upstream/mercurial-mirror Commit - r30829:08b34c3a

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import errno

17

import errno

18

import hashlib

18

import hashlib

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullid,

27

nullid,

28

nullrev,

28

nullrev,

29

)

29

)

30

from .i18n import _

30

from .i18n import _

31

from . import (

31

from . import (

32

ancestor,

32

ancestor,

33

error,

33

error,

34

mdiff,

34

mdiff,

35

parsers,

35

parsers,

36

templatefilters,

36

templatefilters,

37

util,

37

util,

38

)

38

)

39

40

_pack = struct.pack

40

_pack = struct.pack

41

_unpack = struct.unpack

41

_unpack = struct.unpack

42

# Aliased for performance.

42

# Aliased for performance.

43

_zlibdecompress = zlib.decompress

43

_zlibdecompress = zlib.decompress

44

45

# revlog header flags

45

# revlog header flags

46

REVLOGV0 = 0

46

REVLOGV0 = 0

47

REVLOGNG = 1

47

REVLOGNG = 1

48

REVLOGNGINLINEDATA = (1 << 16)

48

REVLOGNGINLINEDATA = (1 << 16)

49

REVLOGGENERALDELTA = (1 << 17)

49

REVLOGGENERALDELTA = (1 << 17)

50

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

50

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

51

REVLOG_DEFAULT_FORMAT = REVLOGNG

51

REVLOG_DEFAULT_FORMAT = REVLOGNG

52

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

52

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

53

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

53

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

54

55

# revlog index flags

55

# revlog index flags

56

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

56

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

57

REVIDX_EXTSTORED = (1 << 14) # revision data is stored externally

57

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

58

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

58

REVIDX_DEFAULT_FLAGS = 0

59

REVIDX_DEFAULT_FLAGS = 0

59

# stable order in which flags need to be processed and their processors applied

60

# stable order in which flags need to be processed and their processors applied

60

REVIDX_FLAGS_ORDER = [

61

REVIDX_FLAGS_ORDER = [

61

REVIDX_ISCENSORED,

62

REVIDX_ISCENSORED,

63

REVIDX_ELLIPSIS,

62

REVIDX_EXTSTORED,

64

REVIDX_EXTSTORED,

63

]

65

]

64

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

66

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

65

67

66

# max size of revlog with inline data

68

# max size of revlog with inline data

67

_maxinline = 131072

69

_maxinline = 131072

68

_chunksize = 1048576

70

_chunksize = 1048576

69

71

70

RevlogError = error.RevlogError

72

RevlogError = error.RevlogError

71

LookupError = error.LookupError

73

LookupError = error.LookupError

72

CensoredNodeError = error.CensoredNodeError

74

CensoredNodeError = error.CensoredNodeError

73

ProgrammingError = error.ProgrammingError

75

ProgrammingError = error.ProgrammingError

74

76

75

# Store flag processors (cf. 'addflagprocessor()' to register)

77

# Store flag processors (cf. 'addflagprocessor()' to register)

76

_flagprocessors = {

78

_flagprocessors = {

77

REVIDX_ISCENSORED: None,

79

REVIDX_ISCENSORED: None,

78

}

80

}

79

81

80

def addflagprocessor(flag, processor):

82

def addflagprocessor(flag, processor):

81

"""Register a flag processor on a revision data flag.

83

"""Register a flag processor on a revision data flag.

82

84

83

Invariant:

85

Invariant:

84

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

86

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

85

- Only one flag processor can be registered on a specific flag.

87

- Only one flag processor can be registered on a specific flag.

86

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

88

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

87

following signatures:

89

following signatures:

88

- (read) f(self, text) -> newtext, bool

90

- (read) f(self, text) -> newtext, bool

89

- (write) f(self, text) -> newtext, bool

91

- (write) f(self, text) -> newtext, bool

90

- (raw) f(self, text) -> bool

92

- (raw) f(self, text) -> bool

91

The boolean returned by these transforms is used to determine whether

93

The boolean returned by these transforms is used to determine whether

92

'newtext' can be used for hash integrity checking.

94

'newtext' can be used for hash integrity checking.

93

95

94

Note: The 'raw' transform is used for changegroup generation and in some

96

Note: The 'raw' transform is used for changegroup generation and in some

95

debug commands. In this case the transform only indicates whether the

97

debug commands. In this case the transform only indicates whether the

96

contents can be used for hash integrity checks.

98

contents can be used for hash integrity checks.

97

"""

99

"""

98

if not flag & REVIDX_KNOWN_FLAGS:

100

if not flag & REVIDX_KNOWN_FLAGS:

99

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

101

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

100

raise ProgrammingError(msg)

102

raise ProgrammingError(msg)

101

if flag not in REVIDX_FLAGS_ORDER:

103

if flag not in REVIDX_FLAGS_ORDER:

102

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

104

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

103

raise ProgrammingError(msg)

105

raise ProgrammingError(msg)

104

if flag in _flagprocessors:

106

if flag in _flagprocessors:

105

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

107

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

106

raise error.Abort(msg)

108

raise error.Abort(msg)

107

_flagprocessors[flag] = processor

109

_flagprocessors[flag] = processor

108

110

109

def getoffset(q):

111

def getoffset(q):

110

return int(q >> 16)

112

return int(q >> 16)

111

113

112

def gettype(q):

114

def gettype(q):

113

return int(q & 0xFFFF)

115

return int(q & 0xFFFF)

114

116

115

def offset_type(offset, type):

117

def offset_type(offset, type):

116

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

118

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

117

raise ValueError('unknown revlog index flags')

119

raise ValueError('unknown revlog index flags')

118

return long(long(offset) << 16 | type)

120

return long(long(offset) << 16 | type)

119

121

120

_nullhash = hashlib.sha1(nullid)

122

_nullhash = hashlib.sha1(nullid)

121

123

122

def hash(text, p1, p2):

124

def hash(text, p1, p2):

123

"""generate a hash from the given text and its parent hashes

125

"""generate a hash from the given text and its parent hashes

124

126

125

This hash combines both the current file contents and its history

127

This hash combines both the current file contents and its history

126

in a manner that makes it easy to distinguish nodes with the same

128

in a manner that makes it easy to distinguish nodes with the same

127

content in the revision graph.

129

content in the revision graph.

128

"""

130

"""

129

# As of now, if one of the parent node is null, p2 is null

131

# As of now, if one of the parent node is null, p2 is null

130

if p2 == nullid:

132

if p2 == nullid:

131

# deep copy of a hash is faster than creating one

133

# deep copy of a hash is faster than creating one

132

s = _nullhash.copy()

134

s = _nullhash.copy()

133

s.update(p1)

135

s.update(p1)

134

else:

136

else:

135

# none of the parent nodes are nullid

137

# none of the parent nodes are nullid

136

l = [p1, p2]

138

l = [p1, p2]

137

l.sort()

139

l.sort()

138

s = hashlib.sha1(l[0])

140

s = hashlib.sha1(l[0])

139

s.update(l[1])

141

s.update(l[1])

140

s.update(text)

142

s.update(text)

141

return s.digest()

143

return s.digest()

142

144

143

# index v0:

145

# index v0:

144

# 4 bytes: offset

146

# 4 bytes: offset

145

# 4 bytes: compressed length

147

# 4 bytes: compressed length

146

# 4 bytes: base rev

148

# 4 bytes: base rev

147

# 4 bytes: link rev

149

# 4 bytes: link rev

148

# 20 bytes: parent 1 nodeid

150

# 20 bytes: parent 1 nodeid

149

# 20 bytes: parent 2 nodeid

151

# 20 bytes: parent 2 nodeid

150

# 20 bytes: nodeid

152

# 20 bytes: nodeid

151

indexformatv0 = ">4l20s20s20s"

153

indexformatv0 = ">4l20s20s20s"

152

154

153

class revlogoldio(object):

155

class revlogoldio(object):

154

def __init__(self):

156

def __init__(self):

155

self.size = struct.calcsize(indexformatv0)

157

self.size = struct.calcsize(indexformatv0)

156

158

157

def parseindex(self, data, inline):

159

def parseindex(self, data, inline):

158

s = self.size

160

s = self.size

159

index = []

161

index = []

160

nodemap = {nullid: nullrev}

162

nodemap = {nullid: nullrev}

161

n = off = 0

163

n = off = 0

162

l = len(data)

164

l = len(data)

163

while off + s <= l:

165

while off + s <= l:

164

cur = data[off:off + s]

166

cur = data[off:off + s]

165

off += s

167

off += s

166

e = _unpack(indexformatv0, cur)

168

e = _unpack(indexformatv0, cur)

167

# transform to revlogv1 format

169

# transform to revlogv1 format

168

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

170

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

169

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

171

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

170

index.append(e2)

172

index.append(e2)

171

nodemap[e[6]] = n

173

nodemap[e[6]] = n

172

n += 1

174

n += 1

173

175

174

# add the magic null revision at -1

176

# add the magic null revision at -1

175

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

177

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

176

178

177

return index, nodemap, None

179

return index, nodemap, None

178

180

179

def packentry(self, entry, node, version, rev):

181

def packentry(self, entry, node, version, rev):

180

if gettype(entry[0]):

182

if gettype(entry[0]):

181

raise RevlogError(_("index entry flags need RevlogNG"))

183

raise RevlogError(_("index entry flags need RevlogNG"))

182

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

184

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

183

node(entry[5]), node(entry[6]), entry[7])

185

node(entry[5]), node(entry[6]), entry[7])

184

return _pack(indexformatv0, *e2)

186

return _pack(indexformatv0, *e2)

185

187

186

# index ng:

188

# index ng:

187

# 6 bytes: offset

189

# 6 bytes: offset

188

# 2 bytes: flags

190

# 2 bytes: flags

189

# 4 bytes: compressed length

191

# 4 bytes: compressed length

190

# 4 bytes: uncompressed length

192

# 4 bytes: uncompressed length

191

# 4 bytes: base rev

193

# 4 bytes: base rev

192

# 4 bytes: link rev

194

# 4 bytes: link rev

193

# 4 bytes: parent 1 rev

195

# 4 bytes: parent 1 rev

194

# 4 bytes: parent 2 rev

196

# 4 bytes: parent 2 rev

195

# 32 bytes: nodeid

197

# 32 bytes: nodeid

196

indexformatng = ">Qiiiiii20s12x"

198

indexformatng = ">Qiiiiii20s12x"

197

versionformat = ">I"

199

versionformat = ">I"

198

200

199

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

201

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

200

# signed integer)

202

# signed integer)

201

_maxentrysize = 0x7fffffff

203

_maxentrysize = 0x7fffffff

202

204

203

class revlogio(object):

205

class revlogio(object):

204

def __init__(self):

206

def __init__(self):

205

self.size = struct.calcsize(indexformatng)

207

self.size = struct.calcsize(indexformatng)

206

208

207

def parseindex(self, data, inline):

209

def parseindex(self, data, inline):

208

# call the C implementation to parse the index data

210

# call the C implementation to parse the index data

209

index, cache = parsers.parse_index2(data, inline)

211

index, cache = parsers.parse_index2(data, inline)

210

return index, getattr(index, 'nodemap', None), cache

212

return index, getattr(index, 'nodemap', None), cache

211

213

212

def packentry(self, entry, node, version, rev):

214

def packentry(self, entry, node, version, rev):

213

p = _pack(indexformatng, *entry)

215

p = _pack(indexformatng, *entry)

214

if rev == 0:

216

if rev == 0:

215

p = _pack(versionformat, version) + p[4:]

217

p = _pack(versionformat, version) + p[4:]

216

return p

218

return p

217

219

218

class revlog(object):

220

class revlog(object):

219

"""

221

"""

220

the underlying revision storage object

222

the underlying revision storage object

221

223

222

A revlog consists of two parts, an index and the revision data.

224

A revlog consists of two parts, an index and the revision data.

223

225

224

The index is a file with a fixed record size containing

226

The index is a file with a fixed record size containing

225

information on each revision, including its nodeid (hash), the

227

information on each revision, including its nodeid (hash), the

226

nodeids of its parents, the position and offset of its data within

228

nodeids of its parents, the position and offset of its data within

227

the data file, and the revision it's based on. Finally, each entry

229

the data file, and the revision it's based on. Finally, each entry

228

contains a linkrev entry that can serve as a pointer to external

230

contains a linkrev entry that can serve as a pointer to external

229

data.

231

data.

230

232

231

The revision data itself is a linear collection of data chunks.

233

The revision data itself is a linear collection of data chunks.

232

Each chunk represents a revision and is usually represented as a

234

Each chunk represents a revision and is usually represented as a

233

delta against the previous chunk. To bound lookup time, runs of

235

delta against the previous chunk. To bound lookup time, runs of

234

deltas are limited to about 2 times the length of the original

236

deltas are limited to about 2 times the length of the original

235

version data. This makes retrieval of a version proportional to

237

version data. This makes retrieval of a version proportional to

236

its size, or O(1) relative to the number of revisions.

238

its size, or O(1) relative to the number of revisions.

237

239

238

Both pieces of the revlog are written to in an append-only

240

Both pieces of the revlog are written to in an append-only

239

fashion, which means we never need to rewrite a file to insert or

241

fashion, which means we never need to rewrite a file to insert or

240

remove data, and can use some simple techniques to avoid the need

242

remove data, and can use some simple techniques to avoid the need

241

for locking while reading.

243

for locking while reading.

242

244

243

If checkambig, indexfile is opened with checkambig=True at

245

If checkambig, indexfile is opened with checkambig=True at

244

writing, to avoid file stat ambiguity.

246

writing, to avoid file stat ambiguity.

245

"""

247

"""

246

def __init__(self, opener, indexfile, checkambig=False):

248

def __init__(self, opener, indexfile, checkambig=False):

247

"""

249

"""

248

create a revlog object

250

create a revlog object

249

251

250

opener is a function that abstracts the file opening operation

252

opener is a function that abstracts the file opening operation

251

and can be used to implement COW semantics or the like.

253

and can be used to implement COW semantics or the like.

252

"""

254

"""

253

self.indexfile = indexfile

255

self.indexfile = indexfile

254

self.datafile = indexfile[:-2] + ".d"

256

self.datafile = indexfile[:-2] + ".d"

255

self.opener = opener

257

self.opener = opener

256

# When True, indexfile is opened with checkambig=True at writing, to

258

# When True, indexfile is opened with checkambig=True at writing, to

257

# avoid file stat ambiguity.

259

# avoid file stat ambiguity.

258

self._checkambig = checkambig

260

self._checkambig = checkambig

259

# 3-tuple of (node, rev, text) for a raw revision.

261

# 3-tuple of (node, rev, text) for a raw revision.

260

self._cache = None

262

self._cache = None

261

# Maps rev to chain base rev.

263

# Maps rev to chain base rev.

262

self._chainbasecache = util.lrucachedict(100)

264

self._chainbasecache = util.lrucachedict(100)

263

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

265

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

264

self._chunkcache = (0, '')

266

self._chunkcache = (0, '')

265

# How much data to read and cache into the raw revlog data cache.

267

# How much data to read and cache into the raw revlog data cache.

266

self._chunkcachesize = 65536

268

self._chunkcachesize = 65536

267

self._maxchainlen = None

269

self._maxchainlen = None

268

self._aggressivemergedeltas = False

270

self._aggressivemergedeltas = False

269

self.index = []

271

self.index = []

270

# Mapping of partial identifiers to full nodes.

272

# Mapping of partial identifiers to full nodes.

271

self._pcache = {}

273

self._pcache = {}

272

# Mapping of revision integer to full node.

274

# Mapping of revision integer to full node.

273

self._nodecache = {nullid: nullrev}

275

self._nodecache = {nullid: nullrev}

274

self._nodepos = None

276

self._nodepos = None

275

self._compengine = 'zlib'

277

self._compengine = 'zlib'

276

278

277

v = REVLOG_DEFAULT_VERSION

279

v = REVLOG_DEFAULT_VERSION

278

opts = getattr(opener, 'options', None)

280

opts = getattr(opener, 'options', None)

279

if opts is not None:

281

if opts is not None:

280

if 'revlogv1' in opts:

282

if 'revlogv1' in opts:

281

if 'generaldelta' in opts:

283

if 'generaldelta' in opts:

282

v |= REVLOGGENERALDELTA

284

v |= REVLOGGENERALDELTA

283

else:

285

else:

284

v = 0

286

v = 0

285

if 'chunkcachesize' in opts:

287

if 'chunkcachesize' in opts:

286

self._chunkcachesize = opts['chunkcachesize']

288

self._chunkcachesize = opts['chunkcachesize']

287

if 'maxchainlen' in opts:

289

if 'maxchainlen' in opts:

288

self._maxchainlen = opts['maxchainlen']

290

self._maxchainlen = opts['maxchainlen']

289

if 'aggressivemergedeltas' in opts:

291

if 'aggressivemergedeltas' in opts:

290

self._aggressivemergedeltas = opts['aggressivemergedeltas']

292

self._aggressivemergedeltas = opts['aggressivemergedeltas']

291

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

293

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

292

if 'compengine' in opts:

294

if 'compengine' in opts:

293

self._compengine = opts['compengine']

295

self._compengine = opts['compengine']

294

296

295

if self._chunkcachesize <= 0:

297

if self._chunkcachesize <= 0:

296

raise RevlogError(_('revlog chunk cache size %r is not greater '

298

raise RevlogError(_('revlog chunk cache size %r is not greater '

297

'than 0') % self._chunkcachesize)

299

'than 0') % self._chunkcachesize)

298

elif self._chunkcachesize & (self._chunkcachesize - 1):

300

elif self._chunkcachesize & (self._chunkcachesize - 1):

299

raise RevlogError(_('revlog chunk cache size %r is not a power '

301

raise RevlogError(_('revlog chunk cache size %r is not a power '

300

'of 2') % self._chunkcachesize)

302

'of 2') % self._chunkcachesize)

301

303

302

indexdata = ''

304

indexdata = ''

303

self._initempty = True

305

self._initempty = True

304

try:

306

try:

305

f = self.opener(self.indexfile)

307

f = self.opener(self.indexfile)

306

indexdata = f.read()

308

indexdata = f.read()

307

f.close()

309

f.close()

308

if len(indexdata) > 0:

310

if len(indexdata) > 0:

309

v = struct.unpack(versionformat, indexdata[:4])[0]

311

v = struct.unpack(versionformat, indexdata[:4])[0]

310

self._initempty = False

312

self._initempty = False

311

except IOError as inst:

313

except IOError as inst:

312

if inst.errno != errno.ENOENT:

314

if inst.errno != errno.ENOENT:

313

raise

315

raise

314

316

315

self.version = v

317

self.version = v

316

self._inline = v & REVLOGNGINLINEDATA

318

self._inline = v & REVLOGNGINLINEDATA

317

self._generaldelta = v & REVLOGGENERALDELTA

319

self._generaldelta = v & REVLOGGENERALDELTA

318

flags = v & ~0xFFFF

320

flags = v & ~0xFFFF

319

fmt = v & 0xFFFF

321

fmt = v & 0xFFFF

320

if fmt == REVLOGV0 and flags:

322

if fmt == REVLOGV0 and flags:

321

raise RevlogError(_("index %s unknown flags %#04x for format v0")

323

raise RevlogError(_("index %s unknown flags %#04x for format v0")

322

% (self.indexfile, flags >> 16))

324

% (self.indexfile, flags >> 16))

323

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

325

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

324

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

326

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

325

% (self.indexfile, flags >> 16))

327

% (self.indexfile, flags >> 16))

326

elif fmt > REVLOGNG:

328

elif fmt > REVLOGNG:

327

raise RevlogError(_("index %s unknown format %d")

329

raise RevlogError(_("index %s unknown format %d")

328

% (self.indexfile, fmt))

330

% (self.indexfile, fmt))

329

331

330

self.storedeltachains = True

332

self.storedeltachains = True

331

333

332

self._io = revlogio()

334

self._io = revlogio()

333

if self.version == REVLOGV0:

335

if self.version == REVLOGV0:

334

self._io = revlogoldio()

336

self._io = revlogoldio()

335

try:

337

try:

336

d = self._io.parseindex(indexdata, self._inline)

338

d = self._io.parseindex(indexdata, self._inline)

337

except (ValueError, IndexError):

339

except (ValueError, IndexError):

338

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

340

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

339

self.index, nodemap, self._chunkcache = d

341

self.index, nodemap, self._chunkcache = d

340

if nodemap is not None:

342

if nodemap is not None:

341

self.nodemap = self._nodecache = nodemap

343

self.nodemap = self._nodecache = nodemap

342

if not self._chunkcache:

344

if not self._chunkcache:

343

self._chunkclear()

345

self._chunkclear()

344

# revnum -> (chain-length, sum-delta-length)

346

# revnum -> (chain-length, sum-delta-length)

345

self._chaininfocache = {}

347

self._chaininfocache = {}

346

# revlog header -> revlog compressor

348

# revlog header -> revlog compressor

347

self._decompressors = {}

349

self._decompressors = {}

348

350

349

@util.propertycache

351

@util.propertycache

350

def _compressor(self):

352

def _compressor(self):

351

return util.compengines[self._compengine].revlogcompressor()

353

return util.compengines[self._compengine].revlogcompressor()

352

354

353

def tip(self):

355

def tip(self):

354

return self.node(len(self.index) - 2)

356

return self.node(len(self.index) - 2)

355

def __contains__(self, rev):

357

def __contains__(self, rev):

356

return 0 <= rev < len(self)

358

return 0 <= rev < len(self)

357

def __len__(self):

359

def __len__(self):

358

return len(self.index) - 1

360

return len(self.index) - 1

359

def __iter__(self):

361

def __iter__(self):

360

return iter(xrange(len(self)))

362

return iter(xrange(len(self)))

361

def revs(self, start=0, stop=None):

363

def revs(self, start=0, stop=None):

362

"""iterate over all rev in this revlog (from start to stop)"""

364

"""iterate over all rev in this revlog (from start to stop)"""

363

step = 1

365

step = 1

364

if stop is not None:

366

if stop is not None:

365

if start > stop:

367

if start > stop:

366

step = -1

368

step = -1

367

stop += step

369

stop += step

368

else:

370

else:

369

stop = len(self)

371

stop = len(self)

370

return xrange(start, stop, step)

372

return xrange(start, stop, step)

371

373

372

@util.propertycache

374

@util.propertycache

373

def nodemap(self):

375

def nodemap(self):

374

self.rev(self.node(0))

376

self.rev(self.node(0))

375

return self._nodecache

377

return self._nodecache

376

378

377

def hasnode(self, node):

379

def hasnode(self, node):

378

try:

380

try:

379

self.rev(node)

381

self.rev(node)

380

return True

382

return True

381

except KeyError:

383

except KeyError:

382

return False

384

return False

383

385

384

def clearcaches(self):

386

def clearcaches(self):

385

self._cache = None

387

self._cache = None

386

self._chainbasecache.clear()

388

self._chainbasecache.clear()

387

self._chunkcache = (0, '')

389

self._chunkcache = (0, '')

388

self._pcache = {}

390

self._pcache = {}

389

391

390

try:

392

try:

391

self._nodecache.clearcaches()

393

self._nodecache.clearcaches()

392

except AttributeError:

394

except AttributeError:

393

self._nodecache = {nullid: nullrev}

395

self._nodecache = {nullid: nullrev}

394

self._nodepos = None

396

self._nodepos = None

395

397

396

def rev(self, node):

398

def rev(self, node):

397

try:

399

try:

398

return self._nodecache[node]

400

return self._nodecache[node]

399

except TypeError:

401

except TypeError:

400

raise

402

raise

401

except RevlogError:

403

except RevlogError:

402

# parsers.c radix tree lookup failed

404

# parsers.c radix tree lookup failed

403

raise LookupError(node, self.indexfile, _('no node'))

405

raise LookupError(node, self.indexfile, _('no node'))

404

except KeyError:

406

except KeyError:

405

# pure python cache lookup failed

407

# pure python cache lookup failed

406

n = self._nodecache

408

n = self._nodecache

407

i = self.index

409

i = self.index

408

p = self._nodepos

410

p = self._nodepos

409

if p is None:

411

if p is None:

410

p = len(i) - 2

412

p = len(i) - 2

411

for r in xrange(p, -1, -1):

413

for r in xrange(p, -1, -1):

412

v = i[r][7]

414

v = i[r][7]

413

n[v] = r

415

n[v] = r

414

if v == node:

416

if v == node:

415

self._nodepos = r - 1

417

self._nodepos = r - 1

416

return r

418

return r

417

raise LookupError(node, self.indexfile, _('no node'))

419

raise LookupError(node, self.indexfile, _('no node'))

418

420

419

# Accessors for index entries.

421

# Accessors for index entries.

420

422

421

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

423

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

422

# are flags.

424

# are flags.

423

def start(self, rev):

425

def start(self, rev):

424

return int(self.index[rev][0] >> 16)

426

return int(self.index[rev][0] >> 16)

425

427

426

def flags(self, rev):

428

def flags(self, rev):

427

return self.index[rev][0] & 0xFFFF

429

return self.index[rev][0] & 0xFFFF

428

430

429

def length(self, rev):

431

def length(self, rev):

430

return self.index[rev][1]

432

return self.index[rev][1]

431

433

432

def rawsize(self, rev):

434

def rawsize(self, rev):

433

"""return the length of the uncompressed text for a given revision"""

435

"""return the length of the uncompressed text for a given revision"""

434

l = self.index[rev][2]

436

l = self.index[rev][2]

435

if l >= 0:

437

if l >= 0:

436

return l

438

return l

437

439

438

t = self.revision(self.node(rev))

440

t = self.revision(self.node(rev))

439

return len(t)

441

return len(t)

440

size = rawsize

442

size = rawsize

441

443

442

def chainbase(self, rev):

444

def chainbase(self, rev):

443

base = self._chainbasecache.get(rev)

445

base = self._chainbasecache.get(rev)

444

if base is not None:

446

if base is not None:

445

return base

447

return base

446

448

447

index = self.index

449

index = self.index

448

base = index[rev][3]

450

base = index[rev][3]

449

while base != rev:

451

while base != rev:

450

rev = base

452

rev = base

451

base = index[rev][3]

453

base = index[rev][3]

452

454

453

self._chainbasecache[rev] = base

455

self._chainbasecache[rev] = base

454

return base

456

return base

455

457

456

def linkrev(self, rev):

458

def linkrev(self, rev):

457

return self.index[rev][4]

459

return self.index[rev][4]

458

460

459

def parentrevs(self, rev):

461

def parentrevs(self, rev):

460

return self.index[rev][5:7]

462

return self.index[rev][5:7]

461

463

462

def node(self, rev):

464

def node(self, rev):

463

return self.index[rev][7]

465

return self.index[rev][7]

464

466

465

# Derived from index values.

467

# Derived from index values.

466

468

467

def end(self, rev):

469

def end(self, rev):

468

return self.start(rev) + self.length(rev)

470

return self.start(rev) + self.length(rev)

469

471

470

def parents(self, node):

472

def parents(self, node):

471

i = self.index

473

i = self.index

472

d = i[self.rev(node)]

474

d = i[self.rev(node)]

473

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

475

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

474

476

475

def chainlen(self, rev):

477

def chainlen(self, rev):

476

return self._chaininfo(rev)[0]

478

return self._chaininfo(rev)[0]

477

479

478

def _chaininfo(self, rev):

480

def _chaininfo(self, rev):

479

chaininfocache = self._chaininfocache

481

chaininfocache = self._chaininfocache

480

if rev in chaininfocache:

482

if rev in chaininfocache:

481

return chaininfocache[rev]

483

return chaininfocache[rev]

482

index = self.index

484

index = self.index

483

generaldelta = self._generaldelta

485

generaldelta = self._generaldelta

484

iterrev = rev

486

iterrev = rev

485

e = index[iterrev]

487

e = index[iterrev]

486

clen = 0

488

clen = 0

487

compresseddeltalen = 0

489

compresseddeltalen = 0

488

while iterrev != e[3]:

490

while iterrev != e[3]:

489

clen += 1

491

clen += 1

490

compresseddeltalen += e[1]

492

compresseddeltalen += e[1]

491

if generaldelta:

493

if generaldelta:

492

iterrev = e[3]

494

iterrev = e[3]

493

else:

495

else:

494

iterrev -= 1

496

iterrev -= 1

495

if iterrev in chaininfocache:

497

if iterrev in chaininfocache:

496

t = chaininfocache[iterrev]

498

t = chaininfocache[iterrev]

497

clen += t[0]

499

clen += t[0]

498

compresseddeltalen += t[1]

500

compresseddeltalen += t[1]

499

break

501

break

500

e = index[iterrev]

502

e = index[iterrev]

501

else:

503

else:

502

# Add text length of base since decompressing that also takes

504

# Add text length of base since decompressing that also takes

503

# work. For cache hits the length is already included.

505

# work. For cache hits the length is already included.

504

compresseddeltalen += e[1]

506

compresseddeltalen += e[1]

505

r = (clen, compresseddeltalen)

507

r = (clen, compresseddeltalen)

506

chaininfocache[rev] = r

508

chaininfocache[rev] = r

507

return r

509

return r

508

510

509

def _deltachain(self, rev, stoprev=None):

511

def _deltachain(self, rev, stoprev=None):

510

"""Obtain the delta chain for a revision.

512

"""Obtain the delta chain for a revision.

511

513

512

``stoprev`` specifies a revision to stop at. If not specified, we

514

``stoprev`` specifies a revision to stop at. If not specified, we

513

stop at the base of the chain.

515

stop at the base of the chain.

514

516

515

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

517

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

516

revs in ascending order and ``stopped`` is a bool indicating whether

518

revs in ascending order and ``stopped`` is a bool indicating whether

517

``stoprev`` was hit.

519

``stoprev`` was hit.

518

"""

520

"""

519

chain = []

521

chain = []

520

522

521

# Alias to prevent attribute lookup in tight loop.

523

# Alias to prevent attribute lookup in tight loop.

522

index = self.index

524

index = self.index

523

generaldelta = self._generaldelta

525

generaldelta = self._generaldelta

524

526

525

iterrev = rev

527

iterrev = rev

526

e = index[iterrev]

528

e = index[iterrev]

527

while iterrev != e[3] and iterrev != stoprev:

529

while iterrev != e[3] and iterrev != stoprev:

528

chain.append(iterrev)

530

chain.append(iterrev)

529

if generaldelta:

531

if generaldelta:

530

iterrev = e[3]

532

iterrev = e[3]

531

else:

533

else:

532

iterrev -= 1

534

iterrev -= 1

533

e = index[iterrev]

535

e = index[iterrev]

534

536

535

if iterrev == stoprev:

537

if iterrev == stoprev:

536

stopped = True

538

stopped = True

537

else:

539

else:

538

chain.append(iterrev)

540

chain.append(iterrev)

539

stopped = False

541

stopped = False

540

542

541

chain.reverse()

543

chain.reverse()

542

return chain, stopped

544

return chain, stopped

543

545

544

def ancestors(self, revs, stoprev=0, inclusive=False):

546

def ancestors(self, revs, stoprev=0, inclusive=False):

545

"""Generate the ancestors of 'revs' in reverse topological order.

547

"""Generate the ancestors of 'revs' in reverse topological order.

546

Does not generate revs lower than stoprev.

548

Does not generate revs lower than stoprev.

547

549

548

See the documentation for ancestor.lazyancestors for more details."""

550

See the documentation for ancestor.lazyancestors for more details."""

549

551

550

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

552

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

551

inclusive=inclusive)

553

inclusive=inclusive)

552

554

553

def descendants(self, revs):

555

def descendants(self, revs):

554

"""Generate the descendants of 'revs' in revision order.

556

"""Generate the descendants of 'revs' in revision order.

555

557

556

Yield a sequence of revision numbers starting with a child of

558

Yield a sequence of revision numbers starting with a child of

557

some rev in revs, i.e., each revision is *not* considered a

559

some rev in revs, i.e., each revision is *not* considered a

558

descendant of itself. Results are ordered by revision number (a

560

descendant of itself. Results are ordered by revision number (a

559

topological sort)."""

561

topological sort)."""

560

first = min(revs)

562

first = min(revs)

561

if first == nullrev:

563

if first == nullrev:

562

for i in self:

564

for i in self:

563

yield i

565

yield i

564

return

566

return

565

567

566

seen = set(revs)

568

seen = set(revs)

567

for i in self.revs(start=first + 1):

569

for i in self.revs(start=first + 1):

568

for x in self.parentrevs(i):

570

for x in self.parentrevs(i):

569

if x != nullrev and x in seen:

571

if x != nullrev and x in seen:

570

seen.add(i)

572

seen.add(i)

571

yield i

573

yield i

572

break

574

break

573

575

574

def findcommonmissing(self, common=None, heads=None):

576

def findcommonmissing(self, common=None, heads=None):

575

"""Return a tuple of the ancestors of common and the ancestors of heads

577

"""Return a tuple of the ancestors of common and the ancestors of heads

576

that are not ancestors of common. In revset terminology, we return the

578

that are not ancestors of common. In revset terminology, we return the

577

tuple:

579

tuple:

578

580

579

::common, (::heads) - (::common)

581

::common, (::heads) - (::common)

580

582

581

The list is sorted by revision number, meaning it is

583

The list is sorted by revision number, meaning it is

582

topologically sorted.

584

topologically sorted.

583

585

584

'heads' and 'common' are both lists of node IDs. If heads is

586

'heads' and 'common' are both lists of node IDs. If heads is

585

not supplied, uses all of the revlog's heads. If common is not

587

not supplied, uses all of the revlog's heads. If common is not

586

supplied, uses nullid."""

588

supplied, uses nullid."""

587

if common is None:

589

if common is None:

588

common = [nullid]

590

common = [nullid]

589

if heads is None:

591

if heads is None:

590

heads = self.heads()

592

heads = self.heads()

591

593

592

common = [self.rev(n) for n in common]

594

common = [self.rev(n) for n in common]

593

heads = [self.rev(n) for n in heads]

595

heads = [self.rev(n) for n in heads]

594

596

595

# we want the ancestors, but inclusive

597

# we want the ancestors, but inclusive

596

class lazyset(object):

598

class lazyset(object):

597

def __init__(self, lazyvalues):

599

def __init__(self, lazyvalues):

598

self.addedvalues = set()

600

self.addedvalues = set()

599

self.lazyvalues = lazyvalues

601

self.lazyvalues = lazyvalues

600

602

601

def __contains__(self, value):

603

def __contains__(self, value):

602

return value in self.addedvalues or value in self.lazyvalues

604

return value in self.addedvalues or value in self.lazyvalues

603

605

604

def __iter__(self):

606

def __iter__(self):

605

added = self.addedvalues

607

added = self.addedvalues

606

for r in added:

608

for r in added:

607

yield r

609

yield r

608

for r in self.lazyvalues:

610

for r in self.lazyvalues:

609

if not r in added:

611

if not r in added:

610

yield r

612

yield r

611

613

612

def add(self, value):

614

def add(self, value):

613

self.addedvalues.add(value)

615

self.addedvalues.add(value)

614

616

615

def update(self, values):

617

def update(self, values):

616

self.addedvalues.update(values)

618

self.addedvalues.update(values)

617

619

618

has = lazyset(self.ancestors(common))

620

has = lazyset(self.ancestors(common))

619

has.add(nullrev)

621

has.add(nullrev)

620

has.update(common)

622

has.update(common)

621

623

622

# take all ancestors from heads that aren't in has

624

# take all ancestors from heads that aren't in has

623

missing = set()

625

missing = set()

624

visit = collections.deque(r for r in heads if r not in has)

626

visit = collections.deque(r for r in heads if r not in has)

625

while visit:

627

while visit:

626

r = visit.popleft()

628

r = visit.popleft()

627

if r in missing:

629

if r in missing:

628

continue

630

continue

629

else:

631

else:

630

missing.add(r)

632

missing.add(r)

631

for p in self.parentrevs(r):

633

for p in self.parentrevs(r):

632

if p not in has:

634

if p not in has:

633

visit.append(p)

635

visit.append(p)

634

missing = list(missing)

636

missing = list(missing)

635

missing.sort()

637

missing.sort()

636

return has, [self.node(miss) for miss in missing]

638

return has, [self.node(miss) for miss in missing]

637

639

638

def incrementalmissingrevs(self, common=None):

640

def incrementalmissingrevs(self, common=None):

639

"""Return an object that can be used to incrementally compute the

641

"""Return an object that can be used to incrementally compute the

640

revision numbers of the ancestors of arbitrary sets that are not

642

revision numbers of the ancestors of arbitrary sets that are not

641

ancestors of common. This is an ancestor.incrementalmissingancestors

643

ancestors of common. This is an ancestor.incrementalmissingancestors

642

object.

644

object.

643

645

644

'common' is a list of revision numbers. If common is not supplied, uses

646

'common' is a list of revision numbers. If common is not supplied, uses

645

nullrev.

647

nullrev.

646

"""

648

"""

647

if common is None:

649

if common is None:

648

common = [nullrev]

650

common = [nullrev]

649

651

650

return ancestor.incrementalmissingancestors(self.parentrevs, common)

652

return ancestor.incrementalmissingancestors(self.parentrevs, common)

651

653

652

def findmissingrevs(self, common=None, heads=None):

654

def findmissingrevs(self, common=None, heads=None):

653

"""Return the revision numbers of the ancestors of heads that

655

"""Return the revision numbers of the ancestors of heads that

654

are not ancestors of common.

656

are not ancestors of common.

655

657

656

More specifically, return a list of revision numbers corresponding to

658

More specifically, return a list of revision numbers corresponding to

657

nodes N such that every N satisfies the following constraints:

659

nodes N such that every N satisfies the following constraints:

658

660

659

1. N is an ancestor of some node in 'heads'

661

1. N is an ancestor of some node in 'heads'

660

2. N is not an ancestor of any node in 'common'

662

2. N is not an ancestor of any node in 'common'

661

663

662

The list is sorted by revision number, meaning it is

664

The list is sorted by revision number, meaning it is

663

topologically sorted.

665

topologically sorted.

664

666

665

'heads' and 'common' are both lists of revision numbers. If heads is

667

'heads' and 'common' are both lists of revision numbers. If heads is

666

not supplied, uses all of the revlog's heads. If common is not

668

not supplied, uses all of the revlog's heads. If common is not

667

supplied, uses nullid."""

669

supplied, uses nullid."""

668

if common is None:

670

if common is None:

669

common = [nullrev]

671

common = [nullrev]

670

if heads is None:

672

if heads is None:

671

heads = self.headrevs()

673

heads = self.headrevs()

672

674

673

inc = self.incrementalmissingrevs(common=common)

675

inc = self.incrementalmissingrevs(common=common)

674

return inc.missingancestors(heads)

676

return inc.missingancestors(heads)

675

677

676

def findmissing(self, common=None, heads=None):

678

def findmissing(self, common=None, heads=None):

677

"""Return the ancestors of heads that are not ancestors of common.

679

"""Return the ancestors of heads that are not ancestors of common.

678

680

679

More specifically, return a list of nodes N such that every N

681

More specifically, return a list of nodes N such that every N

680

satisfies the following constraints:

682

satisfies the following constraints:

681

683

682

1. N is an ancestor of some node in 'heads'

684

1. N is an ancestor of some node in 'heads'

683

2. N is not an ancestor of any node in 'common'

685

2. N is not an ancestor of any node in 'common'

684

686

685

The list is sorted by revision number, meaning it is

687

The list is sorted by revision number, meaning it is

686

topologically sorted.

688

topologically sorted.

687

689

688

'heads' and 'common' are both lists of node IDs. If heads is

690

'heads' and 'common' are both lists of node IDs. If heads is

689

not supplied, uses all of the revlog's heads. If common is not

691

not supplied, uses all of the revlog's heads. If common is not

690

supplied, uses nullid."""

692

supplied, uses nullid."""

691

if common is None:

693

if common is None:

692

common = [nullid]

694

common = [nullid]

693

if heads is None:

695

if heads is None:

694

heads = self.heads()

696

heads = self.heads()

695

697

696

common = [self.rev(n) for n in common]

698

common = [self.rev(n) for n in common]

697

heads = [self.rev(n) for n in heads]

699

heads = [self.rev(n) for n in heads]

698

700

699

inc = self.incrementalmissingrevs(common=common)

701

inc = self.incrementalmissingrevs(common=common)

700

return [self.node(r) for r in inc.missingancestors(heads)]

702

return [self.node(r) for r in inc.missingancestors(heads)]

701

703

702

def nodesbetween(self, roots=None, heads=None):

704

def nodesbetween(self, roots=None, heads=None):

703

"""Return a topological path from 'roots' to 'heads'.

705

"""Return a topological path from 'roots' to 'heads'.

704

706

705

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

707

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

706

topologically sorted list of all nodes N that satisfy both of

708

topologically sorted list of all nodes N that satisfy both of

707

these constraints:

709

these constraints:

708

710

709

1. N is a descendant of some node in 'roots'

711

1. N is a descendant of some node in 'roots'

710

2. N is an ancestor of some node in 'heads'

712

2. N is an ancestor of some node in 'heads'

711

713

712

Every node is considered to be both a descendant and an ancestor

714

Every node is considered to be both a descendant and an ancestor

713

of itself, so every reachable node in 'roots' and 'heads' will be

715

of itself, so every reachable node in 'roots' and 'heads' will be

714

included in 'nodes'.

716

included in 'nodes'.

715

717

716

'outroots' is the list of reachable nodes in 'roots', i.e., the

718

'outroots' is the list of reachable nodes in 'roots', i.e., the

717

subset of 'roots' that is returned in 'nodes'. Likewise,

719

subset of 'roots' that is returned in 'nodes'. Likewise,

718

'outheads' is the subset of 'heads' that is also in 'nodes'.

720

'outheads' is the subset of 'heads' that is also in 'nodes'.

719

721

720

'roots' and 'heads' are both lists of node IDs. If 'roots' is

722

'roots' and 'heads' are both lists of node IDs. If 'roots' is

721

unspecified, uses nullid as the only root. If 'heads' is

723

unspecified, uses nullid as the only root. If 'heads' is

722

unspecified, uses list of all of the revlog's heads."""

724

unspecified, uses list of all of the revlog's heads."""

723

nonodes = ([], [], [])

725

nonodes = ([], [], [])

724

if roots is not None:

726

if roots is not None:

725

roots = list(roots)

727

roots = list(roots)

726

if not roots:

728

if not roots:

727

return nonodes

729

return nonodes

728

lowestrev = min([self.rev(n) for n in roots])

730

lowestrev = min([self.rev(n) for n in roots])

729

else:

731

else:

730

roots = [nullid] # Everybody's a descendant of nullid

732

roots = [nullid] # Everybody's a descendant of nullid

731

lowestrev = nullrev

733

lowestrev = nullrev

732

if (lowestrev == nullrev) and (heads is None):

734

if (lowestrev == nullrev) and (heads is None):

733

# We want _all_ the nodes!

735

# We want _all_ the nodes!

734

return ([self.node(r) for r in self], [nullid], list(self.heads()))

736

return ([self.node(r) for r in self], [nullid], list(self.heads()))

735

if heads is None:

737

if heads is None:

736

# All nodes are ancestors, so the latest ancestor is the last

738

# All nodes are ancestors, so the latest ancestor is the last

737

# node.

739

# node.

738

highestrev = len(self) - 1

740

highestrev = len(self) - 1

739

# Set ancestors to None to signal that every node is an ancestor.

741

# Set ancestors to None to signal that every node is an ancestor.

740

ancestors = None

742

ancestors = None

741

# Set heads to an empty dictionary for later discovery of heads

743

# Set heads to an empty dictionary for later discovery of heads

742

heads = {}

744

heads = {}

743

else:

745

else:

744

heads = list(heads)

746

heads = list(heads)

745

if not heads:

747

if not heads:

746

return nonodes

748

return nonodes

747

ancestors = set()

749

ancestors = set()

748

# Turn heads into a dictionary so we can remove 'fake' heads.

750

# Turn heads into a dictionary so we can remove 'fake' heads.

749

# Also, later we will be using it to filter out the heads we can't

751

# Also, later we will be using it to filter out the heads we can't

750

# find from roots.

752

# find from roots.

751

heads = dict.fromkeys(heads, False)

753

heads = dict.fromkeys(heads, False)

752

# Start at the top and keep marking parents until we're done.

754

# Start at the top and keep marking parents until we're done.

753

nodestotag = set(heads)

755

nodestotag = set(heads)

754

# Remember where the top was so we can use it as a limit later.

756

# Remember where the top was so we can use it as a limit later.

755

highestrev = max([self.rev(n) for n in nodestotag])

757

highestrev = max([self.rev(n) for n in nodestotag])

756

while nodestotag:

758

while nodestotag:

757

# grab a node to tag

759

# grab a node to tag

758

n = nodestotag.pop()

760

n = nodestotag.pop()

759

# Never tag nullid

761

# Never tag nullid

760

if n == nullid:

762

if n == nullid:

761

continue

763

continue

762

# A node's revision number represents its place in a

764

# A node's revision number represents its place in a

763

# topologically sorted list of nodes.

765

# topologically sorted list of nodes.

764

r = self.rev(n)

766

r = self.rev(n)

765

if r >= lowestrev:

767

if r >= lowestrev:

766

if n not in ancestors:

768

if n not in ancestors:

767

# If we are possibly a descendant of one of the roots

769

# If we are possibly a descendant of one of the roots

768

# and we haven't already been marked as an ancestor

770

# and we haven't already been marked as an ancestor

769

ancestors.add(n) # Mark as ancestor

771

ancestors.add(n) # Mark as ancestor

770

# Add non-nullid parents to list of nodes to tag.

772

# Add non-nullid parents to list of nodes to tag.

771

nodestotag.update([p for p in self.parents(n) if

773

nodestotag.update([p for p in self.parents(n) if

772

p != nullid])

774

p != nullid])

773

elif n in heads: # We've seen it before, is it a fake head?

775

elif n in heads: # We've seen it before, is it a fake head?

774

# So it is, real heads should not be the ancestors of

776

# So it is, real heads should not be the ancestors of

775

# any other heads.

777

# any other heads.

776

heads.pop(n)

778

heads.pop(n)

777

if not ancestors:

779

if not ancestors:

778

return nonodes

780

return nonodes

779

# Now that we have our set of ancestors, we want to remove any

781

# Now that we have our set of ancestors, we want to remove any

780

# roots that are not ancestors.

782

# roots that are not ancestors.

781

783

782

# If one of the roots was nullid, everything is included anyway.

784

# If one of the roots was nullid, everything is included anyway.

783

if lowestrev > nullrev:

785

if lowestrev > nullrev:

784

# But, since we weren't, let's recompute the lowest rev to not

786

# But, since we weren't, let's recompute the lowest rev to not

785

# include roots that aren't ancestors.

787

# include roots that aren't ancestors.

786

788

787

# Filter out roots that aren't ancestors of heads

789

# Filter out roots that aren't ancestors of heads

788

roots = [root for root in roots if root in ancestors]

790

roots = [root for root in roots if root in ancestors]

789

# Recompute the lowest revision

791

# Recompute the lowest revision

790

if roots:

792

if roots:

791

lowestrev = min([self.rev(root) for root in roots])

793

lowestrev = min([self.rev(root) for root in roots])

792

else:

794

else:

793

# No more roots? Return empty list

795

# No more roots? Return empty list

794

return nonodes

796

return nonodes

795

else:

797

else:

796

# We are descending from nullid, and don't need to care about

798

# We are descending from nullid, and don't need to care about

797

# any other roots.

799

# any other roots.

798

lowestrev = nullrev

800

lowestrev = nullrev

799

roots = [nullid]

801

roots = [nullid]

800

# Transform our roots list into a set.

802

# Transform our roots list into a set.

801

descendants = set(roots)

803

descendants = set(roots)

802

# Also, keep the original roots so we can filter out roots that aren't

804

# Also, keep the original roots so we can filter out roots that aren't

803

# 'real' roots (i.e. are descended from other roots).

805

# 'real' roots (i.e. are descended from other roots).

804

roots = descendants.copy()

806

roots = descendants.copy()

805

# Our topologically sorted list of output nodes.

807

# Our topologically sorted list of output nodes.

806

orderedout = []

808

orderedout = []

807

# Don't start at nullid since we don't want nullid in our output list,

809

# Don't start at nullid since we don't want nullid in our output list,

808

# and if nullid shows up in descendants, empty parents will look like

810

# and if nullid shows up in descendants, empty parents will look like

809

# they're descendants.

811

# they're descendants.

810

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

812

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

811

n = self.node(r)

813

n = self.node(r)

812

isdescendant = False

814

isdescendant = False

813

if lowestrev == nullrev: # Everybody is a descendant of nullid

815

if lowestrev == nullrev: # Everybody is a descendant of nullid

814

isdescendant = True

816

isdescendant = True

815

elif n in descendants:

817

elif n in descendants:

816

# n is already a descendant

818

# n is already a descendant

817

isdescendant = True

819

isdescendant = True

818

# This check only needs to be done here because all the roots

820

# This check only needs to be done here because all the roots

819

# will start being marked is descendants before the loop.

821

# will start being marked is descendants before the loop.

820

if n in roots:

822

if n in roots:

821

# If n was a root, check if it's a 'real' root.

823

# If n was a root, check if it's a 'real' root.

822

p = tuple(self.parents(n))

824

p = tuple(self.parents(n))

823

# If any of its parents are descendants, it's not a root.

825

# If any of its parents are descendants, it's not a root.

824

if (p[0] in descendants) or (p[1] in descendants):

826

if (p[0] in descendants) or (p[1] in descendants):

825

roots.remove(n)

827

roots.remove(n)

826

else:

828

else:

827

p = tuple(self.parents(n))

829

p = tuple(self.parents(n))

828

# A node is a descendant if either of its parents are

830

# A node is a descendant if either of its parents are

829

# descendants. (We seeded the dependents list with the roots

831

# descendants. (We seeded the dependents list with the roots

830

# up there, remember?)

832

# up there, remember?)

831

if (p[0] in descendants) or (p[1] in descendants):

833

if (p[0] in descendants) or (p[1] in descendants):

832

descendants.add(n)

834

descendants.add(n)

833

isdescendant = True

835

isdescendant = True

834

if isdescendant and ((ancestors is None) or (n in ancestors)):

836

if isdescendant and ((ancestors is None) or (n in ancestors)):

835

# Only include nodes that are both descendants and ancestors.

837

# Only include nodes that are both descendants and ancestors.

836

orderedout.append(n)

838

orderedout.append(n)

837

if (ancestors is not None) and (n in heads):

839

if (ancestors is not None) and (n in heads):

838

# We're trying to figure out which heads are reachable

840

# We're trying to figure out which heads are reachable

839

# from roots.

841

# from roots.

840

# Mark this head as having been reached

842

# Mark this head as having been reached

841

heads[n] = True

843

heads[n] = True

842

elif ancestors is None:

844

elif ancestors is None:

843

# Otherwise, we're trying to discover the heads.

845

# Otherwise, we're trying to discover the heads.

844

# Assume this is a head because if it isn't, the next step

846

# Assume this is a head because if it isn't, the next step

845

# will eventually remove it.

847

# will eventually remove it.

846

heads[n] = True

848

heads[n] = True

847

# But, obviously its parents aren't.

849

# But, obviously its parents aren't.

848

for p in self.parents(n):

850

for p in self.parents(n):

849

heads.pop(p, None)

851

heads.pop(p, None)

850

heads = [head for head, flag in heads.iteritems() if flag]

852

heads = [head for head, flag in heads.iteritems() if flag]

851

roots = list(roots)

853

roots = list(roots)

852

assert orderedout

854

assert orderedout

853

assert roots

855

assert roots

854

assert heads

856

assert heads

855

return (orderedout, roots, heads)

857

return (orderedout, roots, heads)

856

858

857

def headrevs(self):

859

def headrevs(self):

858

try:

860

try:

859

return self.index.headrevs()

861

return self.index.headrevs()

860

except AttributeError:

862

except AttributeError:

861

return self._headrevs()

863

return self._headrevs()

862

864

863

def computephases(self, roots):

865

def computephases(self, roots):

864

return self.index.computephasesmapsets(roots)

866

return self.index.computephasesmapsets(roots)

865

867

866

def _headrevs(self):

868

def _headrevs(self):

867

count = len(self)

869

count = len(self)

868

if not count:

870

if not count:

869

return [nullrev]

871

return [nullrev]

870

# we won't iter over filtered rev so nobody is a head at start

872

# we won't iter over filtered rev so nobody is a head at start

871

ishead = [0] * (count + 1)

873

ishead = [0] * (count + 1)

872

index = self.index

874

index = self.index

873

for r in self:

875

for r in self:

874

ishead[r] = 1 # I may be an head

876

ishead[r] = 1 # I may be an head

875

e = index[r]

877

e = index[r]

876

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

878

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

877

return [r for r, val in enumerate(ishead) if val]

879

return [r for r, val in enumerate(ishead) if val]

878

880

879

def heads(self, start=None, stop=None):

881

def heads(self, start=None, stop=None):

880

"""return the list of all nodes that have no children

882

"""return the list of all nodes that have no children

881

883

882

if start is specified, only heads that are descendants of

884

if start is specified, only heads that are descendants of

883

start will be returned

885

start will be returned

884

if stop is specified, it will consider all the revs from stop

886

if stop is specified, it will consider all the revs from stop

885

as if they had no children

887

as if they had no children

886

"""

888

"""

887

if start is None and stop is None:

889

if start is None and stop is None:

888

if not len(self):

890

if not len(self):

889

return [nullid]

891

return [nullid]

890

return [self.node(r) for r in self.headrevs()]

892

return [self.node(r) for r in self.headrevs()]

891

893

892

if start is None:

894

if start is None:

893

start = nullid

895

start = nullid

894

if stop is None:

896

if stop is None:

895

stop = []

897

stop = []

896

stoprevs = set([self.rev(n) for n in stop])

898

stoprevs = set([self.rev(n) for n in stop])

897

startrev = self.rev(start)

899

startrev = self.rev(start)

898

reachable = set((startrev,))

900

reachable = set((startrev,))

899

heads = set((startrev,))

901

heads = set((startrev,))

900

902

901

parentrevs = self.parentrevs

903

parentrevs = self.parentrevs

902

for r in self.revs(start=startrev + 1):

904

for r in self.revs(start=startrev + 1):

903

for p in parentrevs(r):

905

for p in parentrevs(r):

904

if p in reachable:

906

if p in reachable:

905

if r not in stoprevs:

907

if r not in stoprevs:

906

reachable.add(r)

908

reachable.add(r)

907

heads.add(r)

909

heads.add(r)

908

if p in heads and p not in stoprevs:

910

if p in heads and p not in stoprevs:

909

heads.remove(p)

911

heads.remove(p)

910

912

911

return [self.node(r) for r in heads]

913

return [self.node(r) for r in heads]

912

914

913

def children(self, node):

915

def children(self, node):

914

"""find the children of a given node"""

916

"""find the children of a given node"""

915

c = []

917

c = []

916

p = self.rev(node)

918

p = self.rev(node)

917

for r in self.revs(start=p + 1):

919

for r in self.revs(start=p + 1):

918

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

920

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

919

if prevs:

921

if prevs:

920

for pr in prevs:

922

for pr in prevs:

921

if pr == p:

923

if pr == p:

922

c.append(self.node(r))

924

c.append(self.node(r))

923

elif p == nullrev:

925

elif p == nullrev:

924

c.append(self.node(r))

926

c.append(self.node(r))

925

return c

927

return c

926

928

927

def descendant(self, start, end):

929

def descendant(self, start, end):

928

if start == nullrev:

930

if start == nullrev:

929

return True

931

return True

930

for i in self.descendants([start]):

932

for i in self.descendants([start]):

931

if i == end:

933

if i == end:

932

return True

934

return True

933

elif i > end:

935

elif i > end:

934

break

936

break

935

return False

937

return False

936

938

937

def commonancestorsheads(self, a, b):

939

def commonancestorsheads(self, a, b):

938

"""calculate all the heads of the common ancestors of nodes a and b"""

940

"""calculate all the heads of the common ancestors of nodes a and b"""

939

a, b = self.rev(a), self.rev(b)

941

a, b = self.rev(a), self.rev(b)

940

try:

942

try:

941

ancs = self.index.commonancestorsheads(a, b)

943

ancs = self.index.commonancestorsheads(a, b)

942

except (AttributeError, OverflowError): # C implementation failed

944

except (AttributeError, OverflowError): # C implementation failed

943

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

945

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

944

return map(self.node, ancs)

946

return map(self.node, ancs)

945

947

946

def isancestor(self, a, b):

948

def isancestor(self, a, b):

947

"""return True if node a is an ancestor of node b

949

"""return True if node a is an ancestor of node b

948

950

949

The implementation of this is trivial but the use of

951

The implementation of this is trivial but the use of

950

commonancestorsheads is not."""

952

commonancestorsheads is not."""

951

return a in self.commonancestorsheads(a, b)

953

return a in self.commonancestorsheads(a, b)

952

954

953

def ancestor(self, a, b):

955

def ancestor(self, a, b):

954

"""calculate the "best" common ancestor of nodes a and b"""

956

"""calculate the "best" common ancestor of nodes a and b"""

955

957

956

a, b = self.rev(a), self.rev(b)

958

a, b = self.rev(a), self.rev(b)

957

try:

959

try:

958

ancs = self.index.ancestors(a, b)

960

ancs = self.index.ancestors(a, b)

959

except (AttributeError, OverflowError):

961

except (AttributeError, OverflowError):

960

ancs = ancestor.ancestors(self.parentrevs, a, b)

962

ancs = ancestor.ancestors(self.parentrevs, a, b)

961

if ancs:

963

if ancs:

962

# choose a consistent winner when there's a tie

964

# choose a consistent winner when there's a tie

963

return min(map(self.node, ancs))

965

return min(map(self.node, ancs))

964

return nullid

966

return nullid

965

967

966

def _match(self, id):

968

def _match(self, id):

967

if isinstance(id, int):

969

if isinstance(id, int):

968

# rev

970

# rev

969

return self.node(id)

971

return self.node(id)

970

if len(id) == 20:

972

if len(id) == 20:

971

# possibly a binary node

973

# possibly a binary node

972

# odds of a binary node being all hex in ASCII are 1 in 10**25

974

# odds of a binary node being all hex in ASCII are 1 in 10**25

973

try:

975

try:

974

node = id

976

node = id

975

self.rev(node) # quick search the index

977

self.rev(node) # quick search the index

976

return node

978

return node

977

except LookupError:

979

except LookupError:

978

pass # may be partial hex id

980

pass # may be partial hex id

979

try:

981

try:

980

# str(rev)

982

# str(rev)

981

rev = int(id)

983

rev = int(id)

982

if str(rev) != id:

984

if str(rev) != id:

983

raise ValueError

985

raise ValueError

984

if rev < 0:

986

if rev < 0:

985

rev = len(self) + rev

987

rev = len(self) + rev

986

if rev < 0 or rev >= len(self):

988

if rev < 0 or rev >= len(self):

987

raise ValueError

989

raise ValueError

988

return self.node(rev)

990

return self.node(rev)

989

except (ValueError, OverflowError):

991

except (ValueError, OverflowError):

990

pass

992

pass

991

if len(id) == 40:

993

if len(id) == 40:

992

try:

994

try:

993

# a full hex nodeid?

995

# a full hex nodeid?

994

node = bin(id)

996

node = bin(id)

995

self.rev(node)

997

self.rev(node)

996

return node

998

return node

997

except (TypeError, LookupError):

999

except (TypeError, LookupError):

998

pass

1000

pass

999

1001

1000

def _partialmatch(self, id):

1002

def _partialmatch(self, id):

1001

try:

1003

try:

1002

partial = self.index.partialmatch(id)

1004

partial = self.index.partialmatch(id)

1003

if partial and self.hasnode(partial):

1005

if partial and self.hasnode(partial):

1004

return partial

1006

return partial

1005

return None

1007

return None

1006

except RevlogError:

1008

except RevlogError:

1007

# parsers.c radix tree lookup gave multiple matches

1009

# parsers.c radix tree lookup gave multiple matches

1008

# fast path: for unfiltered changelog, radix tree is accurate

1010

# fast path: for unfiltered changelog, radix tree is accurate

1009

if not getattr(self, 'filteredrevs', None):

1011

if not getattr(self, 'filteredrevs', None):

1010

raise LookupError(id, self.indexfile,

1012

raise LookupError(id, self.indexfile,

1011

_('ambiguous identifier'))

1013

_('ambiguous identifier'))

1012

# fall through to slow path that filters hidden revisions

1014

# fall through to slow path that filters hidden revisions

1013

except (AttributeError, ValueError):

1015

except (AttributeError, ValueError):

1014

# we are pure python, or key was too short to search radix tree

1016

# we are pure python, or key was too short to search radix tree

1015

pass

1017

pass

1016

1018

1017

if id in self._pcache:

1019

if id in self._pcache:

1018

return self._pcache[id]

1020

return self._pcache[id]

1019

1021

1020

if len(id) < 40:

1022

if len(id) < 40:

1021

try:

1023

try:

1022

# hex(node)[:...]

1024

# hex(node)[:...]

1023

l = len(id) // 2 # grab an even number of digits

1025

l = len(id) // 2 # grab an even number of digits

1024

prefix = bin(id[:l * 2])

1026

prefix = bin(id[:l * 2])

1025

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1027

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1026

nl = [n for n in nl if hex(n).startswith(id) and

1028

nl = [n for n in nl if hex(n).startswith(id) and

1027

self.hasnode(n)]

1029

self.hasnode(n)]

1028

if len(nl) > 0:

1030

if len(nl) > 0:

1029

if len(nl) == 1:

1031

if len(nl) == 1:

1030

self._pcache[id] = nl[0]

1032

self._pcache[id] = nl[0]

1031

return nl[0]

1033

return nl[0]

1032

raise LookupError(id, self.indexfile,

1034

raise LookupError(id, self.indexfile,

1033

_('ambiguous identifier'))

1035

_('ambiguous identifier'))

1034

return None

1036

return None

1035

except TypeError:

1037

except TypeError:

1036

pass

1038

pass

1037

1039

1038

def lookup(self, id):

1040

def lookup(self, id):

1039

"""locate a node based on:

1041

"""locate a node based on:

1040

- revision number or str(revision number)

1042

- revision number or str(revision number)

1041

- nodeid or subset of hex nodeid

1043

- nodeid or subset of hex nodeid

1042

"""

1044

"""

1043

n = self._match(id)

1045

n = self._match(id)

1044

if n is not None:

1046

if n is not None:

1045

return n

1047

return n

1046

n = self._partialmatch(id)

1048

n = self._partialmatch(id)

1047

if n:

1049

if n:

1048

return n

1050

return n

1049

1051

1050

raise LookupError(id, self.indexfile, _('no match found'))

1052

raise LookupError(id, self.indexfile, _('no match found'))

1051

1053

1052

def cmp(self, node, text):

1054

def cmp(self, node, text):

1053

"""compare text with a given file revision

1055

"""compare text with a given file revision

1054

1056

1055

returns True if text is different than what is stored.

1057

returns True if text is different than what is stored.

1056

"""

1058

"""

1057

p1, p2 = self.parents(node)

1059

p1, p2 = self.parents(node)

1058

return hash(text, p1, p2) != node

1060

return hash(text, p1, p2) != node

1059

1061

1060

def _addchunk(self, offset, data):

1062

def _addchunk(self, offset, data):

1061

"""Add a segment to the revlog cache.

1063

"""Add a segment to the revlog cache.

1062

1064

1063

Accepts an absolute offset and the data that is at that location.

1065

Accepts an absolute offset and the data that is at that location.

1064

"""

1066

"""

1065

o, d = self._chunkcache

1067

o, d = self._chunkcache

1066

# try to add to existing cache

1068

# try to add to existing cache

1067

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1069

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1068

self._chunkcache = o, d + data

1070

self._chunkcache = o, d + data

1069

else:

1071

else:

1070

self._chunkcache = offset, data

1072

self._chunkcache = offset, data

1071

1073

1072

def _loadchunk(self, offset, length, df=None):

1074

def _loadchunk(self, offset, length, df=None):

1073

"""Load a segment of raw data from the revlog.

1075

"""Load a segment of raw data from the revlog.

1074

1076

1075

Accepts an absolute offset, length to read, and an optional existing

1077

Accepts an absolute offset, length to read, and an optional existing

1076

file handle to read from.

1078

file handle to read from.

1077

1079

1078

If an existing file handle is passed, it will be seeked and the

1080

If an existing file handle is passed, it will be seeked and the

1079

original seek position will NOT be restored.

1081

original seek position will NOT be restored.

1080

1082

1081

Returns a str or buffer of raw byte data.

1083

Returns a str or buffer of raw byte data.

1082

"""

1084

"""

1083

if df is not None:

1085

if df is not None:

1084

closehandle = False

1086

closehandle = False

1085

else:

1087

else:

1086

if self._inline:

1088

if self._inline:

1087

df = self.opener(self.indexfile)

1089

df = self.opener(self.indexfile)

1088

else:

1090

else:

1089

df = self.opener(self.datafile)

1091

df = self.opener(self.datafile)

1090

closehandle = True

1092

closehandle = True

1091

1093

1092

# Cache data both forward and backward around the requested

1094

# Cache data both forward and backward around the requested

1093

# data, in a fixed size window. This helps speed up operations

1095

# data, in a fixed size window. This helps speed up operations

1094

# involving reading the revlog backwards.

1096

# involving reading the revlog backwards.

1095

cachesize = self._chunkcachesize

1097

cachesize = self._chunkcachesize

1096

realoffset = offset & ~(cachesize - 1)

1098

realoffset = offset & ~(cachesize - 1)

1097

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1099

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1098

- realoffset)

1100

- realoffset)

1099

df.seek(realoffset)

1101

df.seek(realoffset)

1100

d = df.read(reallength)

1102

d = df.read(reallength)

1101

if closehandle:

1103

if closehandle:

1102

df.close()

1104

df.close()

1103

self._addchunk(realoffset, d)

1105

self._addchunk(realoffset, d)

1104

if offset != realoffset or reallength != length:

1106

if offset != realoffset or reallength != length:

1105

return util.buffer(d, offset - realoffset, length)

1107

return util.buffer(d, offset - realoffset, length)

1106

return d

1108

return d

1107

1109

1108

def _getchunk(self, offset, length, df=None):

1110

def _getchunk(self, offset, length, df=None):

1109

"""Obtain a segment of raw data from the revlog.

1111

"""Obtain a segment of raw data from the revlog.

1110

1112

1111

Accepts an absolute offset, length of bytes to obtain, and an

1113

Accepts an absolute offset, length of bytes to obtain, and an

1112

optional file handle to the already-opened revlog. If the file

1114

optional file handle to the already-opened revlog. If the file

1113

handle is used, it's original seek position will not be preserved.

1115

handle is used, it's original seek position will not be preserved.

1114

1116

1115

Requests for data may be returned from a cache.

1117

Requests for data may be returned from a cache.

1116

1118

1117

Returns a str or a buffer instance of raw byte data.

1119

Returns a str or a buffer instance of raw byte data.

1118

"""

1120

"""

1119

o, d = self._chunkcache

1121

o, d = self._chunkcache

1120

l = len(d)

1122

l = len(d)

1121

1123

1122

# is it in the cache?

1124

# is it in the cache?

1123

cachestart = offset - o

1125

cachestart = offset - o

1124

cacheend = cachestart + length

1126

cacheend = cachestart + length

1125

if cachestart >= 0 and cacheend <= l:

1127

if cachestart >= 0 and cacheend <= l:

1126

if cachestart == 0 and cacheend == l:

1128

if cachestart == 0 and cacheend == l:

1127

return d # avoid a copy

1129

return d # avoid a copy

1128

return util.buffer(d, cachestart, cacheend - cachestart)

1130

return util.buffer(d, cachestart, cacheend - cachestart)

1129

1131

1130

return self._loadchunk(offset, length, df=df)

1132

return self._loadchunk(offset, length, df=df)

1131

1133

1132

def _chunkraw(self, startrev, endrev, df=None):

1134

def _chunkraw(self, startrev, endrev, df=None):

1133

"""Obtain a segment of raw data corresponding to a range of revisions.

1135

"""Obtain a segment of raw data corresponding to a range of revisions.

1134

1136

1135

Accepts the start and end revisions and an optional already-open

1137

Accepts the start and end revisions and an optional already-open

1136

file handle to be used for reading. If the file handle is read, its

1138

file handle to be used for reading. If the file handle is read, its

1137

seek position will not be preserved.

1139

seek position will not be preserved.

1138

1140

1139

Requests for data may be satisfied by a cache.

1141

Requests for data may be satisfied by a cache.

1140

1142

1141

Returns a 2-tuple of (offset, data) for the requested range of

1143

Returns a 2-tuple of (offset, data) for the requested range of

1142

revisions. Offset is the integer offset from the beginning of the

1144

revisions. Offset is the integer offset from the beginning of the

1143

revlog and data is a str or buffer of the raw byte data.

1145

revlog and data is a str or buffer of the raw byte data.

1144

1146

1145

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1147

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1146

to determine where each revision's data begins and ends.

1148

to determine where each revision's data begins and ends.

1147

"""

1149

"""

1148

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1150

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1149

# (functions are expensive).

1151

# (functions are expensive).

1150

index = self.index

1152

index = self.index

1151

istart = index[startrev]

1153

istart = index[startrev]

1152

start = int(istart[0] >> 16)

1154

start = int(istart[0] >> 16)

1153

if startrev == endrev:

1155

if startrev == endrev:

1154

end = start + istart[1]

1156

end = start + istart[1]

1155

else:

1157

else:

1156

iend = index[endrev]

1158

iend = index[endrev]

1157

end = int(iend[0] >> 16) + iend[1]

1159

end = int(iend[0] >> 16) + iend[1]

1158

1160

1159

if self._inline:

1161

if self._inline:

1160

start += (startrev + 1) * self._io.size

1162

start += (startrev + 1) * self._io.size

1161

end += (endrev + 1) * self._io.size

1163

end += (endrev + 1) * self._io.size

1162

length = end - start

1164

length = end - start

1163

1165

1164

return start, self._getchunk(start, length, df=df)

1166

return start, self._getchunk(start, length, df=df)

1165

1167

1166

def _chunk(self, rev, df=None):

1168

def _chunk(self, rev, df=None):

1167

"""Obtain a single decompressed chunk for a revision.

1169

"""Obtain a single decompressed chunk for a revision.

1168

1170

1169

Accepts an integer revision and an optional already-open file handle

1171

Accepts an integer revision and an optional already-open file handle

1170

to be used for reading. If used, the seek position of the file will not

1172

to be used for reading. If used, the seek position of the file will not

1171

be preserved.

1173

be preserved.

1172

1174

1173

Returns a str holding uncompressed data for the requested revision.

1175

Returns a str holding uncompressed data for the requested revision.

1174

"""

1176

"""

1175

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1177

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1176

1178

1177

def _chunks(self, revs, df=None):

1179

def _chunks(self, revs, df=None):

1178

"""Obtain decompressed chunks for the specified revisions.

1180

"""Obtain decompressed chunks for the specified revisions.

1179

1181

1180

Accepts an iterable of numeric revisions that are assumed to be in

1182

Accepts an iterable of numeric revisions that are assumed to be in

1181

ascending order. Also accepts an optional already-open file handle

1183

ascending order. Also accepts an optional already-open file handle

1182

to be used for reading. If used, the seek position of the file will

1184

to be used for reading. If used, the seek position of the file will

1183

not be preserved.

1185

not be preserved.

1184

1186

1185

This function is similar to calling ``self._chunk()`` multiple times,

1187

This function is similar to calling ``self._chunk()`` multiple times,

1186

but is faster.

1188

but is faster.

1187

1189

1188

Returns a list with decompressed data for each requested revision.

1190

Returns a list with decompressed data for each requested revision.

1189

"""

1191

"""

1190

if not revs:

1192

if not revs:

1191

return []

1193

return []

1192

start = self.start

1194

start = self.start

1193

length = self.length

1195

length = self.length

1194

inline = self._inline

1196

inline = self._inline

1195

iosize = self._io.size

1197

iosize = self._io.size

1196

buffer = util.buffer

1198

buffer = util.buffer

1197

1199

1198

l = []

1200

l = []

1199

ladd = l.append

1201

ladd = l.append

1200

1202

1201

try:

1203

try:

1202

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1204

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1203

except OverflowError:

1205

except OverflowError:

1204

# issue4215 - we can't cache a run of chunks greater than

1206

# issue4215 - we can't cache a run of chunks greater than

1205

# 2G on Windows

1207

# 2G on Windows

1206

return [self._chunk(rev, df=df) for rev in revs]

1208

return [self._chunk(rev, df=df) for rev in revs]

1207

1209

1208

decomp = self.decompress

1210

decomp = self.decompress

1209

for rev in revs:

1211

for rev in revs:

1210

chunkstart = start(rev)

1212

chunkstart = start(rev)

1211

if inline:

1213

if inline:

1212

chunkstart += (rev + 1) * iosize

1214

chunkstart += (rev + 1) * iosize

1213

chunklength = length(rev)

1215

chunklength = length(rev)

1214

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1216

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1215

1217

1216

return l

1218

return l

1217

1219

1218

def _chunkclear(self):

1220

def _chunkclear(self):

1219

"""Clear the raw chunk cache."""

1221

"""Clear the raw chunk cache."""

1220

self._chunkcache = (0, '')

1222

self._chunkcache = (0, '')

1221

1223

1222

def deltaparent(self, rev):

1224

def deltaparent(self, rev):

1223

"""return deltaparent of the given revision"""

1225

"""return deltaparent of the given revision"""

1224

base = self.index[rev][3]

1226

base = self.index[rev][3]

1225

if base == rev:

1227

if base == rev:

1226

return nullrev

1228

return nullrev

1227

elif self._generaldelta:

1229

elif self._generaldelta:

1228

return base

1230

return base

1229

else:

1231

else:

1230

return rev - 1

1232

return rev - 1

1231

1233

1232

def revdiff(self, rev1, rev2):

1234

def revdiff(self, rev1, rev2):

1233

"""return or calculate a delta between two revisions"""

1235

"""return or calculate a delta between two revisions"""

1234

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1236

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1235

return str(self._chunk(rev2))

1237

return str(self._chunk(rev2))

1236

1238

1237

return mdiff.textdiff(self.revision(rev1),

1239

return mdiff.textdiff(self.revision(rev1),

1238

self.revision(rev2))

1240

self.revision(rev2))

1239

1241

1240

def revision(self, nodeorrev, _df=None, raw=False):

1242

def revision(self, nodeorrev, _df=None, raw=False):

1241

"""return an uncompressed revision of a given node or revision

1243

"""return an uncompressed revision of a given node or revision

1242

number.

1244

number.

1243

1245

1244

_df - an existing file handle to read from. (internal-only)

1246

_df - an existing file handle to read from. (internal-only)

1245

raw - an optional argument specifying if the revision data is to be

1247

raw - an optional argument specifying if the revision data is to be

1246

treated as raw data when applying flag transforms. 'raw' should be set

1248

treated as raw data when applying flag transforms. 'raw' should be set

1247

to True when generating changegroups or in debug commands.

1249

to True when generating changegroups or in debug commands.

1248

"""

1250

"""

1249

if isinstance(nodeorrev, int):

1251

if isinstance(nodeorrev, int):

1250

rev = nodeorrev

1252

rev = nodeorrev

1251

node = self.node(rev)

1253

node = self.node(rev)

1252

else:

1254

else:

1253

node = nodeorrev

1255

node = nodeorrev

1254

rev = None

1256

rev = None

1255

1257

1256

cachedrev = None

1258

cachedrev = None

1257

if node == nullid:

1259

if node == nullid:

1258

return ""

1260

return ""

1259

if self._cache:

1261

if self._cache:

1260

if self._cache[0] == node:

1262

if self._cache[0] == node:

1261

return self._cache[2]

1263

return self._cache[2]

1262

cachedrev = self._cache[1]

1264

cachedrev = self._cache[1]

1263

1265

1264

# look up what we need to read

1266

# look up what we need to read

1265

text = None

1267

text = None

1266

if rev is None:

1268

if rev is None:

1267

rev = self.rev(node)

1269

rev = self.rev(node)

1268

1270

1269

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1271

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1270

if stopped:

1272

if stopped:

1271

text = self._cache[2]

1273

text = self._cache[2]

1272

1274

1273

# drop cache to save memory

1275

# drop cache to save memory

1274

self._cache = None

1276

self._cache = None

1275

1277

1276

bins = self._chunks(chain, df=_df)

1278

bins = self._chunks(chain, df=_df)

1277

if text is None:

1279

if text is None:

1278

text = str(bins[0])

1280

text = str(bins[0])

1279

bins = bins[1:]

1281

bins = bins[1:]

1280

1282

1281

text = mdiff.patches(text, bins)

1283

text = mdiff.patches(text, bins)

1282

1284

1283

text, validatehash = self._processflags(text, self.flags(rev), 'read',

1285

text, validatehash = self._processflags(text, self.flags(rev), 'read',

1284

raw=raw)

1286

raw=raw)

1285

if validatehash:

1287

if validatehash:

1286

self.checkhash(text, node, rev=rev)

1288

self.checkhash(text, node, rev=rev)

1287

1289

1288

self._cache = (node, rev, text)

1290

self._cache = (node, rev, text)

1289

return text

1291

return text

1290

1292

1291

def hash(self, text, p1, p2):

1293

def hash(self, text, p1, p2):

1292

"""Compute a node hash.

1294

"""Compute a node hash.

1293

1295

1294

Available as a function so that subclasses can replace the hash

1296

Available as a function so that subclasses can replace the hash

1295

as needed.

1297

as needed.

1296

"""

1298

"""

1297

return hash(text, p1, p2)

1299

return hash(text, p1, p2)

1298

1300

1299

def _processflags(self, text, flags, operation, raw=False):

1301

def _processflags(self, text, flags, operation, raw=False):

1300

"""Inspect revision data flags and applies transforms defined by

1302

"""Inspect revision data flags and applies transforms defined by

1301

registered flag processors.

1303

registered flag processors.

1302

1304

1303

``text`` - the revision data to process

1305

``text`` - the revision data to process

1304

``flags`` - the revision flags

1306

``flags`` - the revision flags

1305

``operation`` - the operation being performed (read or write)

1307

``operation`` - the operation being performed (read or write)

1306

``raw`` - an optional argument describing if the raw transform should be

1308

``raw`` - an optional argument describing if the raw transform should be

1307

applied.

1309

applied.

1308

1310

1309

This method processes the flags in the order (or reverse order if

1311

This method processes the flags in the order (or reverse order if

1310

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1312

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1311

flag processors registered for present flags. The order of flags defined

1313

flag processors registered for present flags. The order of flags defined

1312

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1314

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1313

1315

1314

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1316

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1315

processed text and ``validatehash`` is a bool indicating whether the

1317

processed text and ``validatehash`` is a bool indicating whether the

1316

returned text should be checked for hash integrity.

1318

returned text should be checked for hash integrity.

1317

1319

1318

Note: If the ``raw`` argument is set, it has precedence over the

1320

Note: If the ``raw`` argument is set, it has precedence over the

1319

operation and will only update the value of ``validatehash``.

1321

operation and will only update the value of ``validatehash``.

1320

"""

1322

"""

1321

if not operation in ('read', 'write'):

1323

if not operation in ('read', 'write'):

1322

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1324

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1323

# Check all flags are known.

1325

# Check all flags are known.

1324

if flags & ~REVIDX_KNOWN_FLAGS:

1326

if flags & ~REVIDX_KNOWN_FLAGS:

1325

raise RevlogError(_("incompatible revision flag '%#x'") %

1327

raise RevlogError(_("incompatible revision flag '%#x'") %

1326

(flags & ~REVIDX_KNOWN_FLAGS))

1328

(flags & ~REVIDX_KNOWN_FLAGS))

1327

validatehash = True

1329

validatehash = True

1328

# Depending on the operation (read or write), the order might be

1330

# Depending on the operation (read or write), the order might be

1329

# reversed due to non-commutative transforms.

1331

# reversed due to non-commutative transforms.

1330

orderedflags = REVIDX_FLAGS_ORDER

1332

orderedflags = REVIDX_FLAGS_ORDER

1331

if operation == 'write':

1333

if operation == 'write':

1332

orderedflags = reversed(orderedflags)

1334

orderedflags = reversed(orderedflags)

1333

1335

1334

for flag in orderedflags:

1336

for flag in orderedflags:

1335

# If a flagprocessor has been registered for a known flag, apply the

1337

# If a flagprocessor has been registered for a known flag, apply the

1336

# related operation transform and update result tuple.

1338

# related operation transform and update result tuple.

1337

if flag & flags:

1339

if flag & flags:

1338

vhash = True

1340

vhash = True

1339

1341

1340

if flag not in _flagprocessors:

1342

if flag not in _flagprocessors:

1341

message = _("missing processor for flag '%#x'") % (flag)

1343

message = _("missing processor for flag '%#x'") % (flag)

1342

raise RevlogError(message)

1344

raise RevlogError(message)

1343

1345

1344

processor = _flagprocessors[flag]

1346

processor = _flagprocessors[flag]

1345

if processor is not None:

1347

if processor is not None:

1346

readtransform, writetransform, rawtransform = processor

1348

readtransform, writetransform, rawtransform = processor

1347

1349

1348

if raw:

1350

if raw:

1349

vhash = rawtransform(self, text)

1351

vhash = rawtransform(self, text)

1350

elif operation == 'read':

1352

elif operation == 'read':

1351

text, vhash = readtransform(self, text)

1353

text, vhash = readtransform(self, text)

1352

else: # write operation

1354

else: # write operation

1353

text, vhash = writetransform(self, text)

1355

text, vhash = writetransform(self, text)

1354

validatehash = validatehash and vhash

1356

validatehash = validatehash and vhash

1355

1357

1356

return text, validatehash

1358

return text, validatehash

1357

1359

1358

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1360

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1359

"""Check node hash integrity.

1361

"""Check node hash integrity.

1360

1362

1361

Available as a function so that subclasses can extend hash mismatch

1363

Available as a function so that subclasses can extend hash mismatch

1362

behaviors as needed.

1364

behaviors as needed.

1363

"""

1365

"""

1364

if p1 is None and p2 is None:

1366

if p1 is None and p2 is None:

1365

p1, p2 = self.parents(node)

1367

p1, p2 = self.parents(node)

1366

if node != self.hash(text, p1, p2):

1368

if node != self.hash(text, p1, p2):

1367

revornode = rev

1369

revornode = rev

1368

if revornode is None:

1370

if revornode is None:

1369

revornode = templatefilters.short(hex(node))

1371

revornode = templatefilters.short(hex(node))

1370

raise RevlogError(_("integrity check failed on %s:%s")

1372

raise RevlogError(_("integrity check failed on %s:%s")

1371

% (self.indexfile, revornode))

1373

% (self.indexfile, revornode))

1372

1374

1373

def checkinlinesize(self, tr, fp=None):

1375

def checkinlinesize(self, tr, fp=None):

1374

"""Check if the revlog is too big for inline and convert if so.

1376

"""Check if the revlog is too big for inline and convert if so.

1375

1377

1376

This should be called after revisions are added to the revlog. If the

1378

This should be called after revisions are added to the revlog. If the

1377

revlog has grown too large to be an inline revlog, it will convert it

1379

revlog has grown too large to be an inline revlog, it will convert it

1378

to use multiple index and data files.

1380

to use multiple index and data files.

1379

"""

1381

"""

1380

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1382

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1381

return

1383

return

1382

1384

1383

trinfo = tr.find(self.indexfile)

1385

trinfo = tr.find(self.indexfile)

1384

if trinfo is None:

1386

if trinfo is None:

1385

raise RevlogError(_("%s not found in the transaction")

1387

raise RevlogError(_("%s not found in the transaction")

1386

% self.indexfile)

1388

% self.indexfile)

1387

1389

1388

trindex = trinfo[2]

1390

trindex = trinfo[2]

1389

if trindex is not None:

1391

if trindex is not None:

1390

dataoff = self.start(trindex)

1392

dataoff = self.start(trindex)

1391

else:

1393

else:

1392

# revlog was stripped at start of transaction, use all leftover data

1394

# revlog was stripped at start of transaction, use all leftover data

1393

trindex = len(self) - 1

1395

trindex = len(self) - 1

1394

dataoff = self.end(-2)

1396

dataoff = self.end(-2)

1395

1397

1396

tr.add(self.datafile, dataoff)

1398

tr.add(self.datafile, dataoff)

1397

1399

1398

if fp:

1400

if fp:

1399

fp.flush()

1401

fp.flush()

1400

fp.close()

1402

fp.close()

1401

1403

1402

df = self.opener(self.datafile, 'w')

1404

df = self.opener(self.datafile, 'w')

1403

try:

1405

try:

1404

for r in self:

1406

for r in self:

1405

df.write(self._chunkraw(r, r)[1])

1407

df.write(self._chunkraw(r, r)[1])

1406

finally:

1408

finally:

1407

df.close()

1409

df.close()

1408

1410

1409

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1411

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1410

checkambig=self._checkambig)

1412

checkambig=self._checkambig)

1411

self.version &= ~(REVLOGNGINLINEDATA)

1413

self.version &= ~(REVLOGNGINLINEDATA)

1412

self._inline = False

1414

self._inline = False

1413

for i in self:

1415

for i in self:

1414

e = self._io.packentry(self.index[i], self.node, self.version, i)

1416

e = self._io.packentry(self.index[i], self.node, self.version, i)

1415

fp.write(e)

1417

fp.write(e)

1416

1418

1417

# if we don't call close, the temp file will never replace the

1419

# if we don't call close, the temp file will never replace the

1418

# real index

1420

# real index

1419

fp.close()

1421

fp.close()

1420

1422

1421

tr.replace(self.indexfile, trindex * self._io.size)

1423

tr.replace(self.indexfile, trindex * self._io.size)

1422

self._chunkclear()

1424

self._chunkclear()

1423

1425

1424

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1426

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1425

node=None, flags=REVIDX_DEFAULT_FLAGS):

1427

node=None, flags=REVIDX_DEFAULT_FLAGS):

1426

"""add a revision to the log

1428

"""add a revision to the log

1427

1429

1428

text - the revision data to add

1430

text - the revision data to add

1429

transaction - the transaction object used for rollback

1431

transaction - the transaction object used for rollback

1430

link - the linkrev data to add

1432

link - the linkrev data to add

1431

p1, p2 - the parent nodeids of the revision

1433

p1, p2 - the parent nodeids of the revision

1432

cachedelta - an optional precomputed delta

1434

cachedelta - an optional precomputed delta

1433

node - nodeid of revision; typically node is not specified, and it is

1435

node - nodeid of revision; typically node is not specified, and it is

1434

computed by default as hash(text, p1, p2), however subclasses might

1436

computed by default as hash(text, p1, p2), however subclasses might

1435

use different hashing method (and override checkhash() in such case)

1437

use different hashing method (and override checkhash() in such case)

1436

flags - the known flags to set on the revision

1438

flags - the known flags to set on the revision

1437

"""

1439

"""

1438

if link == nullrev:

1440

if link == nullrev:

1439

raise RevlogError(_("attempted to add linkrev -1 to %s")

1441

raise RevlogError(_("attempted to add linkrev -1 to %s")

1440

% self.indexfile)

1442

% self.indexfile)

1441

1443

1442

if flags:

1444

if flags:

1443

node = node or self.hash(text, p1, p2)

1445

node = node or self.hash(text, p1, p2)

1444

1446

1445

newtext, validatehash = self._processflags(text, flags, 'write')

1447

newtext, validatehash = self._processflags(text, flags, 'write')

1446

1448

1447

# If the flag processor modifies the revision data, ignore any provided

1449

# If the flag processor modifies the revision data, ignore any provided

1448

# cachedelta.

1450

# cachedelta.

1449

if newtext != text:

1451

if newtext != text:

1450

cachedelta = None

1452

cachedelta = None

1451

text = newtext

1453

text = newtext

1452

1454

1453

if len(text) > _maxentrysize:

1455

if len(text) > _maxentrysize:

1454

raise RevlogError(

1456

raise RevlogError(

1455

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1457

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1456

% (self.indexfile, len(text)))

1458

% (self.indexfile, len(text)))

1457

1459

1458

node = node or self.hash(text, p1, p2)

1460

node = node or self.hash(text, p1, p2)

1459

if node in self.nodemap:

1461

if node in self.nodemap:

1460

return node

1462

return node

1461

1463

1462

if validatehash:

1464

if validatehash:

1463

self.checkhash(text, node, p1=p1, p2=p2)

1465

self.checkhash(text, node, p1=p1, p2=p2)

1464

1466

1465

dfh = None

1467

dfh = None

1466

if not self._inline:

1468

if not self._inline:

1467

dfh = self.opener(self.datafile, "a+")

1469

dfh = self.opener(self.datafile, "a+")

1468

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1470

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1469

try:

1471

try:

1470

return self._addrevision(node, text, transaction, link, p1, p2,

1472

return self._addrevision(node, text, transaction, link, p1, p2,

1471

flags, cachedelta, ifh, dfh)

1473

flags, cachedelta, ifh, dfh)

1472

finally:

1474

finally:

1473

if dfh:

1475

if dfh:

1474

dfh.close()

1476

dfh.close()

1475

ifh.close()

1477

ifh.close()

1476

1478

1477

def compress(self, data):

1479

def compress(self, data):

1478

"""Generate a possibly-compressed representation of data."""

1480

"""Generate a possibly-compressed representation of data."""

1479

if not data:

1481

if not data:

1480

return '', data

1482

return '', data

1481

1483

1482

compressed = self._compressor.compress(data)

1484

compressed = self._compressor.compress(data)

1483

1485

1484

if compressed:

1486

if compressed:

1485

# The revlog compressor added the header in the returned data.

1487

# The revlog compressor added the header in the returned data.

1486

return '', compressed

1488

return '', compressed

1487

1489

1488

if data[0] == '\0':

1490

if data[0] == '\0':

1489

return '', data

1491

return '', data

1490

return 'u', data

1492

return 'u', data

1491

1493

1492

def decompress(self, data):

1494

def decompress(self, data):

1493

"""Decompress a revlog chunk.

1495

"""Decompress a revlog chunk.

1494

1496

1495

The chunk is expected to begin with a header identifying the

1497

The chunk is expected to begin with a header identifying the

1496

format type so it can be routed to an appropriate decompressor.

1498

format type so it can be routed to an appropriate decompressor.

1497

"""

1499

"""

1498

if not data:

1500

if not data:

1499

return data

1501

return data

1500

1502

1501

# Revlogs are read much more frequently than they are written and many

1503

# Revlogs are read much more frequently than they are written and many

1502

# chunks only take microseconds to decompress, so performance is

1504

# chunks only take microseconds to decompress, so performance is

1503

# important here.

1505

# important here.

1504

#

1506

#

1505

# We can make a few assumptions about revlogs:

1507

# We can make a few assumptions about revlogs:

1506

#

1508

#

1507

# 1) the majority of chunks will be compressed (as opposed to inline

1509

# 1) the majority of chunks will be compressed (as opposed to inline

1508

# raw data).

1510

# raw data).

1509

# 2) decompressing *any* data will likely by at least 10x slower than

1511

# 2) decompressing *any* data will likely by at least 10x slower than

1510

# returning raw inline data.

1512

# returning raw inline data.

1511

# 3) we want to prioritize common and officially supported compression

1513

# 3) we want to prioritize common and officially supported compression

1512

# engines

1514

# engines

1513

#

1515

#

1514

# It follows that we want to optimize for "decompress compressed data

1516

# It follows that we want to optimize for "decompress compressed data

1515

# when encoded with common and officially supported compression engines"

1517

# when encoded with common and officially supported compression engines"

1516

# case over "raw data" and "data encoded by less common or non-official

1518

# case over "raw data" and "data encoded by less common or non-official

1517

# compression engines." That is why we have the inline lookup first

1519

# compression engines." That is why we have the inline lookup first

1518

# followed by the compengines lookup.

1520

# followed by the compengines lookup.

1519

#

1521

#

1520

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1522

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1521

# compressed chunks. And this matters for changelog and manifest reads.

1523

# compressed chunks. And this matters for changelog and manifest reads.

1522

t = data[0]

1524

t = data[0]

1523

1525

1524

if t == 'x':

1526

if t == 'x':

1525

try:

1527

try:

1526

return _zlibdecompress(data)

1528

return _zlibdecompress(data)

1527

except zlib.error as e:

1529

except zlib.error as e:

1528

raise RevlogError(_('revlog decompress error: %s') % str(e))

1530

raise RevlogError(_('revlog decompress error: %s') % str(e))

1529

# '\0' is more common than 'u' so it goes first.

1531

# '\0' is more common than 'u' so it goes first.

1530

elif t == '\0':

1532

elif t == '\0':

1531

return data

1533

return data

1532

elif t == 'u':

1534

elif t == 'u':

1533

return util.buffer(data, 1)

1535

return util.buffer(data, 1)

1534

1536

1535

try:

1537

try:

1536

compressor = self._decompressors[t]

1538

compressor = self._decompressors[t]

1537

except KeyError:

1539

except KeyError:

1538

try:

1540

try:

1539

engine = util.compengines.forrevlogheader(t)

1541

engine = util.compengines.forrevlogheader(t)

1540

compressor = engine.revlogcompressor()

1542

compressor = engine.revlogcompressor()

1541

self._decompressors[t] = compressor

1543

self._decompressors[t] = compressor

1542

except KeyError:

1544

except KeyError:

1543

raise RevlogError(_('unknown compression type %r') % t)

1545

raise RevlogError(_('unknown compression type %r') % t)

1544

1546

1545

return compressor.decompress(data)

1547

return compressor.decompress(data)

1546

1548

1547

def _isgooddelta(self, d, textlen):

1549

def _isgooddelta(self, d, textlen):

1548

"""Returns True if the given delta is good. Good means that it is within

1550

"""Returns True if the given delta is good. Good means that it is within

1549

the disk span, disk size, and chain length bounds that we know to be

1551

the disk span, disk size, and chain length bounds that we know to be

1550

performant."""

1552

performant."""

1551

if d is None:

1553

if d is None:

1552

return False

1554

return False

1553

1555

1554

# - 'dist' is the distance from the base revision -- bounding it limits

1556

# - 'dist' is the distance from the base revision -- bounding it limits

1555

# the amount of I/O we need to do.

1557

# the amount of I/O we need to do.

1556

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1558

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1557

# to apply -- bounding it limits the amount of CPU we consume.

1559

# to apply -- bounding it limits the amount of CPU we consume.

1558

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1560

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1559

if (dist > textlen * 4 or l > textlen or

1561

if (dist > textlen * 4 or l > textlen or

1560

compresseddeltalen > textlen * 2 or

1562

compresseddeltalen > textlen * 2 or

1561

(self._maxchainlen and chainlen > self._maxchainlen)):

1563

(self._maxchainlen and chainlen > self._maxchainlen)):

1562

return False

1564

return False

1563

1565

1564

return True

1566

return True

1565

1567

1566

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1568

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1567

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1569

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1568

"""internal function to add revisions to the log

1570

"""internal function to add revisions to the log

1569

1571

1570

see addrevision for argument descriptions.

1572

see addrevision for argument descriptions.

1571

invariants:

1573

invariants:

1572

- text is optional (can be None); if not set, cachedelta must be set.

1574

- text is optional (can be None); if not set, cachedelta must be set.

1573

if both are set, they must correspond to each other.

1575

if both are set, they must correspond to each other.

1574

- raw is optional; if set to True, it indicates the revision data is to

1576

- raw is optional; if set to True, it indicates the revision data is to

1575

be treated by _processflags() as raw. It is usually set by changegroup

1577

be treated by _processflags() as raw. It is usually set by changegroup

1576

generation and debug commands.

1578

generation and debug commands.

1577

"""

1579

"""

1578

btext = [text]

1580

btext = [text]

1579

def buildtext():

1581

def buildtext():

1580

if btext[0] is not None:

1582

if btext[0] is not None:

1581

return btext[0]

1583

return btext[0]

1582

baserev = cachedelta[0]

1584

baserev = cachedelta[0]

1583

delta = cachedelta[1]

1585

delta = cachedelta[1]

1584

# special case deltas which replace entire base; no need to decode

1586

# special case deltas which replace entire base; no need to decode

1585

# base revision. this neatly avoids censored bases, which throw when

1587

# base revision. this neatly avoids censored bases, which throw when

1586

# they're decoded.

1588

# they're decoded.

1587

hlen = struct.calcsize(">lll")

1589

hlen = struct.calcsize(">lll")

1588

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1590

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1589

len(delta) - hlen):

1591

len(delta) - hlen):

1590

btext[0] = delta[hlen:]

1592

btext[0] = delta[hlen:]

1591

else:

1593

else:

1592

if self._inline:

1594

if self._inline:

1593

fh = ifh

1595

fh = ifh

1594

else:

1596

else:

1595

fh = dfh

1597

fh = dfh

1596

basetext = self.revision(self.node(baserev), _df=fh, raw=raw)

1598

basetext = self.revision(self.node(baserev), _df=fh, raw=raw)

1597

btext[0] = mdiff.patch(basetext, delta)

1599

btext[0] = mdiff.patch(basetext, delta)

1598

1600

1599

try:

1601

try:

1600

res = self._processflags(btext[0], flags, 'read', raw=raw)

1602

res = self._processflags(btext[0], flags, 'read', raw=raw)

1601

btext[0], validatehash = res

1603

btext[0], validatehash = res

1602

if validatehash:

1604

if validatehash:

1603

self.checkhash(btext[0], node, p1=p1, p2=p2)

1605

self.checkhash(btext[0], node, p1=p1, p2=p2)

1604

if flags & REVIDX_ISCENSORED:

1606

if flags & REVIDX_ISCENSORED:

1605

raise RevlogError(_('node %s is not censored') % node)

1607

raise RevlogError(_('node %s is not censored') % node)

1606

except CensoredNodeError:

1608

except CensoredNodeError:

1607

# must pass the censored index flag to add censored revisions

1609

# must pass the censored index flag to add censored revisions

1608

if not flags & REVIDX_ISCENSORED:

1610

if not flags & REVIDX_ISCENSORED:

1609

raise

1611

raise

1610

return btext[0]

1612

return btext[0]

1611

1613

1612

def builddelta(rev):

1614

def builddelta(rev):

1613

# can we use the cached delta?

1615

# can we use the cached delta?

1614

if cachedelta and cachedelta[0] == rev:

1616

if cachedelta and cachedelta[0] == rev:

1615

delta = cachedelta[1]

1617

delta = cachedelta[1]

1616

else:

1618

else:

1617

t = buildtext()

1619

t = buildtext()

1618

if self.iscensored(rev):

1620

if self.iscensored(rev):

1619

# deltas based on a censored revision must replace the

1621

# deltas based on a censored revision must replace the

1620

# full content in one patch, so delta works everywhere

1622

# full content in one patch, so delta works everywhere

1621

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1623

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1622

delta = header + t

1624

delta = header + t

1623

else:

1625

else:

1624

if self._inline:

1626

if self._inline:

1625

fh = ifh

1627

fh = ifh

1626

else:

1628

else:

1627

fh = dfh

1629

fh = dfh

1628

ptext = self.revision(self.node(rev), _df=fh)

1630

ptext = self.revision(self.node(rev), _df=fh)

1629

delta = mdiff.textdiff(ptext, t)

1631

delta = mdiff.textdiff(ptext, t)

1630

header, data = self.compress(delta)

1632

header, data = self.compress(delta)

1631

deltalen = len(header) + len(data)

1633

deltalen = len(header) + len(data)

1632

chainbase = self.chainbase(rev)

1634

chainbase = self.chainbase(rev)

1633

dist = deltalen + offset - self.start(chainbase)

1635

dist = deltalen + offset - self.start(chainbase)

1634

if self._generaldelta:

1636

if self._generaldelta:

1635

base = rev

1637

base = rev

1636

else:

1638

else:

1637

base = chainbase

1639

base = chainbase

1638

chainlen, compresseddeltalen = self._chaininfo(rev)

1640

chainlen, compresseddeltalen = self._chaininfo(rev)

1639

chainlen += 1

1641

chainlen += 1

1640

compresseddeltalen += deltalen

1642

compresseddeltalen += deltalen

1641

return (dist, deltalen, (header, data), base,

1643

return (dist, deltalen, (header, data), base,

1642

chainbase, chainlen, compresseddeltalen)

1644

chainbase, chainlen, compresseddeltalen)

1643

1645

1644

curr = len(self)

1646

curr = len(self)

1645

prev = curr - 1

1647

prev = curr - 1

1646

offset = self.end(prev)

1648

offset = self.end(prev)

1647

delta = None

1649

delta = None

1648

p1r, p2r = self.rev(p1), self.rev(p2)

1650

p1r, p2r = self.rev(p1), self.rev(p2)

1649

1651

1650

# full versions are inserted when the needed deltas

1652

# full versions are inserted when the needed deltas

1651

# become comparable to the uncompressed text

1653

# become comparable to the uncompressed text

1652

if text is None:

1654

if text is None:

1653

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1655

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1654

cachedelta[1])

1656

cachedelta[1])

1655

else:

1657

else:

1656

textlen = len(text)

1658

textlen = len(text)

1657

1659

1658

# should we try to build a delta?

1660

# should we try to build a delta?

1659

if prev != nullrev and self.storedeltachains:

1661

if prev != nullrev and self.storedeltachains:

1660

tested = set()

1662

tested = set()

1661

# This condition is true most of the time when processing

1663

# This condition is true most of the time when processing

1662

# changegroup data into a generaldelta repo. The only time it

1664

# changegroup data into a generaldelta repo. The only time it

1663

# isn't true is if this is the first revision in a delta chain

1665

# isn't true is if this is the first revision in a delta chain

1664

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1666

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1665

if cachedelta and self._generaldelta and self._lazydeltabase:

1667

if cachedelta and self._generaldelta and self._lazydeltabase:

1666

# Assume what we received from the server is a good choice

1668

# Assume what we received from the server is a good choice

1667

# build delta will reuse the cache

1669

# build delta will reuse the cache

1668

candidatedelta = builddelta(cachedelta[0])

1670

candidatedelta = builddelta(cachedelta[0])

1669

tested.add(cachedelta[0])

1671

tested.add(cachedelta[0])

1670

if self._isgooddelta(candidatedelta, textlen):

1672

if self._isgooddelta(candidatedelta, textlen):

1671

delta = candidatedelta

1673

delta = candidatedelta

1672

if delta is None and self._generaldelta:

1674

if delta is None and self._generaldelta:

1673

# exclude already lazy tested base if any

1675

# exclude already lazy tested base if any

1674

parents = [p for p in (p1r, p2r)

1676

parents = [p for p in (p1r, p2r)

1675

if p != nullrev and p not in tested]

1677

if p != nullrev and p not in tested]

1676

if parents and not self._aggressivemergedeltas:

1678

if parents and not self._aggressivemergedeltas:

1677

# Pick whichever parent is closer to us (to minimize the

1679

# Pick whichever parent is closer to us (to minimize the

1678

# chance of having to build a fulltext).

1680

# chance of having to build a fulltext).

1679

parents = [max(parents)]

1681

parents = [max(parents)]

1680

tested.update(parents)

1682

tested.update(parents)

1681

pdeltas = []

1683

pdeltas = []

1682

for p in parents:

1684

for p in parents:

1683

pd = builddelta(p)

1685

pd = builddelta(p)

1684

if self._isgooddelta(pd, textlen):

1686

if self._isgooddelta(pd, textlen):

1685

pdeltas.append(pd)

1687

pdeltas.append(pd)

1686

if pdeltas:

1688

if pdeltas:

1687

delta = min(pdeltas, key=lambda x: x[1])

1689

delta = min(pdeltas, key=lambda x: x[1])

1688

if delta is None and prev not in tested:

1690

if delta is None and prev not in tested:

1689

# other approach failed try against prev to hopefully save us a

1691

# other approach failed try against prev to hopefully save us a

1690

# fulltext.

1692

# fulltext.

1691

candidatedelta = builddelta(prev)

1693

candidatedelta = builddelta(prev)

1692

if self._isgooddelta(candidatedelta, textlen):

1694

if self._isgooddelta(candidatedelta, textlen):

1693

delta = candidatedelta

1695

delta = candidatedelta

1694

if delta is not None:

1696

if delta is not None:

1695

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1697

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1696

else:

1698

else:

1697

text = buildtext()

1699

text = buildtext()

1698

data = self.compress(text)

1700

data = self.compress(text)

1699

l = len(data[1]) + len(data[0])

1701

l = len(data[1]) + len(data[0])

1700

base = chainbase = curr

1702

base = chainbase = curr

1701

1703

1702

e = (offset_type(offset, flags), l, textlen,

1704

e = (offset_type(offset, flags), l, textlen,

1703

base, link, p1r, p2r, node)

1705

base, link, p1r, p2r, node)

1704

self.index.insert(-1, e)

1706

self.index.insert(-1, e)

1705

self.nodemap[node] = curr

1707

self.nodemap[node] = curr

1706

1708

1707

entry = self._io.packentry(e, self.node, self.version, curr)

1709

entry = self._io.packentry(e, self.node, self.version, curr)

1708

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1710

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1709

1711

1710

if alwayscache and text is None:

1712

if alwayscache and text is None:

1711

text = buildtext()

1713

text = buildtext()

1712

1714

1713

if type(text) == str: # only accept immutable objects

1715

if type(text) == str: # only accept immutable objects

1714

self._cache = (node, curr, text)

1716

self._cache = (node, curr, text)

1715

self._chainbasecache[curr] = chainbase

1717

self._chainbasecache[curr] = chainbase

1716

return node

1718

return node

1717

1719

1718

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1720

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1719

# Files opened in a+ mode have inconsistent behavior on various

1721

# Files opened in a+ mode have inconsistent behavior on various

1720

# platforms. Windows requires that a file positioning call be made

1722

# platforms. Windows requires that a file positioning call be made

1721

# when the file handle transitions between reads and writes. See

1723

# when the file handle transitions between reads and writes. See

1722

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1724

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1723

# platforms, Python or the platform itself can be buggy. Some versions

1725

# platforms, Python or the platform itself can be buggy. Some versions

1724

# of Solaris have been observed to not append at the end of the file

1726

# of Solaris have been observed to not append at the end of the file

1725

# if the file was seeked to before the end. See issue4943 for more.

1727

# if the file was seeked to before the end. See issue4943 for more.

1726

#

1728

#

1727

# We work around this issue by inserting a seek() before writing.

1729

# We work around this issue by inserting a seek() before writing.

1728

# Note: This is likely not necessary on Python 3.

1730

# Note: This is likely not necessary on Python 3.

1729

ifh.seek(0, os.SEEK_END)

1731

ifh.seek(0, os.SEEK_END)

1730

if dfh:

1732

if dfh:

1731

dfh.seek(0, os.SEEK_END)

1733

dfh.seek(0, os.SEEK_END)

1732

1734

1733

curr = len(self) - 1

1735

curr = len(self) - 1

1734

if not self._inline:

1736

if not self._inline:

1735

transaction.add(self.datafile, offset)

1737

transaction.add(self.datafile, offset)

1736

transaction.add(self.indexfile, curr * len(entry))

1738

transaction.add(self.indexfile, curr * len(entry))

1737

if data[0]:

1739

if data[0]:

1738

dfh.write(data[0])

1740

dfh.write(data[0])

1739

dfh.write(data[1])

1741

dfh.write(data[1])

1740

ifh.write(entry)

1742

ifh.write(entry)

1741

else:

1743

else:

1742

offset += curr * self._io.size

1744

offset += curr * self._io.size

1743

transaction.add(self.indexfile, offset, curr)

1745

transaction.add(self.indexfile, offset, curr)

1744

ifh.write(entry)

1746

ifh.write(entry)

1745

ifh.write(data[0])

1747

ifh.write(data[0])

1746

ifh.write(data[1])

1748

ifh.write(data[1])

1747

self.checkinlinesize(transaction, ifh)

1749

self.checkinlinesize(transaction, ifh)

1748

1750

1749

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1751

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1750

"""

1752

"""

1751

add a delta group

1753

add a delta group

1752

1754

1753

given a set of deltas, add them to the revision log. the

1755

given a set of deltas, add them to the revision log. the

1754

first delta is against its parent, which should be in our

1756

first delta is against its parent, which should be in our

1755

log, the rest are against the previous delta.

1757

log, the rest are against the previous delta.

1756

1758

1757

If ``addrevisioncb`` is defined, it will be called with arguments of

1759

If ``addrevisioncb`` is defined, it will be called with arguments of

1758

this revlog and the node that was added.

1760

this revlog and the node that was added.

1759

"""

1761

"""

1760

1762

1761

# track the base of the current delta log

1763

# track the base of the current delta log

1762

content = []

1764

content = []

1763

node = None

1765

node = None

1764

1766

1765

r = len(self)

1767

r = len(self)

1766

end = 0

1768

end = 0

1767

if r:

1769

if r:

1768

end = self.end(r - 1)

1770

end = self.end(r - 1)

1769

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1771

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1770

isize = r * self._io.size

1772

isize = r * self._io.size

1771

if self._inline:

1773

if self._inline:

1772

transaction.add(self.indexfile, end + isize, r)

1774

transaction.add(self.indexfile, end + isize, r)

1773

dfh = None

1775

dfh = None

1774

else:

1776

else:

1775

transaction.add(self.indexfile, isize, r)

1777

transaction.add(self.indexfile, isize, r)

1776

transaction.add(self.datafile, end)

1778

transaction.add(self.datafile, end)

1777

dfh = self.opener(self.datafile, "a+")

1779

dfh = self.opener(self.datafile, "a+")

1778

def flush():

1780

def flush():

1779

if dfh:

1781

if dfh:

1780

dfh.flush()

1782

dfh.flush()

1781

ifh.flush()

1783

ifh.flush()

1782

try:

1784

try:

1783

# loop through our set of deltas

1785

# loop through our set of deltas

1784

chain = None

1786

chain = None

1785

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1787

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1786

node = chunkdata['node']

1788

node = chunkdata['node']

1787

p1 = chunkdata['p1']

1789

p1 = chunkdata['p1']

1788

p2 = chunkdata['p2']

1790

p2 = chunkdata['p2']

1789

cs = chunkdata['cs']

1791

cs = chunkdata['cs']

1790

deltabase = chunkdata['deltabase']

1792

deltabase = chunkdata['deltabase']

1791

delta = chunkdata['delta']

1793

delta = chunkdata['delta']

1792

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1794

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1793

1795

1794

content.append(node)

1796

content.append(node)

1795

1797

1796

link = linkmapper(cs)

1798

link = linkmapper(cs)

1797

if node in self.nodemap:

1799

if node in self.nodemap:

1798

# this can happen if two branches make the same change

1800

# this can happen if two branches make the same change

1799

chain = node

1801

chain = node

1800

continue

1802

continue

1801

1803

1802

for p in (p1, p2):

1804

for p in (p1, p2):

1803

if p not in self.nodemap:

1805

if p not in self.nodemap:

1804

raise LookupError(p, self.indexfile,

1806

raise LookupError(p, self.indexfile,

1805

_('unknown parent'))

1807

_('unknown parent'))

1806

1808

1807

if deltabase not in self.nodemap:

1809

if deltabase not in self.nodemap:

1808

raise LookupError(deltabase, self.indexfile,

1810

raise LookupError(deltabase, self.indexfile,

1809

_('unknown delta base'))

1811

_('unknown delta base'))

1810

1812

1811

baserev = self.rev(deltabase)

1813

baserev = self.rev(deltabase)

1812

1814

1813

if baserev != nullrev and self.iscensored(baserev):

1815

if baserev != nullrev and self.iscensored(baserev):

1814

# if base is censored, delta must be full replacement in a

1816

# if base is censored, delta must be full replacement in a

1815

# single patch operation

1817

# single patch operation

1816

hlen = struct.calcsize(">lll")

1818

hlen = struct.calcsize(">lll")

1817

oldlen = self.rawsize(baserev)

1819

oldlen = self.rawsize(baserev)

1818

newlen = len(delta) - hlen

1820

newlen = len(delta) - hlen

1819

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1821

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1820

raise error.CensoredBaseError(self.indexfile,

1822

raise error.CensoredBaseError(self.indexfile,

1821

self.node(baserev))

1823

self.node(baserev))

1822

1824

1823

if not flags and self._peek_iscensored(baserev, delta, flush):

1825

if not flags and self._peek_iscensored(baserev, delta, flush):

1824

flags |= REVIDX_ISCENSORED

1826

flags |= REVIDX_ISCENSORED

1825

1827

1826

# We assume consumers of addrevisioncb will want to retrieve

1828

# We assume consumers of addrevisioncb will want to retrieve

1827

# the added revision, which will require a call to

1829

# the added revision, which will require a call to

1828

# revision(). revision() will fast path if there is a cache

1830

# revision(). revision() will fast path if there is a cache

1829

# hit. So, we tell _addrevision() to always cache in this case.

1831

# hit. So, we tell _addrevision() to always cache in this case.

1830

# We're only using addgroup() in the context of changegroup

1832

# We're only using addgroup() in the context of changegroup

1831

# generation so the revision data can always be handled as raw

1833

# generation so the revision data can always be handled as raw

1832

# by the flagprocessor.

1834

# by the flagprocessor.

1833

chain = self._addrevision(node, None, transaction, link,

1835

chain = self._addrevision(node, None, transaction, link,

1834

p1, p2, flags, (baserev, delta),

1836

p1, p2, flags, (baserev, delta),

1835

ifh, dfh,

1837

ifh, dfh,

1836

alwayscache=bool(addrevisioncb),

1838

alwayscache=bool(addrevisioncb),

1837

raw=True)

1839

raw=True)

1838

1840

1839

if addrevisioncb:

1841

if addrevisioncb:

1840

addrevisioncb(self, chain)

1842

addrevisioncb(self, chain)

1841

1843

1842

if not dfh and not self._inline:

1844

if not dfh and not self._inline:

1843

# addrevision switched from inline to conventional

1845

# addrevision switched from inline to conventional

1844

# reopen the index

1846

# reopen the index

1845

ifh.close()

1847

ifh.close()

1846

dfh = self.opener(self.datafile, "a+")

1848

dfh = self.opener(self.datafile, "a+")

1847

ifh = self.opener(self.indexfile, "a+",

1849

ifh = self.opener(self.indexfile, "a+",

1848

checkambig=self._checkambig)

1850

checkambig=self._checkambig)

1849

finally:

1851

finally:

1850

if dfh:

1852

if dfh:

1851

dfh.close()

1853

dfh.close()

1852

ifh.close()

1854

ifh.close()

1853

1855

1854

return content

1856

return content

1855

1857

1856

def iscensored(self, rev):

1858

def iscensored(self, rev):

1857

"""Check if a file revision is censored."""

1859

"""Check if a file revision is censored."""

1858

return False

1860

return False

1859

1861

1860

def _peek_iscensored(self, baserev, delta, flush):

1862

def _peek_iscensored(self, baserev, delta, flush):

1861

"""Quickly check if a delta produces a censored revision."""

1863

"""Quickly check if a delta produces a censored revision."""

1862

return False

1864

return False

1863

1865

1864

def getstrippoint(self, minlink):

1866

def getstrippoint(self, minlink):

1865

"""find the minimum rev that must be stripped to strip the linkrev

1867

"""find the minimum rev that must be stripped to strip the linkrev

1866

1868

1867

Returns a tuple containing the minimum rev and a set of all revs that

1869

Returns a tuple containing the minimum rev and a set of all revs that

1868

have linkrevs that will be broken by this strip.

1870

have linkrevs that will be broken by this strip.

1869

"""

1871

"""

1870

brokenrevs = set()

1872

brokenrevs = set()

1871

strippoint = len(self)

1873

strippoint = len(self)

1872

1874

1873

heads = {}

1875

heads = {}

1874

futurelargelinkrevs = set()

1876

futurelargelinkrevs = set()

1875

for head in self.headrevs():

1877

for head in self.headrevs():

1876

headlinkrev = self.linkrev(head)

1878

headlinkrev = self.linkrev(head)

1877

heads[head] = headlinkrev

1879

heads[head] = headlinkrev

1878

if headlinkrev >= minlink:

1880

if headlinkrev >= minlink:

1879

futurelargelinkrevs.add(headlinkrev)

1881

futurelargelinkrevs.add(headlinkrev)

1880

1882

1881

# This algorithm involves walking down the rev graph, starting at the

1883

# This algorithm involves walking down the rev graph, starting at the

1882

# heads. Since the revs are topologically sorted according to linkrev,

1884

# heads. Since the revs are topologically sorted according to linkrev,

1883

# once all head linkrevs are below the minlink, we know there are

1885

# once all head linkrevs are below the minlink, we know there are

1884

# no more revs that could have a linkrev greater than minlink.

1886

# no more revs that could have a linkrev greater than minlink.

1885

# So we can stop walking.

1887

# So we can stop walking.

1886

while futurelargelinkrevs:

1888

while futurelargelinkrevs:

1887

strippoint -= 1

1889

strippoint -= 1

1888

linkrev = heads.pop(strippoint)

1890

linkrev = heads.pop(strippoint)

1889

1891

1890

if linkrev < minlink:

1892

if linkrev < minlink:

1891

brokenrevs.add(strippoint)

1893

brokenrevs.add(strippoint)

1892

else:

1894

else:

1893

futurelargelinkrevs.remove(linkrev)

1895

futurelargelinkrevs.remove(linkrev)

1894

1896

1895

for p in self.parentrevs(strippoint):

1897

for p in self.parentrevs(strippoint):

1896

if p != nullrev:

1898

if p != nullrev:

1897

plinkrev = self.linkrev(p)

1899

plinkrev = self.linkrev(p)

1898

heads[p] = plinkrev

1900

heads[p] = plinkrev

1899

if plinkrev >= minlink:

1901

if plinkrev >= minlink:

1900

futurelargelinkrevs.add(plinkrev)

1902

futurelargelinkrevs.add(plinkrev)

1901

1903

1902

return strippoint, brokenrevs

1904

return strippoint, brokenrevs

1903

1905

1904

def strip(self, minlink, transaction):

1906

def strip(self, minlink, transaction):

1905

"""truncate the revlog on the first revision with a linkrev >= minlink

1907

"""truncate the revlog on the first revision with a linkrev >= minlink

1906

1908

1907

This function is called when we're stripping revision minlink and

1909

This function is called when we're stripping revision minlink and

1908

its descendants from the repository.

1910

its descendants from the repository.

1909

1911

1910

We have to remove all revisions with linkrev >= minlink, because

1912

We have to remove all revisions with linkrev >= minlink, because

1911

the equivalent changelog revisions will be renumbered after the

1913

the equivalent changelog revisions will be renumbered after the

1912

strip.

1914

strip.

1913

1915

1914

So we truncate the revlog on the first of these revisions, and

1916

So we truncate the revlog on the first of these revisions, and

1915

trust that the caller has saved the revisions that shouldn't be

1917

trust that the caller has saved the revisions that shouldn't be

1916

removed and that it'll re-add them after this truncation.

1918

removed and that it'll re-add them after this truncation.

1917

"""

1919

"""

1918

if len(self) == 0:

1920

if len(self) == 0:

1919

return

1921

return

1920

1922

1921

rev, _ = self.getstrippoint(minlink)

1923

rev, _ = self.getstrippoint(minlink)

1922

if rev == len(self):

1924

if rev == len(self):

1923

return

1925

return

1924

1926

1925

# first truncate the files on disk

1927

# first truncate the files on disk

1926

end = self.start(rev)

1928

end = self.start(rev)

1927

if not self._inline:

1929

if not self._inline:

1928

transaction.add(self.datafile, end)

1930

transaction.add(self.datafile, end)

1929

end = rev * self._io.size

1931

end = rev * self._io.size

1930

else:

1932

else:

1931

end += rev * self._io.size

1933

end += rev * self._io.size

1932

1934

1933

transaction.add(self.indexfile, end)

1935

transaction.add(self.indexfile, end)

1934

1936

1935

# then reset internal state in memory to forget those revisions

1937

# then reset internal state in memory to forget those revisions

1936

self._cache = None

1938

self._cache = None

1937

self._chaininfocache = {}

1939

self._chaininfocache = {}

1938

self._chunkclear()

1940

self._chunkclear()

1939

for x in xrange(rev, len(self)):

1941

for x in xrange(rev, len(self)):

1940

del self.nodemap[self.node(x)]

1942

del self.nodemap[self.node(x)]

1941

1943

1942

del self.index[rev:-1]

1944

del self.index[rev:-1]

1943

1945

1944

def checksize(self):

1946

def checksize(self):

1945

expected = 0

1947

expected = 0

1946

if len(self):

1948

if len(self):

1947

expected = max(0, self.end(len(self) - 1))

1949

expected = max(0, self.end(len(self) - 1))

1948

1950

1949

try:

1951

try:

1950

f = self.opener(self.datafile)

1952

f = self.opener(self.datafile)

1951

f.seek(0, 2)

1953

f.seek(0, 2)

1952

actual = f.tell()

1954

actual = f.tell()

1953

f.close()

1955

f.close()

1954

dd = actual - expected

1956

dd = actual - expected

1955

except IOError as inst:

1957

except IOError as inst:

1956

if inst.errno != errno.ENOENT:

1958

if inst.errno != errno.ENOENT:

1957

raise

1959

raise

1958

dd = 0

1960

dd = 0

1959

1961

1960

try:

1962

try:

1961

f = self.opener(self.indexfile)

1963

f = self.opener(self.indexfile)

1962

f.seek(0, 2)

1964

f.seek(0, 2)

1963

actual = f.tell()

1965

actual = f.tell()

1964

f.close()

1966

f.close()

1965

s = self._io.size

1967

s = self._io.size

1966

i = max(0, actual // s)

1968

i = max(0, actual // s)

1967

di = actual - (i * s)

1969

di = actual - (i * s)

1968

if self._inline:

1970

if self._inline:

1969

databytes = 0

1971

databytes = 0

1970

for r in self:

1972

for r in self:

1971

databytes += max(0, self.length(r))

1973

databytes += max(0, self.length(r))

1972

dd = 0

1974

dd = 0

1973

di = actual - len(self) * s - databytes

1975

di = actual - len(self) * s - databytes

1974

except IOError as inst:

1976

except IOError as inst:

1975

if inst.errno != errno.ENOENT:

1977

if inst.errno != errno.ENOENT:

1976

raise

1978

raise

1977

di = 0

1979

di = 0

1978

1980

1979

return (dd, di)

1981

return (dd, di)

1980

1982

1981

def files(self):

1983

def files(self):

1982

res = [self.indexfile]

1984

res = [self.indexfile]

1983

if not self._inline:

1985

if not self._inline:

1984

res.append(self.datafile)

1986

res.append(self.datafile)

1985

return res

1987

return res

1986

1988

1987

DELTAREUSEALWAYS = 'always'

1989

DELTAREUSEALWAYS = 'always'

1988

DELTAREUSESAMEREVS = 'samerevs'

1990

DELTAREUSESAMEREVS = 'samerevs'

1989

DELTAREUSENEVER = 'never'

1991

DELTAREUSENEVER = 'never'

1990

1992

1991

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

1993

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

1992

1994

1993

def clone(self, tr, destrevlog, addrevisioncb=None,

1995

def clone(self, tr, destrevlog, addrevisioncb=None,

1994

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

1996

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

1995

"""Copy this revlog to another, possibly with format changes.

1997

"""Copy this revlog to another, possibly with format changes.

1996

1998

1997

The destination revlog will contain the same revisions and nodes.

1999

The destination revlog will contain the same revisions and nodes.

1998

However, it may not be bit-for-bit identical due to e.g. delta encoding

2000

However, it may not be bit-for-bit identical due to e.g. delta encoding

1999

differences.

2001

differences.

2000

2002

2001

The ``deltareuse`` argument control how deltas from the existing revlog

2003

The ``deltareuse`` argument control how deltas from the existing revlog

2002

are preserved in the destination revlog. The argument can have the

2004

are preserved in the destination revlog. The argument can have the

2003

following values:

2005

following values:

2004

2006

2005

DELTAREUSEALWAYS

2007

DELTAREUSEALWAYS

2006

Deltas will always be reused (if possible), even if the destination

2008

Deltas will always be reused (if possible), even if the destination

2007

revlog would not select the same revisions for the delta. This is the

2009

revlog would not select the same revisions for the delta. This is the

2008

fastest mode of operation.

2010

fastest mode of operation.

2009

DELTAREUSESAMEREVS

2011

DELTAREUSESAMEREVS

2010

Deltas will be reused if the destination revlog would pick the same

2012

Deltas will be reused if the destination revlog would pick the same

2011

revisions for the delta. This mode strikes a balance between speed

2013

revisions for the delta. This mode strikes a balance between speed

2012

and optimization.

2014

and optimization.

2013

DELTAREUSENEVER

2015

DELTAREUSENEVER

2014

Deltas will never be reused. This is the slowest mode of execution.

2016

Deltas will never be reused. This is the slowest mode of execution.

2015

This mode can be used to recompute deltas (e.g. if the diff/delta

2017

This mode can be used to recompute deltas (e.g. if the diff/delta

2016

algorithm changes).

2018

algorithm changes).

2017

2019

2018

Delta computation can be slow, so the choice of delta reuse policy can

2020

Delta computation can be slow, so the choice of delta reuse policy can

2019

significantly affect run time.

2021

significantly affect run time.

2020

2022

2021

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2023

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2022

two extremes. Deltas will be reused if they are appropriate. But if the

2024

two extremes. Deltas will be reused if they are appropriate. But if the

2023

delta could choose a better revision, it will do so. This means if you

2025

delta could choose a better revision, it will do so. This means if you

2024

are converting a non-generaldelta revlog to a generaldelta revlog,

2026

are converting a non-generaldelta revlog to a generaldelta revlog,

2025

deltas will be recomputed if the delta's parent isn't a parent of the

2027

deltas will be recomputed if the delta's parent isn't a parent of the

2026

revision.

2028

revision.

2027

2029

2028

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2030

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2029

controls whether to compute deltas against both parents for merges.

2031

controls whether to compute deltas against both parents for merges.

2030

By default, the current default is used.

2032

By default, the current default is used.

2031

"""

2033

"""

2032

if deltareuse not in self.DELTAREUSEALL:

2034

if deltareuse not in self.DELTAREUSEALL:

2033

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2035

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2034

2036

2035

if len(destrevlog):

2037

if len(destrevlog):

2036

raise ValueError(_('destination revlog is not empty'))

2038

raise ValueError(_('destination revlog is not empty'))

2037

2039

2038

if getattr(self, 'filteredrevs', None):

2040

if getattr(self, 'filteredrevs', None):

2039

raise ValueError(_('source revlog has filtered revisions'))

2041

raise ValueError(_('source revlog has filtered revisions'))

2040

if getattr(destrevlog, 'filteredrevs', None):

2042

if getattr(destrevlog, 'filteredrevs', None):

2041

raise ValueError(_('destination revlog has filtered revisions'))

2043

raise ValueError(_('destination revlog has filtered revisions'))

2042

2044

2043

# lazydeltabase controls whether to reuse a cached delta, if possible.

2045

# lazydeltabase controls whether to reuse a cached delta, if possible.

2044

oldlazydeltabase = destrevlog._lazydeltabase

2046

oldlazydeltabase = destrevlog._lazydeltabase

2045

oldamd = destrevlog._aggressivemergedeltas

2047

oldamd = destrevlog._aggressivemergedeltas

2046

2048

2047

try:

2049

try:

2048

if deltareuse == self.DELTAREUSEALWAYS:

2050

if deltareuse == self.DELTAREUSEALWAYS:

2049

destrevlog._lazydeltabase = True

2051

destrevlog._lazydeltabase = True

2050

elif deltareuse == self.DELTAREUSESAMEREVS:

2052

elif deltareuse == self.DELTAREUSESAMEREVS:

2051

destrevlog._lazydeltabase = False

2053

destrevlog._lazydeltabase = False

2052

2054

2053

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2055

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2054

2056

2055

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2057

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2056

self.DELTAREUSESAMEREVS)

2058

self.DELTAREUSESAMEREVS)

2057

2059

2058

index = self.index

2060

index = self.index

2059

for rev in self:

2061

for rev in self:

2060

entry = index[rev]

2062

entry = index[rev]

2061

2063

2062

# Some classes override linkrev to take filtered revs into

2064

# Some classes override linkrev to take filtered revs into

2063

# account. Use raw entry from index.

2065

# account. Use raw entry from index.

2064

flags = entry[0] & 0xffff

2066

flags = entry[0] & 0xffff

2065

linkrev = entry[4]

2067

linkrev = entry[4]

2066

p1 = index[entry[5]][7]

2068

p1 = index[entry[5]][7]

2067

p2 = index[entry[6]][7]

2069

p2 = index[entry[6]][7]

2068

node = entry[7]

2070

node = entry[7]

2069

2071

2070

# (Possibly) reuse the delta from the revlog if allowed and

2072

# (Possibly) reuse the delta from the revlog if allowed and

2071

# the revlog chunk is a delta.

2073

# the revlog chunk is a delta.

2072

cachedelta = None

2074

cachedelta = None

2073

text = None

2075

text = None

2074

if populatecachedelta:

2076

if populatecachedelta:

2075

dp = self.deltaparent(rev)

2077

dp = self.deltaparent(rev)

2076

if dp != nullrev:

2078

if dp != nullrev:

2077

cachedelta = (dp, str(self._chunk(rev)))

2079

cachedelta = (dp, str(self._chunk(rev)))

2078

2080

2079

if not cachedelta:

2081

if not cachedelta:

2080

text = self.revision(rev)

2082

text = self.revision(rev)

2081

2083

2082

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2084

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2083

checkambig=False)

2085

checkambig=False)

2084

dfh = None

2086

dfh = None

2085

if not destrevlog._inline:

2087

if not destrevlog._inline:

2086

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2088

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2087

try:

2089

try:

2088

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2090

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2089

flags, cachedelta, ifh, dfh)

2091

flags, cachedelta, ifh, dfh)

2090

finally:

2092

finally:

2091

if dfh:

2093

if dfh:

2092

dfh.close()

2094

dfh.close()

2093

ifh.close()

2095

ifh.close()

2094

2096

2095

if addrevisioncb:

2097

if addrevisioncb:

2096

addrevisioncb(self, rev, node)

2098

addrevisioncb(self, rev, node)

2097

finally:

2099

finally:

2098

destrevlog._lazydeltabase = oldlazydeltabase

2100

destrevlog._lazydeltabase = oldlazydeltabase

2099

destrevlog._aggressivemergedeltas = oldamd

2101

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             Revision logs - or *revlogs* - are an append only data structure for
             storing discrete entries, or *revisions*. They are the primary storage
             mechanism of repository data.
             Revlogs effectively model a directed acyclic graph (DAG). Each node
             has edges to 1 or 2 *parent* nodes. Each node contains metadata and
             the raw value for that node.
             Revlogs consist of entries which have metadata and revision data.
             Metadata includes the hash of the revision's content, sizes, and
             links to its *parent* entries. The collective metadata is referred
             to as the *index* and the revision data is the *data*.
             Revision data is stored as a series of compressed deltas against previous
             revisions.
             Revlogs are written in an append-only fashion. We never need to rewrite
             a file to insert nor do we need to remove data. Rolling back in-progress
             writes can be performed by truncating files. Read locks can be avoided
             using simple techniques. This means that references to other data in
             the same revlog *always* refer to a previous entry.
             Revlogs can be modeled as 0-indexed arrays. The first revision is
             revision #0 and the second is revision #1. The revision -1 is typically
             used to mean *does not exist* or *not defined*.
             File Format
             ===========
             A revlog begins with a 32-bit big endian integer holding version info
             and feature flags. This integer is shared with the first revision
             entry.
             This integer is logically divided into 2 16-bit shorts. The least
             significant half of the integer is the format/version short. The other
             short holds feature flags that dictate behavior of the revlog.
             Only 1 bit of the format/version short is currently used. Remaining
             bits are reserved for future use.
             The following values for the format/version short are defined:
                The original revlog version.
                RevlogNG (*next generation*). It replaced version 0 when it was
                implemented in 2006.
             The feature flags short consists of bit flags. Where 0 is the least
             significant bit, the following bit offsets define flags:
                Store revision data inline.
                Generaldelta encoding.
 -15
                Reserved for future use.
             The following header values are common:
 00 00 01
                RevlogNG
 01 00 01
                RevlogNG + inline
 02 00 01
                RevlogNG + generaldelta
 03 00 01
                RevlogNG + inline + generaldelta
             Following the 32-bit header is the remainder of the first index entry.
             Following that are remaining *index* data. Inlined revision data is
             possibly located between index entries. More on this layout is described
             below.
             RevlogNG Format
             ===============
             RevlogNG (version 1) begins with an index describing the revisions in
             the revlog. If the ``inline`` flag is set, revision data is stored inline,
             or between index entries (as opposed to in a separate container).
             Each index entry is 64 bytes. The byte layout of each entry is as
             follows, with byte 0 being the first byte (all data stored as big endian):
 -3 (4 bytes) (rev 0 only)
                Revlog header
 -5 (6 bytes)
                Absolute offset of revision data from beginning of revlog.
 -7 (2 bytes)
                Bit flags impacting revision behavior. The following bit offsets define:
 : REVIDX_ISCENSORED revision has censor metadata, must be verified.
-: REVIDX_EXTSTORED revision data is stored externally.
+: REVIDX_ELLIPSIS revision hash does not match its data. Used by
+               narrowhg
+: REVIDX_EXTSTORED revision data is stored externally.
 -11 (4 bytes)
                Compressed length of revision data / chunk as stored in revlog.
 -15 (4 bytes)
                Uncompressed length of revision data. This is the size of the full
                revision data, not the size of the chunk post decompression.
 -19 (4 bytes)
                Base or previous revision this revision's delta was produced against.
                -1 means this revision holds full text (as opposed to a delta).
                For generaldelta repos, this is the previous revision in the delta
                chain. For non-generaldelta repos, this is the base or first
                revision in the delta chain.
 -23 (4 bytes)
                A revision this revision is *linked* to. This allows a revision in
                one revlog to be forever associated with a revision in another
                revlog. For example, a file's revlog may point to the changelog
                revision that introduced it.
 -27 (4 bytes)
                Revision of 1st parent. -1 indicates no parent.
 -31 (4 bytes)
                Revision of 2nd parent. -1 indicates no 2nd parent.
 -63 (32 bytes)
                Hash of revision's full text. Currently, SHA-1 is used and only
                the first 20 bytes of this field are used. The rest of the bytes
                are ignored and should be stored as \0.
             If inline revision data is being stored, the compressed revision data
             (of length from bytes offset 8-11 from the index entry) immediately
             follows the index entry. There is no header on the revision data. There
             is no padding between it and the index entries before and after.
             If revision data is not inline, then raw revision data is stored in a
             separate byte container. The offsets from bytes 0-5 and the compressed
             length from bytes 8-11 define how to access this data.
             The first 4 bytes of the revlog are shared between the revlog header
             and the 6 byte absolute offset field from the first revlog entry.
             Delta Chains
             ============
             Revision data is encoded as a chain of *chunks*. Each chain begins with
             the compressed original full text for that revision. Each subsequent
             *chunk* is a *delta* against the previous revision. We therefore call
             these chains of chunks/deltas *delta chains*.
             The full text for a revision is reconstructed by loading the original
             full text for the base revision of a *delta chain* and then applying
             *deltas* until the target revision is reconstructed.
             *Delta chains* are limited in length so lookup time is bound. They are
             limited to ~2x the length of the revision's data. The linear distance
             between the base chunk and the final chunk is also limited so the
             amount of read I/O to load all chunks in the delta chain is bound.
             Deltas and delta chains are either computed against the previous
             revision in the revlog or another revision (almost certainly one of
             the parents of the revision). Historically, deltas were computed against
             the previous revision. The *generaldelta* revlog feature flag (enabled
             by default in Mercurial 3.7) activates the mode where deltas are
             computed against an arbitrary revision (almost certainly a parent revision).
             File Storage
             ============
             Revlogs logically consist of an index (metadata of entries) and
             revision data. This data may be stored together in a single file or in
             separate files. The mechanism used is indicated by the ``inline`` feature
             flag on the revlog.
             Mercurial's behavior is to use inline storage until a revlog reaches a
             certain size, at which point it will be converted to non-inline. The
             reason there is a size limit on inline storage is to establish an upper
             bound on how much data must be read to load the index. It would be a waste
             to read tens or hundreds of extra megabytes of data just to access the
             index data.
             The actual layout of revlog files on disk is governed by the repository's
             *store format*. Typically, a ``.i`` file represents the index revlog
             (possibly containing inline data) and a ``.d`` file holds the revision data.
             Revision Entries
             ================
             Revision entries consist of an optional 1 byte header followed by an
             encoding of the revision data. The headers are as follows:
             \0 (0x00)
                Revision data is the entirety of the entry, including this header.
             u (0x75)
                Raw revision data follows.
             x (0x78)
                zlib (RFC 1950) data.
                The 0x78 value is actually the first byte of the zlib header (CMF byte).
             Hash Computation
             ================
             The hash of the revision is stored in the index and is used both as a primary
             key and for data integrity verification.
             Currently, SHA-1 is the only supported hashing algorithm. To obtain the SHA-1
             hash of a revision:
 . Hash the parent nodes
 . Hash the fulltext of the revision
             The 20 byte node ids of the parents are fed into the hasher in ascending order.

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 parsers,
                 templatefilters,
                 util,
             )
             _pack = struct.pack
             _unpack = struct.unpack
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
-            REVIDX_EXTSTORED = (1 << 14) # revision data is stored externally
+            REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
+            REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
+                REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, text) -> newtext, bool
                       - (write) f(self, text) -> newtext, bool
                       - (raw)   f(self, text) -> bool
                   The boolean returned by these transforms is used to determine whether
                   'newtext' can be used for hash integrity checking.
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return long(long(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = hashlib.sha1(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 """
                 def __init__(self, opener, indexfile, checkambig=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def node(self, rev):
                     return self.index[rev][7]
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return map(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             return partial
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._addchunk(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getchunk(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length, df=df)
                 def _chunkraw(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getchunk(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._chunkraw(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     try:
                         offset, data = self._chunkraw(revs[0], revs[-1], df=df)
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     decomp = self.decompress
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return str(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1),
                                           self.revision(rev2))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         text = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     bins = self._chunks(chain, df=_df)
                     if text is None:
                         text = str(bins[0])
                         bins = bins[1:]
                     text = mdiff.patches(text, bins)
                     text, validatehash = self._processflags(text, self.flags(rev), 'read',
                                                             raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     self._cache = (node, rev, text)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     newtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if newtext != text:
                         cachedelta = None
                     text = newtext
                     if len(text) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(text)))
                     node = node or self.hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(text, node, p1=p1, p2=p2)
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False, raw=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     - raw is optional; if set to True, it indicates the revision data is to
                       be treated by _processflags() as raw. It is usually set by changegroup
                       generation and debug commands.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=raw)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(self.node(rev), _df=fh)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         text = buildtext()
                         data = self.compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and text is None:
                         text = buildtext()
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb),
                                                       raw=True)
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             text = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 text = self.revision(rev)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd