upstream/mercurial-mirror Commit - r31357:c233cbda

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import errno

17

import errno

18

import hashlib

18

import hashlib

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullid,

27

nullid,

28

nullrev,

28

nullrev,

29

)

29

)

30

from .i18n import _

30

from .i18n import _

31

from . import (

31

from . import (

32

ancestor,

32

ancestor,

33

error,

33

error,

34

mdiff,

34

mdiff,

35

parsers,

35

parsers,

36

templatefilters,

36

templatefilters,

37

util,

37

util,

38

)

38

)

39

40

_pack = struct.pack

40

_pack = struct.pack

41

_unpack = struct.unpack

41

_unpack = struct.unpack

42

# Aliased for performance.

42

# Aliased for performance.

43

_zlibdecompress = zlib.decompress

43

_zlibdecompress = zlib.decompress

44

45

# revlog header flags

45

# revlog header flags

46

REVLOGV0 = 0

46

REVLOGV0 = 0

47

REVLOGNG = 1

47

REVLOGNG = 1

48

REVLOGNGINLINEDATA = (1 << 16)

48

REVLOGNGINLINEDATA = (1 << 16)

49

REVLOGGENERALDELTA = (1 << 17)

49

REVLOGGENERALDELTA = (1 << 17)

50

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

50

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

51

REVLOG_DEFAULT_FORMAT = REVLOGNG

51

REVLOG_DEFAULT_FORMAT = REVLOGNG

52

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

52

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

53

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

53

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

54

55

# revlog index flags

55

# revlog index flags

56

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

56

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

57

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

57

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

58

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

58

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

59

REVIDX_DEFAULT_FLAGS = 0

59

REVIDX_DEFAULT_FLAGS = 0

60

# stable order in which flags need to be processed and their processors applied

60

# stable order in which flags need to be processed and their processors applied

61

REVIDX_FLAGS_ORDER = [

61

REVIDX_FLAGS_ORDER = [

62

REVIDX_ISCENSORED,

62

REVIDX_ISCENSORED,

63

REVIDX_ELLIPSIS,

63

REVIDX_ELLIPSIS,

64

REVIDX_EXTSTORED,

64

REVIDX_EXTSTORED,

65

]

65

]

66

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

66

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

67

68

# max size of revlog with inline data

68

# max size of revlog with inline data

69

_maxinline = 131072

69

_maxinline = 131072

70

_chunksize = 1048576

70

_chunksize = 1048576

71

72

RevlogError = error.RevlogError

72

RevlogError = error.RevlogError

73

LookupError = error.LookupError

73

LookupError = error.LookupError

74

CensoredNodeError = error.CensoredNodeError

74

CensoredNodeError = error.CensoredNodeError

75

ProgrammingError = error.ProgrammingError

75

ProgrammingError = error.ProgrammingError

76

77

# Store flag processors (cf. 'addflagprocessor()' to register)

77

# Store flag processors (cf. 'addflagprocessor()' to register)

78

_flagprocessors = {

78

_flagprocessors = {

79

REVIDX_ISCENSORED: None,

79

REVIDX_ISCENSORED: None,

80

}

80

}

81

82

def addflagprocessor(flag, processor):

82

def addflagprocessor(flag, processor):

83

"""Register a flag processor on a revision data flag.

83

"""Register a flag processor on a revision data flag.

84

85

Invariant:

85

Invariant:

86

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

86

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

87

- Only one flag processor can be registered on a specific flag.

87

- Only one flag processor can be registered on a specific flag.

88

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

88

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

89

following signatures:

89

following signatures:

90

- (read) f(self, text) -> newtext, bool

90

- (read) f(self, text) -> newtext, bool

91

- (write) f(self, text) -> newtext, bool

91

- (write) f(self, text) -> newtext, bool

92

- (raw) f(self, text) -> bool

92

- (raw) f(self, text) -> bool

93

The boolean returned by these transforms is used to determine whether

93

The boolean returned by these transforms is used to determine whether

94

'newtext' can be used for hash integrity checking.

94

'newtext' can be used for hash integrity checking.

95

96

Note: The 'raw' transform is used for changegroup generation and in some

96

Note: The 'raw' transform is used for changegroup generation and in some

97

debug commands. In this case the transform only indicates whether the

97

debug commands. In this case the transform only indicates whether the

98

contents can be used for hash integrity checks.

98

contents can be used for hash integrity checks.

99

"""

99

"""

100

if not flag & REVIDX_KNOWN_FLAGS:

100

if not flag & REVIDX_KNOWN_FLAGS:

101

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

101

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

102

raise ProgrammingError(msg)

102

raise ProgrammingError(msg)

103

if flag not in REVIDX_FLAGS_ORDER:

103

if flag not in REVIDX_FLAGS_ORDER:

104

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

104

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

105

raise ProgrammingError(msg)

105

raise ProgrammingError(msg)

106

if flag in _flagprocessors:

106

if flag in _flagprocessors:

107

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

107

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

108

raise error.Abort(msg)

108

raise error.Abort(msg)

109

_flagprocessors[flag] = processor

109

_flagprocessors[flag] = processor

110

111

def getoffset(q):

111

def getoffset(q):

112

return int(q >> 16)

112

return int(q >> 16)

113

114

def gettype(q):

114

def gettype(q):

115

return int(q & 0xFFFF)

115

return int(q & 0xFFFF)

116

117

def offset_type(offset, type):

117

def offset_type(offset, type):

118

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

118

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

119

raise ValueError('unknown revlog index flags')

119

raise ValueError('unknown revlog index flags')

120

return long(long(offset) << 16 | type)

120

return long(long(offset) << 16 | type)

121

122

_nullhash = hashlib.sha1(nullid)

122

_nullhash = hashlib.sha1(nullid)

123

124

def hash(text, p1, p2):

124

def hash(text, p1, p2):

125

"""generate a hash from the given text and its parent hashes

125

"""generate a hash from the given text and its parent hashes

126

127

This hash combines both the current file contents and its history

127

This hash combines both the current file contents and its history

128

in a manner that makes it easy to distinguish nodes with the same

128

in a manner that makes it easy to distinguish nodes with the same

129

content in the revision graph.

129

content in the revision graph.

130

"""

130

"""

131

# As of now, if one of the parent node is null, p2 is null

131

# As of now, if one of the parent node is null, p2 is null

132

if p2 == nullid:

132

if p2 == nullid:

133

# deep copy of a hash is faster than creating one

133

# deep copy of a hash is faster than creating one

134

s = _nullhash.copy()

134

s = _nullhash.copy()

135

s.update(p1)

135

s.update(p1)

136

else:

136

else:

137

# none of the parent nodes are nullid

137

# none of the parent nodes are nullid

138

l = [p1, p2]

138

l = [p1, p2]

139

l.sort()

139

l.sort()

140

s = hashlib.sha1(l[0])

140

s = hashlib.sha1(l[0])

141

s.update(l[1])

141

s.update(l[1])

142

s.update(text)

142

s.update(text)

143

return s.digest()

143

return s.digest()

144

145

# index v0:

145

# index v0:

146

# 4 bytes: offset

146

# 4 bytes: offset

147

# 4 bytes: compressed length

147

# 4 bytes: compressed length

148

# 4 bytes: base rev

148

# 4 bytes: base rev

149

# 4 bytes: link rev

149

# 4 bytes: link rev

150

# 20 bytes: parent 1 nodeid

150

# 20 bytes: parent 1 nodeid

151

# 20 bytes: parent 2 nodeid

151

# 20 bytes: parent 2 nodeid

152

# 20 bytes: nodeid

152

# 20 bytes: nodeid

153

indexformatv0 = ">4l20s20s20s"

153

indexformatv0 = ">4l20s20s20s"

154

155

class revlogoldio(object):

155

class revlogoldio(object):

156

def __init__(self):

156

def __init__(self):

157

self.size = struct.calcsize(indexformatv0)

157

self.size = struct.calcsize(indexformatv0)

158

159

def parseindex(self, data, inline):

159

def parseindex(self, data, inline):

160

s = self.size

160

s = self.size

161

index = []

161

index = []

162

nodemap = {nullid: nullrev}

162

nodemap = {nullid: nullrev}

163

n = off = 0

163

n = off = 0

164

l = len(data)

164

l = len(data)

165

while off + s <= l:

165

while off + s <= l:

166

cur = data[off:off + s]

166

cur = data[off:off + s]

167

off += s

167

off += s

168

e = _unpack(indexformatv0, cur)

168

e = _unpack(indexformatv0, cur)

169

# transform to revlogv1 format

169

# transform to revlogv1 format

170

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

170

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

171

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

171

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

172

index.append(e2)

172

index.append(e2)

173

nodemap[e[6]] = n

173

nodemap[e[6]] = n

174

n += 1

174

n += 1

175

176

# add the magic null revision at -1

176

# add the magic null revision at -1

177

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

177

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

178

179

return index, nodemap, None

179

return index, nodemap, None

180

181

def packentry(self, entry, node, version, rev):

181

def packentry(self, entry, node, version, rev):

182

if gettype(entry[0]):

182

if gettype(entry[0]):

183

raise RevlogError(_("index entry flags need RevlogNG"))

183

raise RevlogError(_("index entry flags need RevlogNG"))

184

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

184

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

185

node(entry[5]), node(entry[6]), entry[7])

185

node(entry[5]), node(entry[6]), entry[7])

186

return _pack(indexformatv0, *e2)

186

return _pack(indexformatv0, *e2)

187

188

# index ng:

188

# index ng:

189

# 6 bytes: offset

189

# 6 bytes: offset

190

# 2 bytes: flags

190

# 2 bytes: flags

191

# 4 bytes: compressed length

191

# 4 bytes: compressed length

192

# 4 bytes: uncompressed length

192

# 4 bytes: uncompressed length

193

# 4 bytes: base rev

193

# 4 bytes: base rev

194

# 4 bytes: link rev

194

# 4 bytes: link rev

195

# 4 bytes: parent 1 rev

195

# 4 bytes: parent 1 rev

196

# 4 bytes: parent 2 rev

196

# 4 bytes: parent 2 rev

197

# 32 bytes: nodeid

197

# 32 bytes: nodeid

198

indexformatng = ">Qiiiiii20s12x"

198

indexformatng = ">Qiiiiii20s12x"

199

versionformat = ">I"

199

versionformat = ">I"

200

201

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

201

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

202

# signed integer)

202

# signed integer)

203

_maxentrysize = 0x7fffffff

203

_maxentrysize = 0x7fffffff

204

205

class revlogio(object):

205

class revlogio(object):

206

def __init__(self):

206

def __init__(self):

207

self.size = struct.calcsize(indexformatng)

207

self.size = struct.calcsize(indexformatng)

208

209

def parseindex(self, data, inline):

209

def parseindex(self, data, inline):

210

# call the C implementation to parse the index data

210

# call the C implementation to parse the index data

211

index, cache = parsers.parse_index2(data, inline)

211

index, cache = parsers.parse_index2(data, inline)

212

return index, getattr(index, 'nodemap', None), cache

212

return index, getattr(index, 'nodemap', None), cache

213

214

def packentry(self, entry, node, version, rev):

214

def packentry(self, entry, node, version, rev):

215

p = _pack(indexformatng, *entry)

215

p = _pack(indexformatng, *entry)

216

if rev == 0:

216

if rev == 0:

217

p = _pack(versionformat, version) + p[4:]

217

p = _pack(versionformat, version) + p[4:]

218

return p

218

return p

219

220

class revlog(object):

220

class revlog(object):

221

"""

221

"""

222

the underlying revision storage object

222

the underlying revision storage object

223

224

A revlog consists of two parts, an index and the revision data.

224

A revlog consists of two parts, an index and the revision data.

225

226

The index is a file with a fixed record size containing

226

The index is a file with a fixed record size containing

227

information on each revision, including its nodeid (hash), the

227

information on each revision, including its nodeid (hash), the

228

nodeids of its parents, the position and offset of its data within

228

nodeids of its parents, the position and offset of its data within

229

the data file, and the revision it's based on. Finally, each entry

229

the data file, and the revision it's based on. Finally, each entry

230

contains a linkrev entry that can serve as a pointer to external

230

contains a linkrev entry that can serve as a pointer to external

231

data.

231

data.

232

233

The revision data itself is a linear collection of data chunks.

233

The revision data itself is a linear collection of data chunks.

234

Each chunk represents a revision and is usually represented as a

234

Each chunk represents a revision and is usually represented as a

235

delta against the previous chunk. To bound lookup time, runs of

235

delta against the previous chunk. To bound lookup time, runs of

236

deltas are limited to about 2 times the length of the original

236

deltas are limited to about 2 times the length of the original

237

version data. This makes retrieval of a version proportional to

237

version data. This makes retrieval of a version proportional to

238

its size, or O(1) relative to the number of revisions.

238

its size, or O(1) relative to the number of revisions.

239

240

Both pieces of the revlog are written to in an append-only

240

Both pieces of the revlog are written to in an append-only

241

fashion, which means we never need to rewrite a file to insert or

241

fashion, which means we never need to rewrite a file to insert or

242

remove data, and can use some simple techniques to avoid the need

242

remove data, and can use some simple techniques to avoid the need

243

for locking while reading.

243

for locking while reading.

244

245

If checkambig, indexfile is opened with checkambig=True at

245

If checkambig, indexfile is opened with checkambig=True at

246

writing, to avoid file stat ambiguity.

246

writing, to avoid file stat ambiguity.

247

"""

247

"""

248

def __init__(self, opener, indexfile, checkambig=False):

248

def __init__(self, opener, indexfile, checkambig=False):

249

"""

249

"""

250

create a revlog object

250

create a revlog object

251

252

opener is a function that abstracts the file opening operation

252

opener is a function that abstracts the file opening operation

253

and can be used to implement COW semantics or the like.

253

and can be used to implement COW semantics or the like.

254

"""

254

"""

255

self.indexfile = indexfile

255

self.indexfile = indexfile

256

self.datafile = indexfile[:-2] + ".d"

256

self.datafile = indexfile[:-2] + ".d"

257

self.opener = opener

257

self.opener = opener

258

# When True, indexfile is opened with checkambig=True at writing, to

258

# When True, indexfile is opened with checkambig=True at writing, to

259

# avoid file stat ambiguity.

259

# avoid file stat ambiguity.

260

self._checkambig = checkambig

260

self._checkambig = checkambig

261

# 3-tuple of (node, rev, text) for a raw revision.

261

# 3-tuple of (node, rev, text) for a raw revision.

262

self._cache = None

262

self._cache = None

263

# Maps rev to chain base rev.

263

# Maps rev to chain base rev.

264

self._chainbasecache = util.lrucachedict(100)

264

self._chainbasecache = util.lrucachedict(100)

265

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

265

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

266

self._chunkcache = (0, '')

266

self._chunkcache = (0, '')

267

# How much data to read and cache into the raw revlog data cache.

267

# How much data to read and cache into the raw revlog data cache.

268

self._chunkcachesize = 65536

268

self._chunkcachesize = 65536

269

self._maxchainlen = None

269

self._maxchainlen = None

270

self._aggressivemergedeltas = False

270

self._aggressivemergedeltas = False

271

self.index = []

271

self.index = []

272

# Mapping of partial identifiers to full nodes.

272

# Mapping of partial identifiers to full nodes.

273

self._pcache = {}

273

self._pcache = {}

274

# Mapping of revision integer to full node.

274

# Mapping of revision integer to full node.

275

self._nodecache = {nullid: nullrev}

275

self._nodecache = {nullid: nullrev}

276

self._nodepos = None

276

self._nodepos = None

277

self._compengine = 'zlib'

277

self._compengine = 'zlib'

278

279

v = REVLOG_DEFAULT_VERSION

279

v = REVLOG_DEFAULT_VERSION

280

opts = getattr(opener, 'options', None)

280

opts = getattr(opener, 'options', None)

281

if opts is not None:

281

if opts is not None:

282

if 'revlogv1' in opts:

282

if 'revlogv1' in opts:

283

if 'generaldelta' in opts:

283

if 'generaldelta' in opts:

284

v |= REVLOGGENERALDELTA

284

v |= REVLOGGENERALDELTA

285

else:

285

else:

286

v = 0

286

v = 0

287

if 'chunkcachesize' in opts:

287

if 'chunkcachesize' in opts:

288

self._chunkcachesize = opts['chunkcachesize']

288

self._chunkcachesize = opts['chunkcachesize']

289

if 'maxchainlen' in opts:

289

if 'maxchainlen' in opts:

290

self._maxchainlen = opts['maxchainlen']

290

self._maxchainlen = opts['maxchainlen']

291

if 'aggressivemergedeltas' in opts:

291

if 'aggressivemergedeltas' in opts:

292

self._aggressivemergedeltas = opts['aggressivemergedeltas']

292

self._aggressivemergedeltas = opts['aggressivemergedeltas']

293

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

293

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

294

if 'compengine' in opts:

294

if 'compengine' in opts:

295

self._compengine = opts['compengine']

295

self._compengine = opts['compengine']

296

297

if self._chunkcachesize <= 0:

297

if self._chunkcachesize <= 0:

298

raise RevlogError(_('revlog chunk cache size %r is not greater '

298

raise RevlogError(_('revlog chunk cache size %r is not greater '

299

'than 0') % self._chunkcachesize)

299

'than 0') % self._chunkcachesize)

300

elif self._chunkcachesize & (self._chunkcachesize - 1):

300

elif self._chunkcachesize & (self._chunkcachesize - 1):

301

raise RevlogError(_('revlog chunk cache size %r is not a power '

301

raise RevlogError(_('revlog chunk cache size %r is not a power '

302

'of 2') % self._chunkcachesize)

302

'of 2') % self._chunkcachesize)

303

304

indexdata = ''

304

indexdata = ''

305

self._initempty = True

305

self._initempty = True

306

try:

306

try:

307

f = self.opener(self.indexfile)

307

f = self.opener(self.indexfile)

308

indexdata = f.read()

308

indexdata = f.read()

309

f.close()

309

f.close()

310

if len(indexdata) > 0:

310

if len(indexdata) > 0:

311

v = struct.unpack(versionformat, indexdata[:4])[0]

311

v = struct.unpack(versionformat, indexdata[:4])[0]

312

self._initempty = False

312

self._initempty = False

313

except IOError as inst:

313

except IOError as inst:

314

if inst.errno != errno.ENOENT:

314

if inst.errno != errno.ENOENT:

315

raise

315

raise

316

317

self.version = v

317

self.version = v

318

self._inline = v & REVLOGNGINLINEDATA

318

self._inline = v & REVLOGNGINLINEDATA

319

self._generaldelta = v & REVLOGGENERALDELTA

319

self._generaldelta = v & REVLOGGENERALDELTA

320

flags = v & ~0xFFFF

320

flags = v & ~0xFFFF

321

fmt = v & 0xFFFF

321

fmt = v & 0xFFFF

322

if fmt == REVLOGV0 and flags:

322

if fmt == REVLOGV0 and flags:

323

raise RevlogError(_("index %s unknown flags %#04x for format v0")

323

raise RevlogError(_("index %s unknown flags %#04x for format v0")

324

% (self.indexfile, flags >> 16))

324

% (self.indexfile, flags >> 16))

325

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

325

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

326

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

326

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

327

% (self.indexfile, flags >> 16))

327

% (self.indexfile, flags >> 16))

328

elif fmt > REVLOGNG:

328

elif fmt > REVLOGNG:

329

raise RevlogError(_("index %s unknown format %d")

329

raise RevlogError(_("index %s unknown format %d")

330

% (self.indexfile, fmt))

330

% (self.indexfile, fmt))

331

332

self.storedeltachains = True

332

self.storedeltachains = True

333

334

self._io = revlogio()

334

self._io = revlogio()

335

if self.version == REVLOGV0:

335

if self.version == REVLOGV0:

336

self._io = revlogoldio()

336

self._io = revlogoldio()

337

try:

337

try:

338

d = self._io.parseindex(indexdata, self._inline)

338

d = self._io.parseindex(indexdata, self._inline)

339

except (ValueError, IndexError):

339

except (ValueError, IndexError):

340

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

340

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

341

self.index, nodemap, self._chunkcache = d

341

self.index, nodemap, self._chunkcache = d

342

if nodemap is not None:

342

if nodemap is not None:

343

self.nodemap = self._nodecache = nodemap

343

self.nodemap = self._nodecache = nodemap

344

if not self._chunkcache:

344

if not self._chunkcache:

345

self._chunkclear()

345

self._chunkclear()

346

# revnum -> (chain-length, sum-delta-length)

346

# revnum -> (chain-length, sum-delta-length)

347

self._chaininfocache = {}

347

self._chaininfocache = {}

348

# revlog header -> revlog compressor

348

# revlog header -> revlog compressor

349

self._decompressors = {}

349

self._decompressors = {}

350

351

@util.propertycache

351

@util.propertycache

352

def _compressor(self):

352

def _compressor(self):

353

return util.compengines[self._compengine].revlogcompressor()

353

return util.compengines[self._compengine].revlogcompressor()

354

355

def tip(self):

355

def tip(self):

356

return self.node(len(self.index) - 2)

356

return self.node(len(self.index) - 2)

357

def __contains__(self, rev):

357

def __contains__(self, rev):

358

return 0 <= rev < len(self)

358

return 0 <= rev < len(self)

359

def __len__(self):

359

def __len__(self):

360

return len(self.index) - 1

360

return len(self.index) - 1

361

def __iter__(self):

361

def __iter__(self):

362

return iter(xrange(len(self)))

362

return iter(xrange(len(self)))

363

def revs(self, start=0, stop=None):

363

def revs(self, start=0, stop=None):

364

"""iterate over all rev in this revlog (from start to stop)"""

364

"""iterate over all rev in this revlog (from start to stop)"""

365

step = 1

365

step = 1

366

if stop is not None:

366

if stop is not None:

367

if start > stop:

367

if start > stop:

368

step = -1

368

step = -1

369

stop += step

369

stop += step

370

else:

370

else:

371

stop = len(self)

371

stop = len(self)

372

return xrange(start, stop, step)

372

return xrange(start, stop, step)

373

374

@util.propertycache

374

@util.propertycache

375

def nodemap(self):

375

def nodemap(self):

376

self.rev(self.node(0))

376

self.rev(self.node(0))

377

return self._nodecache

377

return self._nodecache

378

379

def hasnode(self, node):

379

def hasnode(self, node):

380

try:

380

try:

381

self.rev(node)

381

self.rev(node)

382

return True

382

return True

383

except KeyError:

383

except KeyError:

384

return False

384

return False

385

386

def clearcaches(self):

386

def clearcaches(self):

387

self._cache = None

387

self._cache = None

388

self._chainbasecache.clear()

388

self._chainbasecache.clear()

389

self._chunkcache = (0, '')

389

self._chunkcache = (0, '')

390

self._pcache = {}

390

self._pcache = {}

391

392

try:

392

try:

393

self._nodecache.clearcaches()

393

self._nodecache.clearcaches()

394

except AttributeError:

394

except AttributeError:

395

self._nodecache = {nullid: nullrev}

395

self._nodecache = {nullid: nullrev}

396

self._nodepos = None

396

self._nodepos = None

397

398

def rev(self, node):

398

def rev(self, node):

399

try:

399

try:

400

return self._nodecache[node]

400

return self._nodecache[node]

401

except TypeError:

401

except TypeError:

402

raise

402

raise

403

except RevlogError:

403

except RevlogError:

404

# parsers.c radix tree lookup failed

404

# parsers.c radix tree lookup failed

405

raise LookupError(node, self.indexfile, _('no node'))

405

raise LookupError(node, self.indexfile, _('no node'))

406

except KeyError:

406

except KeyError:

407

# pure python cache lookup failed

407

# pure python cache lookup failed

408

n = self._nodecache

408

n = self._nodecache

409

i = self.index

409

i = self.index

410

p = self._nodepos

410

p = self._nodepos

411

if p is None:

411

if p is None:

412

p = len(i) - 2

412

p = len(i) - 2

413

for r in xrange(p, -1, -1):

413

for r in xrange(p, -1, -1):

414

v = i[r][7]

414

v = i[r][7]

415

n[v] = r

415

n[v] = r

416

if v == node:

416

if v == node:

417

self._nodepos = r - 1

417

self._nodepos = r - 1

418

return r

418

return r

419

raise LookupError(node, self.indexfile, _('no node'))

419

raise LookupError(node, self.indexfile, _('no node'))

420

421

# Accessors for index entries.

421

# Accessors for index entries.

422

423

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

423

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

424

# are flags.

424

# are flags.

425

def start(self, rev):

425

def start(self, rev):

426

return int(self.index[rev][0] >> 16)

426

return int(self.index[rev][0] >> 16)

427

428

def flags(self, rev):

428

def flags(self, rev):

429

return self.index[rev][0] & 0xFFFF

429

return self.index[rev][0] & 0xFFFF

430

431

def length(self, rev):

431

def length(self, rev):

432

return self.index[rev][1]

432

return self.index[rev][1]

433

434

def rawsize(self, rev):

434

def rawsize(self, rev):

435

"""return the length of the uncompressed text for a given revision"""

435

"""return the length of the uncompressed text for a given revision"""

436

l = self.index[rev][2]

436

l = self.index[rev][2]

437

if l >= 0:

437

if l >= 0:

438

return l

438

return l

439

440

t = self.revision(self.node(rev))

440

t = self.revision(self.node(rev))

441

return len(t)

441

return len(t)

442

size = rawsize

442

size = rawsize

443

444

def chainbase(self, rev):

444

def chainbase(self, rev):

445

base = self._chainbasecache.get(rev)

445

base = self._chainbasecache.get(rev)

446

if base is not None:

446

if base is not None:

447

return base

447

return base

448

449

index = self.index

449

index = self.index

450

base = index[rev][3]

450

base = index[rev][3]

451

while base != rev:

451

while base != rev:

452

rev = base

452

rev = base

453

base = index[rev][3]

453

base = index[rev][3]

454

455

self._chainbasecache[rev] = base

455

self._chainbasecache[rev] = base

456

return base

456

return base

457

458

def linkrev(self, rev):

458

def linkrev(self, rev):

459

return self.index[rev][4]

459

return self.index[rev][4]

460

461

def parentrevs(self, rev):

461

def parentrevs(self, rev):

462

return self.index[rev][5:7]

462

return self.index[rev][5:7]

463

464

def node(self, rev):

464

def node(self, rev):

465

return self.index[rev][7]

465

return self.index[rev][7]

466

467

# Derived from index values.

467

# Derived from index values.

468

469

def end(self, rev):

469

def end(self, rev):

470

return self.start(rev) + self.length(rev)

470

return self.start(rev) + self.length(rev)

471

472

def parents(self, node):

472

def parents(self, node):

473

i = self.index

473

i = self.index

474

d = i[self.rev(node)]

474

d = i[self.rev(node)]

475

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

475

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

476

477

def chainlen(self, rev):

477

def chainlen(self, rev):

478

return self._chaininfo(rev)[0]

478

return self._chaininfo(rev)[0]

479

480

def _chaininfo(self, rev):

480

def _chaininfo(self, rev):

481

chaininfocache = self._chaininfocache

481

chaininfocache = self._chaininfocache

482

if rev in chaininfocache:

482

if rev in chaininfocache:

483

return chaininfocache[rev]

483

return chaininfocache[rev]

484

index = self.index

484

index = self.index

485

generaldelta = self._generaldelta

485

generaldelta = self._generaldelta

486

iterrev = rev

486

iterrev = rev

487

e = index[iterrev]

487

e = index[iterrev]

488

clen = 0

488

clen = 0

489

compresseddeltalen = 0

489

compresseddeltalen = 0

490

while iterrev != e[3]:

490

while iterrev != e[3]:

491

clen += 1

491

clen += 1

492

compresseddeltalen += e[1]

492

compresseddeltalen += e[1]

493

if generaldelta:

493

if generaldelta:

494

iterrev = e[3]

494

iterrev = e[3]

495

else:

495

else:

496

iterrev -= 1

496

iterrev -= 1

497

if iterrev in chaininfocache:

497

if iterrev in chaininfocache:

498

t = chaininfocache[iterrev]

498

t = chaininfocache[iterrev]

499

clen += t[0]

499

clen += t[0]

500

compresseddeltalen += t[1]

500

compresseddeltalen += t[1]

501

break

501

break

502

e = index[iterrev]

502

e = index[iterrev]

503

else:

503

else:

504

# Add text length of base since decompressing that also takes

504

# Add text length of base since decompressing that also takes

505

# work. For cache hits the length is already included.

505

# work. For cache hits the length is already included.

506

compresseddeltalen += e[1]

506

compresseddeltalen += e[1]

507

r = (clen, compresseddeltalen)

507

r = (clen, compresseddeltalen)

508

chaininfocache[rev] = r

508

chaininfocache[rev] = r

509

return r

509

return r

510

511

def _deltachain(self, rev, stoprev=None):

511

def _deltachain(self, rev, stoprev=None):

512

"""Obtain the delta chain for a revision.

512

"""Obtain the delta chain for a revision.

513

514

``stoprev`` specifies a revision to stop at. If not specified, we

514

``stoprev`` specifies a revision to stop at. If not specified, we

515

stop at the base of the chain.

515

stop at the base of the chain.

516

517

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

517

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

518

revs in ascending order and ``stopped`` is a bool indicating whether

518

revs in ascending order and ``stopped`` is a bool indicating whether

519

``stoprev`` was hit.

519

``stoprev`` was hit.

520

"""

520

"""

521

chain = []

521

chain = []

522

523

# Alias to prevent attribute lookup in tight loop.

523

# Alias to prevent attribute lookup in tight loop.

524

index = self.index

524

index = self.index

525

generaldelta = self._generaldelta

525

generaldelta = self._generaldelta

526

527

iterrev = rev

527

iterrev = rev

528

e = index[iterrev]

528

e = index[iterrev]

529

while iterrev != e[3] and iterrev != stoprev:

529

while iterrev != e[3] and iterrev != stoprev:

530

chain.append(iterrev)

530

chain.append(iterrev)

531

if generaldelta:

531

if generaldelta:

532

iterrev = e[3]

532

iterrev = e[3]

533

else:

533

else:

534

iterrev -= 1

534

iterrev -= 1

535

e = index[iterrev]

535

e = index[iterrev]

536

537

if iterrev == stoprev:

537

if iterrev == stoprev:

538

stopped = True

538

stopped = True

539

else:

539

else:

540

chain.append(iterrev)

540

chain.append(iterrev)

541

stopped = False

541

stopped = False

542

543

chain.reverse()

543

chain.reverse()

544

return chain, stopped

544

return chain, stopped

545

546

def ancestors(self, revs, stoprev=0, inclusive=False):

546

def ancestors(self, revs, stoprev=0, inclusive=False):

547

"""Generate the ancestors of 'revs' in reverse topological order.

547

"""Generate the ancestors of 'revs' in reverse topological order.

548

Does not generate revs lower than stoprev.

548

Does not generate revs lower than stoprev.

549

550

See the documentation for ancestor.lazyancestors for more details."""

550

See the documentation for ancestor.lazyancestors for more details."""

551

552

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

552

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

553

inclusive=inclusive)

553

inclusive=inclusive)

554

555

def descendants(self, revs):

555

def descendants(self, revs):

556

"""Generate the descendants of 'revs' in revision order.

556

"""Generate the descendants of 'revs' in revision order.

557

558

Yield a sequence of revision numbers starting with a child of

558

Yield a sequence of revision numbers starting with a child of

559

some rev in revs, i.e., each revision is *not* considered a

559

some rev in revs, i.e., each revision is *not* considered a

560

descendant of itself. Results are ordered by revision number (a

560

descendant of itself. Results are ordered by revision number (a

561

topological sort)."""

561

topological sort)."""

562

first = min(revs)

562

first = min(revs)

563

if first == nullrev:

563

if first == nullrev:

564

for i in self:

564

for i in self:

565

yield i

565

yield i

566

return

566

return

567

568

seen = set(revs)

568

seen = set(revs)

569

for i in self.revs(start=first + 1):

569

for i in self.revs(start=first + 1):

570

for x in self.parentrevs(i):

570

for x in self.parentrevs(i):

571

if x != nullrev and x in seen:

571

if x != nullrev and x in seen:

572

seen.add(i)

572

seen.add(i)

573

yield i

573

yield i

574

break

574

break

575

576

def findcommonmissing(self, common=None, heads=None):

576

def findcommonmissing(self, common=None, heads=None):

577

"""Return a tuple of the ancestors of common and the ancestors of heads

577

"""Return a tuple of the ancestors of common and the ancestors of heads

578

that are not ancestors of common. In revset terminology, we return the

578

that are not ancestors of common. In revset terminology, we return the

579

tuple:

579

tuple:

580

581

::common, (::heads) - (::common)

581

::common, (::heads) - (::common)

582

583

The list is sorted by revision number, meaning it is

583

The list is sorted by revision number, meaning it is

584

topologically sorted.

584

topologically sorted.

585

586

'heads' and 'common' are both lists of node IDs. If heads is

586

'heads' and 'common' are both lists of node IDs. If heads is

587

not supplied, uses all of the revlog's heads. If common is not

587

not supplied, uses all of the revlog's heads. If common is not

588

supplied, uses nullid."""

588

supplied, uses nullid."""

589

if common is None:

589

if common is None:

590

common = [nullid]

590

common = [nullid]

591

if heads is None:

591

if heads is None:

592

heads = self.heads()

592

heads = self.heads()

593

594

common = [self.rev(n) for n in common]

594

common = [self.rev(n) for n in common]

595

heads = [self.rev(n) for n in heads]

595

heads = [self.rev(n) for n in heads]

596

597

# we want the ancestors, but inclusive

597

# we want the ancestors, but inclusive

598

class lazyset(object):

598

class lazyset(object):

599

def __init__(self, lazyvalues):

599

def __init__(self, lazyvalues):

600

self.addedvalues = set()

600

self.addedvalues = set()

601

self.lazyvalues = lazyvalues

601

self.lazyvalues = lazyvalues

602

603

def __contains__(self, value):

603

def __contains__(self, value):

604

return value in self.addedvalues or value in self.lazyvalues

604

return value in self.addedvalues or value in self.lazyvalues

605

606

def __iter__(self):

606

def __iter__(self):

607

added = self.addedvalues

607

added = self.addedvalues

608

for r in added:

608

for r in added:

609

yield r

609

yield r

610

for r in self.lazyvalues:

610

for r in self.lazyvalues:

611

if not r in added:

611

if not r in added:

612

yield r

612

yield r

613

614

def add(self, value):

614

def add(self, value):

615

self.addedvalues.add(value)

615

self.addedvalues.add(value)

616

617

def update(self, values):

617

def update(self, values):

618

self.addedvalues.update(values)

618

self.addedvalues.update(values)

619

620

has = lazyset(self.ancestors(common))

620

has = lazyset(self.ancestors(common))

621

has.add(nullrev)

621

has.add(nullrev)

622

has.update(common)

622

has.update(common)

623

624

# take all ancestors from heads that aren't in has

624

# take all ancestors from heads that aren't in has

625

missing = set()

625

missing = set()

626

visit = collections.deque(r for r in heads if r not in has)

626

visit = collections.deque(r for r in heads if r not in has)

627

while visit:

627

while visit:

628

r = visit.popleft()

628

r = visit.popleft()

629

if r in missing:

629

if r in missing:

630

continue

630

continue

631

else:

631

else:

632

missing.add(r)

632

missing.add(r)

633

for p in self.parentrevs(r):

633

for p in self.parentrevs(r):

634

if p not in has:

634

if p not in has:

635

visit.append(p)

635

visit.append(p)

636

missing = list(missing)

636

missing = list(missing)

637

missing.sort()

637

missing.sort()

638

return has, [self.node(miss) for miss in missing]

638

return has, [self.node(miss) for miss in missing]

639

640

def incrementalmissingrevs(self, common=None):

640

def incrementalmissingrevs(self, common=None):

641

"""Return an object that can be used to incrementally compute the

641

"""Return an object that can be used to incrementally compute the

642

revision numbers of the ancestors of arbitrary sets that are not

642

revision numbers of the ancestors of arbitrary sets that are not

643

ancestors of common. This is an ancestor.incrementalmissingancestors

643

ancestors of common. This is an ancestor.incrementalmissingancestors

644

object.

644

object.

645

646

'common' is a list of revision numbers. If common is not supplied, uses

646

'common' is a list of revision numbers. If common is not supplied, uses

647

nullrev.

647

nullrev.

648

"""

648

"""

649

if common is None:

649

if common is None:

650

common = [nullrev]

650

common = [nullrev]

651

652

return ancestor.incrementalmissingancestors(self.parentrevs, common)

652

return ancestor.incrementalmissingancestors(self.parentrevs, common)

653

654

def findmissingrevs(self, common=None, heads=None):

654

def findmissingrevs(self, common=None, heads=None):

655

"""Return the revision numbers of the ancestors of heads that

655

"""Return the revision numbers of the ancestors of heads that

656

are not ancestors of common.

656

are not ancestors of common.

657

658

More specifically, return a list of revision numbers corresponding to

658

More specifically, return a list of revision numbers corresponding to

659

nodes N such that every N satisfies the following constraints:

659

nodes N such that every N satisfies the following constraints:

660

661

1. N is an ancestor of some node in 'heads'

661

1. N is an ancestor of some node in 'heads'

662

2. N is not an ancestor of any node in 'common'

662

2. N is not an ancestor of any node in 'common'

663

664

The list is sorted by revision number, meaning it is

664

The list is sorted by revision number, meaning it is

665

topologically sorted.

665

topologically sorted.

666

667

'heads' and 'common' are both lists of revision numbers. If heads is

667

'heads' and 'common' are both lists of revision numbers. If heads is

668

not supplied, uses all of the revlog's heads. If common is not

668

not supplied, uses all of the revlog's heads. If common is not

669

supplied, uses nullid."""

669

supplied, uses nullid."""

670

if common is None:

670

if common is None:

671

common = [nullrev]

671

common = [nullrev]

672

if heads is None:

672

if heads is None:

673

heads = self.headrevs()

673

heads = self.headrevs()

674

675

inc = self.incrementalmissingrevs(common=common)

675

inc = self.incrementalmissingrevs(common=common)

676

return inc.missingancestors(heads)

676

return inc.missingancestors(heads)

677

678

def findmissing(self, common=None, heads=None):

678

def findmissing(self, common=None, heads=None):

679

"""Return the ancestors of heads that are not ancestors of common.

679

"""Return the ancestors of heads that are not ancestors of common.

680

681

More specifically, return a list of nodes N such that every N

681

More specifically, return a list of nodes N such that every N

682

satisfies the following constraints:

682

satisfies the following constraints:

683

684

1. N is an ancestor of some node in 'heads'

684

1. N is an ancestor of some node in 'heads'

685

2. N is not an ancestor of any node in 'common'

685

2. N is not an ancestor of any node in 'common'

686

687

The list is sorted by revision number, meaning it is

687

The list is sorted by revision number, meaning it is

688

topologically sorted.

688

topologically sorted.

689

690

'heads' and 'common' are both lists of node IDs. If heads is

690

'heads' and 'common' are both lists of node IDs. If heads is

691

not supplied, uses all of the revlog's heads. If common is not

691

not supplied, uses all of the revlog's heads. If common is not

692

supplied, uses nullid."""

692

supplied, uses nullid."""

693

if common is None:

693

if common is None:

694

common = [nullid]

694

common = [nullid]

695

if heads is None:

695

if heads is None:

696

heads = self.heads()

696

heads = self.heads()

697

698

common = [self.rev(n) for n in common]

698

common = [self.rev(n) for n in common]

699

heads = [self.rev(n) for n in heads]

699

heads = [self.rev(n) for n in heads]

700

701

inc = self.incrementalmissingrevs(common=common)

701

inc = self.incrementalmissingrevs(common=common)

702

return [self.node(r) for r in inc.missingancestors(heads)]

702

return [self.node(r) for r in inc.missingancestors(heads)]

703

704

def nodesbetween(self, roots=None, heads=None):

704

def nodesbetween(self, roots=None, heads=None):

705

"""Return a topological path from 'roots' to 'heads'.

705

"""Return a topological path from 'roots' to 'heads'.

706

707

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

707

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

708

topologically sorted list of all nodes N that satisfy both of

708

topologically sorted list of all nodes N that satisfy both of

709

these constraints:

709

these constraints:

710

711

1. N is a descendant of some node in 'roots'

711

1. N is a descendant of some node in 'roots'

712

2. N is an ancestor of some node in 'heads'

712

2. N is an ancestor of some node in 'heads'

713

714

Every node is considered to be both a descendant and an ancestor

714

Every node is considered to be both a descendant and an ancestor

715

of itself, so every reachable node in 'roots' and 'heads' will be

715

of itself, so every reachable node in 'roots' and 'heads' will be

716

included in 'nodes'.

716

included in 'nodes'.

717

718

'outroots' is the list of reachable nodes in 'roots', i.e., the

718

'outroots' is the list of reachable nodes in 'roots', i.e., the

719

subset of 'roots' that is returned in 'nodes'. Likewise,

719

subset of 'roots' that is returned in 'nodes'. Likewise,

720

'outheads' is the subset of 'heads' that is also in 'nodes'.

720

'outheads' is the subset of 'heads' that is also in 'nodes'.

721

722

'roots' and 'heads' are both lists of node IDs. If 'roots' is

722

'roots' and 'heads' are both lists of node IDs. If 'roots' is

723

unspecified, uses nullid as the only root. If 'heads' is

723

unspecified, uses nullid as the only root. If 'heads' is

724

unspecified, uses list of all of the revlog's heads."""

724

unspecified, uses list of all of the revlog's heads."""

725

nonodes = ([], [], [])

725

nonodes = ([], [], [])

726

if roots is not None:

726

if roots is not None:

727

roots = list(roots)

727

roots = list(roots)

728

if not roots:

728

if not roots:

729

return nonodes

729

return nonodes

730

lowestrev = min([self.rev(n) for n in roots])

730

lowestrev = min([self.rev(n) for n in roots])

731

else:

731

else:

732

roots = [nullid] # Everybody's a descendant of nullid

732

roots = [nullid] # Everybody's a descendant of nullid

733

lowestrev = nullrev

733

lowestrev = nullrev

734

if (lowestrev == nullrev) and (heads is None):

734

if (lowestrev == nullrev) and (heads is None):

735

# We want _all_ the nodes!

735

# We want _all_ the nodes!

736

return ([self.node(r) for r in self], [nullid], list(self.heads()))

736

return ([self.node(r) for r in self], [nullid], list(self.heads()))

737

if heads is None:

737

if heads is None:

738

# All nodes are ancestors, so the latest ancestor is the last

738

# All nodes are ancestors, so the latest ancestor is the last

739

# node.

739

# node.

740

highestrev = len(self) - 1

740

highestrev = len(self) - 1

741

# Set ancestors to None to signal that every node is an ancestor.

741

# Set ancestors to None to signal that every node is an ancestor.

742

ancestors = None

742

ancestors = None

743

# Set heads to an empty dictionary for later discovery of heads

743

# Set heads to an empty dictionary for later discovery of heads

744

heads = {}

744

heads = {}

745

else:

745

else:

746

heads = list(heads)

746

heads = list(heads)

747

if not heads:

747

if not heads:

748

return nonodes

748

return nonodes

749

ancestors = set()

749

ancestors = set()

750

# Turn heads into a dictionary so we can remove 'fake' heads.

750

# Turn heads into a dictionary so we can remove 'fake' heads.

751

# Also, later we will be using it to filter out the heads we can't

751

# Also, later we will be using it to filter out the heads we can't

752

# find from roots.

752

# find from roots.

753

heads = dict.fromkeys(heads, False)

753

heads = dict.fromkeys(heads, False)

754

# Start at the top and keep marking parents until we're done.

754

# Start at the top and keep marking parents until we're done.

755

nodestotag = set(heads)

755

nodestotag = set(heads)

756

# Remember where the top was so we can use it as a limit later.

756

# Remember where the top was so we can use it as a limit later.

757

highestrev = max([self.rev(n) for n in nodestotag])

757

highestrev = max([self.rev(n) for n in nodestotag])

758

while nodestotag:

758

while nodestotag:

759

# grab a node to tag

759

# grab a node to tag

760

n = nodestotag.pop()

760

n = nodestotag.pop()

761

# Never tag nullid

761

# Never tag nullid

762

if n == nullid:

762

if n == nullid:

763

continue

763

continue

764

# A node's revision number represents its place in a

764

# A node's revision number represents its place in a

765

# topologically sorted list of nodes.

765

# topologically sorted list of nodes.

766

r = self.rev(n)

766

r = self.rev(n)

767

if r >= lowestrev:

767

if r >= lowestrev:

768

if n not in ancestors:

768

if n not in ancestors:

769

# If we are possibly a descendant of one of the roots

769

# If we are possibly a descendant of one of the roots

770

# and we haven't already been marked as an ancestor

770

# and we haven't already been marked as an ancestor

771

ancestors.add(n) # Mark as ancestor

771

ancestors.add(n) # Mark as ancestor

772

# Add non-nullid parents to list of nodes to tag.

772

# Add non-nullid parents to list of nodes to tag.

773

nodestotag.update([p for p in self.parents(n) if

773

nodestotag.update([p for p in self.parents(n) if

774

p != nullid])

774

p != nullid])

775

elif n in heads: # We've seen it before, is it a fake head?

775

elif n in heads: # We've seen it before, is it a fake head?

776

# So it is, real heads should not be the ancestors of

776

# So it is, real heads should not be the ancestors of

777

# any other heads.

777

# any other heads.

778

heads.pop(n)

778

heads.pop(n)

779

if not ancestors:

779

if not ancestors:

780

return nonodes

780

return nonodes

781

# Now that we have our set of ancestors, we want to remove any

781

# Now that we have our set of ancestors, we want to remove any

782

# roots that are not ancestors.

782

# roots that are not ancestors.

783

784

# If one of the roots was nullid, everything is included anyway.

784

# If one of the roots was nullid, everything is included anyway.

785

if lowestrev > nullrev:

785

if lowestrev > nullrev:

786

# But, since we weren't, let's recompute the lowest rev to not

786

# But, since we weren't, let's recompute the lowest rev to not

787

# include roots that aren't ancestors.

787

# include roots that aren't ancestors.

788

789

# Filter out roots that aren't ancestors of heads

789

# Filter out roots that aren't ancestors of heads

790

roots = [root for root in roots if root in ancestors]

790

roots = [root for root in roots if root in ancestors]

791

# Recompute the lowest revision

791

# Recompute the lowest revision

792

if roots:

792

if roots:

793

lowestrev = min([self.rev(root) for root in roots])

793

lowestrev = min([self.rev(root) for root in roots])

794

else:

794

else:

795

# No more roots? Return empty list

795

# No more roots? Return empty list

796

return nonodes

796

return nonodes

797

else:

797

else:

798

# We are descending from nullid, and don't need to care about

798

# We are descending from nullid, and don't need to care about

799

# any other roots.

799

# any other roots.

800

lowestrev = nullrev

800

lowestrev = nullrev

801

roots = [nullid]

801

roots = [nullid]

802

# Transform our roots list into a set.

802

# Transform our roots list into a set.

803

descendants = set(roots)

803

descendants = set(roots)

804

# Also, keep the original roots so we can filter out roots that aren't

804

# Also, keep the original roots so we can filter out roots that aren't

805

# 'real' roots (i.e. are descended from other roots).

805

# 'real' roots (i.e. are descended from other roots).

806

roots = descendants.copy()

806

roots = descendants.copy()

807

# Our topologically sorted list of output nodes.

807

# Our topologically sorted list of output nodes.

808

orderedout = []

808

orderedout = []

809

# Don't start at nullid since we don't want nullid in our output list,

809

# Don't start at nullid since we don't want nullid in our output list,

810

# and if nullid shows up in descendants, empty parents will look like

810

# and if nullid shows up in descendants, empty parents will look like

811

# they're descendants.

811

# they're descendants.

812

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

812

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

813

n = self.node(r)

813

n = self.node(r)

814

isdescendant = False

814

isdescendant = False

815

if lowestrev == nullrev: # Everybody is a descendant of nullid

815

if lowestrev == nullrev: # Everybody is a descendant of nullid

816

isdescendant = True

816

isdescendant = True

817

elif n in descendants:

817

elif n in descendants:

818

# n is already a descendant

818

# n is already a descendant

819

isdescendant = True

819

isdescendant = True

820

# This check only needs to be done here because all the roots

820

# This check only needs to be done here because all the roots

821

# will start being marked is descendants before the loop.

821

# will start being marked is descendants before the loop.

822

if n in roots:

822

if n in roots:

823

# If n was a root, check if it's a 'real' root.

823

# If n was a root, check if it's a 'real' root.

824

p = tuple(self.parents(n))

824

p = tuple(self.parents(n))

825

# If any of its parents are descendants, it's not a root.

825

# If any of its parents are descendants, it's not a root.

826

if (p[0] in descendants) or (p[1] in descendants):

826

if (p[0] in descendants) or (p[1] in descendants):

827

roots.remove(n)

827

roots.remove(n)

828

else:

828

else:

829

p = tuple(self.parents(n))

829

p = tuple(self.parents(n))

830

# A node is a descendant if either of its parents are

830

# A node is a descendant if either of its parents are

831

# descendants. (We seeded the dependents list with the roots

831

# descendants. (We seeded the dependents list with the roots

832

# up there, remember?)

832

# up there, remember?)

833

if (p[0] in descendants) or (p[1] in descendants):

833

if (p[0] in descendants) or (p[1] in descendants):

834

descendants.add(n)

834

descendants.add(n)

835

isdescendant = True

835

isdescendant = True

836

if isdescendant and ((ancestors is None) or (n in ancestors)):

836

if isdescendant and ((ancestors is None) or (n in ancestors)):

837

# Only include nodes that are both descendants and ancestors.

837

# Only include nodes that are both descendants and ancestors.

838

orderedout.append(n)

838

orderedout.append(n)

839

if (ancestors is not None) and (n in heads):

839

if (ancestors is not None) and (n in heads):

840

# We're trying to figure out which heads are reachable

840

# We're trying to figure out which heads are reachable

841

# from roots.

841

# from roots.

842

# Mark this head as having been reached

842

# Mark this head as having been reached

843

heads[n] = True

843

heads[n] = True

844

elif ancestors is None:

844

elif ancestors is None:

845

# Otherwise, we're trying to discover the heads.

845

# Otherwise, we're trying to discover the heads.

846

# Assume this is a head because if it isn't, the next step

846

# Assume this is a head because if it isn't, the next step

847

# will eventually remove it.

847

# will eventually remove it.

848

heads[n] = True

848

heads[n] = True

849

# But, obviously its parents aren't.

849

# But, obviously its parents aren't.

850

for p in self.parents(n):

850

for p in self.parents(n):

851

heads.pop(p, None)

851

heads.pop(p, None)

852

heads = [head for head, flag in heads.iteritems() if flag]

852

heads = [head for head, flag in heads.iteritems() if flag]

853

roots = list(roots)

853

roots = list(roots)

854

assert orderedout

854

assert orderedout

855

assert roots

855

assert roots

856

assert heads

856

assert heads

857

return (orderedout, roots, heads)

857

return (orderedout, roots, heads)

858

859

def headrevs(self):

859

def headrevs(self):

860

try:

860

try:

861

return self.index.headrevs()

861

return self.index.headrevs()

862

except AttributeError:

862

except AttributeError:

863

return self._headrevs()

863

return self._headrevs()

864

865

def computephases(self, roots):

865

def computephases(self, roots):

866

return self.index.computephasesmapsets(roots)

866

return self.index.computephasesmapsets(roots)

867

868

def _headrevs(self):

868

def _headrevs(self):

869

count = len(self)

869

count = len(self)

870

if not count:

870

if not count:

871

return [nullrev]

871

return [nullrev]

872

# we won't iter over filtered rev so nobody is a head at start

872

# we won't iter over filtered rev so nobody is a head at start

873

ishead = [0] * (count + 1)

873

ishead = [0] * (count + 1)

874

index = self.index

874

index = self.index

875

for r in self:

875

for r in self:

876

ishead[r] = 1 # I may be an head

876

ishead[r] = 1 # I may be an head

877

e = index[r]

877

e = index[r]

878

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

878

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

879

return [r for r, val in enumerate(ishead) if val]

879

return [r for r, val in enumerate(ishead) if val]

880

881

def heads(self, start=None, stop=None):

881

def heads(self, start=None, stop=None):

882

"""return the list of all nodes that have no children

882

"""return the list of all nodes that have no children

883

884

if start is specified, only heads that are descendants of

884

if start is specified, only heads that are descendants of

885

start will be returned

885

start will be returned

886

if stop is specified, it will consider all the revs from stop

886

if stop is specified, it will consider all the revs from stop

887

as if they had no children

887

as if they had no children

888

"""

888

"""

889

if start is None and stop is None:

889

if start is None and stop is None:

890

if not len(self):

890

if not len(self):

891

return [nullid]

891

return [nullid]

892

return [self.node(r) for r in self.headrevs()]

892

return [self.node(r) for r in self.headrevs()]

893

894

if start is None:

894

if start is None:

895

start = nullid

895

start = nullid

896

if stop is None:

896

if stop is None:

897

stop = []

897

stop = []

898

stoprevs = set([self.rev(n) for n in stop])

898

stoprevs = set([self.rev(n) for n in stop])

899

startrev = self.rev(start)

899

startrev = self.rev(start)

900

reachable = set((startrev,))

900

reachable = set((startrev,))

901

heads = set((startrev,))

901

heads = set((startrev,))

902

903

parentrevs = self.parentrevs

903

parentrevs = self.parentrevs

904

for r in self.revs(start=startrev + 1):

904

for r in self.revs(start=startrev + 1):

905

for p in parentrevs(r):

905

for p in parentrevs(r):

906

if p in reachable:

906

if p in reachable:

907

if r not in stoprevs:

907

if r not in stoprevs:

908

reachable.add(r)

908

reachable.add(r)

909

heads.add(r)

909

heads.add(r)

910

if p in heads and p not in stoprevs:

910

if p in heads and p not in stoprevs:

911

heads.remove(p)

911

heads.remove(p)

912

913

return [self.node(r) for r in heads]

913

return [self.node(r) for r in heads]

914

915

def children(self, node):

915

def children(self, node):

916

"""find the children of a given node"""

916

"""find the children of a given node"""

917

c = []

917

c = []

918

p = self.rev(node)

918

p = self.rev(node)

919

for r in self.revs(start=p + 1):

919

for r in self.revs(start=p + 1):

920

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

920

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

921

if prevs:

921

if prevs:

922

for pr in prevs:

922

for pr in prevs:

923

if pr == p:

923

if pr == p:

924

c.append(self.node(r))

924

c.append(self.node(r))

925

elif p == nullrev:

925

elif p == nullrev:

926

c.append(self.node(r))

926

c.append(self.node(r))

927

return c

927

return c

928

929

def descendant(self, start, end):

929

def descendant(self, start, end):

930

if start == nullrev:

930

if start == nullrev:

931

return True

931

return True

932

for i in self.descendants([start]):

932

for i in self.descendants([start]):

933

if i == end:

933

if i == end:

934

return True

934

return True

935

elif i > end:

935

elif i > end:

936

break

936

break

937

return False

937

return False

938

939

def commonancestorsheads(self, a, b):

939

def commonancestorsheads(self, a, b):

940

"""calculate all the heads of the common ancestors of nodes a and b"""

940

"""calculate all the heads of the common ancestors of nodes a and b"""

941

a, b = self.rev(a), self.rev(b)

941

a, b = self.rev(a), self.rev(b)

942

try:

942

try:

943

ancs = self.index.commonancestorsheads(a, b)

943

ancs = self.index.commonancestorsheads(a, b)

944

except (AttributeError, OverflowError): # C implementation failed

944

except (AttributeError, OverflowError): # C implementation failed

945

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

945

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

946

return map(self.node, ancs)

946

return map(self.node, ancs)

947

948

def isancestor(self, a, b):

948

def isancestor(self, a, b):

949

"""return True if node a is an ancestor of node b

949

"""return True if node a is an ancestor of node b

950

951

The implementation of this is trivial but the use of

951

The implementation of this is trivial but the use of

952

commonancestorsheads is not."""

952

commonancestorsheads is not."""

953

return a in self.commonancestorsheads(a, b)

953

return a in self.commonancestorsheads(a, b)

954

955

def ancestor(self, a, b):

955

def ancestor(self, a, b):

956

"""calculate the "best" common ancestor of nodes a and b"""

956

"""calculate the "best" common ancestor of nodes a and b"""

957

958

a, b = self.rev(a), self.rev(b)

958

a, b = self.rev(a), self.rev(b)

959

try:

959

try:

960

ancs = self.index.ancestors(a, b)

960

ancs = self.index.ancestors(a, b)

961

except (AttributeError, OverflowError):

961

except (AttributeError, OverflowError):

962

ancs = ancestor.ancestors(self.parentrevs, a, b)

962

ancs = ancestor.ancestors(self.parentrevs, a, b)

963

if ancs:

963

if ancs:

964

# choose a consistent winner when there's a tie

964

# choose a consistent winner when there's a tie

965

return min(map(self.node, ancs))

965

return min(map(self.node, ancs))

966

return nullid

966

return nullid

967

968

def _match(self, id):

968

def _match(self, id):

969

if isinstance(id, int):

969

if isinstance(id, int):

970

# rev

970

# rev

971

return self.node(id)

971

return self.node(id)

972

if len(id) == 20:

972

if len(id) == 20:

973

# possibly a binary node

973

# possibly a binary node

974

# odds of a binary node being all hex in ASCII are 1 in 10**25

974

# odds of a binary node being all hex in ASCII are 1 in 10**25

975

try:

975

try:

976

node = id

976

node = id

977

self.rev(node) # quick search the index

977

self.rev(node) # quick search the index

978

return node

978

return node

979

except LookupError:

979

except LookupError:

980

pass # may be partial hex id

980

pass # may be partial hex id

981

try:

981

try:

982

# str(rev)

982

# str(rev)

983

rev = int(id)

983

rev = int(id)

984

if str(rev) != id:

984

if str(rev) != id:

985

raise ValueError

985

raise ValueError

986

if rev < 0:

986

if rev < 0:

987

rev = len(self) + rev

987

rev = len(self) + rev

988

if rev < 0 or rev >= len(self):

988

if rev < 0 or rev >= len(self):

989

raise ValueError

989

raise ValueError

990

return self.node(rev)

990

return self.node(rev)

991

except (ValueError, OverflowError):

991

except (ValueError, OverflowError):

992

pass

992

pass

993

if len(id) == 40:

993

if len(id) == 40:

994

try:

994

try:

995

# a full hex nodeid?

995

# a full hex nodeid?

996

node = bin(id)

996

node = bin(id)

997

self.rev(node)

997

self.rev(node)

998

return node

998

return node

999

except (TypeError, LookupError):

999

except (TypeError, LookupError):

1000

pass

1000

pass

1001

1002

def _partialmatch(self, id):

1002

def _partialmatch(self, id):

1003

try:

1003

try:

1004

partial = self.index.partialmatch(id)

1004

partial = self.index.partialmatch(id)

1005

if partial and self.hasnode(partial):

1005

if partial and self.hasnode(partial):

1006

return partial

1006

return partial

1007

return None

1007

return None

1008

except RevlogError:

1008

except RevlogError:

1009

# parsers.c radix tree lookup gave multiple matches

1009

# parsers.c radix tree lookup gave multiple matches

1010

# fast path: for unfiltered changelog, radix tree is accurate

1010

# fast path: for unfiltered changelog, radix tree is accurate

1011

if not getattr(self, 'filteredrevs', None):

1011

if not getattr(self, 'filteredrevs', None):

1012

raise LookupError(id, self.indexfile,

1012

raise LookupError(id, self.indexfile,

1013

_('ambiguous identifier'))

1013

_('ambiguous identifier'))

1014

# fall through to slow path that filters hidden revisions

1014

# fall through to slow path that filters hidden revisions

1015

except (AttributeError, ValueError):

1015

except (AttributeError, ValueError):

1016

# we are pure python, or key was too short to search radix tree

1016

# we are pure python, or key was too short to search radix tree

1017

pass

1017

pass

1018

1019

if id in self._pcache:

1019

if id in self._pcache:

1020

return self._pcache[id]

1020

return self._pcache[id]

1021

1022

if len(id) < 40:

1022

if len(id) < 40:

1023

try:

1023

try:

1024

# hex(node)[:...]

1024

# hex(node)[:...]

1025

l = len(id) // 2 # grab an even number of digits

1025

l = len(id) // 2 # grab an even number of digits

1026

prefix = bin(id[:l * 2])

1026

prefix = bin(id[:l * 2])

1027

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1027

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1028

nl = [n for n in nl if hex(n).startswith(id) and

1028

nl = [n for n in nl if hex(n).startswith(id) and

1029

self.hasnode(n)]

1029

self.hasnode(n)]

1030

if len(nl) > 0:

1030

if len(nl) > 0:

1031

if len(nl) == 1:

1031

if len(nl) == 1:

1032

self._pcache[id] = nl[0]

1032

self._pcache[id] = nl[0]

1033

return nl[0]

1033

return nl[0]

1034

raise LookupError(id, self.indexfile,

1034

raise LookupError(id, self.indexfile,

1035

_('ambiguous identifier'))

1035

_('ambiguous identifier'))

1036

return None

1036

return None

1037

except TypeError:

1037

except TypeError:

1038

pass

1038

pass

1039

1040

def lookup(self, id):

1040

def lookup(self, id):

1041

"""locate a node based on:

1041

"""locate a node based on:

1042

- revision number or str(revision number)

1042

- revision number or str(revision number)

1043

- nodeid or subset of hex nodeid

1043

- nodeid or subset of hex nodeid

1044

"""

1044

"""

1045

n = self._match(id)

1045

n = self._match(id)

1046

if n is not None:

1046

if n is not None:

1047

return n

1047

return n

1048

n = self._partialmatch(id)

1048

n = self._partialmatch(id)

1049

if n:

1049

if n:

1050

return n

1050

return n

1051

1052

raise LookupError(id, self.indexfile, _('no match found'))

1052

raise LookupError(id, self.indexfile, _('no match found'))

1053

1054

def cmp(self, node, text):

1054

def cmp(self, node, text):

1055

"""compare text with a given file revision

1055

"""compare text with a given file revision

1056

1057

returns True if text is different than what is stored.

1057

returns True if text is different than what is stored.

1058

"""

1058

"""

1059

p1, p2 = self.parents(node)

1059

p1, p2 = self.parents(node)

1060

return hash(text, p1, p2) != node

1060

return hash(text, p1, p2) != node

1061

1062

def _addchunk(self, offset, data):

1062

def _addchunk(self, offset, data):

1063

"""Add a segment to the revlog cache.

1063

"""Add a segment to the revlog cache.

1064

1065

Accepts an absolute offset and the data that is at that location.

1065

Accepts an absolute offset and the data that is at that location.

1066

"""

1066

"""

1067

o, d = self._chunkcache

1067

o, d = self._chunkcache

1068

# try to add to existing cache

1068

# try to add to existing cache

1069

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1069

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1070

self._chunkcache = o, d + data

1070

self._chunkcache = o, d + data

1071

else:

1071

else:

1072

self._chunkcache = offset, data

1072

self._chunkcache = offset, data

1073

1074

def _loadchunk(self, offset, length, df=None):

1074

def _loadchunk(self, offset, length, df=None):

1075

"""Load a segment of raw data from the revlog.

1075

"""Load a segment of raw data from the revlog.

1076

1077

Accepts an absolute offset, length to read, and an optional existing

1077

Accepts an absolute offset, length to read, and an optional existing

1078

file handle to read from.

1078

file handle to read from.

1079

1080

If an existing file handle is passed, it will be seeked and the

1080

If an existing file handle is passed, it will be seeked and the

1081

original seek position will NOT be restored.

1081

original seek position will NOT be restored.

1082

1083

Returns a str or buffer of raw byte data.

1083

Returns a str or buffer of raw byte data.

1084

"""

1084

"""

1085

if df is not None:

1085

if df is not None:

1086

closehandle = False

1086

closehandle = False

1087

else:

1087

else:

1088

if self._inline:

1088

if self._inline:

1089

df = self.opener(self.indexfile)

1089

df = self.opener(self.indexfile)

1090

else:

1090

else:

1091

df = self.opener(self.datafile)

1091

df = self.opener(self.datafile)

1092

closehandle = True

1092

closehandle = True

1093

1094

# Cache data both forward and backward around the requested

1094

# Cache data both forward and backward around the requested

1095

# data, in a fixed size window. This helps speed up operations

1095

# data, in a fixed size window. This helps speed up operations

1096

# involving reading the revlog backwards.

1096

# involving reading the revlog backwards.

1097

cachesize = self._chunkcachesize

1097

cachesize = self._chunkcachesize

1098

realoffset = offset & ~(cachesize - 1)

1098

realoffset = offset & ~(cachesize - 1)

1099

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1099

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1100

- realoffset)

1100

- realoffset)

1101

df.seek(realoffset)

1101

df.seek(realoffset)

1102

d = df.read(reallength)

1102

d = df.read(reallength)

1103

if closehandle:

1103

if closehandle:

1104

df.close()

1104

df.close()

1105

self._addchunk(realoffset, d)

1105

self._addchunk(realoffset, d)

1106

if offset != realoffset or reallength != length:

1106

if offset != realoffset or reallength != length:

1107

return util.buffer(d, offset - realoffset, length)

1107

return util.buffer(d, offset - realoffset, length)

1108

return d

1108

return d

1109

1110

def _getchunk(self, offset, length, df=None):

1110

def _getchunk(self, offset, length, df=None):

1111

"""Obtain a segment of raw data from the revlog.

1111

"""Obtain a segment of raw data from the revlog.

1112

1113

Accepts an absolute offset, length of bytes to obtain, and an

1113

Accepts an absolute offset, length of bytes to obtain, and an

1114

optional file handle to the already-opened revlog. If the file

1114

optional file handle to the already-opened revlog. If the file

1115

handle is used, it's original seek position will not be preserved.

1115

handle is used, it's original seek position will not be preserved.

1116

1117

Requests for data may be returned from a cache.

1117

Requests for data may be returned from a cache.

1118

1119

Returns a str or a buffer instance of raw byte data.

1119

Returns a str or a buffer instance of raw byte data.

1120

"""

1120

"""

1121

o, d = self._chunkcache

1121

o, d = self._chunkcache

1122

l = len(d)

1122

l = len(d)

1123

1124

# is it in the cache?

1124

# is it in the cache?

1125

cachestart = offset - o

1125

cachestart = offset - o

1126

cacheend = cachestart + length

1126

cacheend = cachestart + length

1127

if cachestart >= 0 and cacheend <= l:

1127

if cachestart >= 0 and cacheend <= l:

1128

if cachestart == 0 and cacheend == l:

1128

if cachestart == 0 and cacheend == l:

1129

return d # avoid a copy

1129

return d # avoid a copy

1130

return util.buffer(d, cachestart, cacheend - cachestart)

1130

return util.buffer(d, cachestart, cacheend - cachestart)

1131

1132

return self._loadchunk(offset, length, df=df)

1132

return self._loadchunk(offset, length, df=df)

1133

1134

def _chunkraw(self, startrev, endrev, df=None):

1134

def _chunkraw(self, startrev, endrev, df=None):

1135

"""Obtain a segment of raw data corresponding to a range of revisions.

1135

"""Obtain a segment of raw data corresponding to a range of revisions.

1136

1137

Accepts the start and end revisions and an optional already-open

1137

Accepts the start and end revisions and an optional already-open

1138

file handle to be used for reading. If the file handle is read, its

1138

file handle to be used for reading. If the file handle is read, its

1139

seek position will not be preserved.

1139

seek position will not be preserved.

1140

1141

Requests for data may be satisfied by a cache.

1141

Requests for data may be satisfied by a cache.

1142

1143

Returns a 2-tuple of (offset, data) for the requested range of

1143

Returns a 2-tuple of (offset, data) for the requested range of

1144

revisions. Offset is the integer offset from the beginning of the

1144

revisions. Offset is the integer offset from the beginning of the

1145

revlog and data is a str or buffer of the raw byte data.

1145

revlog and data is a str or buffer of the raw byte data.

1146

1147

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1147

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1148

to determine where each revision's data begins and ends.

1148

to determine where each revision's data begins and ends.

1149

"""

1149

"""

1150

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1150

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1151

# (functions are expensive).

1151

# (functions are expensive).

1152

index = self.index

1152

index = self.index

1153

istart = index[startrev]

1153

istart = index[startrev]

1154

start = int(istart[0] >> 16)

1154

start = int(istart[0] >> 16)

1155

if startrev == endrev:

1155

if startrev == endrev:

1156

end = start + istart[1]

1156

end = start + istart[1]

1157

else:

1157

else:

1158

iend = index[endrev]

1158

iend = index[endrev]

1159

end = int(iend[0] >> 16) + iend[1]

1159

end = int(iend[0] >> 16) + iend[1]

1160

1161

if self._inline:

1161

if self._inline:

1162

start += (startrev + 1) * self._io.size

1162

start += (startrev + 1) * self._io.size

1163

end += (endrev + 1) * self._io.size

1163

end += (endrev + 1) * self._io.size

1164

length = end - start

1164

length = end - start

1165

1166

return start, self._getchunk(start, length, df=df)

1166

return start, self._getchunk(start, length, df=df)

1167

1168

def _chunk(self, rev, df=None):

1168

def _chunk(self, rev, df=None):

1169

"""Obtain a single decompressed chunk for a revision.

1169

"""Obtain a single decompressed chunk for a revision.

1170

1171

Accepts an integer revision and an optional already-open file handle

1171

Accepts an integer revision and an optional already-open file handle

1172

to be used for reading. If used, the seek position of the file will not

1172

to be used for reading. If used, the seek position of the file will not

1173

be preserved.

1173

be preserved.

1174

1175

Returns a str holding uncompressed data for the requested revision.

1175

Returns a str holding uncompressed data for the requested revision.

1176

"""

1176

"""

1177

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1177

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1178

1179

def _chunks(self, revs, df=None):

1179

def _chunks(self, revs, df=None):

1180

"""Obtain decompressed chunks for the specified revisions.

1180

"""Obtain decompressed chunks for the specified revisions.

1181

1182

Accepts an iterable of numeric revisions that are assumed to be in

1182

Accepts an iterable of numeric revisions that are assumed to be in

1183

ascending order. Also accepts an optional already-open file handle

1183

ascending order. Also accepts an optional already-open file handle

1184

to be used for reading. If used, the seek position of the file will

1184

to be used for reading. If used, the seek position of the file will

1185

not be preserved.

1185

not be preserved.

1186

1187

This function is similar to calling ``self._chunk()`` multiple times,

1187

This function is similar to calling ``self._chunk()`` multiple times,

1188

but is faster.

1188

but is faster.

1189

1190

Returns a list with decompressed data for each requested revision.

1190

Returns a list with decompressed data for each requested revision.

1191

"""

1191

"""

1192

if not revs:

1192

if not revs:

1193

return []

1193

return []

1194

start = self.start

1194

start = self.start

1195

length = self.length

1195

length = self.length

1196

inline = self._inline

1196

inline = self._inline

1197

iosize = self._io.size

1197

iosize = self._io.size

1198

buffer = util.buffer

1198

buffer = util.buffer

1199

1200

l = []

1200

l = []

1201

ladd = l.append

1201

ladd = l.append

1202

1203

try:

1203

try:

1204

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1204

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1205

except OverflowError:

1205

except OverflowError:

1206

# issue4215 - we can't cache a run of chunks greater than

1206

# issue4215 - we can't cache a run of chunks greater than

1207

# 2G on Windows

1207

# 2G on Windows

1208

return [self._chunk(rev, df=df) for rev in revs]

1208

return [self._chunk(rev, df=df) for rev in revs]

1209

1210

decomp = self.decompress

1210

decomp = self.decompress

1211

for rev in revs:

1211

for rev in revs:

1212

chunkstart = start(rev)

1212

chunkstart = start(rev)

1213

if inline:

1213

if inline:

1214

chunkstart += (rev + 1) * iosize

1214

chunkstart += (rev + 1) * iosize

1215

chunklength = length(rev)

1215

chunklength = length(rev)

1216

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1216

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1217

1218

return l

1218

return l

1219

1220

def _chunkclear(self):

1220

def _chunkclear(self):

1221

"""Clear the raw chunk cache."""

1221

"""Clear the raw chunk cache."""

1222

self._chunkcache = (0, '')

1222

self._chunkcache = (0, '')

1223

1224

def deltaparent(self, rev):

1224

def deltaparent(self, rev):

1225

"""return deltaparent of the given revision"""

1225

"""return deltaparent of the given revision"""

1226

base = self.index[rev][3]

1226

base = self.index[rev][3]

1227

if base == rev:

1227

if base == rev:

1228

return nullrev

1228

return nullrev

1229

elif self._generaldelta:

1229

elif self._generaldelta:

1230

return base

1230

return base

1231

else:

1231

else:

1232

return rev - 1

1232

return rev - 1

1233

1234

def revdiff(self, rev1, rev2):

1234

def revdiff(self, rev1, rev2):

1235

"""return or calculate a delta between two revisions"""

1235

"""return or calculate a delta between two revisions"""

1236

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1236

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1237

return str(self._chunk(rev2))

1237

return str(self._chunk(rev2))

1238

1239

return mdiff.textdiff(self.revision(rev1),

1239

return mdiff.textdiff(self.revision(rev1),

1240

self.revision(rev2))

1240

self.revision(rev2))

1241

1242

def revision(self, nodeorrev, _df=None, raw=False):

1242

def revision(self, nodeorrev, _df=None, raw=False):

1243

"""return an uncompressed revision of a given node or revision

1243

"""return an uncompressed revision of a given node or revision

1244

number.

1244

number.

1245

1246

_df - an existing file handle to read from. (internal-only)

1246

_df - an existing file handle to read from. (internal-only)

1247

raw - an optional argument specifying if the revision data is to be

1247

raw - an optional argument specifying if the revision data is to be

1248

treated as raw data when applying flag transforms. 'raw' should be set

1248

treated as raw data when applying flag transforms. 'raw' should be set

1249

to True when generating changegroups or in debug commands.

1249

to True when generating changegroups or in debug commands.

1250

"""

1250

"""

1251

if isinstance(nodeorrev, int):

1251

if isinstance(nodeorrev, int):

1252

rev = nodeorrev

1252

rev = nodeorrev

1253

node = self.node(rev)

1253

node = self.node(rev)

1254

else:

1254

else:

1255

node = nodeorrev

1255

node = nodeorrev

1256

rev = None

1256

rev = None

1257

1258

cachedrev = None

1258

cachedrev = None

1259

if node == nullid:

1259

if node == nullid:

1260

return ""

1260

return ""

1261

if self._cache:

1261

if self._cache:

1262

if self._cache[0] == node:

1262

if self._cache[0] == node:

1263

return self._cache[2]

1263

return self._cache[2]

1264

cachedrev = self._cache[1]

1264

cachedrev = self._cache[1]

1265

1266

# look up what we need to read

1266

# look up what we need to read

1267

text = None

1267

text = None

1268

if rev is None:

1268

if rev is None:

1269

rev = self.rev(node)

1269

rev = self.rev(node)

1270

1271

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1271

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1272

if stopped:

1272

if stopped:

1273

text = self._cache[2]

1273

text = self._cache[2]

1274

1275

# drop cache to save memory

1275

# drop cache to save memory

1276

self._cache = None

1276

self._cache = None

1277

1278

bins = self._chunks(chain, df=_df)

1278

bins = self._chunks(chain, df=_df)

1279

if text is None:

1279

if text is None:

1280

text = ~~str~~(bins[0])

1280

text = bytes(bins[0])

1281

bins = bins[1:]

1281

bins = bins[1:]

1282

1283

text = mdiff.patches(text, bins)

1283

text = mdiff.patches(text, bins)

1284

1285

text, validatehash = self._processflags(text, self.flags(rev), 'read',

1285

text, validatehash = self._processflags(text, self.flags(rev), 'read',

1286

raw=raw)

1286

raw=raw)

1287

if validatehash:

1287

if validatehash:

1288

self.checkhash(text, node, rev=rev)

1288

self.checkhash(text, node, rev=rev)

1289

1290

self._cache = (node, rev, text)

1290

self._cache = (node, rev, text)

1291

return text

1291

return text

1292

1293

def hash(self, text, p1, p2):

1293

def hash(self, text, p1, p2):

1294

"""Compute a node hash.

1294

"""Compute a node hash.

1295

1296

Available as a function so that subclasses can replace the hash

1296

Available as a function so that subclasses can replace the hash

1297

as needed.

1297

as needed.

1298

"""

1298

"""

1299

return hash(text, p1, p2)

1299

return hash(text, p1, p2)

1300

1301

def _processflags(self, text, flags, operation, raw=False):

1301

def _processflags(self, text, flags, operation, raw=False):

1302

"""Inspect revision data flags and applies transforms defined by

1302

"""Inspect revision data flags and applies transforms defined by

1303

registered flag processors.

1303

registered flag processors.

1304

1305

``text`` - the revision data to process

1305

``text`` - the revision data to process

1306

``flags`` - the revision flags

1306

``flags`` - the revision flags

1307

``operation`` - the operation being performed (read or write)

1307

``operation`` - the operation being performed (read or write)

1308

``raw`` - an optional argument describing if the raw transform should be

1308

``raw`` - an optional argument describing if the raw transform should be

1309

applied.

1309

applied.

1310

1311

This method processes the flags in the order (or reverse order if

1311

This method processes the flags in the order (or reverse order if

1312

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1312

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1313

flag processors registered for present flags. The order of flags defined

1313

flag processors registered for present flags. The order of flags defined

1314

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1314

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1315

1316

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1316

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1317

processed text and ``validatehash`` is a bool indicating whether the

1317

processed text and ``validatehash`` is a bool indicating whether the

1318

returned text should be checked for hash integrity.

1318

returned text should be checked for hash integrity.

1319

1320

Note: If the ``raw`` argument is set, it has precedence over the

1320

Note: If the ``raw`` argument is set, it has precedence over the

1321

operation and will only update the value of ``validatehash``.

1321

operation and will only update the value of ``validatehash``.

1322

"""

1322

"""

1323

if not operation in ('read', 'write'):

1323

if not operation in ('read', 'write'):

1324

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1324

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1325

# Check all flags are known.

1325

# Check all flags are known.

1326

if flags & ~REVIDX_KNOWN_FLAGS:

1326

if flags & ~REVIDX_KNOWN_FLAGS:

1327

raise RevlogError(_("incompatible revision flag '%#x'") %

1327

raise RevlogError(_("incompatible revision flag '%#x'") %

1328

(flags & ~REVIDX_KNOWN_FLAGS))

1328

(flags & ~REVIDX_KNOWN_FLAGS))

1329

validatehash = True

1329

validatehash = True

1330

# Depending on the operation (read or write), the order might be

1330

# Depending on the operation (read or write), the order might be

1331

# reversed due to non-commutative transforms.

1331

# reversed due to non-commutative transforms.

1332

orderedflags = REVIDX_FLAGS_ORDER

1332

orderedflags = REVIDX_FLAGS_ORDER

1333

if operation == 'write':

1333

if operation == 'write':

1334

orderedflags = reversed(orderedflags)

1334

orderedflags = reversed(orderedflags)

1335

1336

for flag in orderedflags:

1336

for flag in orderedflags:

1337

# If a flagprocessor has been registered for a known flag, apply the

1337

# If a flagprocessor has been registered for a known flag, apply the

1338

# related operation transform and update result tuple.

1338

# related operation transform and update result tuple.

1339

if flag & flags:

1339

if flag & flags:

1340

vhash = True

1340

vhash = True

1341

1342

if flag not in _flagprocessors:

1342

if flag not in _flagprocessors:

1343

message = _("missing processor for flag '%#x'") % (flag)

1343

message = _("missing processor for flag '%#x'") % (flag)

1344

raise RevlogError(message)

1344

raise RevlogError(message)

1345

1346

processor = _flagprocessors[flag]

1346

processor = _flagprocessors[flag]

1347

if processor is not None:

1347

if processor is not None:

1348

readtransform, writetransform, rawtransform = processor

1348

readtransform, writetransform, rawtransform = processor

1349

1350

if raw:

1350

if raw:

1351

vhash = rawtransform(self, text)

1351

vhash = rawtransform(self, text)

1352

elif operation == 'read':

1352

elif operation == 'read':

1353

text, vhash = readtransform(self, text)

1353

text, vhash = readtransform(self, text)

1354

else: # write operation

1354

else: # write operation

1355

text, vhash = writetransform(self, text)

1355

text, vhash = writetransform(self, text)

1356

validatehash = validatehash and vhash

1356

validatehash = validatehash and vhash

1357

1358

return text, validatehash

1358

return text, validatehash

1359

1360

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1360

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1361

"""Check node hash integrity.

1361

"""Check node hash integrity.

1362

1363

Available as a function so that subclasses can extend hash mismatch

1363

Available as a function so that subclasses can extend hash mismatch

1364

behaviors as needed.

1364

behaviors as needed.

1365

"""

1365

"""

1366

if p1 is None and p2 is None:

1366

if p1 is None and p2 is None:

1367

p1, p2 = self.parents(node)

1367

p1, p2 = self.parents(node)

1368

if node != self.hash(text, p1, p2):

1368

if node != self.hash(text, p1, p2):

1369

revornode = rev

1369

revornode = rev

1370

if revornode is None:

1370

if revornode is None:

1371

revornode = templatefilters.short(hex(node))

1371

revornode = templatefilters.short(hex(node))

1372

raise RevlogError(_("integrity check failed on %s:%s")

1372

raise RevlogError(_("integrity check failed on %s:%s")

1373

% (self.indexfile, revornode))

1373

% (self.indexfile, revornode))

1374

1375

def checkinlinesize(self, tr, fp=None):

1375

def checkinlinesize(self, tr, fp=None):

1376

"""Check if the revlog is too big for inline and convert if so.

1376

"""Check if the revlog is too big for inline and convert if so.

1377

1378

This should be called after revisions are added to the revlog. If the

1378

This should be called after revisions are added to the revlog. If the

1379

revlog has grown too large to be an inline revlog, it will convert it

1379

revlog has grown too large to be an inline revlog, it will convert it

1380

to use multiple index and data files.

1380

to use multiple index and data files.

1381

"""

1381

"""

1382

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1382

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1383

return

1383

return

1384

1385

trinfo = tr.find(self.indexfile)

1385

trinfo = tr.find(self.indexfile)

1386

if trinfo is None:

1386

if trinfo is None:

1387

raise RevlogError(_("%s not found in the transaction")

1387

raise RevlogError(_("%s not found in the transaction")

1388

% self.indexfile)

1388

% self.indexfile)

1389

1390

trindex = trinfo[2]

1390

trindex = trinfo[2]

1391

if trindex is not None:

1391

if trindex is not None:

1392

dataoff = self.start(trindex)

1392

dataoff = self.start(trindex)

1393

else:

1393

else:

1394

# revlog was stripped at start of transaction, use all leftover data

1394

# revlog was stripped at start of transaction, use all leftover data

1395

trindex = len(self) - 1

1395

trindex = len(self) - 1

1396

dataoff = self.end(-2)

1396

dataoff = self.end(-2)

1397

1398

tr.add(self.datafile, dataoff)

1398

tr.add(self.datafile, dataoff)

1399

1400

if fp:

1400

if fp:

1401

fp.flush()

1401

fp.flush()

1402

fp.close()

1402

fp.close()

1403

1404

df = self.opener(self.datafile, 'w')

1404

df = self.opener(self.datafile, 'w')

1405

try:

1405

try:

1406

for r in self:

1406

for r in self:

1407

df.write(self._chunkraw(r, r)[1])

1407

df.write(self._chunkraw(r, r)[1])

1408

finally:

1408

finally:

1409

df.close()

1409

df.close()

1410

1411

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1411

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1412

checkambig=self._checkambig)

1412

checkambig=self._checkambig)

1413

self.version &= ~(REVLOGNGINLINEDATA)

1413

self.version &= ~(REVLOGNGINLINEDATA)

1414

self._inline = False

1414

self._inline = False

1415

for i in self:

1415

for i in self:

1416

e = self._io.packentry(self.index[i], self.node, self.version, i)

1416

e = self._io.packentry(self.index[i], self.node, self.version, i)

1417

fp.write(e)

1417

fp.write(e)

1418

1419

# if we don't call close, the temp file will never replace the

1419

# if we don't call close, the temp file will never replace the

1420

# real index

1420

# real index

1421

fp.close()

1421

fp.close()

1422

1423

tr.replace(self.indexfile, trindex * self._io.size)

1423

tr.replace(self.indexfile, trindex * self._io.size)

1424

self._chunkclear()

1424

self._chunkclear()

1425

1426

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1426

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1427

node=None, flags=REVIDX_DEFAULT_FLAGS):

1427

node=None, flags=REVIDX_DEFAULT_FLAGS):

1428

"""add a revision to the log

1428

"""add a revision to the log

1429

1430

text - the revision data to add

1430

text - the revision data to add

1431

transaction - the transaction object used for rollback

1431

transaction - the transaction object used for rollback

1432

link - the linkrev data to add

1432

link - the linkrev data to add

1433

p1, p2 - the parent nodeids of the revision

1433

p1, p2 - the parent nodeids of the revision

1434

cachedelta - an optional precomputed delta

1434

cachedelta - an optional precomputed delta

1435

node - nodeid of revision; typically node is not specified, and it is

1435

node - nodeid of revision; typically node is not specified, and it is

1436

computed by default as hash(text, p1, p2), however subclasses might

1436

computed by default as hash(text, p1, p2), however subclasses might

1437

use different hashing method (and override checkhash() in such case)

1437

use different hashing method (and override checkhash() in such case)

1438

flags - the known flags to set on the revision

1438

flags - the known flags to set on the revision

1439

"""

1439

"""

1440

if link == nullrev:

1440

if link == nullrev:

1441

raise RevlogError(_("attempted to add linkrev -1 to %s")

1441

raise RevlogError(_("attempted to add linkrev -1 to %s")

1442

% self.indexfile)

1442

% self.indexfile)

1443

1444

if flags:

1444

if flags:

1445

node = node or self.hash(text, p1, p2)

1445

node = node or self.hash(text, p1, p2)

1446

1447

newtext, validatehash = self._processflags(text, flags, 'write')

1447

newtext, validatehash = self._processflags(text, flags, 'write')

1448

1449

# If the flag processor modifies the revision data, ignore any provided

1449

# If the flag processor modifies the revision data, ignore any provided

1450

# cachedelta.

1450

# cachedelta.

1451

if newtext != text:

1451

if newtext != text:

1452

cachedelta = None

1452

cachedelta = None

1453

text = newtext

1453

text = newtext

1454

1455

if len(text) > _maxentrysize:

1455

if len(text) > _maxentrysize:

1456

raise RevlogError(

1456

raise RevlogError(

1457

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1457

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1458

% (self.indexfile, len(text)))

1458

% (self.indexfile, len(text)))

1459

1460

node = node or self.hash(text, p1, p2)

1460

node = node or self.hash(text, p1, p2)

1461

if node in self.nodemap:

1461

if node in self.nodemap:

1462

return node

1462

return node

1463

1464

if validatehash:

1464

if validatehash:

1465

self.checkhash(text, node, p1=p1, p2=p2)

1465

self.checkhash(text, node, p1=p1, p2=p2)

1466

1467

dfh = None

1467

dfh = None

1468

if not self._inline:

1468

if not self._inline:

1469

dfh = self.opener(self.datafile, "a+")

1469

dfh = self.opener(self.datafile, "a+")

1470

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1470

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1471

try:

1471

try:

1472

return self._addrevision(node, text, transaction, link, p1, p2,

1472

return self._addrevision(node, text, transaction, link, p1, p2,

1473

flags, cachedelta, ifh, dfh)

1473

flags, cachedelta, ifh, dfh)

1474

finally:

1474

finally:

1475

if dfh:

1475

if dfh:

1476

dfh.close()

1476

dfh.close()

1477

ifh.close()

1477

ifh.close()

1478

1479

def compress(self, data):

1479

def compress(self, data):

1480

"""Generate a possibly-compressed representation of data."""

1480

"""Generate a possibly-compressed representation of data."""

1481

if not data:

1481

if not data:

1482

return '', data

1482

return '', data

1483

1484

compressed = self._compressor.compress(data)

1484

compressed = self._compressor.compress(data)

1485

1486

if compressed:

1486

if compressed:

1487

# The revlog compressor added the header in the returned data.

1487

# The revlog compressor added the header in the returned data.

1488

return '', compressed

1488

return '', compressed

1489

1490

if data[0] == '\0':

1490

if data[0] == '\0':

1491

return '', data

1491

return '', data

1492

return 'u', data

1492

return 'u', data

1493

1494

def decompress(self, data):

1494

def decompress(self, data):

1495

"""Decompress a revlog chunk.

1495

"""Decompress a revlog chunk.

1496

1497

The chunk is expected to begin with a header identifying the

1497

The chunk is expected to begin with a header identifying the

1498

format type so it can be routed to an appropriate decompressor.

1498

format type so it can be routed to an appropriate decompressor.

1499

"""

1499

"""

1500

if not data:

1500

if not data:

1501

return data

1501

return data

1502

1503

# Revlogs are read much more frequently than they are written and many

1503

# Revlogs are read much more frequently than they are written and many

1504

# chunks only take microseconds to decompress, so performance is

1504

# chunks only take microseconds to decompress, so performance is

1505

# important here.

1505

# important here.

1506

#

1506

#

1507

# We can make a few assumptions about revlogs:

1507

# We can make a few assumptions about revlogs:

1508

#

1508

#

1509

# 1) the majority of chunks will be compressed (as opposed to inline

1509

# 1) the majority of chunks will be compressed (as opposed to inline

1510

# raw data).

1510

# raw data).

1511

# 2) decompressing *any* data will likely by at least 10x slower than

1511

# 2) decompressing *any* data will likely by at least 10x slower than

1512

# returning raw inline data.

1512

# returning raw inline data.

1513

# 3) we want to prioritize common and officially supported compression

1513

# 3) we want to prioritize common and officially supported compression

1514

# engines

1514

# engines

1515

#

1515

#

1516

# It follows that we want to optimize for "decompress compressed data

1516

# It follows that we want to optimize for "decompress compressed data

1517

# when encoded with common and officially supported compression engines"

1517

# when encoded with common and officially supported compression engines"

1518

# case over "raw data" and "data encoded by less common or non-official

1518

# case over "raw data" and "data encoded by less common or non-official

1519

# compression engines." That is why we have the inline lookup first

1519

# compression engines." That is why we have the inline lookup first

1520

# followed by the compengines lookup.

1520

# followed by the compengines lookup.

1521

#

1521

#

1522

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1522

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1523

# compressed chunks. And this matters for changelog and manifest reads.

1523

# compressed chunks. And this matters for changelog and manifest reads.

1524

t = data[0:1]

1524

t = data[0:1]

1525

1526

if t == 'x':

1526

if t == 'x':

1527

try:

1527

try:

1528

return _zlibdecompress(data)

1528

return _zlibdecompress(data)

1529

except zlib.error as e:

1529

except zlib.error as e:

1530

raise RevlogError(_('revlog decompress error: %s') % str(e))

1530

raise RevlogError(_('revlog decompress error: %s') % str(e))

1531

# '\0' is more common than 'u' so it goes first.

1531

# '\0' is more common than 'u' so it goes first.

1532

elif t == '\0':

1532

elif t == '\0':

1533

return data

1533

return data

1534

elif t == 'u':

1534

elif t == 'u':

1535

return util.buffer(data, 1)

1535

return util.buffer(data, 1)

1536

1537

try:

1537

try:

1538

compressor = self._decompressors[t]

1538

compressor = self._decompressors[t]

1539

except KeyError:

1539

except KeyError:

1540

try:

1540

try:

1541

engine = util.compengines.forrevlogheader(t)

1541

engine = util.compengines.forrevlogheader(t)

1542

compressor = engine.revlogcompressor()

1542

compressor = engine.revlogcompressor()

1543

self._decompressors[t] = compressor

1543

self._decompressors[t] = compressor

1544

except KeyError:

1544

except KeyError:

1545

raise RevlogError(_('unknown compression type %r') % t)

1545

raise RevlogError(_('unknown compression type %r') % t)

1546

1547

return compressor.decompress(data)

1547

return compressor.decompress(data)

1548

1549

def _isgooddelta(self, d, textlen):

1549

def _isgooddelta(self, d, textlen):

1550

"""Returns True if the given delta is good. Good means that it is within

1550

"""Returns True if the given delta is good. Good means that it is within

1551

the disk span, disk size, and chain length bounds that we know to be

1551

the disk span, disk size, and chain length bounds that we know to be

1552

performant."""

1552

performant."""

1553

if d is None:

1553

if d is None:

1554

return False

1554

return False

1555

1556

# - 'dist' is the distance from the base revision -- bounding it limits

1556

# - 'dist' is the distance from the base revision -- bounding it limits

1557

# the amount of I/O we need to do.

1557

# the amount of I/O we need to do.

1558

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1558

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1559

# to apply -- bounding it limits the amount of CPU we consume.

1559

# to apply -- bounding it limits the amount of CPU we consume.

1560

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1560

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1561

if (dist > textlen * 4 or l > textlen or

1561

if (dist > textlen * 4 or l > textlen or

1562

compresseddeltalen > textlen * 2 or

1562

compresseddeltalen > textlen * 2 or

1563

(self._maxchainlen and chainlen > self._maxchainlen)):

1563

(self._maxchainlen and chainlen > self._maxchainlen)):

1564

return False

1564

return False

1565

1566

return True

1566

return True

1567

1568

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1568

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1569

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1569

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1570

"""internal function to add revisions to the log

1570

"""internal function to add revisions to the log

1571

1572

see addrevision for argument descriptions.

1572

see addrevision for argument descriptions.

1573

invariants:

1573

invariants:

1574

- text is optional (can be None); if not set, cachedelta must be set.

1574

- text is optional (can be None); if not set, cachedelta must be set.

1575

if both are set, they must correspond to each other.

1575

if both are set, they must correspond to each other.

1576

- raw is optional; if set to True, it indicates the revision data is to

1576

- raw is optional; if set to True, it indicates the revision data is to

1577

be treated by _processflags() as raw. It is usually set by changegroup

1577

be treated by _processflags() as raw. It is usually set by changegroup

1578

generation and debug commands.

1578

generation and debug commands.

1579

"""

1579

"""

1580

btext = [text]

1580

btext = [text]

1581

def buildtext():

1581

def buildtext():

1582

if btext[0] is not None:

1582

if btext[0] is not None:

1583

return btext[0]

1583

return btext[0]

1584

baserev = cachedelta[0]

1584

baserev = cachedelta[0]

1585

delta = cachedelta[1]

1585

delta = cachedelta[1]

1586

# special case deltas which replace entire base; no need to decode

1586

# special case deltas which replace entire base; no need to decode

1587

# base revision. this neatly avoids censored bases, which throw when

1587

# base revision. this neatly avoids censored bases, which throw when

1588

# they're decoded.

1588

# they're decoded.

1589

hlen = struct.calcsize(">lll")

1589

hlen = struct.calcsize(">lll")

1590

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1590

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1591

len(delta) - hlen):

1591

len(delta) - hlen):

1592

btext[0] = delta[hlen:]

1592

btext[0] = delta[hlen:]

1593

else:

1593

else:

1594

if self._inline:

1594

if self._inline:

1595

fh = ifh

1595

fh = ifh

1596

else:

1596

else:

1597

fh = dfh

1597

fh = dfh

1598

basetext = self.revision(self.node(baserev), _df=fh, raw=raw)

1598

basetext = self.revision(self.node(baserev), _df=fh, raw=raw)

1599

btext[0] = mdiff.patch(basetext, delta)

1599

btext[0] = mdiff.patch(basetext, delta)

1600

1601

try:

1601

try:

1602

res = self._processflags(btext[0], flags, 'read', raw=raw)

1602

res = self._processflags(btext[0], flags, 'read', raw=raw)

1603

btext[0], validatehash = res

1603

btext[0], validatehash = res

1604

if validatehash:

1604

if validatehash:

1605

self.checkhash(btext[0], node, p1=p1, p2=p2)

1605

self.checkhash(btext[0], node, p1=p1, p2=p2)

1606

if flags & REVIDX_ISCENSORED:

1606

if flags & REVIDX_ISCENSORED:

1607

raise RevlogError(_('node %s is not censored') % node)

1607

raise RevlogError(_('node %s is not censored') % node)

1608

except CensoredNodeError:

1608

except CensoredNodeError:

1609

# must pass the censored index flag to add censored revisions

1609

# must pass the censored index flag to add censored revisions

1610

if not flags & REVIDX_ISCENSORED:

1610

if not flags & REVIDX_ISCENSORED:

1611

raise

1611

raise

1612

return btext[0]

1612

return btext[0]

1613

1614

def builddelta(rev):

1614

def builddelta(rev):

1615

# can we use the cached delta?

1615

# can we use the cached delta?

1616

if cachedelta and cachedelta[0] == rev:

1616

if cachedelta and cachedelta[0] == rev:

1617

delta = cachedelta[1]

1617

delta = cachedelta[1]

1618

else:

1618

else:

1619

t = buildtext()

1619

t = buildtext()

1620

if self.iscensored(rev):

1620

if self.iscensored(rev):

1621

# deltas based on a censored revision must replace the

1621

# deltas based on a censored revision must replace the

1622

# full content in one patch, so delta works everywhere

1622

# full content in one patch, so delta works everywhere

1623

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1623

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1624

delta = header + t

1624

delta = header + t

1625

else:

1625

else:

1626

if self._inline:

1626

if self._inline:

1627

fh = ifh

1627

fh = ifh

1628

else:

1628

else:

1629

fh = dfh

1629

fh = dfh

1630

ptext = self.revision(self.node(rev), _df=fh)

1630

ptext = self.revision(self.node(rev), _df=fh)

1631

delta = mdiff.textdiff(ptext, t)

1631

delta = mdiff.textdiff(ptext, t)

1632

header, data = self.compress(delta)

1632

header, data = self.compress(delta)

1633

deltalen = len(header) + len(data)

1633

deltalen = len(header) + len(data)

1634

chainbase = self.chainbase(rev)

1634

chainbase = self.chainbase(rev)

1635

dist = deltalen + offset - self.start(chainbase)

1635

dist = deltalen + offset - self.start(chainbase)

1636

if self._generaldelta:

1636

if self._generaldelta:

1637

base = rev

1637

base = rev

1638

else:

1638

else:

1639

base = chainbase

1639

base = chainbase

1640

chainlen, compresseddeltalen = self._chaininfo(rev)

1640

chainlen, compresseddeltalen = self._chaininfo(rev)

1641

chainlen += 1

1641

chainlen += 1

1642

compresseddeltalen += deltalen

1642

compresseddeltalen += deltalen

1643

return (dist, deltalen, (header, data), base,

1643

return (dist, deltalen, (header, data), base,

1644

chainbase, chainlen, compresseddeltalen)

1644

chainbase, chainlen, compresseddeltalen)

1645

1646

curr = len(self)

1646

curr = len(self)

1647

prev = curr - 1

1647

prev = curr - 1

1648

offset = self.end(prev)

1648

offset = self.end(prev)

1649

delta = None

1649

delta = None

1650

p1r, p2r = self.rev(p1), self.rev(p2)

1650

p1r, p2r = self.rev(p1), self.rev(p2)

1651

1652

# full versions are inserted when the needed deltas

1652

# full versions are inserted when the needed deltas

1653

# become comparable to the uncompressed text

1653

# become comparable to the uncompressed text

1654

if text is None:

1654

if text is None:

1655

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1655

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1656

cachedelta[1])

1656

cachedelta[1])

1657

else:

1657

else:

1658

textlen = len(text)

1658

textlen = len(text)

1659

1660

# should we try to build a delta?

1660

# should we try to build a delta?

1661

if prev != nullrev and self.storedeltachains:

1661

if prev != nullrev and self.storedeltachains:

1662

tested = set()

1662

tested = set()

1663

# This condition is true most of the time when processing

1663

# This condition is true most of the time when processing

1664

# changegroup data into a generaldelta repo. The only time it

1664

# changegroup data into a generaldelta repo. The only time it

1665

# isn't true is if this is the first revision in a delta chain

1665

# isn't true is if this is the first revision in a delta chain

1666

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1666

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1667

if cachedelta and self._generaldelta and self._lazydeltabase:

1667

if cachedelta and self._generaldelta and self._lazydeltabase:

1668

# Assume what we received from the server is a good choice

1668

# Assume what we received from the server is a good choice

1669

# build delta will reuse the cache

1669

# build delta will reuse the cache

1670

candidatedelta = builddelta(cachedelta[0])

1670

candidatedelta = builddelta(cachedelta[0])

1671

tested.add(cachedelta[0])

1671

tested.add(cachedelta[0])

1672

if self._isgooddelta(candidatedelta, textlen):

1672

if self._isgooddelta(candidatedelta, textlen):

1673

delta = candidatedelta

1673

delta = candidatedelta

1674

if delta is None and self._generaldelta:

1674

if delta is None and self._generaldelta:

1675

# exclude already lazy tested base if any

1675

# exclude already lazy tested base if any

1676

parents = [p for p in (p1r, p2r)

1676

parents = [p for p in (p1r, p2r)

1677

if p != nullrev and p not in tested]

1677

if p != nullrev and p not in tested]

1678

if parents and not self._aggressivemergedeltas:

1678

if parents and not self._aggressivemergedeltas:

1679

# Pick whichever parent is closer to us (to minimize the

1679

# Pick whichever parent is closer to us (to minimize the

1680

# chance of having to build a fulltext).

1680

# chance of having to build a fulltext).

1681

parents = [max(parents)]

1681

parents = [max(parents)]

1682

tested.update(parents)

1682

tested.update(parents)

1683

pdeltas = []

1683

pdeltas = []

1684

for p in parents:

1684

for p in parents:

1685

pd = builddelta(p)

1685

pd = builddelta(p)

1686

if self._isgooddelta(pd, textlen):

1686

if self._isgooddelta(pd, textlen):

1687

pdeltas.append(pd)

1687

pdeltas.append(pd)

1688

if pdeltas:

1688

if pdeltas:

1689

delta = min(pdeltas, key=lambda x: x[1])

1689

delta = min(pdeltas, key=lambda x: x[1])

1690

if delta is None and prev not in tested:

1690

if delta is None and prev not in tested:

1691

# other approach failed try against prev to hopefully save us a

1691

# other approach failed try against prev to hopefully save us a

1692

# fulltext.

1692

# fulltext.

1693

candidatedelta = builddelta(prev)

1693

candidatedelta = builddelta(prev)

1694

if self._isgooddelta(candidatedelta, textlen):

1694

if self._isgooddelta(candidatedelta, textlen):

1695

delta = candidatedelta

1695

delta = candidatedelta

1696

if delta is not None:

1696

if delta is not None:

1697

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1697

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1698

else:

1698

else:

1699

text = buildtext()

1699

text = buildtext()

1700

data = self.compress(text)

1700

data = self.compress(text)

1701

l = len(data[1]) + len(data[0])

1701

l = len(data[1]) + len(data[0])

1702

base = chainbase = curr

1702

base = chainbase = curr

1703

1704

e = (offset_type(offset, flags), l, textlen,

1704

e = (offset_type(offset, flags), l, textlen,

1705

base, link, p1r, p2r, node)

1705

base, link, p1r, p2r, node)

1706

self.index.insert(-1, e)

1706

self.index.insert(-1, e)

1707

self.nodemap[node] = curr

1707

self.nodemap[node] = curr

1708

1709

entry = self._io.packentry(e, self.node, self.version, curr)

1709

entry = self._io.packentry(e, self.node, self.version, curr)

1710

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1710

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1711

1712

if alwayscache and text is None:

1712

if alwayscache and text is None:

1713

text = buildtext()

1713

text = buildtext()

1714

1715

if type(text) == str: # only accept immutable objects

1715

if type(text) == str: # only accept immutable objects

1716

self._cache = (node, curr, text)

1716

self._cache = (node, curr, text)

1717

self._chainbasecache[curr] = chainbase

1717

self._chainbasecache[curr] = chainbase

1718

return node

1718

return node

1719

1720

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1720

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1721

# Files opened in a+ mode have inconsistent behavior on various

1721

# Files opened in a+ mode have inconsistent behavior on various

1722

# platforms. Windows requires that a file positioning call be made

1722

# platforms. Windows requires that a file positioning call be made

1723

# when the file handle transitions between reads and writes. See

1723

# when the file handle transitions between reads and writes. See

1724

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1724

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1725

# platforms, Python or the platform itself can be buggy. Some versions

1725

# platforms, Python or the platform itself can be buggy. Some versions

1726

# of Solaris have been observed to not append at the end of the file

1726

# of Solaris have been observed to not append at the end of the file

1727

# if the file was seeked to before the end. See issue4943 for more.

1727

# if the file was seeked to before the end. See issue4943 for more.

1728

#

1728

#

1729

# We work around this issue by inserting a seek() before writing.

1729

# We work around this issue by inserting a seek() before writing.

1730

# Note: This is likely not necessary on Python 3.

1730

# Note: This is likely not necessary on Python 3.

1731

ifh.seek(0, os.SEEK_END)

1731

ifh.seek(0, os.SEEK_END)

1732

if dfh:

1732

if dfh:

1733

dfh.seek(0, os.SEEK_END)

1733

dfh.seek(0, os.SEEK_END)

1734

1735

curr = len(self) - 1

1735

curr = len(self) - 1

1736

if not self._inline:

1736

if not self._inline:

1737

transaction.add(self.datafile, offset)

1737

transaction.add(self.datafile, offset)

1738

transaction.add(self.indexfile, curr * len(entry))

1738

transaction.add(self.indexfile, curr * len(entry))

1739

if data[0]:

1739

if data[0]:

1740

dfh.write(data[0])

1740

dfh.write(data[0])

1741

dfh.write(data[1])

1741

dfh.write(data[1])

1742

ifh.write(entry)

1742

ifh.write(entry)

1743

else:

1743

else:

1744

offset += curr * self._io.size

1744

offset += curr * self._io.size

1745

transaction.add(self.indexfile, offset, curr)

1745

transaction.add(self.indexfile, offset, curr)

1746

ifh.write(entry)

1746

ifh.write(entry)

1747

ifh.write(data[0])

1747

ifh.write(data[0])

1748

ifh.write(data[1])

1748

ifh.write(data[1])

1749

self.checkinlinesize(transaction, ifh)

1749

self.checkinlinesize(transaction, ifh)

1750

1751

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1751

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1752

"""

1752

"""

1753

add a delta group

1753

add a delta group

1754

1755

given a set of deltas, add them to the revision log. the

1755

given a set of deltas, add them to the revision log. the

1756

first delta is against its parent, which should be in our

1756

first delta is against its parent, which should be in our

1757

log, the rest are against the previous delta.

1757

log, the rest are against the previous delta.

1758

1759

If ``addrevisioncb`` is defined, it will be called with arguments of

1759

If ``addrevisioncb`` is defined, it will be called with arguments of

1760

this revlog and the node that was added.

1760

this revlog and the node that was added.

1761

"""

1761

"""

1762

1763

# track the base of the current delta log

1763

# track the base of the current delta log

1764

content = []

1764

content = []

1765

node = None

1765

node = None

1766

1767

r = len(self)

1767

r = len(self)

1768

end = 0

1768

end = 0

1769

if r:

1769

if r:

1770

end = self.end(r - 1)

1770

end = self.end(r - 1)

1771

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1771

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1772

isize = r * self._io.size

1772

isize = r * self._io.size

1773

if self._inline:

1773

if self._inline:

1774

transaction.add(self.indexfile, end + isize, r)

1774

transaction.add(self.indexfile, end + isize, r)

1775

dfh = None

1775

dfh = None

1776

else:

1776

else:

1777

transaction.add(self.indexfile, isize, r)

1777

transaction.add(self.indexfile, isize, r)

1778

transaction.add(self.datafile, end)

1778

transaction.add(self.datafile, end)

1779

dfh = self.opener(self.datafile, "a+")

1779

dfh = self.opener(self.datafile, "a+")

1780

def flush():

1780

def flush():

1781

if dfh:

1781

if dfh:

1782

dfh.flush()

1782

dfh.flush()

1783

ifh.flush()

1783

ifh.flush()

1784

try:

1784

try:

1785

# loop through our set of deltas

1785

# loop through our set of deltas

1786

chain = None

1786

chain = None

1787

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1787

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1788

node = chunkdata['node']

1788

node = chunkdata['node']

1789

p1 = chunkdata['p1']

1789

p1 = chunkdata['p1']

1790

p2 = chunkdata['p2']

1790

p2 = chunkdata['p2']

1791

cs = chunkdata['cs']

1791

cs = chunkdata['cs']

1792

deltabase = chunkdata['deltabase']

1792

deltabase = chunkdata['deltabase']

1793

delta = chunkdata['delta']

1793

delta = chunkdata['delta']

1794

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1794

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1795

1796

content.append(node)

1796

content.append(node)

1797

1798

link = linkmapper(cs)

1798

link = linkmapper(cs)

1799

if node in self.nodemap:

1799

if node in self.nodemap:

1800

# this can happen if two branches make the same change

1800

# this can happen if two branches make the same change

1801

chain = node

1801

chain = node

1802

continue

1802

continue

1803

1804

for p in (p1, p2):

1804

for p in (p1, p2):

1805

if p not in self.nodemap:

1805

if p not in self.nodemap:

1806

raise LookupError(p, self.indexfile,

1806

raise LookupError(p, self.indexfile,

1807

_('unknown parent'))

1807

_('unknown parent'))

1808

1809

if deltabase not in self.nodemap:

1809

if deltabase not in self.nodemap:

1810

raise LookupError(deltabase, self.indexfile,

1810

raise LookupError(deltabase, self.indexfile,

1811

_('unknown delta base'))

1811

_('unknown delta base'))

1812

1813

baserev = self.rev(deltabase)

1813

baserev = self.rev(deltabase)

1814

1815

if baserev != nullrev and self.iscensored(baserev):

1815

if baserev != nullrev and self.iscensored(baserev):

1816

# if base is censored, delta must be full replacement in a

1816

# if base is censored, delta must be full replacement in a

1817

# single patch operation

1817

# single patch operation

1818

hlen = struct.calcsize(">lll")

1818

hlen = struct.calcsize(">lll")

1819

oldlen = self.rawsize(baserev)

1819

oldlen = self.rawsize(baserev)

1820

newlen = len(delta) - hlen

1820

newlen = len(delta) - hlen

1821

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1821

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1822

raise error.CensoredBaseError(self.indexfile,

1822

raise error.CensoredBaseError(self.indexfile,

1823

self.node(baserev))

1823

self.node(baserev))

1824

1825

if not flags and self._peek_iscensored(baserev, delta, flush):

1825

if not flags and self._peek_iscensored(baserev, delta, flush):

1826

flags |= REVIDX_ISCENSORED

1826

flags |= REVIDX_ISCENSORED

1827

1828

# We assume consumers of addrevisioncb will want to retrieve

1828

# We assume consumers of addrevisioncb will want to retrieve

1829

# the added revision, which will require a call to

1829

# the added revision, which will require a call to

1830

# revision(). revision() will fast path if there is a cache

1830

# revision(). revision() will fast path if there is a cache

1831

# hit. So, we tell _addrevision() to always cache in this case.

1831

# hit. So, we tell _addrevision() to always cache in this case.

1832

# We're only using addgroup() in the context of changegroup

1832

# We're only using addgroup() in the context of changegroup

1833

# generation so the revision data can always be handled as raw

1833

# generation so the revision data can always be handled as raw

1834

# by the flagprocessor.

1834

# by the flagprocessor.

1835

chain = self._addrevision(node, None, transaction, link,

1835

chain = self._addrevision(node, None, transaction, link,

1836

p1, p2, flags, (baserev, delta),

1836

p1, p2, flags, (baserev, delta),

1837

ifh, dfh,

1837

ifh, dfh,

1838

alwayscache=bool(addrevisioncb),

1838

alwayscache=bool(addrevisioncb),

1839

raw=True)

1839

raw=True)

1840

1841

if addrevisioncb:

1841

if addrevisioncb:

1842

addrevisioncb(self, chain)

1842

addrevisioncb(self, chain)

1843

1844

if not dfh and not self._inline:

1844

if not dfh and not self._inline:

1845

# addrevision switched from inline to conventional

1845

# addrevision switched from inline to conventional

1846

# reopen the index

1846

# reopen the index

1847

ifh.close()

1847

ifh.close()

1848

dfh = self.opener(self.datafile, "a+")

1848

dfh = self.opener(self.datafile, "a+")

1849

ifh = self.opener(self.indexfile, "a+",

1849

ifh = self.opener(self.indexfile, "a+",

1850

checkambig=self._checkambig)

1850

checkambig=self._checkambig)

1851

finally:

1851

finally:

1852

if dfh:

1852

if dfh:

1853

dfh.close()

1853

dfh.close()

1854

ifh.close()

1854

ifh.close()

1855

1856

return content

1856

return content

1857

1858

def iscensored(self, rev):

1858

def iscensored(self, rev):

1859

"""Check if a file revision is censored."""

1859

"""Check if a file revision is censored."""

1860

return False

1860

return False

1861

1862

def _peek_iscensored(self, baserev, delta, flush):

1862

def _peek_iscensored(self, baserev, delta, flush):

1863

"""Quickly check if a delta produces a censored revision."""

1863

"""Quickly check if a delta produces a censored revision."""

1864

return False

1864

return False

1865

1866

def getstrippoint(self, minlink):

1866

def getstrippoint(self, minlink):

1867

"""find the minimum rev that must be stripped to strip the linkrev

1867

"""find the minimum rev that must be stripped to strip the linkrev

1868

1869

Returns a tuple containing the minimum rev and a set of all revs that

1869

Returns a tuple containing the minimum rev and a set of all revs that

1870

have linkrevs that will be broken by this strip.

1870

have linkrevs that will be broken by this strip.

1871

"""

1871

"""

1872

brokenrevs = set()

1872

brokenrevs = set()

1873

strippoint = len(self)

1873

strippoint = len(self)

1874

1875

heads = {}

1875

heads = {}

1876

futurelargelinkrevs = set()

1876

futurelargelinkrevs = set()

1877

for head in self.headrevs():

1877

for head in self.headrevs():

1878

headlinkrev = self.linkrev(head)

1878

headlinkrev = self.linkrev(head)

1879

heads[head] = headlinkrev

1879

heads[head] = headlinkrev

1880

if headlinkrev >= minlink:

1880

if headlinkrev >= minlink:

1881

futurelargelinkrevs.add(headlinkrev)

1881

futurelargelinkrevs.add(headlinkrev)

1882

1883

# This algorithm involves walking down the rev graph, starting at the

1883

# This algorithm involves walking down the rev graph, starting at the

1884

# heads. Since the revs are topologically sorted according to linkrev,

1884

# heads. Since the revs are topologically sorted according to linkrev,

1885

# once all head linkrevs are below the minlink, we know there are

1885

# once all head linkrevs are below the minlink, we know there are

1886

# no more revs that could have a linkrev greater than minlink.

1886

# no more revs that could have a linkrev greater than minlink.

1887

# So we can stop walking.

1887

# So we can stop walking.

1888

while futurelargelinkrevs:

1888

while futurelargelinkrevs:

1889

strippoint -= 1

1889

strippoint -= 1

1890

linkrev = heads.pop(strippoint)

1890

linkrev = heads.pop(strippoint)

1891

1892

if linkrev < minlink:

1892

if linkrev < minlink:

1893

brokenrevs.add(strippoint)

1893

brokenrevs.add(strippoint)

1894

else:

1894

else:

1895

futurelargelinkrevs.remove(linkrev)

1895

futurelargelinkrevs.remove(linkrev)

1896

1897

for p in self.parentrevs(strippoint):

1897

for p in self.parentrevs(strippoint):

1898

if p != nullrev:

1898

if p != nullrev:

1899

plinkrev = self.linkrev(p)

1899

plinkrev = self.linkrev(p)

1900

heads[p] = plinkrev

1900

heads[p] = plinkrev

1901

if plinkrev >= minlink:

1901

if plinkrev >= minlink:

1902

futurelargelinkrevs.add(plinkrev)

1902

futurelargelinkrevs.add(plinkrev)

1903

1904

return strippoint, brokenrevs

1904

return strippoint, brokenrevs

1905

1906

def strip(self, minlink, transaction):

1906

def strip(self, minlink, transaction):

1907

"""truncate the revlog on the first revision with a linkrev >= minlink

1907

"""truncate the revlog on the first revision with a linkrev >= minlink

1908

1909

This function is called when we're stripping revision minlink and

1909

This function is called when we're stripping revision minlink and

1910

its descendants from the repository.

1910

its descendants from the repository.

1911

1912

We have to remove all revisions with linkrev >= minlink, because

1912

We have to remove all revisions with linkrev >= minlink, because

1913

the equivalent changelog revisions will be renumbered after the

1913

the equivalent changelog revisions will be renumbered after the

1914

strip.

1914

strip.

1915

1916

So we truncate the revlog on the first of these revisions, and

1916

So we truncate the revlog on the first of these revisions, and

1917

trust that the caller has saved the revisions that shouldn't be

1917

trust that the caller has saved the revisions that shouldn't be

1918

removed and that it'll re-add them after this truncation.

1918

removed and that it'll re-add them after this truncation.

1919

"""

1919

"""

1920

if len(self) == 0:

1920

if len(self) == 0:

1921

return

1921

return

1922

1923

rev, _ = self.getstrippoint(minlink)

1923

rev, _ = self.getstrippoint(minlink)

1924

if rev == len(self):

1924

if rev == len(self):

1925

return

1925

return

1926

1927

# first truncate the files on disk

1927

# first truncate the files on disk

1928

end = self.start(rev)

1928

end = self.start(rev)

1929

if not self._inline:

1929

if not self._inline:

1930

transaction.add(self.datafile, end)

1930

transaction.add(self.datafile, end)

1931

end = rev * self._io.size

1931

end = rev * self._io.size

1932

else:

1932

else:

1933

end += rev * self._io.size

1933

end += rev * self._io.size

1934

1935

transaction.add(self.indexfile, end)

1935

transaction.add(self.indexfile, end)

1936

1937

# then reset internal state in memory to forget those revisions

1937

# then reset internal state in memory to forget those revisions

1938

self._cache = None

1938

self._cache = None

1939

self._chaininfocache = {}

1939

self._chaininfocache = {}

1940

self._chunkclear()

1940

self._chunkclear()

1941

for x in xrange(rev, len(self)):

1941

for x in xrange(rev, len(self)):

1942

del self.nodemap[self.node(x)]

1942

del self.nodemap[self.node(x)]

1943

1944

del self.index[rev:-1]

1944

del self.index[rev:-1]

1945

1946

def checksize(self):

1946

def checksize(self):

1947

expected = 0

1947

expected = 0

1948

if len(self):

1948

if len(self):

1949

expected = max(0, self.end(len(self) - 1))

1949

expected = max(0, self.end(len(self) - 1))

1950

1951

try:

1951

try:

1952

f = self.opener(self.datafile)

1952

f = self.opener(self.datafile)

1953

f.seek(0, 2)

1953

f.seek(0, 2)

1954

actual = f.tell()

1954

actual = f.tell()

1955

f.close()

1955

f.close()

1956

dd = actual - expected

1956

dd = actual - expected

1957

except IOError as inst:

1957

except IOError as inst:

1958

if inst.errno != errno.ENOENT:

1958

if inst.errno != errno.ENOENT:

1959

raise

1959

raise

1960

dd = 0

1960

dd = 0

1961

1962

try:

1962

try:

1963

f = self.opener(self.indexfile)

1963

f = self.opener(self.indexfile)

1964

f.seek(0, 2)

1964

f.seek(0, 2)

1965

actual = f.tell()

1965

actual = f.tell()

1966

f.close()

1966

f.close()

1967

s = self._io.size

1967

s = self._io.size

1968

i = max(0, actual // s)

1968

i = max(0, actual // s)

1969

di = actual - (i * s)

1969

di = actual - (i * s)

1970

if self._inline:

1970

if self._inline:

1971

databytes = 0

1971

databytes = 0

1972

for r in self:

1972

for r in self:

1973

databytes += max(0, self.length(r))

1973

databytes += max(0, self.length(r))

1974

dd = 0

1974

dd = 0

1975

di = actual - len(self) * s - databytes

1975

di = actual - len(self) * s - databytes

1976

except IOError as inst:

1976

except IOError as inst:

1977

if inst.errno != errno.ENOENT:

1977

if inst.errno != errno.ENOENT:

1978

raise

1978

raise

1979

di = 0

1979

di = 0

1980

1981

return (dd, di)

1981

return (dd, di)

1982

1983

def files(self):

1983

def files(self):

1984

res = [self.indexfile]

1984

res = [self.indexfile]

1985

if not self._inline:

1985

if not self._inline:

1986

res.append(self.datafile)

1986

res.append(self.datafile)

1987

return res

1987

return res

1988

1989

DELTAREUSEALWAYS = 'always'

1989

DELTAREUSEALWAYS = 'always'

1990

DELTAREUSESAMEREVS = 'samerevs'

1990

DELTAREUSESAMEREVS = 'samerevs'

1991

DELTAREUSENEVER = 'never'

1991

DELTAREUSENEVER = 'never'

1992

1993

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

1993

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

1994

1995

def clone(self, tr, destrevlog, addrevisioncb=None,

1995

def clone(self, tr, destrevlog, addrevisioncb=None,

1996

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

1996

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

1997

"""Copy this revlog to another, possibly with format changes.

1997

"""Copy this revlog to another, possibly with format changes.

1998

1999

The destination revlog will contain the same revisions and nodes.

1999

The destination revlog will contain the same revisions and nodes.

2000

However, it may not be bit-for-bit identical due to e.g. delta encoding

2000

However, it may not be bit-for-bit identical due to e.g. delta encoding

2001

differences.

2001

differences.

2002

2003

The ``deltareuse`` argument control how deltas from the existing revlog

2003

The ``deltareuse`` argument control how deltas from the existing revlog

2004

are preserved in the destination revlog. The argument can have the

2004

are preserved in the destination revlog. The argument can have the

2005

following values:

2005

following values:

2006

2007

DELTAREUSEALWAYS

2007

DELTAREUSEALWAYS

2008

Deltas will always be reused (if possible), even if the destination

2008

Deltas will always be reused (if possible), even if the destination

2009

revlog would not select the same revisions for the delta. This is the

2009

revlog would not select the same revisions for the delta. This is the

2010

fastest mode of operation.

2010

fastest mode of operation.

2011

DELTAREUSESAMEREVS

2011

DELTAREUSESAMEREVS

2012

Deltas will be reused if the destination revlog would pick the same

2012

Deltas will be reused if the destination revlog would pick the same

2013

revisions for the delta. This mode strikes a balance between speed

2013

revisions for the delta. This mode strikes a balance between speed

2014

and optimization.

2014

and optimization.

2015

DELTAREUSENEVER

2015

DELTAREUSENEVER

2016

Deltas will never be reused. This is the slowest mode of execution.

2016

Deltas will never be reused. This is the slowest mode of execution.

2017

This mode can be used to recompute deltas (e.g. if the diff/delta

2017

This mode can be used to recompute deltas (e.g. if the diff/delta

2018

algorithm changes).

2018

algorithm changes).

2019

2020

Delta computation can be slow, so the choice of delta reuse policy can

2020

Delta computation can be slow, so the choice of delta reuse policy can

2021

significantly affect run time.

2021

significantly affect run time.

2022

2023

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2023

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2024

two extremes. Deltas will be reused if they are appropriate. But if the

2024

two extremes. Deltas will be reused if they are appropriate. But if the

2025

delta could choose a better revision, it will do so. This means if you

2025

delta could choose a better revision, it will do so. This means if you

2026

are converting a non-generaldelta revlog to a generaldelta revlog,

2026

are converting a non-generaldelta revlog to a generaldelta revlog,

2027

deltas will be recomputed if the delta's parent isn't a parent of the

2027

deltas will be recomputed if the delta's parent isn't a parent of the

2028

revision.

2028

revision.

2029

2030

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2030

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2031

controls whether to compute deltas against both parents for merges.

2031

controls whether to compute deltas against both parents for merges.

2032

By default, the current default is used.

2032

By default, the current default is used.

2033

"""

2033

"""

2034

if deltareuse not in self.DELTAREUSEALL:

2034

if deltareuse not in self.DELTAREUSEALL:

2035

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2035

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2036

2037

if len(destrevlog):

2037

if len(destrevlog):

2038

raise ValueError(_('destination revlog is not empty'))

2038

raise ValueError(_('destination revlog is not empty'))

2039

2040

if getattr(self, 'filteredrevs', None):

2040

if getattr(self, 'filteredrevs', None):

2041

raise ValueError(_('source revlog has filtered revisions'))

2041

raise ValueError(_('source revlog has filtered revisions'))

2042

if getattr(destrevlog, 'filteredrevs', None):

2042

if getattr(destrevlog, 'filteredrevs', None):

2043

raise ValueError(_('destination revlog has filtered revisions'))

2043

raise ValueError(_('destination revlog has filtered revisions'))

2044

2045

# lazydeltabase controls whether to reuse a cached delta, if possible.

2045

# lazydeltabase controls whether to reuse a cached delta, if possible.

2046

oldlazydeltabase = destrevlog._lazydeltabase

2046

oldlazydeltabase = destrevlog._lazydeltabase

2047

oldamd = destrevlog._aggressivemergedeltas

2047

oldamd = destrevlog._aggressivemergedeltas

2048

2049

try:

2049

try:

2050

if deltareuse == self.DELTAREUSEALWAYS:

2050

if deltareuse == self.DELTAREUSEALWAYS:

2051

destrevlog._lazydeltabase = True

2051

destrevlog._lazydeltabase = True

2052

elif deltareuse == self.DELTAREUSESAMEREVS:

2052

elif deltareuse == self.DELTAREUSESAMEREVS:

2053

destrevlog._lazydeltabase = False

2053

destrevlog._lazydeltabase = False

2054

2055

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2055

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2056

2057

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2057

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2058

self.DELTAREUSESAMEREVS)

2058

self.DELTAREUSESAMEREVS)

2059

2060

index = self.index

2060

index = self.index

2061

for rev in self:

2061

for rev in self:

2062

entry = index[rev]

2062

entry = index[rev]

2063

2064

# Some classes override linkrev to take filtered revs into

2064

# Some classes override linkrev to take filtered revs into

2065

# account. Use raw entry from index.

2065

# account. Use raw entry from index.

2066

flags = entry[0] & 0xffff

2066

flags = entry[0] & 0xffff

2067

linkrev = entry[4]

2067

linkrev = entry[4]

2068

p1 = index[entry[5]][7]

2068

p1 = index[entry[5]][7]

2069

p2 = index[entry[6]][7]

2069

p2 = index[entry[6]][7]

2070

node = entry[7]

2070

node = entry[7]

2071

2072

# (Possibly) reuse the delta from the revlog if allowed and

2072

# (Possibly) reuse the delta from the revlog if allowed and

2073

# the revlog chunk is a delta.

2073

# the revlog chunk is a delta.

2074

cachedelta = None

2074

cachedelta = None

2075

text = None

2075

text = None

2076

if populatecachedelta:

2076

if populatecachedelta:

2077

dp = self.deltaparent(rev)

2077

dp = self.deltaparent(rev)

2078

if dp != nullrev:

2078

if dp != nullrev:

2079

cachedelta = (dp, str(self._chunk(rev)))

2079

cachedelta = (dp, str(self._chunk(rev)))

2080

2081

if not cachedelta:

2081

if not cachedelta:

2082

text = self.revision(rev)

2082

text = self.revision(rev)

2083

2084

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2084

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2085

checkambig=False)

2085

checkambig=False)

2086

dfh = None

2086

dfh = None

2087

if not destrevlog._inline:

2087

if not destrevlog._inline:

2088

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2088

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2089

try:

2089

try:

2090

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2090

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2091

flags, cachedelta, ifh, dfh)

2091

flags, cachedelta, ifh, dfh)

2092

finally:

2092

finally:

2093

if dfh:

2093

if dfh:

2094

dfh.close()

2094

dfh.close()

2095

ifh.close()

2095

ifh.close()

2096

2097

if addrevisioncb:

2097

if addrevisioncb:

2098

addrevisioncb(self, rev, node)

2098

addrevisioncb(self, rev, node)

2099

finally:

2099

finally:

2100

destrevlog._lazydeltabase = oldlazydeltabase

2100

destrevlog._lazydeltabase = oldlazydeltabase

2101

destrevlog._aggressivemergedeltas = oldamd

2101

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 parsers,
                 templatefilters,
                 util,
             )
             _pack = struct.pack
             _unpack = struct.unpack
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, text) -> newtext, bool
                       - (write) f(self, text) -> newtext, bool
                       - (raw)   f(self, text) -> bool
                   The boolean returned by these transforms is used to determine whether
                   'newtext' can be used for hash integrity checking.
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return long(long(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = hashlib.sha1(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 """
                 def __init__(self, opener, indexfile, checkambig=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(self.node(rev))
                     return len(t)
                 size = rawsize
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def node(self, rev):
                     return self.index[rev][7]
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return map(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             return partial
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._addchunk(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getchunk(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length, df=df)
                 def _chunkraw(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getchunk(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._chunkraw(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     try:
                         offset, data = self._chunkraw(revs[0], revs[-1], df=df)
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     decomp = self.decompress
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions"""
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return str(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1),
                                           self.revision(rev2))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     text = None
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         text = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     bins = self._chunks(chain, df=_df)
                     if text is None:
-                        text = str(bins[0])
+                        text = bytes(bins[0])
                         bins = bins[1:]
                     text = mdiff.patches(text, bins)
                     text, validatehash = self._processflags(text, self.flags(rev), 'read',
                                                             raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     self._cache = (node, rev, text)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     newtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if newtext != text:
                         cachedelta = None
                     text = newtext
                     if len(text) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(text)))
                     node = node or self.hash(text, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(text, node, p1=p1, p2=p2)
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, text, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False, raw=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     - raw is optional; if set to True, it indicates the revision data is to
                       be treated by _processflags() as raw. It is usually set by changegroup
                       generation and debug commands.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(self.node(baserev), _df=fh, raw=raw)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=raw)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(self.node(rev), _df=fh)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         text = buildtext()
                         data = self.compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and text is None:
                         text = buildtext()
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb),
                                                       raw=True)
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             text = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 text = self.revision(rev)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd