upstream/mercurial-mirror Commit - r32307:3caec778

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import errno

17

import errno

18

import hashlib

18

import hashlib

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullid,

27

nullid,

28

nullrev,

28

nullrev,

29

)

29

)

30

from .i18n import _

30

from .i18n import _

31

from . import (

31

from . import (

32

ancestor,

32

ancestor,

33

error,

33

error,

34

mdiff,

34

mdiff,

35

parsers,

35

parsers,

36

pycompat,

36

pycompat,

37

templatefilters,

37

templatefilters,

38

util,

38

util,

39

)

39

)

40

41

_pack = struct.pack

41

_pack = struct.pack

42

_unpack = struct.unpack

42

_unpack = struct.unpack

43

# Aliased for performance.

43

# Aliased for performance.

44

_zlibdecompress = zlib.decompress

44

_zlibdecompress = zlib.decompress

45

46

# revlog header flags

46

# revlog header flags

47

REVLOGV0 = 0

47

REVLOGV0 = 0

48

REVLOGNG = 1

48

REVLOGNG = 1

49

REVLOGNGINLINEDATA = (1 << 16)

49

REVLOGNGINLINEDATA = (1 << 16)

50

REVLOGGENERALDELTA = (1 << 17)

50

REVLOGGENERALDELTA = (1 << 17)

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

55

56

# revlog index flags

56

# revlog index flags

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

60

REVIDX_DEFAULT_FLAGS = 0

60

REVIDX_DEFAULT_FLAGS = 0

61

# stable order in which flags need to be processed and their processors applied

61

# stable order in which flags need to be processed and their processors applied

62

REVIDX_FLAGS_ORDER = [

62

REVIDX_FLAGS_ORDER = [

63

REVIDX_ISCENSORED,

63

REVIDX_ISCENSORED,

64

REVIDX_ELLIPSIS,

64

REVIDX_ELLIPSIS,

65

REVIDX_EXTSTORED,

65

REVIDX_EXTSTORED,

66

]

66

]

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

68

69

# max size of revlog with inline data

69

# max size of revlog with inline data

70

_maxinline = 131072

70

_maxinline = 131072

71

_chunksize = 1048576

71

_chunksize = 1048576

72

73

RevlogError = error.RevlogError

73

RevlogError = error.RevlogError

74

LookupError = error.LookupError

74

LookupError = error.LookupError

75

CensoredNodeError = error.CensoredNodeError

75

CensoredNodeError = error.CensoredNodeError

76

ProgrammingError = error.ProgrammingError

76

ProgrammingError = error.ProgrammingError

77

78

# Store flag processors (cf. 'addflagprocessor()' to register)

78

# Store flag processors (cf. 'addflagprocessor()' to register)

79

_flagprocessors = {

79

_flagprocessors = {

80

REVIDX_ISCENSORED: None,

80

REVIDX_ISCENSORED: None,

81

}

81

}

82

83

def addflagprocessor(flag, processor):

83

def addflagprocessor(flag, processor):

84

"""Register a flag processor on a revision data flag.

84

"""Register a flag processor on a revision data flag.

85

86

Invariant:

86

Invariant:

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

88

- Only one flag processor can be registered on a specific flag.

88

- Only one flag processor can be registered on a specific flag.

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

90

following signatures:

90

following signatures:

91

- (read) f(self, rawtext) -> text, bool

91

- (read) f(self, rawtext) -> text, bool

92

- (write) f(self, text) -> rawtext, bool

92

- (write) f(self, text) -> rawtext, bool

93

- (raw) f(self, rawtext) -> bool

93

- (raw) f(self, rawtext) -> bool

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

95

directly visible to the user.

95

directly visible to the user.

96

The boolean returned by these transforms is used to determine whether

96

The boolean returned by these transforms is used to determine whether

97

the returned text can be used for hash integrity checking. For example,

97

the returned text can be used for hash integrity checking. For example,

98

if "write" returns False, then "text" is used to generate hash. If

98

if "write" returns False, then "text" is used to generate hash. If

99

"write" returns True, that basically means "rawtext" returned by "write"

99

"write" returns True, that basically means "rawtext" returned by "write"

100

should be used to generate hash. Usually, "write" and "read" return

100

should be used to generate hash. Usually, "write" and "read" return

101

different booleans. And "raw" returns a same boolean as "write".

101

different booleans. And "raw" returns a same boolean as "write".

102

103

Note: The 'raw' transform is used for changegroup generation and in some

103

Note: The 'raw' transform is used for changegroup generation and in some

104

debug commands. In this case the transform only indicates whether the

104

debug commands. In this case the transform only indicates whether the

105

contents can be used for hash integrity checks.

105

contents can be used for hash integrity checks.

106

"""

106

"""

107

if not flag & REVIDX_KNOWN_FLAGS:

107

if not flag & REVIDX_KNOWN_FLAGS:

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

109

raise ProgrammingError(msg)

109

raise ProgrammingError(msg)

110

if flag not in REVIDX_FLAGS_ORDER:

110

if flag not in REVIDX_FLAGS_ORDER:

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

112

raise ProgrammingError(msg)

112

raise ProgrammingError(msg)

113

if flag in _flagprocessors:

113

if flag in _flagprocessors:

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

115

raise error.Abort(msg)

115

raise error.Abort(msg)

116

_flagprocessors[flag] = processor

116

_flagprocessors[flag] = processor

117

118

def getoffset(q):

118

def getoffset(q):

119

return int(q >> 16)

119

return int(q >> 16)

120

121

def gettype(q):

121

def gettype(q):

122

return int(q & 0xFFFF)

122

return int(q & 0xFFFF)

123

124

def offset_type(offset, type):

124

def offset_type(offset, type):

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

126

raise ValueError('unknown revlog index flags')

126

raise ValueError('unknown revlog index flags')

127

return int(int(offset) << 16 | type)

127

return int(int(offset) << 16 | type)

128

129

_nullhash = hashlib.sha1(nullid)

129

_nullhash = hashlib.sha1(nullid)

130

131

def hash(text, p1, p2):

131

def hash(text, p1, p2):

132

"""generate a hash from the given text and its parent hashes

132

"""generate a hash from the given text and its parent hashes

133

134

This hash combines both the current file contents and its history

134

This hash combines both the current file contents and its history

135

in a manner that makes it easy to distinguish nodes with the same

135

in a manner that makes it easy to distinguish nodes with the same

136

content in the revision graph.

136

content in the revision graph.

137

"""

137

"""

138

# As of now, if one of the parent node is null, p2 is null

138

# As of now, if one of the parent node is null, p2 is null

139

if p2 == nullid:

139

if p2 == nullid:

140

# deep copy of a hash is faster than creating one

140

# deep copy of a hash is faster than creating one

141

s = _nullhash.copy()

141

s = _nullhash.copy()

142

s.update(p1)

142

s.update(p1)

143

else:

143

else:

144

# none of the parent nodes are nullid

144

# none of the parent nodes are nullid

145

l = [p1, p2]

145

l = [p1, p2]

146

l.sort()

146

l.sort()

147

s = hashlib.sha1(l[0])

147

s = hashlib.sha1(l[0])

148

s.update(l[1])

148

s.update(l[1])

149

s.update(text)

149

s.update(text)

150

return s.digest()

150

return s.digest()

151

152

# index v0:

152

# index v0:

153

# 4 bytes: offset

153

# 4 bytes: offset

154

# 4 bytes: compressed length

154

# 4 bytes: compressed length

155

# 4 bytes: base rev

155

# 4 bytes: base rev

156

# 4 bytes: link rev

156

# 4 bytes: link rev

157

# 20 bytes: parent 1 nodeid

157

# 20 bytes: parent 1 nodeid

158

# 20 bytes: parent 2 nodeid

158

# 20 bytes: parent 2 nodeid

159

# 20 bytes: nodeid

159

# 20 bytes: nodeid

160

indexformatv0 = ">4l20s20s20s"

160

indexformatv0 = ">4l20s20s20s"

161

162

class revlogoldio(object):

162

class revlogoldio(object):

163

def __init__(self):

163

def __init__(self):

164

self.size = struct.calcsize(indexformatv0)

164

self.size = struct.calcsize(indexformatv0)

165

166

def parseindex(self, data, inline):

166

def parseindex(self, data, inline):

167

s = self.size

167

s = self.size

168

index = []

168

index = []

169

nodemap = {nullid: nullrev}

169

nodemap = {nullid: nullrev}

170

n = off = 0

170

n = off = 0

171

l = len(data)

171

l = len(data)

172

while off + s <= l:

172

while off + s <= l:

173

cur = data[off:off + s]

173

cur = data[off:off + s]

174

off += s

174

off += s

175

e = _unpack(indexformatv0, cur)

175

e = _unpack(indexformatv0, cur)

176

# transform to revlogv1 format

176

# transform to revlogv1 format

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

179

index.append(e2)

179

index.append(e2)

180

nodemap[e[6]] = n

180

nodemap[e[6]] = n

181

n += 1

181

n += 1

182

183

# add the magic null revision at -1

183

# add the magic null revision at -1

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

185

186

return index, nodemap, None

186

return index, nodemap, None

187

188

def packentry(self, entry, node, version, rev):

188

def packentry(self, entry, node, version, rev):

189

if gettype(entry[0]):

189

if gettype(entry[0]):

190

raise RevlogError(_("index entry flags need RevlogNG"))

190

raise RevlogError(_("index entry flags need RevlogNG"))

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

192

node(entry[5]), node(entry[6]), entry[7])

192

node(entry[5]), node(entry[6]), entry[7])

193

return _pack(indexformatv0, *e2)

193

return _pack(indexformatv0, *e2)

194

195

# index ng:

195

# index ng:

196

# 6 bytes: offset

196

# 6 bytes: offset

197

# 2 bytes: flags

197

# 2 bytes: flags

198

# 4 bytes: compressed length

198

# 4 bytes: compressed length

199

# 4 bytes: uncompressed length

199

# 4 bytes: uncompressed length

200

# 4 bytes: base rev

200

# 4 bytes: base rev

201

# 4 bytes: link rev

201

# 4 bytes: link rev

202

# 4 bytes: parent 1 rev

202

# 4 bytes: parent 1 rev

203

# 4 bytes: parent 2 rev

203

# 4 bytes: parent 2 rev

204

# 32 bytes: nodeid

204

# 32 bytes: nodeid

205

indexformatng = ">Qiiiiii20s12x"

205

indexformatng = ">Qiiiiii20s12x"

206

versionformat = ">I"

206

versionformat = ">I"

207

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

209

# signed integer)

209

# signed integer)

210

_maxentrysize = 0x7fffffff

210

_maxentrysize = 0x7fffffff

211

212

class revlogio(object):

212

class revlogio(object):

213

def __init__(self):

213

def __init__(self):

214

self.size = struct.calcsize(indexformatng)

214

self.size = struct.calcsize(indexformatng)

215

216

def parseindex(self, data, inline):

216

def parseindex(self, data, inline):

217

# call the C implementation to parse the index data

217

# call the C implementation to parse the index data

218

index, cache = parsers.parse_index2(data, inline)

218

index, cache = parsers.parse_index2(data, inline)

219

return index, getattr(index, 'nodemap', None), cache

219

return index, getattr(index, 'nodemap', None), cache

220

221

def packentry(self, entry, node, version, rev):

221

def packentry(self, entry, node, version, rev):

222

p = _pack(indexformatng, *entry)

222

p = _pack(indexformatng, *entry)

223

if rev == 0:

223

if rev == 0:

224

p = _pack(versionformat, version) + p[4:]

224

p = _pack(versionformat, version) + p[4:]

225

return p

225

return p

226

227

class revlog(object):

227

class revlog(object):

228

"""

228

"""

229

the underlying revision storage object

229

the underlying revision storage object

230

231

A revlog consists of two parts, an index and the revision data.

231

A revlog consists of two parts, an index and the revision data.

232

233

The index is a file with a fixed record size containing

233

The index is a file with a fixed record size containing

234

information on each revision, including its nodeid (hash), the

234

information on each revision, including its nodeid (hash), the

235

nodeids of its parents, the position and offset of its data within

235

nodeids of its parents, the position and offset of its data within

236

the data file, and the revision it's based on. Finally, each entry

236

the data file, and the revision it's based on. Finally, each entry

237

contains a linkrev entry that can serve as a pointer to external

237

contains a linkrev entry that can serve as a pointer to external

238

data.

238

data.

239

240

The revision data itself is a linear collection of data chunks.

240

The revision data itself is a linear collection of data chunks.

241

Each chunk represents a revision and is usually represented as a

241

Each chunk represents a revision and is usually represented as a

242

delta against the previous chunk. To bound lookup time, runs of

242

delta against the previous chunk. To bound lookup time, runs of

243

deltas are limited to about 2 times the length of the original

243

deltas are limited to about 2 times the length of the original

244

version data. This makes retrieval of a version proportional to

244

version data. This makes retrieval of a version proportional to

245

its size, or O(1) relative to the number of revisions.

245

its size, or O(1) relative to the number of revisions.

246

247

Both pieces of the revlog are written to in an append-only

247

Both pieces of the revlog are written to in an append-only

248

fashion, which means we never need to rewrite a file to insert or

248

fashion, which means we never need to rewrite a file to insert or

249

remove data, and can use some simple techniques to avoid the need

249

remove data, and can use some simple techniques to avoid the need

250

for locking while reading.

250

for locking while reading.

251

252

If checkambig, indexfile is opened with checkambig=True at

252

If checkambig, indexfile is opened with checkambig=True at

253

writing, to avoid file stat ambiguity.

253

writing, to avoid file stat ambiguity.

254

"""

254

"""

255

def __init__(self, opener, indexfile, checkambig=False):

255

def __init__(self, opener, indexfile, datafile=None, checkambig=False):

256

"""

256

"""

257

create a revlog object

257

create a revlog object

258

259

opener is a function that abstracts the file opening operation

259

opener is a function that abstracts the file opening operation

260

and can be used to implement COW semantics or the like.

260

and can be used to implement COW semantics or the like.

261

"""

261

"""

262

self.indexfile = indexfile

262

self.indexfile = indexfile

263

self.datafile = indexfile[:-2] + ".d"

263

self.datafile = datafile or (indexfile[:-2] + ".d")

264

self.opener = opener

264

self.opener = opener

265

# When True, indexfile is opened with checkambig=True at writing, to

265

# When True, indexfile is opened with checkambig=True at writing, to

266

# avoid file stat ambiguity.

266

# avoid file stat ambiguity.

267

self._checkambig = checkambig

267

self._checkambig = checkambig

268

# 3-tuple of (node, rev, text) for a raw revision.

268

# 3-tuple of (node, rev, text) for a raw revision.

269

self._cache = None

269

self._cache = None

270

# Maps rev to chain base rev.

270

# Maps rev to chain base rev.

271

self._chainbasecache = util.lrucachedict(100)

271

self._chainbasecache = util.lrucachedict(100)

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

273

self._chunkcache = (0, '')

273

self._chunkcache = (0, '')

274

# How much data to read and cache into the raw revlog data cache.

274

# How much data to read and cache into the raw revlog data cache.

275

self._chunkcachesize = 65536

275

self._chunkcachesize = 65536

276

self._maxchainlen = None

276

self._maxchainlen = None

277

self._aggressivemergedeltas = False

277

self._aggressivemergedeltas = False

278

self.index = []

278

self.index = []

279

# Mapping of partial identifiers to full nodes.

279

# Mapping of partial identifiers to full nodes.

280

self._pcache = {}

280

self._pcache = {}

281

# Mapping of revision integer to full node.

281

# Mapping of revision integer to full node.

282

self._nodecache = {nullid: nullrev}

282

self._nodecache = {nullid: nullrev}

283

self._nodepos = None

283

self._nodepos = None

284

self._compengine = 'zlib'

284

self._compengine = 'zlib'

285

286

v = REVLOG_DEFAULT_VERSION

286

v = REVLOG_DEFAULT_VERSION

287

opts = getattr(opener, 'options', None)

287

opts = getattr(opener, 'options', None)

288

if opts is not None:

288

if opts is not None:

289

if 'revlogv1' in opts:

289

if 'revlogv1' in opts:

290

if 'generaldelta' in opts:

290

if 'generaldelta' in opts:

291

v |= REVLOGGENERALDELTA

291

v |= REVLOGGENERALDELTA

292

else:

292

else:

293

v = 0

293

v = 0

294

if 'chunkcachesize' in opts:

294

if 'chunkcachesize' in opts:

295

self._chunkcachesize = opts['chunkcachesize']

295

self._chunkcachesize = opts['chunkcachesize']

296

if 'maxchainlen' in opts:

296

if 'maxchainlen' in opts:

297

self._maxchainlen = opts['maxchainlen']

297

self._maxchainlen = opts['maxchainlen']

298

if 'aggressivemergedeltas' in opts:

298

if 'aggressivemergedeltas' in opts:

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

301

if 'compengine' in opts:

301

if 'compengine' in opts:

302

self._compengine = opts['compengine']

302

self._compengine = opts['compengine']

303

304

if self._chunkcachesize <= 0:

304

if self._chunkcachesize <= 0:

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

306

'than 0') % self._chunkcachesize)

306

'than 0') % self._chunkcachesize)

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

309

'of 2') % self._chunkcachesize)

309

'of 2') % self._chunkcachesize)

310

311

indexdata = ''

311

indexdata = ''

312

self._initempty = True

312

self._initempty = True

313

try:

313

try:

314

f = self.opener(self.indexfile)

314

f = self.opener(self.indexfile)

315

indexdata = f.read()

315

indexdata = f.read()

316

f.close()

316

f.close()

317

if len(indexdata) > 0:

317

if len(indexdata) > 0:

318

v = struct.unpack(versionformat, indexdata[:4])[0]

318

v = struct.unpack(versionformat, indexdata[:4])[0]

319

self._initempty = False

319

self._initempty = False

320

except IOError as inst:

320

except IOError as inst:

321

if inst.errno != errno.ENOENT:

321

if inst.errno != errno.ENOENT:

322

raise

322

raise

323

324

self.version = v

324

self.version = v

325

self._inline = v & REVLOGNGINLINEDATA

325

self._inline = v & REVLOGNGINLINEDATA

326

self._generaldelta = v & REVLOGGENERALDELTA

326

self._generaldelta = v & REVLOGGENERALDELTA

327

flags = v & ~0xFFFF

327

flags = v & ~0xFFFF

328

fmt = v & 0xFFFF

328

fmt = v & 0xFFFF

329

if fmt == REVLOGV0 and flags:

329

if fmt == REVLOGV0 and flags:

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

331

% (self.indexfile, flags >> 16))

331

% (self.indexfile, flags >> 16))

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

334

% (self.indexfile, flags >> 16))

334

% (self.indexfile, flags >> 16))

335

elif fmt > REVLOGNG:

335

elif fmt > REVLOGNG:

336

raise RevlogError(_("index %s unknown format %d")

336

raise RevlogError(_("index %s unknown format %d")

337

% (self.indexfile, fmt))

337

% (self.indexfile, fmt))

338

339

self.storedeltachains = True

339

self.storedeltachains = True

340

341

self._io = revlogio()

341

self._io = revlogio()

342

if self.version == REVLOGV0:

342

if self.version == REVLOGV0:

343

self._io = revlogoldio()

343

self._io = revlogoldio()

344

try:

344

try:

345

d = self._io.parseindex(indexdata, self._inline)

345

d = self._io.parseindex(indexdata, self._inline)

346

except (ValueError, IndexError):

346

except (ValueError, IndexError):

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

348

self.index, nodemap, self._chunkcache = d

348

self.index, nodemap, self._chunkcache = d

349

if nodemap is not None:

349

if nodemap is not None:

350

self.nodemap = self._nodecache = nodemap

350

self.nodemap = self._nodecache = nodemap

351

if not self._chunkcache:

351

if not self._chunkcache:

352

self._chunkclear()

352

self._chunkclear()

353

# revnum -> (chain-length, sum-delta-length)

353

# revnum -> (chain-length, sum-delta-length)

354

self._chaininfocache = {}

354

self._chaininfocache = {}

355

# revlog header -> revlog compressor

355

# revlog header -> revlog compressor

356

self._decompressors = {}

356

self._decompressors = {}

357

358

@util.propertycache

358

@util.propertycache

359

def _compressor(self):

359

def _compressor(self):

360

return util.compengines[self._compengine].revlogcompressor()

360

return util.compengines[self._compengine].revlogcompressor()

361

362

def tip(self):

362

def tip(self):

363

return self.node(len(self.index) - 2)

363

return self.node(len(self.index) - 2)

364

def __contains__(self, rev):

364

def __contains__(self, rev):

365

return 0 <= rev < len(self)

365

return 0 <= rev < len(self)

366

def __len__(self):

366

def __len__(self):

367

return len(self.index) - 1

367

return len(self.index) - 1

368

def __iter__(self):

368

def __iter__(self):

369

return iter(xrange(len(self)))

369

return iter(xrange(len(self)))

370

def revs(self, start=0, stop=None):

370

def revs(self, start=0, stop=None):

371

"""iterate over all rev in this revlog (from start to stop)"""

371

"""iterate over all rev in this revlog (from start to stop)"""

372

step = 1

372

step = 1

373

if stop is not None:

373

if stop is not None:

374

if start > stop:

374

if start > stop:

375

step = -1

375

step = -1

376

stop += step

376

stop += step

377

else:

377

else:

378

stop = len(self)

378

stop = len(self)

379

return xrange(start, stop, step)

379

return xrange(start, stop, step)

380

381

@util.propertycache

381

@util.propertycache

382

def nodemap(self):

382

def nodemap(self):

383

self.rev(self.node(0))

383

self.rev(self.node(0))

384

return self._nodecache

384

return self._nodecache

385

386

def hasnode(self, node):

386

def hasnode(self, node):

387

try:

387

try:

388

self.rev(node)

388

self.rev(node)

389

return True

389

return True

390

except KeyError:

390

except KeyError:

391

return False

391

return False

392

393

def clearcaches(self):

393

def clearcaches(self):

394

self._cache = None

394

self._cache = None

395

self._chainbasecache.clear()

395

self._chainbasecache.clear()

396

self._chunkcache = (0, '')

396

self._chunkcache = (0, '')

397

self._pcache = {}

397

self._pcache = {}

398

399

try:

399

try:

400

self._nodecache.clearcaches()

400

self._nodecache.clearcaches()

401

except AttributeError:

401

except AttributeError:

402

self._nodecache = {nullid: nullrev}

402

self._nodecache = {nullid: nullrev}

403

self._nodepos = None

403

self._nodepos = None

404

405

def rev(self, node):

405

def rev(self, node):

406

try:

406

try:

407

return self._nodecache[node]

407

return self._nodecache[node]

408

except TypeError:

408

except TypeError:

409

raise

409

raise

410

except RevlogError:

410

except RevlogError:

411

# parsers.c radix tree lookup failed

411

# parsers.c radix tree lookup failed

412

raise LookupError(node, self.indexfile, _('no node'))

412

raise LookupError(node, self.indexfile, _('no node'))

413

except KeyError:

413

except KeyError:

414

# pure python cache lookup failed

414

# pure python cache lookup failed

415

n = self._nodecache

415

n = self._nodecache

416

i = self.index

416

i = self.index

417

p = self._nodepos

417

p = self._nodepos

418

if p is None:

418

if p is None:

419

p = len(i) - 2

419

p = len(i) - 2

420

for r in xrange(p, -1, -1):

420

for r in xrange(p, -1, -1):

421

v = i[r][7]

421

v = i[r][7]

422

n[v] = r

422

n[v] = r

423

if v == node:

423

if v == node:

424

self._nodepos = r - 1

424

self._nodepos = r - 1

425

return r

425

return r

426

raise LookupError(node, self.indexfile, _('no node'))

426

raise LookupError(node, self.indexfile, _('no node'))

427

428

# Accessors for index entries.

428

# Accessors for index entries.

429

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

431

# are flags.

431

# are flags.

432

def start(self, rev):

432

def start(self, rev):

433

return int(self.index[rev][0] >> 16)

433

return int(self.index[rev][0] >> 16)

434

435

def flags(self, rev):

435

def flags(self, rev):

436

return self.index[rev][0] & 0xFFFF

436

return self.index[rev][0] & 0xFFFF

437

438

def length(self, rev):

438

def length(self, rev):

439

return self.index[rev][1]

439

return self.index[rev][1]

440

441

def rawsize(self, rev):

441

def rawsize(self, rev):

442

"""return the length of the uncompressed text for a given revision"""

442

"""return the length of the uncompressed text for a given revision"""

443

l = self.index[rev][2]

443

l = self.index[rev][2]

444

if l >= 0:

444

if l >= 0:

445

return l

445

return l

446

447

t = self.revision(rev, raw=True)

447

t = self.revision(rev, raw=True)

448

return len(t)

448

return len(t)

449

450

def size(self, rev):

450

def size(self, rev):

451

"""length of non-raw text (processed by a "read" flag processor)"""

451

"""length of non-raw text (processed by a "read" flag processor)"""

452

# fast path: if no "read" flag processor could change the content,

452

# fast path: if no "read" flag processor could change the content,

453

# size is rawsize. note: ELLIPSIS is known to not change the content.

453

# size is rawsize. note: ELLIPSIS is known to not change the content.

454

flags = self.flags(rev)

454

flags = self.flags(rev)

455

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

455

if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:

456

return self.rawsize(rev)

456

return self.rawsize(rev)

457

458

return len(self.revision(rev, raw=False))

458

return len(self.revision(rev, raw=False))

459

460

def chainbase(self, rev):

460

def chainbase(self, rev):

461

base = self._chainbasecache.get(rev)

461

base = self._chainbasecache.get(rev)

462

if base is not None:

462

if base is not None:

463

return base

463

return base

464

465

index = self.index

465

index = self.index

466

base = index[rev][3]

466

base = index[rev][3]

467

while base != rev:

467

while base != rev:

468

rev = base

468

rev = base

469

base = index[rev][3]

469

base = index[rev][3]

470

471

self._chainbasecache[rev] = base

471

self._chainbasecache[rev] = base

472

return base

472

return base

473

474

def linkrev(self, rev):

474

def linkrev(self, rev):

475

return self.index[rev][4]

475

return self.index[rev][4]

476

477

def parentrevs(self, rev):

477

def parentrevs(self, rev):

478

return self.index[rev][5:7]

478

return self.index[rev][5:7]

479

480

def node(self, rev):

480

def node(self, rev):

481

return self.index[rev][7]

481

return self.index[rev][7]

482

483

# Derived from index values.

483

# Derived from index values.

484

485

def end(self, rev):

485

def end(self, rev):

486

return self.start(rev) + self.length(rev)

486

return self.start(rev) + self.length(rev)

487

488

def parents(self, node):

488

def parents(self, node):

489

i = self.index

489

i = self.index

490

d = i[self.rev(node)]

490

d = i[self.rev(node)]

491

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

491

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

492

493

def chainlen(self, rev):

493

def chainlen(self, rev):

494

return self._chaininfo(rev)[0]

494

return self._chaininfo(rev)[0]

495

496

def _chaininfo(self, rev):

496

def _chaininfo(self, rev):

497

chaininfocache = self._chaininfocache

497

chaininfocache = self._chaininfocache

498

if rev in chaininfocache:

498

if rev in chaininfocache:

499

return chaininfocache[rev]

499

return chaininfocache[rev]

500

index = self.index

500

index = self.index

501

generaldelta = self._generaldelta

501

generaldelta = self._generaldelta

502

iterrev = rev

502

iterrev = rev

503

e = index[iterrev]

503

e = index[iterrev]

504

clen = 0

504

clen = 0

505

compresseddeltalen = 0

505

compresseddeltalen = 0

506

while iterrev != e[3]:

506

while iterrev != e[3]:

507

clen += 1

507

clen += 1

508

compresseddeltalen += e[1]

508

compresseddeltalen += e[1]

509

if generaldelta:

509

if generaldelta:

510

iterrev = e[3]

510

iterrev = e[3]

511

else:

511

else:

512

iterrev -= 1

512

iterrev -= 1

513

if iterrev in chaininfocache:

513

if iterrev in chaininfocache:

514

t = chaininfocache[iterrev]

514

t = chaininfocache[iterrev]

515

clen += t[0]

515

clen += t[0]

516

compresseddeltalen += t[1]

516

compresseddeltalen += t[1]

517

break

517

break

518

e = index[iterrev]

518

e = index[iterrev]

519

else:

519

else:

520

# Add text length of base since decompressing that also takes

520

# Add text length of base since decompressing that also takes

521

# work. For cache hits the length is already included.

521

# work. For cache hits the length is already included.

522

compresseddeltalen += e[1]

522

compresseddeltalen += e[1]

523

r = (clen, compresseddeltalen)

523

r = (clen, compresseddeltalen)

524

chaininfocache[rev] = r

524

chaininfocache[rev] = r

525

return r

525

return r

526

527

def _deltachain(self, rev, stoprev=None):

527

def _deltachain(self, rev, stoprev=None):

528

"""Obtain the delta chain for a revision.

528

"""Obtain the delta chain for a revision.

529

530

``stoprev`` specifies a revision to stop at. If not specified, we

530

``stoprev`` specifies a revision to stop at. If not specified, we

531

stop at the base of the chain.

531

stop at the base of the chain.

532

533

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

533

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

534

revs in ascending order and ``stopped`` is a bool indicating whether

534

revs in ascending order and ``stopped`` is a bool indicating whether

535

``stoprev`` was hit.

535

``stoprev`` was hit.

536

"""

536

"""

537

chain = []

537

chain = []

538

539

# Alias to prevent attribute lookup in tight loop.

539

# Alias to prevent attribute lookup in tight loop.

540

index = self.index

540

index = self.index

541

generaldelta = self._generaldelta

541

generaldelta = self._generaldelta

542

543

iterrev = rev

543

iterrev = rev

544

e = index[iterrev]

544

e = index[iterrev]

545

while iterrev != e[3] and iterrev != stoprev:

545

while iterrev != e[3] and iterrev != stoprev:

546

chain.append(iterrev)

546

chain.append(iterrev)

547

if generaldelta:

547

if generaldelta:

548

iterrev = e[3]

548

iterrev = e[3]

549

else:

549

else:

550

iterrev -= 1

550

iterrev -= 1

551

e = index[iterrev]

551

e = index[iterrev]

552

553

if iterrev == stoprev:

553

if iterrev == stoprev:

554

stopped = True

554

stopped = True

555

else:

555

else:

556

chain.append(iterrev)

556

chain.append(iterrev)

557

stopped = False

557

stopped = False

558

559

chain.reverse()

559

chain.reverse()

560

return chain, stopped

560

return chain, stopped

561

562

def ancestors(self, revs, stoprev=0, inclusive=False):

562

def ancestors(self, revs, stoprev=0, inclusive=False):

563

"""Generate the ancestors of 'revs' in reverse topological order.

563

"""Generate the ancestors of 'revs' in reverse topological order.

564

Does not generate revs lower than stoprev.

564

Does not generate revs lower than stoprev.

565

566

See the documentation for ancestor.lazyancestors for more details."""

566

See the documentation for ancestor.lazyancestors for more details."""

567

568

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

568

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

569

inclusive=inclusive)

569

inclusive=inclusive)

570

571

def descendants(self, revs):

571

def descendants(self, revs):

572

"""Generate the descendants of 'revs' in revision order.

572

"""Generate the descendants of 'revs' in revision order.

573

574

Yield a sequence of revision numbers starting with a child of

574

Yield a sequence of revision numbers starting with a child of

575

some rev in revs, i.e., each revision is *not* considered a

575

some rev in revs, i.e., each revision is *not* considered a

576

descendant of itself. Results are ordered by revision number (a

576

descendant of itself. Results are ordered by revision number (a

577

topological sort)."""

577

topological sort)."""

578

first = min(revs)

578

first = min(revs)

579

if first == nullrev:

579

if first == nullrev:

580

for i in self:

580

for i in self:

581

yield i

581

yield i

582

return

582

return

583

584

seen = set(revs)

584

seen = set(revs)

585

for i in self.revs(start=first + 1):

585

for i in self.revs(start=first + 1):

586

for x in self.parentrevs(i):

586

for x in self.parentrevs(i):

587

if x != nullrev and x in seen:

587

if x != nullrev and x in seen:

588

seen.add(i)

588

seen.add(i)

589

yield i

589

yield i

590

break

590

break

591

592

def findcommonmissing(self, common=None, heads=None):

592

def findcommonmissing(self, common=None, heads=None):

593

"""Return a tuple of the ancestors of common and the ancestors of heads

593

"""Return a tuple of the ancestors of common and the ancestors of heads

594

that are not ancestors of common. In revset terminology, we return the

594

that are not ancestors of common. In revset terminology, we return the

595

tuple:

595

tuple:

596

597

::common, (::heads) - (::common)

597

::common, (::heads) - (::common)

598

599

The list is sorted by revision number, meaning it is

599

The list is sorted by revision number, meaning it is

600

topologically sorted.

600

topologically sorted.

601

602

'heads' and 'common' are both lists of node IDs. If heads is

602

'heads' and 'common' are both lists of node IDs. If heads is

603

not supplied, uses all of the revlog's heads. If common is not

603

not supplied, uses all of the revlog's heads. If common is not

604

supplied, uses nullid."""

604

supplied, uses nullid."""

605

if common is None:

605

if common is None:

606

common = [nullid]

606

common = [nullid]

607

if heads is None:

607

if heads is None:

608

heads = self.heads()

608

heads = self.heads()

609

610

common = [self.rev(n) for n in common]

610

common = [self.rev(n) for n in common]

611

heads = [self.rev(n) for n in heads]

611

heads = [self.rev(n) for n in heads]

612

613

# we want the ancestors, but inclusive

613

# we want the ancestors, but inclusive

614

class lazyset(object):

614

class lazyset(object):

615

def __init__(self, lazyvalues):

615

def __init__(self, lazyvalues):

616

self.addedvalues = set()

616

self.addedvalues = set()

617

self.lazyvalues = lazyvalues

617

self.lazyvalues = lazyvalues

618

619

def __contains__(self, value):

619

def __contains__(self, value):

620

return value in self.addedvalues or value in self.lazyvalues

620

return value in self.addedvalues or value in self.lazyvalues

621

622

def __iter__(self):

622

def __iter__(self):

623

added = self.addedvalues

623

added = self.addedvalues

624

for r in added:

624

for r in added:

625

yield r

625

yield r

626

for r in self.lazyvalues:

626

for r in self.lazyvalues:

627

if not r in added:

627

if not r in added:

628

yield r

628

yield r

629

630

def add(self, value):

630

def add(self, value):

631

self.addedvalues.add(value)

631

self.addedvalues.add(value)

632

633

def update(self, values):

633

def update(self, values):

634

self.addedvalues.update(values)

634

self.addedvalues.update(values)

635

636

has = lazyset(self.ancestors(common))

636

has = lazyset(self.ancestors(common))

637

has.add(nullrev)

637

has.add(nullrev)

638

has.update(common)

638

has.update(common)

639

640

# take all ancestors from heads that aren't in has

640

# take all ancestors from heads that aren't in has

641

missing = set()

641

missing = set()

642

visit = collections.deque(r for r in heads if r not in has)

642

visit = collections.deque(r for r in heads if r not in has)

643

while visit:

643

while visit:

644

r = visit.popleft()

644

r = visit.popleft()

645

if r in missing:

645

if r in missing:

646

continue

646

continue

647

else:

647

else:

648

missing.add(r)

648

missing.add(r)

649

for p in self.parentrevs(r):

649

for p in self.parentrevs(r):

650

if p not in has:

650

if p not in has:

651

visit.append(p)

651

visit.append(p)

652

missing = list(missing)

652

missing = list(missing)

653

missing.sort()

653

missing.sort()

654

return has, [self.node(miss) for miss in missing]

654

return has, [self.node(miss) for miss in missing]

655

656

def incrementalmissingrevs(self, common=None):

656

def incrementalmissingrevs(self, common=None):

657

"""Return an object that can be used to incrementally compute the

657

"""Return an object that can be used to incrementally compute the

658

revision numbers of the ancestors of arbitrary sets that are not

658

revision numbers of the ancestors of arbitrary sets that are not

659

ancestors of common. This is an ancestor.incrementalmissingancestors

659

ancestors of common. This is an ancestor.incrementalmissingancestors

660

object.

660

object.

661

662

'common' is a list of revision numbers. If common is not supplied, uses

662

'common' is a list of revision numbers. If common is not supplied, uses

663

nullrev.

663

nullrev.

664

"""

664

"""

665

if common is None:

665

if common is None:

666

common = [nullrev]

666

common = [nullrev]

667

668

return ancestor.incrementalmissingancestors(self.parentrevs, common)

668

return ancestor.incrementalmissingancestors(self.parentrevs, common)

669

670

def findmissingrevs(self, common=None, heads=None):

670

def findmissingrevs(self, common=None, heads=None):

671

"""Return the revision numbers of the ancestors of heads that

671

"""Return the revision numbers of the ancestors of heads that

672

are not ancestors of common.

672

are not ancestors of common.

673

674

More specifically, return a list of revision numbers corresponding to

674

More specifically, return a list of revision numbers corresponding to

675

nodes N such that every N satisfies the following constraints:

675

nodes N such that every N satisfies the following constraints:

676

677

1. N is an ancestor of some node in 'heads'

677

1. N is an ancestor of some node in 'heads'

678

2. N is not an ancestor of any node in 'common'

678

2. N is not an ancestor of any node in 'common'

679

680

The list is sorted by revision number, meaning it is

680

The list is sorted by revision number, meaning it is

681

topologically sorted.

681

topologically sorted.

682

683

'heads' and 'common' are both lists of revision numbers. If heads is

683

'heads' and 'common' are both lists of revision numbers. If heads is

684

not supplied, uses all of the revlog's heads. If common is not

684

not supplied, uses all of the revlog's heads. If common is not

685

supplied, uses nullid."""

685

supplied, uses nullid."""

686

if common is None:

686

if common is None:

687

common = [nullrev]

687

common = [nullrev]

688

if heads is None:

688

if heads is None:

689

heads = self.headrevs()

689

heads = self.headrevs()

690

691

inc = self.incrementalmissingrevs(common=common)

691

inc = self.incrementalmissingrevs(common=common)

692

return inc.missingancestors(heads)

692

return inc.missingancestors(heads)

693

694

def findmissing(self, common=None, heads=None):

694

def findmissing(self, common=None, heads=None):

695

"""Return the ancestors of heads that are not ancestors of common.

695

"""Return the ancestors of heads that are not ancestors of common.

696

697

More specifically, return a list of nodes N such that every N

697

More specifically, return a list of nodes N such that every N

698

satisfies the following constraints:

698

satisfies the following constraints:

699

700

1. N is an ancestor of some node in 'heads'

700

1. N is an ancestor of some node in 'heads'

701

2. N is not an ancestor of any node in 'common'

701

2. N is not an ancestor of any node in 'common'

702

703

The list is sorted by revision number, meaning it is

703

The list is sorted by revision number, meaning it is

704

topologically sorted.

704

topologically sorted.

705

706

'heads' and 'common' are both lists of node IDs. If heads is

706

'heads' and 'common' are both lists of node IDs. If heads is

707

not supplied, uses all of the revlog's heads. If common is not

707

not supplied, uses all of the revlog's heads. If common is not

708

supplied, uses nullid."""

708

supplied, uses nullid."""

709

if common is None:

709

if common is None:

710

common = [nullid]

710

common = [nullid]

711

if heads is None:

711

if heads is None:

712

heads = self.heads()

712

heads = self.heads()

713

714

common = [self.rev(n) for n in common]

714

common = [self.rev(n) for n in common]

715

heads = [self.rev(n) for n in heads]

715

heads = [self.rev(n) for n in heads]

716

717

inc = self.incrementalmissingrevs(common=common)

717

inc = self.incrementalmissingrevs(common=common)

718

return [self.node(r) for r in inc.missingancestors(heads)]

718

return [self.node(r) for r in inc.missingancestors(heads)]

719

720

def nodesbetween(self, roots=None, heads=None):

720

def nodesbetween(self, roots=None, heads=None):

721

"""Return a topological path from 'roots' to 'heads'.

721

"""Return a topological path from 'roots' to 'heads'.

722

723

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

723

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

724

topologically sorted list of all nodes N that satisfy both of

724

topologically sorted list of all nodes N that satisfy both of

725

these constraints:

725

these constraints:

726

727

1. N is a descendant of some node in 'roots'

727

1. N is a descendant of some node in 'roots'

728

2. N is an ancestor of some node in 'heads'

728

2. N is an ancestor of some node in 'heads'

729

730

Every node is considered to be both a descendant and an ancestor

730

Every node is considered to be both a descendant and an ancestor

731

of itself, so every reachable node in 'roots' and 'heads' will be

731

of itself, so every reachable node in 'roots' and 'heads' will be

732

included in 'nodes'.

732

included in 'nodes'.

733

734

'outroots' is the list of reachable nodes in 'roots', i.e., the

734

'outroots' is the list of reachable nodes in 'roots', i.e., the

735

subset of 'roots' that is returned in 'nodes'. Likewise,

735

subset of 'roots' that is returned in 'nodes'. Likewise,

736

'outheads' is the subset of 'heads' that is also in 'nodes'.

736

'outheads' is the subset of 'heads' that is also in 'nodes'.

737

738

'roots' and 'heads' are both lists of node IDs. If 'roots' is

738

'roots' and 'heads' are both lists of node IDs. If 'roots' is

739

unspecified, uses nullid as the only root. If 'heads' is

739

unspecified, uses nullid as the only root. If 'heads' is

740

unspecified, uses list of all of the revlog's heads."""

740

unspecified, uses list of all of the revlog's heads."""

741

nonodes = ([], [], [])

741

nonodes = ([], [], [])

742

if roots is not None:

742

if roots is not None:

743

roots = list(roots)

743

roots = list(roots)

744

if not roots:

744

if not roots:

745

return nonodes

745

return nonodes

746

lowestrev = min([self.rev(n) for n in roots])

746

lowestrev = min([self.rev(n) for n in roots])

747

else:

747

else:

748

roots = [nullid] # Everybody's a descendant of nullid

748

roots = [nullid] # Everybody's a descendant of nullid

749

lowestrev = nullrev

749

lowestrev = nullrev

750

if (lowestrev == nullrev) and (heads is None):

750

if (lowestrev == nullrev) and (heads is None):

751

# We want _all_ the nodes!

751

# We want _all_ the nodes!

752

return ([self.node(r) for r in self], [nullid], list(self.heads()))

752

return ([self.node(r) for r in self], [nullid], list(self.heads()))

753

if heads is None:

753

if heads is None:

754

# All nodes are ancestors, so the latest ancestor is the last

754

# All nodes are ancestors, so the latest ancestor is the last

755

# node.

755

# node.

756

highestrev = len(self) - 1

756

highestrev = len(self) - 1

757

# Set ancestors to None to signal that every node is an ancestor.

757

# Set ancestors to None to signal that every node is an ancestor.

758

ancestors = None

758

ancestors = None

759

# Set heads to an empty dictionary for later discovery of heads

759

# Set heads to an empty dictionary for later discovery of heads

760

heads = {}

760

heads = {}

761

else:

761

else:

762

heads = list(heads)

762

heads = list(heads)

763

if not heads:

763

if not heads:

764

return nonodes

764

return nonodes

765

ancestors = set()

765

ancestors = set()

766

# Turn heads into a dictionary so we can remove 'fake' heads.

766

# Turn heads into a dictionary so we can remove 'fake' heads.

767

# Also, later we will be using it to filter out the heads we can't

767

# Also, later we will be using it to filter out the heads we can't

768

# find from roots.

768

# find from roots.

769

heads = dict.fromkeys(heads, False)

769

heads = dict.fromkeys(heads, False)

770

# Start at the top and keep marking parents until we're done.

770

# Start at the top and keep marking parents until we're done.

771

nodestotag = set(heads)

771

nodestotag = set(heads)

772

# Remember where the top was so we can use it as a limit later.

772

# Remember where the top was so we can use it as a limit later.

773

highestrev = max([self.rev(n) for n in nodestotag])

773

highestrev = max([self.rev(n) for n in nodestotag])

774

while nodestotag:

774

while nodestotag:

775

# grab a node to tag

775

# grab a node to tag

776

n = nodestotag.pop()

776

n = nodestotag.pop()

777

# Never tag nullid

777

# Never tag nullid

778

if n == nullid:

778

if n == nullid:

779

continue

779

continue

780

# A node's revision number represents its place in a

780

# A node's revision number represents its place in a

781

# topologically sorted list of nodes.

781

# topologically sorted list of nodes.

782

r = self.rev(n)

782

r = self.rev(n)

783

if r >= lowestrev:

783

if r >= lowestrev:

784

if n not in ancestors:

784

if n not in ancestors:

785

# If we are possibly a descendant of one of the roots

785

# If we are possibly a descendant of one of the roots

786

# and we haven't already been marked as an ancestor

786

# and we haven't already been marked as an ancestor

787

ancestors.add(n) # Mark as ancestor

787

ancestors.add(n) # Mark as ancestor

788

# Add non-nullid parents to list of nodes to tag.

788

# Add non-nullid parents to list of nodes to tag.

789

nodestotag.update([p for p in self.parents(n) if

789

nodestotag.update([p for p in self.parents(n) if

790

p != nullid])

790

p != nullid])

791

elif n in heads: # We've seen it before, is it a fake head?

791

elif n in heads: # We've seen it before, is it a fake head?

792

# So it is, real heads should not be the ancestors of

792

# So it is, real heads should not be the ancestors of

793

# any other heads.

793

# any other heads.

794

heads.pop(n)

794

heads.pop(n)

795

if not ancestors:

795

if not ancestors:

796

return nonodes

796

return nonodes

797

# Now that we have our set of ancestors, we want to remove any

797

# Now that we have our set of ancestors, we want to remove any

798

# roots that are not ancestors.

798

# roots that are not ancestors.

799

800

# If one of the roots was nullid, everything is included anyway.

800

# If one of the roots was nullid, everything is included anyway.

801

if lowestrev > nullrev:

801

if lowestrev > nullrev:

802

# But, since we weren't, let's recompute the lowest rev to not

802

# But, since we weren't, let's recompute the lowest rev to not

803

# include roots that aren't ancestors.

803

# include roots that aren't ancestors.

804

805

# Filter out roots that aren't ancestors of heads

805

# Filter out roots that aren't ancestors of heads

806

roots = [root for root in roots if root in ancestors]

806

roots = [root for root in roots if root in ancestors]

807

# Recompute the lowest revision

807

# Recompute the lowest revision

808

if roots:

808

if roots:

809

lowestrev = min([self.rev(root) for root in roots])

809

lowestrev = min([self.rev(root) for root in roots])

810

else:

810

else:

811

# No more roots? Return empty list

811

# No more roots? Return empty list

812

return nonodes

812

return nonodes

813

else:

813

else:

814

# We are descending from nullid, and don't need to care about

814

# We are descending from nullid, and don't need to care about

815

# any other roots.

815

# any other roots.

816

lowestrev = nullrev

816

lowestrev = nullrev

817

roots = [nullid]

817

roots = [nullid]

818

# Transform our roots list into a set.

818

# Transform our roots list into a set.

819

descendants = set(roots)

819

descendants = set(roots)

820

# Also, keep the original roots so we can filter out roots that aren't

820

# Also, keep the original roots so we can filter out roots that aren't

821

# 'real' roots (i.e. are descended from other roots).

821

# 'real' roots (i.e. are descended from other roots).

822

roots = descendants.copy()

822

roots = descendants.copy()

823

# Our topologically sorted list of output nodes.

823

# Our topologically sorted list of output nodes.

824

orderedout = []

824

orderedout = []

825

# Don't start at nullid since we don't want nullid in our output list,

825

# Don't start at nullid since we don't want nullid in our output list,

826

# and if nullid shows up in descendants, empty parents will look like

826

# and if nullid shows up in descendants, empty parents will look like

827

# they're descendants.

827

# they're descendants.

828

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

828

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

829

n = self.node(r)

829

n = self.node(r)

830

isdescendant = False

830

isdescendant = False

831

if lowestrev == nullrev: # Everybody is a descendant of nullid

831

if lowestrev == nullrev: # Everybody is a descendant of nullid

832

isdescendant = True

832

isdescendant = True

833

elif n in descendants:

833

elif n in descendants:

834

# n is already a descendant

834

# n is already a descendant

835

isdescendant = True

835

isdescendant = True

836

# This check only needs to be done here because all the roots

836

# This check only needs to be done here because all the roots

837

# will start being marked is descendants before the loop.

837

# will start being marked is descendants before the loop.

838

if n in roots:

838

if n in roots:

839

# If n was a root, check if it's a 'real' root.

839

# If n was a root, check if it's a 'real' root.

840

p = tuple(self.parents(n))

840

p = tuple(self.parents(n))

841

# If any of its parents are descendants, it's not a root.

841

# If any of its parents are descendants, it's not a root.

842

if (p[0] in descendants) or (p[1] in descendants):

842

if (p[0] in descendants) or (p[1] in descendants):

843

roots.remove(n)

843

roots.remove(n)

844

else:

844

else:

845

p = tuple(self.parents(n))

845

p = tuple(self.parents(n))

846

# A node is a descendant if either of its parents are

846

# A node is a descendant if either of its parents are

847

# descendants. (We seeded the dependents list with the roots

847

# descendants. (We seeded the dependents list with the roots

848

# up there, remember?)

848

# up there, remember?)

849

if (p[0] in descendants) or (p[1] in descendants):

849

if (p[0] in descendants) or (p[1] in descendants):

850

descendants.add(n)

850

descendants.add(n)

851

isdescendant = True

851

isdescendant = True

852

if isdescendant and ((ancestors is None) or (n in ancestors)):

852

if isdescendant and ((ancestors is None) or (n in ancestors)):

853

# Only include nodes that are both descendants and ancestors.

853

# Only include nodes that are both descendants and ancestors.

854

orderedout.append(n)

854

orderedout.append(n)

855

if (ancestors is not None) and (n in heads):

855

if (ancestors is not None) and (n in heads):

856

# We're trying to figure out which heads are reachable

856

# We're trying to figure out which heads are reachable

857

# from roots.

857

# from roots.

858

# Mark this head as having been reached

858

# Mark this head as having been reached

859

heads[n] = True

859

heads[n] = True

860

elif ancestors is None:

860

elif ancestors is None:

861

# Otherwise, we're trying to discover the heads.

861

# Otherwise, we're trying to discover the heads.

862

# Assume this is a head because if it isn't, the next step

862

# Assume this is a head because if it isn't, the next step

863

# will eventually remove it.

863

# will eventually remove it.

864

heads[n] = True

864

heads[n] = True

865

# But, obviously its parents aren't.

865

# But, obviously its parents aren't.

866

for p in self.parents(n):

866

for p in self.parents(n):

867

heads.pop(p, None)

867

heads.pop(p, None)

868

heads = [head for head, flag in heads.iteritems() if flag]

868

heads = [head for head, flag in heads.iteritems() if flag]

869

roots = list(roots)

869

roots = list(roots)

870

assert orderedout

870

assert orderedout

871

assert roots

871

assert roots

872

assert heads

872

assert heads

873

return (orderedout, roots, heads)

873

return (orderedout, roots, heads)

874

875

def headrevs(self):

875

def headrevs(self):

876

try:

876

try:

877

return self.index.headrevs()

877

return self.index.headrevs()

878

except AttributeError:

878

except AttributeError:

879

return self._headrevs()

879

return self._headrevs()

880

881

def computephases(self, roots):

881

def computephases(self, roots):

882

return self.index.computephasesmapsets(roots)

882

return self.index.computephasesmapsets(roots)

883

884

def _headrevs(self):

884

def _headrevs(self):

885

count = len(self)

885

count = len(self)

886

if not count:

886

if not count:

887

return [nullrev]

887

return [nullrev]

888

# we won't iter over filtered rev so nobody is a head at start

888

# we won't iter over filtered rev so nobody is a head at start

889

ishead = [0] * (count + 1)

889

ishead = [0] * (count + 1)

890

index = self.index

890

index = self.index

891

for r in self:

891

for r in self:

892

ishead[r] = 1 # I may be an head

892

ishead[r] = 1 # I may be an head

893

e = index[r]

893

e = index[r]

894

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

894

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

895

return [r for r, val in enumerate(ishead) if val]

895

return [r for r, val in enumerate(ishead) if val]

896

897

def heads(self, start=None, stop=None):

897

def heads(self, start=None, stop=None):

898

"""return the list of all nodes that have no children

898

"""return the list of all nodes that have no children

899

900

if start is specified, only heads that are descendants of

900

if start is specified, only heads that are descendants of

901

start will be returned

901

start will be returned

902

if stop is specified, it will consider all the revs from stop

902

if stop is specified, it will consider all the revs from stop

903

as if they had no children

903

as if they had no children

904

"""

904

"""

905

if start is None and stop is None:

905

if start is None and stop is None:

906

if not len(self):

906

if not len(self):

907

return [nullid]

907

return [nullid]

908

return [self.node(r) for r in self.headrevs()]

908

return [self.node(r) for r in self.headrevs()]

909

910

if start is None:

910

if start is None:

911

start = nullid

911

start = nullid

912

if stop is None:

912

if stop is None:

913

stop = []

913

stop = []

914

stoprevs = set([self.rev(n) for n in stop])

914

stoprevs = set([self.rev(n) for n in stop])

915

startrev = self.rev(start)

915

startrev = self.rev(start)

916

reachable = {startrev}

916

reachable = {startrev}

917

heads = {startrev}

917

heads = {startrev}

918

919

parentrevs = self.parentrevs

919

parentrevs = self.parentrevs

920

for r in self.revs(start=startrev + 1):

920

for r in self.revs(start=startrev + 1):

921

for p in parentrevs(r):

921

for p in parentrevs(r):

922

if p in reachable:

922

if p in reachable:

923

if r not in stoprevs:

923

if r not in stoprevs:

924

reachable.add(r)

924

reachable.add(r)

925

heads.add(r)

925

heads.add(r)

926

if p in heads and p not in stoprevs:

926

if p in heads and p not in stoprevs:

927

heads.remove(p)

927

heads.remove(p)

928

929

return [self.node(r) for r in heads]

929

return [self.node(r) for r in heads]

930

931

def children(self, node):

931

def children(self, node):

932

"""find the children of a given node"""

932

"""find the children of a given node"""

933

c = []

933

c = []

934

p = self.rev(node)

934

p = self.rev(node)

935

for r in self.revs(start=p + 1):

935

for r in self.revs(start=p + 1):

936

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

936

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

937

if prevs:

937

if prevs:

938

for pr in prevs:

938

for pr in prevs:

939

if pr == p:

939

if pr == p:

940

c.append(self.node(r))

940

c.append(self.node(r))

941

elif p == nullrev:

941

elif p == nullrev:

942

c.append(self.node(r))

942

c.append(self.node(r))

943

return c

943

return c

944

945

def descendant(self, start, end):

945

def descendant(self, start, end):

946

if start == nullrev:

946

if start == nullrev:

947

return True

947

return True

948

for i in self.descendants([start]):

948

for i in self.descendants([start]):

949

if i == end:

949

if i == end:

950

return True

950

return True

951

elif i > end:

951

elif i > end:

952

break

952

break

953

return False

953

return False

954

955

def commonancestorsheads(self, a, b):

955

def commonancestorsheads(self, a, b):

956

"""calculate all the heads of the common ancestors of nodes a and b"""

956

"""calculate all the heads of the common ancestors of nodes a and b"""

957

a, b = self.rev(a), self.rev(b)

957

a, b = self.rev(a), self.rev(b)

958

try:

958

try:

959

ancs = self.index.commonancestorsheads(a, b)

959

ancs = self.index.commonancestorsheads(a, b)

960

except (AttributeError, OverflowError): # C implementation failed

960

except (AttributeError, OverflowError): # C implementation failed

961

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

961

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

962

return pycompat.maplist(self.node, ancs)

962

return pycompat.maplist(self.node, ancs)

963

964

def isancestor(self, a, b):

964

def isancestor(self, a, b):

965

"""return True if node a is an ancestor of node b

965

"""return True if node a is an ancestor of node b

966

967

The implementation of this is trivial but the use of

967

The implementation of this is trivial but the use of

968

commonancestorsheads is not."""

968

commonancestorsheads is not."""

969

return a in self.commonancestorsheads(a, b)

969

return a in self.commonancestorsheads(a, b)

970

971

def ancestor(self, a, b):

971

def ancestor(self, a, b):

972

"""calculate the "best" common ancestor of nodes a and b"""

972

"""calculate the "best" common ancestor of nodes a and b"""

973

974

a, b = self.rev(a), self.rev(b)

974

a, b = self.rev(a), self.rev(b)

975

try:

975

try:

976

ancs = self.index.ancestors(a, b)

976

ancs = self.index.ancestors(a, b)

977

except (AttributeError, OverflowError):

977

except (AttributeError, OverflowError):

978

ancs = ancestor.ancestors(self.parentrevs, a, b)

978

ancs = ancestor.ancestors(self.parentrevs, a, b)

979

if ancs:

979

if ancs:

980

# choose a consistent winner when there's a tie

980

# choose a consistent winner when there's a tie

981

return min(map(self.node, ancs))

981

return min(map(self.node, ancs))

982

return nullid

982

return nullid

983

984

def _match(self, id):

984

def _match(self, id):

985

if isinstance(id, int):

985

if isinstance(id, int):

986

# rev

986

# rev

987

return self.node(id)

987

return self.node(id)

988

if len(id) == 20:

988

if len(id) == 20:

989

# possibly a binary node

989

# possibly a binary node

990

# odds of a binary node being all hex in ASCII are 1 in 10**25

990

# odds of a binary node being all hex in ASCII are 1 in 10**25

991

try:

991

try:

992

node = id

992

node = id

993

self.rev(node) # quick search the index

993

self.rev(node) # quick search the index

994

return node

994

return node

995

except LookupError:

995

except LookupError:

996

pass # may be partial hex id

996

pass # may be partial hex id

997

try:

997

try:

998

# str(rev)

998

# str(rev)

999

rev = int(id)

999

rev = int(id)

1000

if str(rev) != id:

1000

if str(rev) != id:

1001

raise ValueError

1001

raise ValueError

1002

if rev < 0:

1002

if rev < 0:

1003

rev = len(self) + rev

1003

rev = len(self) + rev

1004

if rev < 0 or rev >= len(self):

1004

if rev < 0 or rev >= len(self):

1005

raise ValueError

1005

raise ValueError

1006

return self.node(rev)

1006

return self.node(rev)

1007

except (ValueError, OverflowError):

1007

except (ValueError, OverflowError):

1008

pass

1008

pass

1009

if len(id) == 40:

1009

if len(id) == 40:

1010

try:

1010

try:

1011

# a full hex nodeid?

1011

# a full hex nodeid?

1012

node = bin(id)

1012

node = bin(id)

1013

self.rev(node)

1013

self.rev(node)

1014

return node

1014

return node

1015

except (TypeError, LookupError):

1015

except (TypeError, LookupError):

1016

pass

1016

pass

1017

1018

def _partialmatch(self, id):

1018

def _partialmatch(self, id):

1019

try:

1019

try:

1020

partial = self.index.partialmatch(id)

1020

partial = self.index.partialmatch(id)

1021

if partial and self.hasnode(partial):

1021

if partial and self.hasnode(partial):

1022

return partial

1022

return partial

1023

return None

1023

return None

1024

except RevlogError:

1024

except RevlogError:

1025

# parsers.c radix tree lookup gave multiple matches

1025

# parsers.c radix tree lookup gave multiple matches

1026

# fast path: for unfiltered changelog, radix tree is accurate

1026

# fast path: for unfiltered changelog, radix tree is accurate

1027

if not getattr(self, 'filteredrevs', None):

1027

if not getattr(self, 'filteredrevs', None):

1028

raise LookupError(id, self.indexfile,

1028

raise LookupError(id, self.indexfile,

1029

_('ambiguous identifier'))

1029

_('ambiguous identifier'))

1030

# fall through to slow path that filters hidden revisions

1030

# fall through to slow path that filters hidden revisions

1031

except (AttributeError, ValueError):

1031

except (AttributeError, ValueError):

1032

# we are pure python, or key was too short to search radix tree

1032

# we are pure python, or key was too short to search radix tree

1033

pass

1033

pass

1034

1035

if id in self._pcache:

1035

if id in self._pcache:

1036

return self._pcache[id]

1036

return self._pcache[id]

1037

1038

if len(id) < 40:

1038

if len(id) < 40:

1039

try:

1039

try:

1040

# hex(node)[:...]

1040

# hex(node)[:...]

1041

l = len(id) // 2 # grab an even number of digits

1041

l = len(id) // 2 # grab an even number of digits

1042

prefix = bin(id[:l * 2])

1042

prefix = bin(id[:l * 2])

1043

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1043

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1044

nl = [n for n in nl if hex(n).startswith(id) and

1044

nl = [n for n in nl if hex(n).startswith(id) and

1045

self.hasnode(n)]

1045

self.hasnode(n)]

1046

if len(nl) > 0:

1046

if len(nl) > 0:

1047

if len(nl) == 1:

1047

if len(nl) == 1:

1048

self._pcache[id] = nl[0]

1048

self._pcache[id] = nl[0]

1049

return nl[0]

1049

return nl[0]

1050

raise LookupError(id, self.indexfile,

1050

raise LookupError(id, self.indexfile,

1051

_('ambiguous identifier'))

1051

_('ambiguous identifier'))

1052

return None

1052

return None

1053

except TypeError:

1053

except TypeError:

1054

pass

1054

pass

1055

1056

def lookup(self, id):

1056

def lookup(self, id):

1057

"""locate a node based on:

1057

"""locate a node based on:

1058

- revision number or str(revision number)

1058

- revision number or str(revision number)

1059

- nodeid or subset of hex nodeid

1059

- nodeid or subset of hex nodeid

1060

"""

1060

"""

1061

n = self._match(id)

1061

n = self._match(id)

1062

if n is not None:

1062

if n is not None:

1063

return n

1063

return n

1064

n = self._partialmatch(id)

1064

n = self._partialmatch(id)

1065

if n:

1065

if n:

1066

return n

1066

return n

1067

1068

raise LookupError(id, self.indexfile, _('no match found'))

1068

raise LookupError(id, self.indexfile, _('no match found'))

1069

1070

def cmp(self, node, text):

1070

def cmp(self, node, text):

1071

"""compare text with a given file revision

1071

"""compare text with a given file revision

1072

1073

returns True if text is different than what is stored.

1073

returns True if text is different than what is stored.

1074

"""

1074

"""

1075

p1, p2 = self.parents(node)

1075

p1, p2 = self.parents(node)

1076

return hash(text, p1, p2) != node

1076

return hash(text, p1, p2) != node

1077

1078

def _cachesegment(self, offset, data):

1078

def _cachesegment(self, offset, data):

1079

"""Add a segment to the revlog cache.

1079

"""Add a segment to the revlog cache.

1080

1081

Accepts an absolute offset and the data that is at that location.

1081

Accepts an absolute offset and the data that is at that location.

1082

"""

1082

"""

1083

o, d = self._chunkcache

1083

o, d = self._chunkcache

1084

# try to add to existing cache

1084

# try to add to existing cache

1085

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1085

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1086

self._chunkcache = o, d + data

1086

self._chunkcache = o, d + data

1087

else:

1087

else:

1088

self._chunkcache = offset, data

1088

self._chunkcache = offset, data

1089

1090

def _readsegment(self, offset, length, df=None):

1090

def _readsegment(self, offset, length, df=None):

1091

"""Load a segment of raw data from the revlog.

1091

"""Load a segment of raw data from the revlog.

1092

1093

Accepts an absolute offset, length to read, and an optional existing

1093

Accepts an absolute offset, length to read, and an optional existing

1094

file handle to read from.

1094

file handle to read from.

1095

1096

If an existing file handle is passed, it will be seeked and the

1096

If an existing file handle is passed, it will be seeked and the

1097

original seek position will NOT be restored.

1097

original seek position will NOT be restored.

1098

1099

Returns a str or buffer of raw byte data.

1099

Returns a str or buffer of raw byte data.

1100

"""

1100

"""

1101

if df is not None:

1101

if df is not None:

1102

closehandle = False

1102

closehandle = False

1103

else:

1103

else:

1104

if self._inline:

1104

if self._inline:

1105

df = self.opener(self.indexfile)

1105

df = self.opener(self.indexfile)

1106

else:

1106

else:

1107

df = self.opener(self.datafile)

1107

df = self.opener(self.datafile)

1108

closehandle = True

1108

closehandle = True

1109

1110

# Cache data both forward and backward around the requested

1110

# Cache data both forward and backward around the requested

1111

# data, in a fixed size window. This helps speed up operations

1111

# data, in a fixed size window. This helps speed up operations

1112

# involving reading the revlog backwards.

1112

# involving reading the revlog backwards.

1113

cachesize = self._chunkcachesize

1113

cachesize = self._chunkcachesize

1114

realoffset = offset & ~(cachesize - 1)

1114

realoffset = offset & ~(cachesize - 1)

1115

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1115

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1116

- realoffset)

1116

- realoffset)

1117

df.seek(realoffset)

1117

df.seek(realoffset)

1118

d = df.read(reallength)

1118

d = df.read(reallength)

1119

if closehandle:

1119

if closehandle:

1120

df.close()

1120

df.close()

1121

self._cachesegment(realoffset, d)

1121

self._cachesegment(realoffset, d)

1122

if offset != realoffset or reallength != length:

1122

if offset != realoffset or reallength != length:

1123

return util.buffer(d, offset - realoffset, length)

1123

return util.buffer(d, offset - realoffset, length)

1124

return d

1124

return d

1125

1126

def _getsegment(self, offset, length, df=None):

1126

def _getsegment(self, offset, length, df=None):

1127

"""Obtain a segment of raw data from the revlog.

1127

"""Obtain a segment of raw data from the revlog.

1128

1129

Accepts an absolute offset, length of bytes to obtain, and an

1129

Accepts an absolute offset, length of bytes to obtain, and an

1130

optional file handle to the already-opened revlog. If the file

1130

optional file handle to the already-opened revlog. If the file

1131

handle is used, it's original seek position will not be preserved.

1131

handle is used, it's original seek position will not be preserved.

1132

1133

Requests for data may be returned from a cache.

1133

Requests for data may be returned from a cache.

1134

1135

Returns a str or a buffer instance of raw byte data.

1135

Returns a str or a buffer instance of raw byte data.

1136

"""

1136

"""

1137

o, d = self._chunkcache

1137

o, d = self._chunkcache

1138

l = len(d)

1138

l = len(d)

1139

1140

# is it in the cache?

1140

# is it in the cache?

1141

cachestart = offset - o

1141

cachestart = offset - o

1142

cacheend = cachestart + length

1142

cacheend = cachestart + length

1143

if cachestart >= 0 and cacheend <= l:

1143

if cachestart >= 0 and cacheend <= l:

1144

if cachestart == 0 and cacheend == l:

1144

if cachestart == 0 and cacheend == l:

1145

return d # avoid a copy

1145

return d # avoid a copy

1146

return util.buffer(d, cachestart, cacheend - cachestart)

1146

return util.buffer(d, cachestart, cacheend - cachestart)

1147

1148

return self._readsegment(offset, length, df=df)

1148

return self._readsegment(offset, length, df=df)

1149

1150

def _getsegmentforrevs(self, startrev, endrev, df=None):

1150

def _getsegmentforrevs(self, startrev, endrev, df=None):

1151

"""Obtain a segment of raw data corresponding to a range of revisions.

1151

"""Obtain a segment of raw data corresponding to a range of revisions.

1152

1153

Accepts the start and end revisions and an optional already-open

1153

Accepts the start and end revisions and an optional already-open

1154

file handle to be used for reading. If the file handle is read, its

1154

file handle to be used for reading. If the file handle is read, its

1155

seek position will not be preserved.

1155

seek position will not be preserved.

1156

1157

Requests for data may be satisfied by a cache.

1157

Requests for data may be satisfied by a cache.

1158

1159

Returns a 2-tuple of (offset, data) for the requested range of

1159

Returns a 2-tuple of (offset, data) for the requested range of

1160

revisions. Offset is the integer offset from the beginning of the

1160

revisions. Offset is the integer offset from the beginning of the

1161

revlog and data is a str or buffer of the raw byte data.

1161

revlog and data is a str or buffer of the raw byte data.

1162

1163

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1163

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1164

to determine where each revision's data begins and ends.

1164

to determine where each revision's data begins and ends.

1165

"""

1165

"""

1166

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1166

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1167

# (functions are expensive).

1167

# (functions are expensive).

1168

index = self.index

1168

index = self.index

1169

istart = index[startrev]

1169

istart = index[startrev]

1170

start = int(istart[0] >> 16)

1170

start = int(istart[0] >> 16)

1171

if startrev == endrev:

1171

if startrev == endrev:

1172

end = start + istart[1]

1172

end = start + istart[1]

1173

else:

1173

else:

1174

iend = index[endrev]

1174

iend = index[endrev]

1175

end = int(iend[0] >> 16) + iend[1]

1175

end = int(iend[0] >> 16) + iend[1]

1176

1177

if self._inline:

1177

if self._inline:

1178

start += (startrev + 1) * self._io.size

1178

start += (startrev + 1) * self._io.size

1179

end += (endrev + 1) * self._io.size

1179

end += (endrev + 1) * self._io.size

1180

length = end - start

1180

length = end - start

1181

1182

return start, self._getsegment(start, length, df=df)

1182

return start, self._getsegment(start, length, df=df)

1183

1184

def _chunk(self, rev, df=None):

1184

def _chunk(self, rev, df=None):

1185

"""Obtain a single decompressed chunk for a revision.

1185

"""Obtain a single decompressed chunk for a revision.

1186

1187

Accepts an integer revision and an optional already-open file handle

1187

Accepts an integer revision and an optional already-open file handle

1188

to be used for reading. If used, the seek position of the file will not

1188

to be used for reading. If used, the seek position of the file will not

1189

be preserved.

1189

be preserved.

1190

1191

Returns a str holding uncompressed data for the requested revision.

1191

Returns a str holding uncompressed data for the requested revision.

1192

"""

1192

"""

1193

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1193

return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])

1194

1195

def _chunks(self, revs, df=None):

1195

def _chunks(self, revs, df=None):

1196

"""Obtain decompressed chunks for the specified revisions.

1196

"""Obtain decompressed chunks for the specified revisions.

1197

1198

Accepts an iterable of numeric revisions that are assumed to be in

1198

Accepts an iterable of numeric revisions that are assumed to be in

1199

ascending order. Also accepts an optional already-open file handle

1199

ascending order. Also accepts an optional already-open file handle

1200

to be used for reading. If used, the seek position of the file will

1200

to be used for reading. If used, the seek position of the file will

1201

not be preserved.

1201

not be preserved.

1202

1203

This function is similar to calling ``self._chunk()`` multiple times,

1203

This function is similar to calling ``self._chunk()`` multiple times,

1204

but is faster.

1204

but is faster.

1205

1206

Returns a list with decompressed data for each requested revision.

1206

Returns a list with decompressed data for each requested revision.

1207

"""

1207

"""

1208

if not revs:

1208

if not revs:

1209

return []

1209

return []

1210

start = self.start

1210

start = self.start

1211

length = self.length

1211

length = self.length

1212

inline = self._inline

1212

inline = self._inline

1213

iosize = self._io.size

1213

iosize = self._io.size

1214

buffer = util.buffer

1214

buffer = util.buffer

1215

1216

l = []

1216

l = []

1217

ladd = l.append

1217

ladd = l.append

1218

1219

try:

1219

try:

1220

offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)

1220

offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)

1221

except OverflowError:

1221

except OverflowError:

1222

# issue4215 - we can't cache a run of chunks greater than

1222

# issue4215 - we can't cache a run of chunks greater than

1223

# 2G on Windows

1223

# 2G on Windows

1224

return [self._chunk(rev, df=df) for rev in revs]

1224

return [self._chunk(rev, df=df) for rev in revs]

1225

1226

decomp = self.decompress

1226

decomp = self.decompress

1227

for rev in revs:

1227

for rev in revs:

1228

chunkstart = start(rev)

1228

chunkstart = start(rev)

1229

if inline:

1229

if inline:

1230

chunkstart += (rev + 1) * iosize

1230

chunkstart += (rev + 1) * iosize

1231

chunklength = length(rev)

1231

chunklength = length(rev)

1232

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1232

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1233

1234

return l

1234

return l

1235

1236

def _chunkclear(self):

1236

def _chunkclear(self):

1237

"""Clear the raw chunk cache."""

1237

"""Clear the raw chunk cache."""

1238

self._chunkcache = (0, '')

1238

self._chunkcache = (0, '')

1239

1240

def deltaparent(self, rev):

1240

def deltaparent(self, rev):

1241

"""return deltaparent of the given revision"""

1241

"""return deltaparent of the given revision"""

1242

base = self.index[rev][3]

1242

base = self.index[rev][3]

1243

if base == rev:

1243

if base == rev:

1244

return nullrev

1244

return nullrev

1245

elif self._generaldelta:

1245

elif self._generaldelta:

1246

return base

1246

return base

1247

else:

1247

else:

1248

return rev - 1

1248

return rev - 1

1249

1250

def revdiff(self, rev1, rev2):

1250

def revdiff(self, rev1, rev2):

1251

"""return or calculate a delta between two revisions

1251

"""return or calculate a delta between two revisions

1252

1253

The delta calculated is in binary form and is intended to be written to

1253

The delta calculated is in binary form and is intended to be written to

1254

revlog data directly. So this function needs raw revision data.

1254

revlog data directly. So this function needs raw revision data.

1255

"""

1255

"""

1256

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1256

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1257

return bytes(self._chunk(rev2))

1257

return bytes(self._chunk(rev2))

1258

1259

return mdiff.textdiff(self.revision(rev1, raw=True),

1259

return mdiff.textdiff(self.revision(rev1, raw=True),

1260

self.revision(rev2, raw=True))

1260

self.revision(rev2, raw=True))

1261

1262

def revision(self, nodeorrev, _df=None, raw=False):

1262

def revision(self, nodeorrev, _df=None, raw=False):

1263

"""return an uncompressed revision of a given node or revision

1263

"""return an uncompressed revision of a given node or revision

1264

number.

1264

number.

1265

1266

_df - an existing file handle to read from. (internal-only)

1266

_df - an existing file handle to read from. (internal-only)

1267

raw - an optional argument specifying if the revision data is to be

1267

raw - an optional argument specifying if the revision data is to be

1268

treated as raw data when applying flag transforms. 'raw' should be set

1268

treated as raw data when applying flag transforms. 'raw' should be set

1269

to True when generating changegroups or in debug commands.

1269

to True when generating changegroups or in debug commands.

1270

"""

1270

"""

1271

if isinstance(nodeorrev, int):

1271

if isinstance(nodeorrev, int):

1272

rev = nodeorrev

1272

rev = nodeorrev

1273

node = self.node(rev)

1273

node = self.node(rev)

1274

else:

1274

else:

1275

node = nodeorrev

1275

node = nodeorrev

1276

rev = None

1276

rev = None

1277

1278

cachedrev = None

1278

cachedrev = None

1279

flags = None

1279

flags = None

1280

rawtext = None

1280

rawtext = None

1281

if node == nullid:

1281

if node == nullid:

1282

return ""

1282

return ""

1283

if self._cache:

1283

if self._cache:

1284

if self._cache[0] == node:

1284

if self._cache[0] == node:

1285

# _cache only stores rawtext

1285

# _cache only stores rawtext

1286

if raw:

1286

if raw:

1287

return self._cache[2]

1287

return self._cache[2]

1288

# duplicated, but good for perf

1288

# duplicated, but good for perf

1289

if rev is None:

1289

if rev is None:

1290

rev = self.rev(node)

1290

rev = self.rev(node)

1291

if flags is None:

1291

if flags is None:

1292

flags = self.flags(rev)

1292

flags = self.flags(rev)

1293

# no extra flags set, no flag processor runs, text = rawtext

1293

# no extra flags set, no flag processor runs, text = rawtext

1294

if flags == REVIDX_DEFAULT_FLAGS:

1294

if flags == REVIDX_DEFAULT_FLAGS:

1295

return self._cache[2]

1295

return self._cache[2]

1296

# rawtext is reusable. need to run flag processor

1296

# rawtext is reusable. need to run flag processor

1297

rawtext = self._cache[2]

1297

rawtext = self._cache[2]

1298

1299

cachedrev = self._cache[1]

1299

cachedrev = self._cache[1]

1300

1301

# look up what we need to read

1301

# look up what we need to read

1302

if rawtext is None:

1302

if rawtext is None:

1303

if rev is None:

1303

if rev is None:

1304

rev = self.rev(node)

1304

rev = self.rev(node)

1305

1306

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1306

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1307

if stopped:

1307

if stopped:

1308

rawtext = self._cache[2]

1308

rawtext = self._cache[2]

1309

1310

# drop cache to save memory

1310

# drop cache to save memory

1311

self._cache = None

1311

self._cache = None

1312

1313

bins = self._chunks(chain, df=_df)

1313

bins = self._chunks(chain, df=_df)

1314

if rawtext is None:

1314

if rawtext is None:

1315

rawtext = bytes(bins[0])

1315

rawtext = bytes(bins[0])

1316

bins = bins[1:]

1316

bins = bins[1:]

1317

1318

rawtext = mdiff.patches(rawtext, bins)

1318

rawtext = mdiff.patches(rawtext, bins)

1319

self._cache = (node, rev, rawtext)

1319

self._cache = (node, rev, rawtext)

1320

1321

if flags is None:

1321

if flags is None:

1322

if rev is None:

1322

if rev is None:

1323

rev = self.rev(node)

1323

rev = self.rev(node)

1324

flags = self.flags(rev)

1324

flags = self.flags(rev)

1325

1326

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1326

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1327

if validatehash:

1327

if validatehash:

1328

self.checkhash(text, node, rev=rev)

1328

self.checkhash(text, node, rev=rev)

1329

1330

return text

1330

return text

1331

1332

def hash(self, text, p1, p2):

1332

def hash(self, text, p1, p2):

1333

"""Compute a node hash.

1333

"""Compute a node hash.

1334

1335

Available as a function so that subclasses can replace the hash

1335

Available as a function so that subclasses can replace the hash

1336

as needed.

1336

as needed.

1337

"""

1337

"""

1338

return hash(text, p1, p2)

1338

return hash(text, p1, p2)

1339

1340

def _processflags(self, text, flags, operation, raw=False):

1340

def _processflags(self, text, flags, operation, raw=False):

1341

"""Inspect revision data flags and applies transforms defined by

1341

"""Inspect revision data flags and applies transforms defined by

1342

registered flag processors.

1342

registered flag processors.

1343

1344

``text`` - the revision data to process

1344

``text`` - the revision data to process

1345

``flags`` - the revision flags

1345

``flags`` - the revision flags

1346

``operation`` - the operation being performed (read or write)

1346

``operation`` - the operation being performed (read or write)

1347

``raw`` - an optional argument describing if the raw transform should be

1347

``raw`` - an optional argument describing if the raw transform should be

1348

applied.

1348

applied.

1349

1350

This method processes the flags in the order (or reverse order if

1350

This method processes the flags in the order (or reverse order if

1351

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1351

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1352

flag processors registered for present flags. The order of flags defined

1352

flag processors registered for present flags. The order of flags defined

1353

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1353

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1354

1355

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1355

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1356

processed text and ``validatehash`` is a bool indicating whether the

1356

processed text and ``validatehash`` is a bool indicating whether the

1357

returned text should be checked for hash integrity.

1357

returned text should be checked for hash integrity.

1358

1359

Note: If the ``raw`` argument is set, it has precedence over the

1359

Note: If the ``raw`` argument is set, it has precedence over the

1360

operation and will only update the value of ``validatehash``.

1360

operation and will only update the value of ``validatehash``.

1361

"""

1361

"""

1362

# fast path: no flag processors will run

1362

# fast path: no flag processors will run

1363

if flags == 0:

1363

if flags == 0:

1364

return text, True

1364

return text, True

1365

if not operation in ('read', 'write'):

1365

if not operation in ('read', 'write'):

1366

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1366

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1367

# Check all flags are known.

1367

# Check all flags are known.

1368

if flags & ~REVIDX_KNOWN_FLAGS:

1368

if flags & ~REVIDX_KNOWN_FLAGS:

1369

raise RevlogError(_("incompatible revision flag '%#x'") %

1369

raise RevlogError(_("incompatible revision flag '%#x'") %

1370

(flags & ~REVIDX_KNOWN_FLAGS))

1370

(flags & ~REVIDX_KNOWN_FLAGS))

1371

validatehash = True

1371

validatehash = True

1372

# Depending on the operation (read or write), the order might be

1372

# Depending on the operation (read or write), the order might be

1373

# reversed due to non-commutative transforms.

1373

# reversed due to non-commutative transforms.

1374

orderedflags = REVIDX_FLAGS_ORDER

1374

orderedflags = REVIDX_FLAGS_ORDER

1375

if operation == 'write':

1375

if operation == 'write':

1376

orderedflags = reversed(orderedflags)

1376

orderedflags = reversed(orderedflags)

1377

1378

for flag in orderedflags:

1378

for flag in orderedflags:

1379

# If a flagprocessor has been registered for a known flag, apply the

1379

# If a flagprocessor has been registered for a known flag, apply the

1380

# related operation transform and update result tuple.

1380

# related operation transform and update result tuple.

1381

if flag & flags:

1381

if flag & flags:

1382

vhash = True

1382

vhash = True

1383

1384

if flag not in _flagprocessors:

1384

if flag not in _flagprocessors:

1385

message = _("missing processor for flag '%#x'") % (flag)

1385

message = _("missing processor for flag '%#x'") % (flag)

1386

raise RevlogError(message)

1386

raise RevlogError(message)

1387

1388

processor = _flagprocessors[flag]

1388

processor = _flagprocessors[flag]

1389

if processor is not None:

1389

if processor is not None:

1390

readtransform, writetransform, rawtransform = processor

1390

readtransform, writetransform, rawtransform = processor

1391

1392

if raw:

1392

if raw:

1393

vhash = rawtransform(self, text)

1393

vhash = rawtransform(self, text)

1394

elif operation == 'read':

1394

elif operation == 'read':

1395

text, vhash = readtransform(self, text)

1395

text, vhash = readtransform(self, text)

1396

else: # write operation

1396

else: # write operation

1397

text, vhash = writetransform(self, text)

1397

text, vhash = writetransform(self, text)

1398

validatehash = validatehash and vhash

1398

validatehash = validatehash and vhash

1399

1400

return text, validatehash

1400

return text, validatehash

1401

1402

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1402

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1403

"""Check node hash integrity.

1403

"""Check node hash integrity.

1404

1405

Available as a function so that subclasses can extend hash mismatch

1405

Available as a function so that subclasses can extend hash mismatch

1406

behaviors as needed.

1406

behaviors as needed.

1407

"""

1407

"""

1408

if p1 is None and p2 is None:

1408

if p1 is None and p2 is None:

1409

p1, p2 = self.parents(node)

1409

p1, p2 = self.parents(node)

1410

if node != self.hash(text, p1, p2):

1410

if node != self.hash(text, p1, p2):

1411

revornode = rev

1411

revornode = rev

1412

if revornode is None:

1412

if revornode is None:

1413

revornode = templatefilters.short(hex(node))

1413

revornode = templatefilters.short(hex(node))

1414

raise RevlogError(_("integrity check failed on %s:%s")

1414

raise RevlogError(_("integrity check failed on %s:%s")

1415

% (self.indexfile, revornode))

1415

% (self.indexfile, revornode))

1416

1417

def checkinlinesize(self, tr, fp=None):

1417

def checkinlinesize(self, tr, fp=None):

1418

"""Check if the revlog is too big for inline and convert if so.

1418

"""Check if the revlog is too big for inline and convert if so.

1419

1420

This should be called after revisions are added to the revlog. If the

1420

This should be called after revisions are added to the revlog. If the

1421

revlog has grown too large to be an inline revlog, it will convert it

1421

revlog has grown too large to be an inline revlog, it will convert it

1422

to use multiple index and data files.

1422

to use multiple index and data files.

1423

"""

1423

"""

1424

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1424

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1425

return

1425

return

1426

1427

trinfo = tr.find(self.indexfile)

1427

trinfo = tr.find(self.indexfile)

1428

if trinfo is None:

1428

if trinfo is None:

1429

raise RevlogError(_("%s not found in the transaction")

1429

raise RevlogError(_("%s not found in the transaction")

1430

% self.indexfile)

1430

% self.indexfile)

1431

1432

trindex = trinfo[2]

1432

trindex = trinfo[2]

1433

if trindex is not None:

1433

if trindex is not None:

1434

dataoff = self.start(trindex)

1434

dataoff = self.start(trindex)

1435

else:

1435

else:

1436

# revlog was stripped at start of transaction, use all leftover data

1436

# revlog was stripped at start of transaction, use all leftover data

1437

trindex = len(self) - 1

1437

trindex = len(self) - 1

1438

dataoff = self.end(-2)

1438

dataoff = self.end(-2)

1439

1440

tr.add(self.datafile, dataoff)

1440

tr.add(self.datafile, dataoff)

1441

1442

if fp:

1442

if fp:

1443

fp.flush()

1443

fp.flush()

1444

fp.close()

1444

fp.close()

1445

1446

df = self.opener(self.datafile, 'w')

1446

df = self.opener(self.datafile, 'w')

1447

try:

1447

try:

1448

for r in self:

1448

for r in self:

1449

df.write(self._getsegmentforrevs(r, r)[1])

1449

df.write(self._getsegmentforrevs(r, r)[1])

1450

finally:

1450

finally:

1451

df.close()

1451

df.close()

1452

1453

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1453

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1454

checkambig=self._checkambig)

1454

checkambig=self._checkambig)

1455

self.version &= ~(REVLOGNGINLINEDATA)

1455

self.version &= ~(REVLOGNGINLINEDATA)

1456

self._inline = False

1456

self._inline = False

1457

for i in self:

1457

for i in self:

1458

e = self._io.packentry(self.index[i], self.node, self.version, i)

1458

e = self._io.packentry(self.index[i], self.node, self.version, i)

1459

fp.write(e)

1459

fp.write(e)

1460

1461

# if we don't call close, the temp file will never replace the

1461

# if we don't call close, the temp file will never replace the

1462

# real index

1462

# real index

1463

fp.close()

1463

fp.close()

1464

1465

tr.replace(self.indexfile, trindex * self._io.size)

1465

tr.replace(self.indexfile, trindex * self._io.size)

1466

self._chunkclear()

1466

self._chunkclear()

1467

1468

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1468

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1469

node=None, flags=REVIDX_DEFAULT_FLAGS):

1469

node=None, flags=REVIDX_DEFAULT_FLAGS):

1470

"""add a revision to the log

1470

"""add a revision to the log

1471

1472

text - the revision data to add

1472

text - the revision data to add

1473

transaction - the transaction object used for rollback

1473

transaction - the transaction object used for rollback

1474

link - the linkrev data to add

1474

link - the linkrev data to add

1475

p1, p2 - the parent nodeids of the revision

1475

p1, p2 - the parent nodeids of the revision

1476

cachedelta - an optional precomputed delta

1476

cachedelta - an optional precomputed delta

1477

node - nodeid of revision; typically node is not specified, and it is

1477

node - nodeid of revision; typically node is not specified, and it is

1478

computed by default as hash(text, p1, p2), however subclasses might

1478

computed by default as hash(text, p1, p2), however subclasses might

1479

use different hashing method (and override checkhash() in such case)

1479

use different hashing method (and override checkhash() in such case)

1480

flags - the known flags to set on the revision

1480

flags - the known flags to set on the revision

1481

"""

1481

"""

1482

if link == nullrev:

1482

if link == nullrev:

1483

raise RevlogError(_("attempted to add linkrev -1 to %s")

1483

raise RevlogError(_("attempted to add linkrev -1 to %s")

1484

% self.indexfile)

1484

% self.indexfile)

1485

1486

if flags:

1486

if flags:

1487

node = node or self.hash(text, p1, p2)

1487

node = node or self.hash(text, p1, p2)

1488

1489

rawtext, validatehash = self._processflags(text, flags, 'write')

1489

rawtext, validatehash = self._processflags(text, flags, 'write')

1490

1491

# If the flag processor modifies the revision data, ignore any provided

1491

# If the flag processor modifies the revision data, ignore any provided

1492

# cachedelta.

1492

# cachedelta.

1493

if rawtext != text:

1493

if rawtext != text:

1494

cachedelta = None

1494

cachedelta = None

1495

1496

if len(rawtext) > _maxentrysize:

1496

if len(rawtext) > _maxentrysize:

1497

raise RevlogError(

1497

raise RevlogError(

1498

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1498

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1499

% (self.indexfile, len(rawtext)))

1499

% (self.indexfile, len(rawtext)))

1500

1501

node = node or self.hash(rawtext, p1, p2)

1501

node = node or self.hash(rawtext, p1, p2)

1502

if node in self.nodemap:

1502

if node in self.nodemap:

1503

return node

1503

return node

1504

1505

if validatehash:

1505

if validatehash:

1506

self.checkhash(rawtext, node, p1=p1, p2=p2)

1506

self.checkhash(rawtext, node, p1=p1, p2=p2)

1507

1508

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1508

return self.addrawrevision(rawtext, transaction, link, p1, p2, node,

1509

flags, cachedelta=cachedelta)

1509

flags, cachedelta=cachedelta)

1510

1511

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1511

def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,

1512

cachedelta=None):

1512

cachedelta=None):

1513

"""add a raw revision with known flags, node and parents

1513

"""add a raw revision with known flags, node and parents

1514

useful when reusing a revision not stored in this revlog (ex: received

1514

useful when reusing a revision not stored in this revlog (ex: received

1515

over wire, or read from an external bundle).

1515

over wire, or read from an external bundle).

1516

"""

1516

"""

1517

dfh = None

1517

dfh = None

1518

if not self._inline:

1518

if not self._inline:

1519

dfh = self.opener(self.datafile, "a+")

1519

dfh = self.opener(self.datafile, "a+")

1520

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1520

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1521

try:

1521

try:

1522

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1522

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1523

flags, cachedelta, ifh, dfh)

1523

flags, cachedelta, ifh, dfh)

1524

finally:

1524

finally:

1525

if dfh:

1525

if dfh:

1526

dfh.close()

1526

dfh.close()

1527

ifh.close()

1527

ifh.close()

1528

1529

def compress(self, data):

1529

def compress(self, data):

1530

"""Generate a possibly-compressed representation of data."""

1530

"""Generate a possibly-compressed representation of data."""

1531

if not data:

1531

if not data:

1532

return '', data

1532

return '', data

1533

1534

compressed = self._compressor.compress(data)

1534

compressed = self._compressor.compress(data)

1535

1536

if compressed:

1536

if compressed:

1537

# The revlog compressor added the header in the returned data.

1537

# The revlog compressor added the header in the returned data.

1538

return '', compressed

1538

return '', compressed

1539

1540

if data[0:1] == '\0':

1540

if data[0:1] == '\0':

1541

return '', data

1541

return '', data

1542

return 'u', data

1542

return 'u', data

1543

1544

def decompress(self, data):

1544

def decompress(self, data):

1545

"""Decompress a revlog chunk.

1545

"""Decompress a revlog chunk.

1546

1547

The chunk is expected to begin with a header identifying the

1547

The chunk is expected to begin with a header identifying the

1548

format type so it can be routed to an appropriate decompressor.

1548

format type so it can be routed to an appropriate decompressor.

1549

"""

1549

"""

1550

if not data:

1550

if not data:

1551

return data

1551

return data

1552

1553

# Revlogs are read much more frequently than they are written and many

1553

# Revlogs are read much more frequently than they are written and many

1554

# chunks only take microseconds to decompress, so performance is

1554

# chunks only take microseconds to decompress, so performance is

1555

# important here.

1555

# important here.

1556

#

1556

#

1557

# We can make a few assumptions about revlogs:

1557

# We can make a few assumptions about revlogs:

1558

#

1558

#

1559

# 1) the majority of chunks will be compressed (as opposed to inline

1559

# 1) the majority of chunks will be compressed (as opposed to inline

1560

# raw data).

1560

# raw data).

1561

# 2) decompressing *any* data will likely by at least 10x slower than

1561

# 2) decompressing *any* data will likely by at least 10x slower than

1562

# returning raw inline data.

1562

# returning raw inline data.

1563

# 3) we want to prioritize common and officially supported compression

1563

# 3) we want to prioritize common and officially supported compression

1564

# engines

1564

# engines

1565

#

1565

#

1566

# It follows that we want to optimize for "decompress compressed data

1566

# It follows that we want to optimize for "decompress compressed data

1567

# when encoded with common and officially supported compression engines"

1567

# when encoded with common and officially supported compression engines"

1568

# case over "raw data" and "data encoded by less common or non-official

1568

# case over "raw data" and "data encoded by less common or non-official

1569

# compression engines." That is why we have the inline lookup first

1569

# compression engines." That is why we have the inline lookup first

1570

# followed by the compengines lookup.

1570

# followed by the compengines lookup.

1571

#

1571

#

1572

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1572

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1573

# compressed chunks. And this matters for changelog and manifest reads.

1573

# compressed chunks. And this matters for changelog and manifest reads.

1574

t = data[0:1]

1574

t = data[0:1]

1575

1576

if t == 'x':

1576

if t == 'x':

1577

try:

1577

try:

1578

return _zlibdecompress(data)

1578

return _zlibdecompress(data)

1579

except zlib.error as e:

1579

except zlib.error as e:

1580

raise RevlogError(_('revlog decompress error: %s') % str(e))

1580

raise RevlogError(_('revlog decompress error: %s') % str(e))

1581

# '\0' is more common than 'u' so it goes first.

1581

# '\0' is more common than 'u' so it goes first.

1582

elif t == '\0':

1582

elif t == '\0':

1583

return data

1583

return data

1584

elif t == 'u':

1584

elif t == 'u':

1585

return util.buffer(data, 1)

1585

return util.buffer(data, 1)

1586

1587

try:

1587

try:

1588

compressor = self._decompressors[t]

1588

compressor = self._decompressors[t]

1589

except KeyError:

1589

except KeyError:

1590

try:

1590

try:

1591

engine = util.compengines.forrevlogheader(t)

1591

engine = util.compengines.forrevlogheader(t)

1592

compressor = engine.revlogcompressor()

1592

compressor = engine.revlogcompressor()

1593

self._decompressors[t] = compressor

1593

self._decompressors[t] = compressor

1594

except KeyError:

1594

except KeyError:

1595

raise RevlogError(_('unknown compression type %r') % t)

1595

raise RevlogError(_('unknown compression type %r') % t)

1596

1597

return compressor.decompress(data)

1597

return compressor.decompress(data)

1598

1599

def _isgooddelta(self, d, textlen):

1599

def _isgooddelta(self, d, textlen):

1600

"""Returns True if the given delta is good. Good means that it is within

1600

"""Returns True if the given delta is good. Good means that it is within

1601

the disk span, disk size, and chain length bounds that we know to be

1601

the disk span, disk size, and chain length bounds that we know to be

1602

performant."""

1602

performant."""

1603

if d is None:

1603

if d is None:

1604

return False

1604

return False

1605

1606

# - 'dist' is the distance from the base revision -- bounding it limits

1606

# - 'dist' is the distance from the base revision -- bounding it limits

1607

# the amount of I/O we need to do.

1607

# the amount of I/O we need to do.

1608

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1608

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1609

# to apply -- bounding it limits the amount of CPU we consume.

1609

# to apply -- bounding it limits the amount of CPU we consume.

1610

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1610

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1611

if (dist > textlen * 4 or l > textlen or

1611

if (dist > textlen * 4 or l > textlen or

1612

compresseddeltalen > textlen * 2 or

1612

compresseddeltalen > textlen * 2 or

1613

(self._maxchainlen and chainlen > self._maxchainlen)):

1613

(self._maxchainlen and chainlen > self._maxchainlen)):

1614

return False

1614

return False

1615

1616

return True

1616

return True

1617

1618

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1618

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1619

cachedelta, ifh, dfh, alwayscache=False):

1619

cachedelta, ifh, dfh, alwayscache=False):

1620

"""internal function to add revisions to the log

1620

"""internal function to add revisions to the log

1621

1622

see addrevision for argument descriptions.

1622

see addrevision for argument descriptions.

1623

1624

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1624

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1625

1626

invariants:

1626

invariants:

1627

- rawtext is optional (can be None); if not set, cachedelta must be set.

1627

- rawtext is optional (can be None); if not set, cachedelta must be set.

1628

if both are set, they must correspond to each other.

1628

if both are set, they must correspond to each other.

1629

"""

1629

"""

1630

btext = [rawtext]

1630

btext = [rawtext]

1631

def buildtext():

1631

def buildtext():

1632

if btext[0] is not None:

1632

if btext[0] is not None:

1633

return btext[0]

1633

return btext[0]

1634

baserev = cachedelta[0]

1634

baserev = cachedelta[0]

1635

delta = cachedelta[1]

1635

delta = cachedelta[1]

1636

# special case deltas which replace entire base; no need to decode

1636

# special case deltas which replace entire base; no need to decode

1637

# base revision. this neatly avoids censored bases, which throw when

1637

# base revision. this neatly avoids censored bases, which throw when

1638

# they're decoded.

1638

# they're decoded.

1639

hlen = struct.calcsize(">lll")

1639

hlen = struct.calcsize(">lll")

1640

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1640

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1641

len(delta) - hlen):

1641

len(delta) - hlen):

1642

btext[0] = delta[hlen:]

1642

btext[0] = delta[hlen:]

1643

else:

1643

else:

1644

if self._inline:

1644

if self._inline:

1645

fh = ifh

1645

fh = ifh

1646

else:

1646

else:

1647

fh = dfh

1647

fh = dfh

1648

basetext = self.revision(baserev, _df=fh, raw=True)

1648

basetext = self.revision(baserev, _df=fh, raw=True)

1649

btext[0] = mdiff.patch(basetext, delta)

1649

btext[0] = mdiff.patch(basetext, delta)

1650

1651

try:

1651

try:

1652

res = self._processflags(btext[0], flags, 'read', raw=True)

1652

res = self._processflags(btext[0], flags, 'read', raw=True)

1653

btext[0], validatehash = res

1653

btext[0], validatehash = res

1654

if validatehash:

1654

if validatehash:

1655

self.checkhash(btext[0], node, p1=p1, p2=p2)

1655

self.checkhash(btext[0], node, p1=p1, p2=p2)

1656

if flags & REVIDX_ISCENSORED:

1656

if flags & REVIDX_ISCENSORED:

1657

raise RevlogError(_('node %s is not censored') % node)

1657

raise RevlogError(_('node %s is not censored') % node)

1658

except CensoredNodeError:

1658

except CensoredNodeError:

1659

# must pass the censored index flag to add censored revisions

1659

# must pass the censored index flag to add censored revisions

1660

if not flags & REVIDX_ISCENSORED:

1660

if not flags & REVIDX_ISCENSORED:

1661

raise

1661

raise

1662

return btext[0]

1662

return btext[0]

1663

1664

def builddelta(rev):

1664

def builddelta(rev):

1665

# can we use the cached delta?

1665

# can we use the cached delta?

1666

if cachedelta and cachedelta[0] == rev:

1666

if cachedelta and cachedelta[0] == rev:

1667

delta = cachedelta[1]

1667

delta = cachedelta[1]

1668

else:

1668

else:

1669

t = buildtext()

1669

t = buildtext()

1670

if self.iscensored(rev):

1670

if self.iscensored(rev):

1671

# deltas based on a censored revision must replace the

1671

# deltas based on a censored revision must replace the

1672

# full content in one patch, so delta works everywhere

1672

# full content in one patch, so delta works everywhere

1673

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1673

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1674

delta = header + t

1674

delta = header + t

1675

else:

1675

else:

1676

if self._inline:

1676

if self._inline:

1677

fh = ifh

1677

fh = ifh

1678

else:

1678

else:

1679

fh = dfh

1679

fh = dfh

1680

ptext = self.revision(rev, _df=fh, raw=True)

1680

ptext = self.revision(rev, _df=fh, raw=True)

1681

delta = mdiff.textdiff(ptext, t)

1681

delta = mdiff.textdiff(ptext, t)

1682

header, data = self.compress(delta)

1682

header, data = self.compress(delta)

1683

deltalen = len(header) + len(data)

1683

deltalen = len(header) + len(data)

1684

chainbase = self.chainbase(rev)

1684

chainbase = self.chainbase(rev)

1685

dist = deltalen + offset - self.start(chainbase)

1685

dist = deltalen + offset - self.start(chainbase)

1686

if self._generaldelta:

1686

if self._generaldelta:

1687

base = rev

1687

base = rev

1688

else:

1688

else:

1689

base = chainbase

1689

base = chainbase

1690

chainlen, compresseddeltalen = self._chaininfo(rev)

1690

chainlen, compresseddeltalen = self._chaininfo(rev)

1691

chainlen += 1

1691

chainlen += 1

1692

compresseddeltalen += deltalen

1692

compresseddeltalen += deltalen

1693

return (dist, deltalen, (header, data), base,

1693

return (dist, deltalen, (header, data), base,

1694

chainbase, chainlen, compresseddeltalen)

1694

chainbase, chainlen, compresseddeltalen)

1695

1696

curr = len(self)

1696

curr = len(self)

1697

prev = curr - 1

1697

prev = curr - 1

1698

offset = self.end(prev)

1698

offset = self.end(prev)

1699

delta = None

1699

delta = None

1700

p1r, p2r = self.rev(p1), self.rev(p2)

1700

p1r, p2r = self.rev(p1), self.rev(p2)

1701

1702

# full versions are inserted when the needed deltas

1702

# full versions are inserted when the needed deltas

1703

# become comparable to the uncompressed text

1703

# become comparable to the uncompressed text

1704

if rawtext is None:

1704

if rawtext is None:

1705

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1705

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1706

cachedelta[1])

1706

cachedelta[1])

1707

else:

1707

else:

1708

textlen = len(rawtext)

1708

textlen = len(rawtext)

1709

1710

# should we try to build a delta?

1710

# should we try to build a delta?

1711

if prev != nullrev and self.storedeltachains:

1711

if prev != nullrev and self.storedeltachains:

1712

tested = set()

1712

tested = set()

1713

# This condition is true most of the time when processing

1713

# This condition is true most of the time when processing

1714

# changegroup data into a generaldelta repo. The only time it

1714

# changegroup data into a generaldelta repo. The only time it

1715

# isn't true is if this is the first revision in a delta chain

1715

# isn't true is if this is the first revision in a delta chain

1716

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1716

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1717

if cachedelta and self._generaldelta and self._lazydeltabase:

1717

if cachedelta and self._generaldelta and self._lazydeltabase:

1718

# Assume what we received from the server is a good choice

1718

# Assume what we received from the server is a good choice

1719

# build delta will reuse the cache

1719

# build delta will reuse the cache

1720

candidatedelta = builddelta(cachedelta[0])

1720

candidatedelta = builddelta(cachedelta[0])

1721

tested.add(cachedelta[0])

1721

tested.add(cachedelta[0])

1722

if self._isgooddelta(candidatedelta, textlen):

1722

if self._isgooddelta(candidatedelta, textlen):

1723

delta = candidatedelta

1723

delta = candidatedelta

1724

if delta is None and self._generaldelta:

1724

if delta is None and self._generaldelta:

1725

# exclude already lazy tested base if any

1725

# exclude already lazy tested base if any

1726

parents = [p for p in (p1r, p2r)

1726

parents = [p for p in (p1r, p2r)

1727

if p != nullrev and p not in tested]

1727

if p != nullrev and p not in tested]

1728

if parents and not self._aggressivemergedeltas:

1728

if parents and not self._aggressivemergedeltas:

1729

# Pick whichever parent is closer to us (to minimize the

1729

# Pick whichever parent is closer to us (to minimize the

1730

# chance of having to build a fulltext).

1730

# chance of having to build a fulltext).

1731

parents = [max(parents)]

1731

parents = [max(parents)]

1732

tested.update(parents)

1732

tested.update(parents)

1733

pdeltas = []

1733

pdeltas = []

1734

for p in parents:

1734

for p in parents:

1735

pd = builddelta(p)

1735

pd = builddelta(p)

1736

if self._isgooddelta(pd, textlen):

1736

if self._isgooddelta(pd, textlen):

1737

pdeltas.append(pd)

1737

pdeltas.append(pd)

1738

if pdeltas:

1738

if pdeltas:

1739

delta = min(pdeltas, key=lambda x: x[1])

1739

delta = min(pdeltas, key=lambda x: x[1])

1740

if delta is None and prev not in tested:

1740

if delta is None and prev not in tested:

1741

# other approach failed try against prev to hopefully save us a

1741

# other approach failed try against prev to hopefully save us a

1742

# fulltext.

1742

# fulltext.

1743

candidatedelta = builddelta(prev)

1743

candidatedelta = builddelta(prev)

1744

if self._isgooddelta(candidatedelta, textlen):

1744

if self._isgooddelta(candidatedelta, textlen):

1745

delta = candidatedelta

1745

delta = candidatedelta

1746

if delta is not None:

1746

if delta is not None:

1747

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1747

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1748

else:

1748

else:

1749

rawtext = buildtext()

1749

rawtext = buildtext()

1750

data = self.compress(rawtext)

1750

data = self.compress(rawtext)

1751

l = len(data[1]) + len(data[0])

1751

l = len(data[1]) + len(data[0])

1752

base = chainbase = curr

1752

base = chainbase = curr

1753

1754

e = (offset_type(offset, flags), l, textlen,

1754

e = (offset_type(offset, flags), l, textlen,

1755

base, link, p1r, p2r, node)

1755

base, link, p1r, p2r, node)

1756

self.index.insert(-1, e)

1756

self.index.insert(-1, e)

1757

self.nodemap[node] = curr

1757

self.nodemap[node] = curr

1758

1759

entry = self._io.packentry(e, self.node, self.version, curr)

1759

entry = self._io.packentry(e, self.node, self.version, curr)

1760

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1760

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1761

1762

if alwayscache and rawtext is None:

1762

if alwayscache and rawtext is None:

1763

rawtext = buildtext()

1763

rawtext = buildtext()

1764

1765

if type(rawtext) == str: # only accept immutable objects

1765

if type(rawtext) == str: # only accept immutable objects

1766

self._cache = (node, curr, rawtext)

1766

self._cache = (node, curr, rawtext)

1767

self._chainbasecache[curr] = chainbase

1767

self._chainbasecache[curr] = chainbase

1768

return node

1768

return node

1769

1770

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1770

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1771

# Files opened in a+ mode have inconsistent behavior on various

1771

# Files opened in a+ mode have inconsistent behavior on various

1772

# platforms. Windows requires that a file positioning call be made

1772

# platforms. Windows requires that a file positioning call be made

1773

# when the file handle transitions between reads and writes. See

1773

# when the file handle transitions between reads and writes. See

1774

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1774

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1775

# platforms, Python or the platform itself can be buggy. Some versions

1775

# platforms, Python or the platform itself can be buggy. Some versions

1776

# of Solaris have been observed to not append at the end of the file

1776

# of Solaris have been observed to not append at the end of the file

1777

# if the file was seeked to before the end. See issue4943 for more.

1777

# if the file was seeked to before the end. See issue4943 for more.

1778

#

1778

#

1779

# We work around this issue by inserting a seek() before writing.

1779

# We work around this issue by inserting a seek() before writing.

1780

# Note: This is likely not necessary on Python 3.

1780

# Note: This is likely not necessary on Python 3.

1781

ifh.seek(0, os.SEEK_END)

1781

ifh.seek(0, os.SEEK_END)

1782

if dfh:

1782

if dfh:

1783

dfh.seek(0, os.SEEK_END)

1783

dfh.seek(0, os.SEEK_END)

1784

1785

curr = len(self) - 1

1785

curr = len(self) - 1

1786

if not self._inline:

1786

if not self._inline:

1787

transaction.add(self.datafile, offset)

1787

transaction.add(self.datafile, offset)

1788

transaction.add(self.indexfile, curr * len(entry))

1788

transaction.add(self.indexfile, curr * len(entry))

1789

if data[0]:

1789

if data[0]:

1790

dfh.write(data[0])

1790

dfh.write(data[0])

1791

dfh.write(data[1])

1791

dfh.write(data[1])

1792

ifh.write(entry)

1792

ifh.write(entry)

1793

else:

1793

else:

1794

offset += curr * self._io.size

1794

offset += curr * self._io.size

1795

transaction.add(self.indexfile, offset, curr)

1795

transaction.add(self.indexfile, offset, curr)

1796

ifh.write(entry)

1796

ifh.write(entry)

1797

ifh.write(data[0])

1797

ifh.write(data[0])

1798

ifh.write(data[1])

1798

ifh.write(data[1])

1799

self.checkinlinesize(transaction, ifh)

1799

self.checkinlinesize(transaction, ifh)

1800

1801

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1801

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1802

"""

1802

"""

1803

add a delta group

1803

add a delta group

1804

1805

given a set of deltas, add them to the revision log. the

1805

given a set of deltas, add them to the revision log. the

1806

first delta is against its parent, which should be in our

1806

first delta is against its parent, which should be in our

1807

log, the rest are against the previous delta.

1807

log, the rest are against the previous delta.

1808

1809

If ``addrevisioncb`` is defined, it will be called with arguments of

1809

If ``addrevisioncb`` is defined, it will be called with arguments of

1810

this revlog and the node that was added.

1810

this revlog and the node that was added.

1811

"""

1811

"""

1812

1813

# track the base of the current delta log

1813

# track the base of the current delta log

1814

content = []

1814

content = []

1815

node = None

1815

node = None

1816

1817

r = len(self)

1817

r = len(self)

1818

end = 0

1818

end = 0

1819

if r:

1819

if r:

1820

end = self.end(r - 1)

1820

end = self.end(r - 1)

1821

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1821

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1822

isize = r * self._io.size

1822

isize = r * self._io.size

1823

if self._inline:

1823

if self._inline:

1824

transaction.add(self.indexfile, end + isize, r)

1824

transaction.add(self.indexfile, end + isize, r)

1825

dfh = None

1825

dfh = None

1826

else:

1826

else:

1827

transaction.add(self.indexfile, isize, r)

1827

transaction.add(self.indexfile, isize, r)

1828

transaction.add(self.datafile, end)

1828

transaction.add(self.datafile, end)

1829

dfh = self.opener(self.datafile, "a+")

1829

dfh = self.opener(self.datafile, "a+")

1830

def flush():

1830

def flush():

1831

if dfh:

1831

if dfh:

1832

dfh.flush()

1832

dfh.flush()

1833

ifh.flush()

1833

ifh.flush()

1834

try:

1834

try:

1835

# loop through our set of deltas

1835

# loop through our set of deltas

1836

chain = None

1836

chain = None

1837

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1837

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1838

node = chunkdata['node']

1838

node = chunkdata['node']

1839

p1 = chunkdata['p1']

1839

p1 = chunkdata['p1']

1840

p2 = chunkdata['p2']

1840

p2 = chunkdata['p2']

1841

cs = chunkdata['cs']

1841

cs = chunkdata['cs']

1842

deltabase = chunkdata['deltabase']

1842

deltabase = chunkdata['deltabase']

1843

delta = chunkdata['delta']

1843

delta = chunkdata['delta']

1844

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1844

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1845

1846

content.append(node)

1846

content.append(node)

1847

1848

link = linkmapper(cs)

1848

link = linkmapper(cs)

1849

if node in self.nodemap:

1849

if node in self.nodemap:

1850

# this can happen if two branches make the same change

1850

# this can happen if two branches make the same change

1851

chain = node

1851

chain = node

1852

continue

1852

continue

1853

1854

for p in (p1, p2):

1854

for p in (p1, p2):

1855

if p not in self.nodemap:

1855

if p not in self.nodemap:

1856

raise LookupError(p, self.indexfile,

1856

raise LookupError(p, self.indexfile,

1857

_('unknown parent'))

1857

_('unknown parent'))

1858

1859

if deltabase not in self.nodemap:

1859

if deltabase not in self.nodemap:

1860

raise LookupError(deltabase, self.indexfile,

1860

raise LookupError(deltabase, self.indexfile,

1861

_('unknown delta base'))

1861

_('unknown delta base'))

1862

1863

baserev = self.rev(deltabase)

1863

baserev = self.rev(deltabase)

1864

1865

if baserev != nullrev and self.iscensored(baserev):

1865

if baserev != nullrev and self.iscensored(baserev):

1866

# if base is censored, delta must be full replacement in a

1866

# if base is censored, delta must be full replacement in a

1867

# single patch operation

1867

# single patch operation

1868

hlen = struct.calcsize(">lll")

1868

hlen = struct.calcsize(">lll")

1869

oldlen = self.rawsize(baserev)

1869

oldlen = self.rawsize(baserev)

1870

newlen = len(delta) - hlen

1870

newlen = len(delta) - hlen

1871

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1871

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1872

raise error.CensoredBaseError(self.indexfile,

1872

raise error.CensoredBaseError(self.indexfile,

1873

self.node(baserev))

1873

self.node(baserev))

1874

1875

if not flags and self._peek_iscensored(baserev, delta, flush):

1875

if not flags and self._peek_iscensored(baserev, delta, flush):

1876

flags |= REVIDX_ISCENSORED

1876

flags |= REVIDX_ISCENSORED

1877

1878

# We assume consumers of addrevisioncb will want to retrieve

1878

# We assume consumers of addrevisioncb will want to retrieve

1879

# the added revision, which will require a call to

1879

# the added revision, which will require a call to

1880

# revision(). revision() will fast path if there is a cache

1880

# revision(). revision() will fast path if there is a cache

1881

# hit. So, we tell _addrevision() to always cache in this case.

1881

# hit. So, we tell _addrevision() to always cache in this case.

1882

# We're only using addgroup() in the context of changegroup

1882

# We're only using addgroup() in the context of changegroup

1883

# generation so the revision data can always be handled as raw

1883

# generation so the revision data can always be handled as raw

1884

# by the flagprocessor.

1884

# by the flagprocessor.

1885

chain = self._addrevision(node, None, transaction, link,

1885

chain = self._addrevision(node, None, transaction, link,

1886

p1, p2, flags, (baserev, delta),

1886

p1, p2, flags, (baserev, delta),

1887

ifh, dfh,

1887

ifh, dfh,

1888

alwayscache=bool(addrevisioncb))

1888

alwayscache=bool(addrevisioncb))

1889

1890

if addrevisioncb:

1890

if addrevisioncb:

1891

addrevisioncb(self, chain)

1891

addrevisioncb(self, chain)

1892

1893

if not dfh and not self._inline:

1893

if not dfh and not self._inline:

1894

# addrevision switched from inline to conventional

1894

# addrevision switched from inline to conventional

1895

# reopen the index

1895

# reopen the index

1896

ifh.close()

1896

ifh.close()

1897

dfh = self.opener(self.datafile, "a+")

1897

dfh = self.opener(self.datafile, "a+")

1898

ifh = self.opener(self.indexfile, "a+",

1898

ifh = self.opener(self.indexfile, "a+",

1899

checkambig=self._checkambig)

1899

checkambig=self._checkambig)

1900

finally:

1900

finally:

1901

if dfh:

1901

if dfh:

1902

dfh.close()

1902

dfh.close()

1903

ifh.close()

1903

ifh.close()

1904

1905

return content

1905

return content

1906

1907

def iscensored(self, rev):

1907

def iscensored(self, rev):

1908

"""Check if a file revision is censored."""

1908

"""Check if a file revision is censored."""

1909

return False

1909

return False

1910

1911

def _peek_iscensored(self, baserev, delta, flush):

1911

def _peek_iscensored(self, baserev, delta, flush):

1912

"""Quickly check if a delta produces a censored revision."""

1912

"""Quickly check if a delta produces a censored revision."""

1913

return False

1913

return False

1914

1915

def getstrippoint(self, minlink):

1915

def getstrippoint(self, minlink):

1916

"""find the minimum rev that must be stripped to strip the linkrev

1916

"""find the minimum rev that must be stripped to strip the linkrev

1917

1918

Returns a tuple containing the minimum rev and a set of all revs that

1918

Returns a tuple containing the minimum rev and a set of all revs that

1919

have linkrevs that will be broken by this strip.

1919

have linkrevs that will be broken by this strip.

1920

"""

1920

"""

1921

brokenrevs = set()

1921

brokenrevs = set()

1922

strippoint = len(self)

1922

strippoint = len(self)

1923

1924

heads = {}

1924

heads = {}

1925

futurelargelinkrevs = set()

1925

futurelargelinkrevs = set()

1926

for head in self.headrevs():

1926

for head in self.headrevs():

1927

headlinkrev = self.linkrev(head)

1927

headlinkrev = self.linkrev(head)

1928

heads[head] = headlinkrev

1928

heads[head] = headlinkrev

1929

if headlinkrev >= minlink:

1929

if headlinkrev >= minlink:

1930

futurelargelinkrevs.add(headlinkrev)

1930

futurelargelinkrevs.add(headlinkrev)

1931

1932

# This algorithm involves walking down the rev graph, starting at the

1932

# This algorithm involves walking down the rev graph, starting at the

1933

# heads. Since the revs are topologically sorted according to linkrev,

1933

# heads. Since the revs are topologically sorted according to linkrev,

1934

# once all head linkrevs are below the minlink, we know there are

1934

# once all head linkrevs are below the minlink, we know there are

1935

# no more revs that could have a linkrev greater than minlink.

1935

# no more revs that could have a linkrev greater than minlink.

1936

# So we can stop walking.

1936

# So we can stop walking.

1937

while futurelargelinkrevs:

1937

while futurelargelinkrevs:

1938

strippoint -= 1

1938

strippoint -= 1

1939

linkrev = heads.pop(strippoint)

1939

linkrev = heads.pop(strippoint)

1940

1941

if linkrev < minlink:

1941

if linkrev < minlink:

1942

brokenrevs.add(strippoint)

1942

brokenrevs.add(strippoint)

1943

else:

1943

else:

1944

futurelargelinkrevs.remove(linkrev)

1944

futurelargelinkrevs.remove(linkrev)

1945

1946

for p in self.parentrevs(strippoint):

1946

for p in self.parentrevs(strippoint):

1947

if p != nullrev:

1947

if p != nullrev:

1948

plinkrev = self.linkrev(p)

1948

plinkrev = self.linkrev(p)

1949

heads[p] = plinkrev

1949

heads[p] = plinkrev

1950

if plinkrev >= minlink:

1950

if plinkrev >= minlink:

1951

futurelargelinkrevs.add(plinkrev)

1951

futurelargelinkrevs.add(plinkrev)

1952

1953

return strippoint, brokenrevs

1953

return strippoint, brokenrevs

1954

1955

def strip(self, minlink, transaction):

1955

def strip(self, minlink, transaction):

1956

"""truncate the revlog on the first revision with a linkrev >= minlink

1956

"""truncate the revlog on the first revision with a linkrev >= minlink

1957

1958

This function is called when we're stripping revision minlink and

1958

This function is called when we're stripping revision minlink and

1959

its descendants from the repository.

1959

its descendants from the repository.

1960

1961

We have to remove all revisions with linkrev >= minlink, because

1961

We have to remove all revisions with linkrev >= minlink, because

1962

the equivalent changelog revisions will be renumbered after the

1962

the equivalent changelog revisions will be renumbered after the

1963

strip.

1963

strip.

1964

1965

So we truncate the revlog on the first of these revisions, and

1965

So we truncate the revlog on the first of these revisions, and

1966

trust that the caller has saved the revisions that shouldn't be

1966

trust that the caller has saved the revisions that shouldn't be

1967

removed and that it'll re-add them after this truncation.

1967

removed and that it'll re-add them after this truncation.

1968

"""

1968

"""

1969

if len(self) == 0:

1969

if len(self) == 0:

1970

return

1970

return

1971

1972

rev, _ = self.getstrippoint(minlink)

1972

rev, _ = self.getstrippoint(minlink)

1973

if rev == len(self):

1973

if rev == len(self):

1974

return

1974

return

1975

1976

# first truncate the files on disk

1976

# first truncate the files on disk

1977

end = self.start(rev)

1977

end = self.start(rev)

1978

if not self._inline:

1978

if not self._inline:

1979

transaction.add(self.datafile, end)

1979

transaction.add(self.datafile, end)

1980

end = rev * self._io.size

1980

end = rev * self._io.size

1981

else:

1981

else:

1982

end += rev * self._io.size

1982

end += rev * self._io.size

1983

1984

transaction.add(self.indexfile, end)

1984

transaction.add(self.indexfile, end)

1985

1986

# then reset internal state in memory to forget those revisions

1986

# then reset internal state in memory to forget those revisions

1987

self._cache = None

1987

self._cache = None

1988

self._chaininfocache = {}

1988

self._chaininfocache = {}

1989

self._chunkclear()

1989

self._chunkclear()

1990

for x in xrange(rev, len(self)):

1990

for x in xrange(rev, len(self)):

1991

del self.nodemap[self.node(x)]

1991

del self.nodemap[self.node(x)]

1992

1993

del self.index[rev:-1]

1993

del self.index[rev:-1]

1994

1995

def checksize(self):

1995

def checksize(self):

1996

expected = 0

1996

expected = 0

1997

if len(self):

1997

if len(self):

1998

expected = max(0, self.end(len(self) - 1))

1998

expected = max(0, self.end(len(self) - 1))

1999

2000

try:

2000

try:

2001

f = self.opener(self.datafile)

2001

f = self.opener(self.datafile)

2002

f.seek(0, 2)

2002

f.seek(0, 2)

2003

actual = f.tell()

2003

actual = f.tell()

2004

f.close()

2004

f.close()

2005

dd = actual - expected

2005

dd = actual - expected

2006

except IOError as inst:

2006

except IOError as inst:

2007

if inst.errno != errno.ENOENT:

2007

if inst.errno != errno.ENOENT:

2008

raise

2008

raise

2009

dd = 0

2009

dd = 0

2010

2011

try:

2011

try:

2012

f = self.opener(self.indexfile)

2012

f = self.opener(self.indexfile)

2013

f.seek(0, 2)

2013

f.seek(0, 2)

2014

actual = f.tell()

2014

actual = f.tell()

2015

f.close()

2015

f.close()

2016

s = self._io.size

2016

s = self._io.size

2017

i = max(0, actual // s)

2017

i = max(0, actual // s)

2018

di = actual - (i * s)

2018

di = actual - (i * s)

2019

if self._inline:

2019

if self._inline:

2020

databytes = 0

2020

databytes = 0

2021

for r in self:

2021

for r in self:

2022

databytes += max(0, self.length(r))

2022

databytes += max(0, self.length(r))

2023

dd = 0

2023

dd = 0

2024

di = actual - len(self) * s - databytes

2024

di = actual - len(self) * s - databytes

2025

except IOError as inst:

2025

except IOError as inst:

2026

if inst.errno != errno.ENOENT:

2026

if inst.errno != errno.ENOENT:

2027

raise

2027

raise

2028

di = 0

2028

di = 0

2029

2030

return (dd, di)

2030

return (dd, di)

2031

2032

def files(self):

2032

def files(self):

2033

res = [self.indexfile]

2033

res = [self.indexfile]

2034

if not self._inline:

2034

if not self._inline:

2035

res.append(self.datafile)

2035

res.append(self.datafile)

2036

return res

2036

return res

2037

2038

DELTAREUSEALWAYS = 'always'

2038

DELTAREUSEALWAYS = 'always'

2039

DELTAREUSESAMEREVS = 'samerevs'

2039

DELTAREUSESAMEREVS = 'samerevs'

2040

DELTAREUSENEVER = 'never'

2040

DELTAREUSENEVER = 'never'

2041

2042

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2042

DELTAREUSEALL = {'always', 'samerevs', 'never'}

2043

2044

def clone(self, tr, destrevlog, addrevisioncb=None,

2044

def clone(self, tr, destrevlog, addrevisioncb=None,

2045

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2045

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2046

"""Copy this revlog to another, possibly with format changes.

2046

"""Copy this revlog to another, possibly with format changes.

2047

2048

The destination revlog will contain the same revisions and nodes.

2048

The destination revlog will contain the same revisions and nodes.

2049

However, it may not be bit-for-bit identical due to e.g. delta encoding

2049

However, it may not be bit-for-bit identical due to e.g. delta encoding

2050

differences.

2050

differences.

2051

2052

The ``deltareuse`` argument control how deltas from the existing revlog

2052

The ``deltareuse`` argument control how deltas from the existing revlog

2053

are preserved in the destination revlog. The argument can have the

2053

are preserved in the destination revlog. The argument can have the

2054

following values:

2054

following values:

2055

2056

DELTAREUSEALWAYS

2056

DELTAREUSEALWAYS

2057

Deltas will always be reused (if possible), even if the destination

2057

Deltas will always be reused (if possible), even if the destination

2058

revlog would not select the same revisions for the delta. This is the

2058

revlog would not select the same revisions for the delta. This is the

2059

fastest mode of operation.

2059

fastest mode of operation.

2060

DELTAREUSESAMEREVS

2060

DELTAREUSESAMEREVS

2061

Deltas will be reused if the destination revlog would pick the same

2061

Deltas will be reused if the destination revlog would pick the same

2062

revisions for the delta. This mode strikes a balance between speed

2062

revisions for the delta. This mode strikes a balance between speed

2063

and optimization.

2063

and optimization.

2064

DELTAREUSENEVER

2064

DELTAREUSENEVER

2065

Deltas will never be reused. This is the slowest mode of execution.

2065

Deltas will never be reused. This is the slowest mode of execution.

2066

This mode can be used to recompute deltas (e.g. if the diff/delta

2066

This mode can be used to recompute deltas (e.g. if the diff/delta

2067

algorithm changes).

2067

algorithm changes).

2068

2069

Delta computation can be slow, so the choice of delta reuse policy can

2069

Delta computation can be slow, so the choice of delta reuse policy can

2070

significantly affect run time.

2070

significantly affect run time.

2071

2072

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2072

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2073

two extremes. Deltas will be reused if they are appropriate. But if the

2073

two extremes. Deltas will be reused if they are appropriate. But if the

2074

delta could choose a better revision, it will do so. This means if you

2074

delta could choose a better revision, it will do so. This means if you

2075

are converting a non-generaldelta revlog to a generaldelta revlog,

2075

are converting a non-generaldelta revlog to a generaldelta revlog,

2076

deltas will be recomputed if the delta's parent isn't a parent of the

2076

deltas will be recomputed if the delta's parent isn't a parent of the

2077

revision.

2077

revision.

2078

2079

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2079

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2080

controls whether to compute deltas against both parents for merges.

2080

controls whether to compute deltas against both parents for merges.

2081

By default, the current default is used.

2081

By default, the current default is used.

2082

"""

2082

"""

2083

if deltareuse not in self.DELTAREUSEALL:

2083

if deltareuse not in self.DELTAREUSEALL:

2084

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2084

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2085

2086

if len(destrevlog):

2086

if len(destrevlog):

2087

raise ValueError(_('destination revlog is not empty'))

2087

raise ValueError(_('destination revlog is not empty'))

2088

2089

if getattr(self, 'filteredrevs', None):

2089

if getattr(self, 'filteredrevs', None):

2090

raise ValueError(_('source revlog has filtered revisions'))

2090

raise ValueError(_('source revlog has filtered revisions'))

2091

if getattr(destrevlog, 'filteredrevs', None):

2091

if getattr(destrevlog, 'filteredrevs', None):

2092

raise ValueError(_('destination revlog has filtered revisions'))

2092

raise ValueError(_('destination revlog has filtered revisions'))

2093

2094

# lazydeltabase controls whether to reuse a cached delta, if possible.

2094

# lazydeltabase controls whether to reuse a cached delta, if possible.

2095

oldlazydeltabase = destrevlog._lazydeltabase

2095

oldlazydeltabase = destrevlog._lazydeltabase

2096

oldamd = destrevlog._aggressivemergedeltas

2096

oldamd = destrevlog._aggressivemergedeltas

2097

2098

try:

2098

try:

2099

if deltareuse == self.DELTAREUSEALWAYS:

2099

if deltareuse == self.DELTAREUSEALWAYS:

2100

destrevlog._lazydeltabase = True

2100

destrevlog._lazydeltabase = True

2101

elif deltareuse == self.DELTAREUSESAMEREVS:

2101

elif deltareuse == self.DELTAREUSESAMEREVS:

2102

destrevlog._lazydeltabase = False

2102

destrevlog._lazydeltabase = False

2103

2104

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2104

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2105

2106

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2106

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2107

self.DELTAREUSESAMEREVS)

2107

self.DELTAREUSESAMEREVS)

2108

2109

index = self.index

2109

index = self.index

2110

for rev in self:

2110

for rev in self:

2111

entry = index[rev]

2111

entry = index[rev]

2112

2113

# Some classes override linkrev to take filtered revs into

2113

# Some classes override linkrev to take filtered revs into

2114

# account. Use raw entry from index.

2114

# account. Use raw entry from index.

2115

flags = entry[0] & 0xffff

2115

flags = entry[0] & 0xffff

2116

linkrev = entry[4]

2116

linkrev = entry[4]

2117

p1 = index[entry[5]][7]

2117

p1 = index[entry[5]][7]

2118

p2 = index[entry[6]][7]

2118

p2 = index[entry[6]][7]

2119

node = entry[7]

2119

node = entry[7]

2120

2121

# (Possibly) reuse the delta from the revlog if allowed and

2121

# (Possibly) reuse the delta from the revlog if allowed and

2122

# the revlog chunk is a delta.

2122

# the revlog chunk is a delta.

2123

cachedelta = None

2123

cachedelta = None

2124

rawtext = None

2124

rawtext = None

2125

if populatecachedelta:

2125

if populatecachedelta:

2126

dp = self.deltaparent(rev)

2126

dp = self.deltaparent(rev)

2127

if dp != nullrev:

2127

if dp != nullrev:

2128

cachedelta = (dp, str(self._chunk(rev)))

2128

cachedelta = (dp, str(self._chunk(rev)))

2129

2130

if not cachedelta:

2130

if not cachedelta:

2131

rawtext = self.revision(rev, raw=True)

2131

rawtext = self.revision(rev, raw=True)

2132

2133

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2133

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2134

checkambig=False)

2134

checkambig=False)

2135

dfh = None

2135

dfh = None

2136

if not destrevlog._inline:

2136

if not destrevlog._inline:

2137

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2137

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2138

try:

2138

try:

2139

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2139

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2140

flags, cachedelta, ifh, dfh)

2140

flags, cachedelta, ifh, dfh)

2141

finally:

2141

finally:

2142

if dfh:

2142

if dfh:

2143

dfh.close()

2143

dfh.close()

2144

ifh.close()

2144

ifh.close()

2145

2146

if addrevisioncb:

2146

if addrevisioncb:

2147

addrevisioncb(self, rev, node)

2147

addrevisioncb(self, rev, node)

2148

finally:

2148

finally:

2149

destrevlog._lazydeltabase = oldlazydeltabase

2149

destrevlog._lazydeltabase = oldlazydeltabase

2150

destrevlog._aggressivemergedeltas = oldamd

2150

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # changelog.py - changelog class for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             from __future__ import absolute_import
             import collections
             from .i18n import _
             from .node import (
                 bin,
                 hex,
                 nullid,
             )
             from . import (
                 encoding,
                 error,
                 revlog,
                 util,
             )
             _defaultextra = {'branch': 'default'}
             def _string_escape(text):
                 """
                 >>> d = {'nl': chr(10), 'bs': chr(92), 'cr': chr(13), 'nul': chr(0)}
                 >>> s = "ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
                 >>> s
                 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
                 >>> res = _string_escape(s)
                 >>> s == util.unescapestr(res)
                 True
                 """
                 # subset of the string_escape codec
                 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
                 return text.replace('\0', '\\0')
             def decodeextra(text):
                 """
                 >>> sorted(decodeextra(encodeextra({'foo': 'bar', 'baz': chr(0) + '2'})
                 ...                    ).iteritems())
                 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 >>> sorted(decodeextra(encodeextra({'foo': 'bar',
                 ...                                 'baz': chr(92) + chr(0) + '2'})
                 ...                    ).iteritems())
                 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
                 """
                 extra = _defaultextra.copy()
                 for l in text.split('\0'):
                     if l:
                         if '\\0' in l:
                             # fix up \0 without getting into trouble with \\0
                             l = l.replace('\\\\', '\\\\\n')
                             l = l.replace('\\0', '\0')
                             l = l.replace('\n', '')
                         k, v = util.unescapestr(l).split(':', 1)
                         extra[k] = v
                 return extra
             def encodeextra(d):
                 # keys must be sorted to produce a deterministic changelog entry
                 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
                 return "\0".join(items)
             def stripdesc(desc):
                 """strip trailing whitespace and leading and trailing empty lines"""
                 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
             class appender(object):
                 '''the changelog index must be updated last on disk, so we use this class
                 to delay writes to it'''
                 def __init__(self, vfs, name, mode, buf):
                     self.data = buf
                     fp = vfs(name, mode)
                     self.fp = fp
                     self.offset = fp.tell()
                     self.size = vfs.fstat(fp).st_size
                     self._end = self.size
                 def end(self):
                     return self._end
                 def tell(self):
                     return self.offset
                 def flush(self):
                     pass
                 def close(self):
                     self.fp.close()
                 def seek(self, offset, whence=0):
                     '''virtual file offset spans real file and data'''
                     if whence == 0:
                         self.offset = offset
                     elif whence == 1:
                         self.offset += offset
                     elif whence == 2:
                         self.offset = self.end() + offset
                     if self.offset < self.size:
                         self.fp.seek(self.offset)
                 def read(self, count=-1):
                     '''only trick here is reads that span real file and data'''
                     ret = ""
                     if self.offset < self.size:
                         s = self.fp.read(count)
                         ret = s
                         self.offset += len(s)
                         if count > 0:
                             count -= len(s)
                     if count != 0:
                         doff = self.offset - self.size
                         self.data.insert(0, "".join(self.data))
                         del self.data[1:]
                         s = self.data[0][doff:doff + count]
                         self.offset += len(s)
                         ret += s
                     return ret
                 def write(self, s):
                     self.data.append(bytes(s))
                     self.offset += len(s)
                     self._end += len(s)
             def _divertopener(opener, target):
                 """build an opener that writes in 'target.a' instead of 'target'"""
                 def _divert(name, mode='r', checkambig=False):
                     if name != target:
                         return opener(name, mode)
                     return opener(name + ".a", mode)
                 return _divert
             def _delayopener(opener, target, buf):
                 """build an opener that stores chunks in 'buf' instead of 'target'"""
                 def _delay(name, mode='r', checkambig=False):
                     if name != target:
                         return opener(name, mode)
                     return appender(opener, name, mode, buf)
                 return _delay
             _changelogrevision = collections.namedtuple(u'changelogrevision',
                                                         (u'manifest', u'user', u'date',
                                                          u'files', u'description',
                                                          u'extra'))
             class changelogrevision(object):
                 """Holds results of a parsed changelog revision.
                 Changelog revisions consist of multiple pieces of data, including
                 the manifest node, user, and date. This object exposes a view into
                 the parsed object.
                 """
                 __slots__ = (
                     u'_offsets',
                     u'_text',
                 )
                 def __new__(cls, text):
                     if not text:
                         return _changelogrevision(
                             manifest=nullid,
                             user='',
                             date=(0, 0),
                             files=[],
                             description='',
                             extra=_defaultextra,
                         )
                     self = super(changelogrevision, cls).__new__(cls)
                     # We could return here and implement the following as an __init__.
                     # But doing it here is equivalent and saves an extra function call.
                     # format used:
                     # nodeid\n        : manifest node in ascii
                     # user\n          : user, no \n or \r allowed
                     # time tz extra\n : date (time is int or float, timezone is int)
                     #                 : extra is metadata, encoded and separated by '\0'
                     #                 : older versions ignore it
                     # files\n\n       : files modified by the cset, no \n or \r allowed
                     # (.*)            : comment (free text, ideally utf-8)
                     #
                     # changelog v0 doesn't use extra
                     nl1 = text.index('\n')
                     nl2 = text.index('\n', nl1 + 1)
                     nl3 = text.index('\n', nl2 + 1)
                     # The list of files may be empty. Which means nl3 is the first of the
                     # double newline that precedes the description.
                     if text[nl3 + 1:nl3 + 2] == '\n':
                         doublenl = nl3
                     else:
                         doublenl = text.index('\n\n', nl3 + 1)
                     self._offsets = (nl1, nl2, nl3, doublenl)
                     self._text = text
                     return self
                 @property
                 def manifest(self):
                     return bin(self._text[0:self._offsets[0]])
                 @property
                 def user(self):
                     off = self._offsets
                     return encoding.tolocal(self._text[off[0] + 1:off[1]])
                 @property
                 def _rawdate(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1:off[2]]
                     return dateextra.split(' ', 2)[0:2]
                 @property
                 def _rawextra(self):
                     off = self._offsets
                     dateextra = self._text[off[1] + 1:off[2]]
                     fields = dateextra.split(' ', 2)
                     if len(fields) != 3:
                         return None
                     return fields[2]
                 @property
                 def date(self):
                     raw = self._rawdate
                     time = float(raw[0])
                     # Various tools did silly things with the timezone.
                     try:
                         timezone = int(raw[1])
                     except ValueError:
                         timezone = 0
                     return time, timezone
                 @property
                 def extra(self):
                     raw = self._rawextra
                     if raw is None:
                         return _defaultextra
                     return decodeextra(raw)
                 @property
                 def files(self):
                     off = self._offsets
                     if off[2] == off[3]:
                         return []
                     return self._text[off[2] + 1:off[3]].split('\n')
                 @property
                 def description(self):
                     return encoding.tolocal(self._text[self._offsets[3] + 2:])
             class changelog(revlog.revlog):
                 def __init__(self, opener, trypending=False):
                     """Load a changelog revlog using an opener.
                     If ``trypending`` is true, we attempt to load the index from a
                     ``00changelog.i.a`` file instead of the default ``00changelog.i``.
                     The ``00changelog.i.a`` file contains index (and possibly inline
                     revision) data for a transaction that hasn't been finalized yet.
                     It exists in a separate file to facilitate readers (such as
                     hooks processes) accessing data before a transaction is finalized.
                     """
                     if trypending and opener.exists('00changelog.i.a'):
                         indexfile = '00changelog.i.a'
                     else:
                         indexfile = '00changelog.i'
-                    revlog.revlog.__init__(self, opener, indexfile, checkambig=True)
+                    datafile = '00changelog.d'
+                    revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
+                                           checkambig=True)
                     if self._initempty:
                         # changelogs don't benefit from generaldelta
                         self.version &= ~revlog.REVLOGGENERALDELTA
                         self._generaldelta = False
                     # Delta chains for changelogs tend to be very small because entries
                     # tend to be small and don't delta well with each. So disable delta
                     # chains.
                     self.storedeltachains = False
                     self._realopener = opener
                     self._delayed = False
                     self._delaybuf = None
                     self._divert = False
                     self.filteredrevs = frozenset()
                 def tip(self):
                     """filtered version of revlog.tip"""
                     for i in xrange(len(self) -1, -2, -1):
                         if i not in self.filteredrevs:
                             return self.node(i)
                 def __contains__(self, rev):
                     """filtered version of revlog.__contains__"""
                     return (0 <= rev < len(self)
                             and rev not in self.filteredrevs)
                 def __iter__(self):
                     """filtered version of revlog.__iter__"""
                     if len(self.filteredrevs) == 0:
                         return revlog.revlog.__iter__(self)
                     def filterediter():
                         for i in xrange(len(self)):
                             if i not in self.filteredrevs:
                                 yield i
                     return filterediter()
                 def revs(self, start=0, stop=None):
                     """filtered version of revlog.revs"""
                     for i in super(changelog, self).revs(start, stop):
                         if i not in self.filteredrevs:
                             yield i
                 @util.propertycache
                 def nodemap(self):
                     # XXX need filtering too
                     self.rev(self.node(0))
                     return self._nodecache
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     return self.index.reachableroots2(minroot, heads, roots, includepath)
                 def headrevs(self):
                     if self.filteredrevs:
                         try:
                             return self.index.headrevsfiltered(self.filteredrevs)
                         # AttributeError covers non-c-extension environments and
                         # old c extensions without filter handling.
                         except AttributeError:
                             return self._headrevs()
                     return super(changelog, self).headrevs()
                 def strip(self, *args, **kwargs):
                     # XXX make something better than assert
                     # We can't expect proper strip behavior if we are filtered.
                     assert not self.filteredrevs
                     super(changelog, self).strip(*args, **kwargs)
                 def rev(self, node):
                     """filtered version of revlog.rev"""
                     r = super(changelog, self).rev(node)
                     if r in self.filteredrevs:
                         raise error.FilteredLookupError(hex(node), self.indexfile,
                                                         _('filtered node'))
                     return r
                 def node(self, rev):
                     """filtered version of revlog.node"""
                     if rev in self.filteredrevs:
                         raise error.FilteredIndexError(rev)
                     return super(changelog, self).node(rev)
                 def linkrev(self, rev):
                     """filtered version of revlog.linkrev"""
                     if rev in self.filteredrevs:
                         raise error.FilteredIndexError(rev)
                     return super(changelog, self).linkrev(rev)
                 def parentrevs(self, rev):
                     """filtered version of revlog.parentrevs"""
                     if rev in self.filteredrevs:
                         raise error.FilteredIndexError(rev)
                     return super(changelog, self).parentrevs(rev)
                 def flags(self, rev):
                     """filtered version of revlog.flags"""
                     if rev in self.filteredrevs:
                         raise error.FilteredIndexError(rev)
                     return super(changelog, self).flags(rev)
                 def delayupdate(self, tr):
                     "delay visibility of index updates to other readers"
                     if not self._delayed:
                         if len(self) == 0:
                             self._divert = True
                             if self._realopener.exists(self.indexfile + '.a'):
                                 self._realopener.unlink(self.indexfile + '.a')
                             self.opener = _divertopener(self._realopener, self.indexfile)
                         else:
                             self._delaybuf = []
                             self.opener = _delayopener(self._realopener, self.indexfile,
                                                        self._delaybuf)
                     self._delayed = True
                     tr.addpending('cl-%i' % id(self), self._writepending)
                     tr.addfinalize('cl-%i' % id(self), self._finalize)
                 def _finalize(self, tr):
                     "finalize index updates"
                     self._delayed = False
                     self.opener = self._realopener
                     # move redirected index data back into place
                     if self._divert:
                         assert not self._delaybuf
                         tmpname = self.indexfile + ".a"
                         nfile = self.opener.open(tmpname)
                         nfile.close()
                         self.opener.rename(tmpname, self.indexfile, checkambig=True)
                     elif self._delaybuf:
                         fp = self.opener(self.indexfile, 'a', checkambig=True)
                         fp.write("".join(self._delaybuf))
                         fp.close()
                         self._delaybuf = None
                     self._divert = False
                     # split when we're done
                     self.checkinlinesize(tr)
                 def _writepending(self, tr):
                     "create a file containing the unfinalized state for pretxnchangegroup"
                     if self._delaybuf:
                         # make a temporary copy of the index
                         fp1 = self._realopener(self.indexfile)
                         pendingfilename = self.indexfile + ".a"
                         # register as a temp file to ensure cleanup on failure
                         tr.registertmp(pendingfilename)
                         # write existing data
                         fp2 = self._realopener(pendingfilename, "w")
                         fp2.write(fp1.read())
                         # add pending data
                         fp2.write("".join(self._delaybuf))
                         fp2.close()
                         # switch modes so finalize can simply rename
                         self._delaybuf = None
                         self._divert = True
                         self.opener = _divertopener(self._realopener, self.indexfile)
                     if self._divert:
                         return True
                     return False
                 def checkinlinesize(self, tr, fp=None):
                     if not self._delayed:
                         revlog.revlog.checkinlinesize(self, tr, fp)
                 def read(self, node):
                     """Obtain data from a parsed changelog revision.
                     Returns a 6-tuple of:
                        - manifest node in binary
                        - author/user as a localstr
                        - date as a 2-tuple of (time, timezone)
                        - list of files
                        - commit message as a localstr
                        - dict of extra metadata
                     Unless you need to access all fields, consider calling
                     ``changelogrevision`` instead, as it is faster for partial object
                     access.
                     """
                     c = changelogrevision(self.revision(node))
                     return (
                         c.manifest,
                         c.user,
                         c.date,
                         c.files,
                         c.description,
                         c.extra
                     )
                 def changelogrevision(self, nodeorrev):
                     """Obtain a ``changelogrevision`` for a node or revision."""
                     return changelogrevision(self.revision(nodeorrev))
                 def readfiles(self, node):
                     """
                     short version of read that only returns the files modified by the cset
                     """
                     text = self.revision(node)
                     if not text:
                         return []
                     last = text.index("\n\n")
                     l = text[:last].split('\n')
                     return l[3:]
                 def add(self, manifest, files, desc, transaction, p1, p2,
                               user, date=None, extra=None):
                     # Convert to UTF-8 encoded bytestrings as the very first
                     # thing: calling any method on a localstr object will turn it
                     # into a str object and the cached UTF-8 string is thus lost.
                     user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
                     user = user.strip()
                     # An empty username or a username with a "\n" will make the
                     # revision text contain two "\n\n" sequences -> corrupt
                     # repository since read cannot unpack the revision.
                     if not user:
                         raise error.RevlogError(_("empty username"))
                     if "\n" in user:
                         raise error.RevlogError(_("username %s contains a newline")
                                                 % repr(user))
                     desc = stripdesc(desc)
                     if date:
                         parseddate = "%d %d" % util.parsedate(date)
                     else:
                         parseddate = "%d %d" % util.makedate()
                     if extra:
                         branch = extra.get("branch")
                         if branch in ("default", ""):
                             del extra["branch"]
                         elif branch in (".", "null", "tip"):
                             raise error.RevlogError(_('the name \'%s\' is reserved')
                                                     % branch)
                     if extra:
                         extra = encodeextra(extra)
                         parseddate = "%s %s" % (parseddate, extra)
                     l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
                     text = "\n".join(l)
                     return self.addrevision(text, transaction, len(self), p1, p2)
                 def branchinfo(self, rev):
                     """return the branch name and open/close state of a revision
                     This function exists because creating a changectx object
                     just to access this is costly."""
                     extra = self.read(rev)[5]
                     return encoding.tolocal(extra.get("branch")), 'close' in extra
                 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
                     # overlay over the standard revlog._addrevision to track the new
                     # revision on the transaction.
                     rev = len(self)
                     node = super(changelog, self)._addrevision(node, rawtext, transaction,
                                                                *args, **kwargs)
                     revs = transaction.changes.get('revs')
                     if revs is not None:
                         revs.add(rev)
                     return node

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 parsers,
                 pycompat,
                 templatefilters,
                 util,
             )
             _pack = struct.pack
             _unpack = struct.unpack
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = hashlib.sha1(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 """
-                def __init__(self, opener, indexfile, checkambig=False):
+                def __init__(self, opener, indexfile, datafile=None, checkambig=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
-                    self.datafile = indexfile[:-2] + ".d"
+                    self.datafile = datafile or (indexfile[:-2] + ".d")
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def node(self, rev):
                     return self.index[rev][7]
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = {startrev}
                     heads = {startrev}
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return pycompat.maplist(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             return partial
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     try:
                         offset, data = self._getsegmentforrevs(revs[0], revs[-1], df=df)
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     decomp = self.decompress
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
                     rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
                             # rawtext is reusable. need to run flag processor
                             rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
                         self._cache = (node, rev, rawtext)
                     if flags is None:
                         if rev is None:
                             rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     # fast path: no flag processors will run
                     if flags == 0:
                         return text, True
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._getsegmentforrevs(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
                                                flags, cachedelta=cachedelta)
                 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
                                    cachedelta=None):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     btext = [rawtext]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(baserev, _df=fh, raw=True)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=True)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(rev, _df=fh, raw=True)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         rawtext = buildtext()
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = buildtext()
                     if type(rawtext) == str: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb))
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = {'always', 'samerevs', 'never'}
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd