upstream/mercurial-mirror Commit - r31804:726f2412

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import errno

17

import errno

18

import hashlib

18

import hashlib

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullid,

27

nullid,

28

nullrev,

28

nullrev,

29

)

29

)

30

from .i18n import _

30

from .i18n import _

31

from . import (

31

from . import (

32

ancestor,

32

ancestor,

33

error,

33

error,

34

mdiff,

34

mdiff,

35

parsers,

35

parsers,

36

pycompat,

36

pycompat,

37

templatefilters,

37

templatefilters,

38

util,

38

util,

39

)

39

)

40

41

_pack = struct.pack

41

_pack = struct.pack

42

_unpack = struct.unpack

42

_unpack = struct.unpack

43

# Aliased for performance.

43

# Aliased for performance.

44

_zlibdecompress = zlib.decompress

44

_zlibdecompress = zlib.decompress

45

46

# revlog header flags

46

# revlog header flags

47

REVLOGV0 = 0

47

REVLOGV0 = 0

48

REVLOGNG = 1

48

REVLOGNG = 1

49

REVLOGNGINLINEDATA = (1 << 16)

49

REVLOGNGINLINEDATA = (1 << 16)

50

REVLOGGENERALDELTA = (1 << 17)

50

REVLOGGENERALDELTA = (1 << 17)

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

55

56

# revlog index flags

56

# revlog index flags

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

60

REVIDX_DEFAULT_FLAGS = 0

60

REVIDX_DEFAULT_FLAGS = 0

61

# stable order in which flags need to be processed and their processors applied

61

# stable order in which flags need to be processed and their processors applied

62

REVIDX_FLAGS_ORDER = [

62

REVIDX_FLAGS_ORDER = [

63

REVIDX_ISCENSORED,

63

REVIDX_ISCENSORED,

64

REVIDX_ELLIPSIS,

64

REVIDX_ELLIPSIS,

65

REVIDX_EXTSTORED,

65

REVIDX_EXTSTORED,

66

]

66

]

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

68

69

# max size of revlog with inline data

69

# max size of revlog with inline data

70

_maxinline = 131072

70

_maxinline = 131072

71

_chunksize = 1048576

71

_chunksize = 1048576

72

73

RevlogError = error.RevlogError

73

RevlogError = error.RevlogError

74

LookupError = error.LookupError

74

LookupError = error.LookupError

75

CensoredNodeError = error.CensoredNodeError

75

CensoredNodeError = error.CensoredNodeError

76

ProgrammingError = error.ProgrammingError

76

ProgrammingError = error.ProgrammingError

77

78

# Store flag processors (cf. 'addflagprocessor()' to register)

78

# Store flag processors (cf. 'addflagprocessor()' to register)

79

_flagprocessors = {

79

_flagprocessors = {

80

REVIDX_ISCENSORED: None,

80

REVIDX_ISCENSORED: None,

81

}

81

}

82

83

def addflagprocessor(flag, processor):

83

def addflagprocessor(flag, processor):

84

"""Register a flag processor on a revision data flag.

84

"""Register a flag processor on a revision data flag.

85

86

Invariant:

86

Invariant:

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

88

- Only one flag processor can be registered on a specific flag.

88

- Only one flag processor can be registered on a specific flag.

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

90

following signatures:

90

following signatures:

91

- (read) f(self, rawtext) -> text, bool

91

- (read) f(self, rawtext) -> text, bool

92

- (write) f(self, text) -> rawtext, bool

92

- (write) f(self, text) -> rawtext, bool

93

- (raw) f(self, rawtext) -> bool

93

- (raw) f(self, rawtext) -> bool

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

95

directly visible to the user.

95

directly visible to the user.

96

The boolean returned by these transforms is used to determine whether

96

The boolean returned by these transforms is used to determine whether

97

the returned text can be used for hash integrity checking. For example,

97

the returned text can be used for hash integrity checking. For example,

98

if "write" returns False, then "text" is used to generate hash. If

98

if "write" returns False, then "text" is used to generate hash. If

99

"write" returns True, that basically means "rawtext" returned by "write"

99

"write" returns True, that basically means "rawtext" returned by "write"

100

should be used to generate hash. Usually, "write" and "read" return

100

should be used to generate hash. Usually, "write" and "read" return

101

different booleans. And "raw" returns a same boolean as "write".

101

different booleans. And "raw" returns a same boolean as "write".

102

103

Note: The 'raw' transform is used for changegroup generation and in some

103

Note: The 'raw' transform is used for changegroup generation and in some

104

debug commands. In this case the transform only indicates whether the

104

debug commands. In this case the transform only indicates whether the

105

contents can be used for hash integrity checks.

105

contents can be used for hash integrity checks.

106

"""

106

"""

107

if not flag & REVIDX_KNOWN_FLAGS:

107

if not flag & REVIDX_KNOWN_FLAGS:

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

109

raise ProgrammingError(msg)

109

raise ProgrammingError(msg)

110

if flag not in REVIDX_FLAGS_ORDER:

110

if flag not in REVIDX_FLAGS_ORDER:

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

112

raise ProgrammingError(msg)

112

raise ProgrammingError(msg)

113

if flag in _flagprocessors:

113

if flag in _flagprocessors:

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

115

raise error.Abort(msg)

115

raise error.Abort(msg)

116

_flagprocessors[flag] = processor

116

_flagprocessors[flag] = processor

117

118

def getoffset(q):

118

def getoffset(q):

119

return int(q >> 16)

119

return int(q >> 16)

120

121

def gettype(q):

121

def gettype(q):

122

return int(q & 0xFFFF)

122

return int(q & 0xFFFF)

123

124

def offset_type(offset, type):

124

def offset_type(offset, type):

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

126

raise ValueError('unknown revlog index flags')

126

raise ValueError('unknown revlog index flags')

127

return int(int(offset) << 16 | type)

127

return int(int(offset) << 16 | type)

128

129

_nullhash = hashlib.sha1(nullid)

129

_nullhash = hashlib.sha1(nullid)

130

131

def hash(text, p1, p2):

131

def hash(text, p1, p2):

132

"""generate a hash from the given text and its parent hashes

132

"""generate a hash from the given text and its parent hashes

133

134

This hash combines both the current file contents and its history

134

This hash combines both the current file contents and its history

135

in a manner that makes it easy to distinguish nodes with the same

135

in a manner that makes it easy to distinguish nodes with the same

136

content in the revision graph.

136

content in the revision graph.

137

"""

137

"""

138

# As of now, if one of the parent node is null, p2 is null

138

# As of now, if one of the parent node is null, p2 is null

139

if p2 == nullid:

139

if p2 == nullid:

140

# deep copy of a hash is faster than creating one

140

# deep copy of a hash is faster than creating one

141

s = _nullhash.copy()

141

s = _nullhash.copy()

142

s.update(p1)

142

s.update(p1)

143

else:

143

else:

144

# none of the parent nodes are nullid

144

# none of the parent nodes are nullid

145

l = [p1, p2]

145

l = [p1, p2]

146

l.sort()

146

l.sort()

147

s = hashlib.sha1(l[0])

147

s = hashlib.sha1(l[0])

148

s.update(l[1])

148

s.update(l[1])

149

s.update(text)

149

s.update(text)

150

return s.digest()

150

return s.digest()

151

152

# index v0:

152

# index v0:

153

# 4 bytes: offset

153

# 4 bytes: offset

154

# 4 bytes: compressed length

154

# 4 bytes: compressed length

155

# 4 bytes: base rev

155

# 4 bytes: base rev

156

# 4 bytes: link rev

156

# 4 bytes: link rev

157

# 20 bytes: parent 1 nodeid

157

# 20 bytes: parent 1 nodeid

158

# 20 bytes: parent 2 nodeid

158

# 20 bytes: parent 2 nodeid

159

# 20 bytes: nodeid

159

# 20 bytes: nodeid

160

indexformatv0 = ">4l20s20s20s"

160

indexformatv0 = ">4l20s20s20s"

161

162

class revlogoldio(object):

162

class revlogoldio(object):

163

def __init__(self):

163

def __init__(self):

164

self.size = struct.calcsize(indexformatv0)

164

self.size = struct.calcsize(indexformatv0)

165

166

def parseindex(self, data, inline):

166

def parseindex(self, data, inline):

167

s = self.size

167

s = self.size

168

index = []

168

index = []

169

nodemap = {nullid: nullrev}

169

nodemap = {nullid: nullrev}

170

n = off = 0

170

n = off = 0

171

l = len(data)

171

l = len(data)

172

while off + s <= l:

172

while off + s <= l:

173

cur = data[off:off + s]

173

cur = data[off:off + s]

174

off += s

174

off += s

175

e = _unpack(indexformatv0, cur)

175

e = _unpack(indexformatv0, cur)

176

# transform to revlogv1 format

176

# transform to revlogv1 format

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

179

index.append(e2)

179

index.append(e2)

180

nodemap[e[6]] = n

180

nodemap[e[6]] = n

181

n += 1

181

n += 1

182

183

# add the magic null revision at -1

183

# add the magic null revision at -1

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

185

186

return index, nodemap, None

186

return index, nodemap, None

187

188

def packentry(self, entry, node, version, rev):

188

def packentry(self, entry, node, version, rev):

189

if gettype(entry[0]):

189

if gettype(entry[0]):

190

raise RevlogError(_("index entry flags need RevlogNG"))

190

raise RevlogError(_("index entry flags need RevlogNG"))

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

192

node(entry[5]), node(entry[6]), entry[7])

192

node(entry[5]), node(entry[6]), entry[7])

193

return _pack(indexformatv0, *e2)

193

return _pack(indexformatv0, *e2)

194

195

# index ng:

195

# index ng:

196

# 6 bytes: offset

196

# 6 bytes: offset

197

# 2 bytes: flags

197

# 2 bytes: flags

198

# 4 bytes: compressed length

198

# 4 bytes: compressed length

199

# 4 bytes: uncompressed length

199

# 4 bytes: uncompressed length

200

# 4 bytes: base rev

200

# 4 bytes: base rev

201

# 4 bytes: link rev

201

# 4 bytes: link rev

202

# 4 bytes: parent 1 rev

202

# 4 bytes: parent 1 rev

203

# 4 bytes: parent 2 rev

203

# 4 bytes: parent 2 rev

204

# 32 bytes: nodeid

204

# 32 bytes: nodeid

205

indexformatng = ">Qiiiiii20s12x"

205

indexformatng = ">Qiiiiii20s12x"

206

versionformat = ">I"

206

versionformat = ">I"

207

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

209

# signed integer)

209

# signed integer)

210

_maxentrysize = 0x7fffffff

210

_maxentrysize = 0x7fffffff

211

212

class revlogio(object):

212

class revlogio(object):

213

def __init__(self):

213

def __init__(self):

214

self.size = struct.calcsize(indexformatng)

214

self.size = struct.calcsize(indexformatng)

215

216

def parseindex(self, data, inline):

216

def parseindex(self, data, inline):

217

# call the C implementation to parse the index data

217

# call the C implementation to parse the index data

218

index, cache = parsers.parse_index2(data, inline)

218

index, cache = parsers.parse_index2(data, inline)

219

return index, getattr(index, 'nodemap', None), cache

219

return index, getattr(index, 'nodemap', None), cache

220

221

def packentry(self, entry, node, version, rev):

221

def packentry(self, entry, node, version, rev):

222

p = _pack(indexformatng, *entry)

222

p = _pack(indexformatng, *entry)

223

if rev == 0:

223

if rev == 0:

224

p = _pack(versionformat, version) + p[4:]

224

p = _pack(versionformat, version) + p[4:]

225

return p

225

return p

226

227

class revlog(object):

227

class revlog(object):

228

"""

228

"""

229

the underlying revision storage object

229

the underlying revision storage object

230

231

A revlog consists of two parts, an index and the revision data.

231

A revlog consists of two parts, an index and the revision data.

232

233

The index is a file with a fixed record size containing

233

The index is a file with a fixed record size containing

234

information on each revision, including its nodeid (hash), the

234

information on each revision, including its nodeid (hash), the

235

nodeids of its parents, the position and offset of its data within

235

nodeids of its parents, the position and offset of its data within

236

the data file, and the revision it's based on. Finally, each entry

236

the data file, and the revision it's based on. Finally, each entry

237

contains a linkrev entry that can serve as a pointer to external

237

contains a linkrev entry that can serve as a pointer to external

238

data.

238

data.

239

240

The revision data itself is a linear collection of data chunks.

240

The revision data itself is a linear collection of data chunks.

241

Each chunk represents a revision and is usually represented as a

241

Each chunk represents a revision and is usually represented as a

242

delta against the previous chunk. To bound lookup time, runs of

242

delta against the previous chunk. To bound lookup time, runs of

243

deltas are limited to about 2 times the length of the original

243

deltas are limited to about 2 times the length of the original

244

version data. This makes retrieval of a version proportional to

244

version data. This makes retrieval of a version proportional to

245

its size, or O(1) relative to the number of revisions.

245

its size, or O(1) relative to the number of revisions.

246

247

Both pieces of the revlog are written to in an append-only

247

Both pieces of the revlog are written to in an append-only

248

fashion, which means we never need to rewrite a file to insert or

248

fashion, which means we never need to rewrite a file to insert or

249

remove data, and can use some simple techniques to avoid the need

249

remove data, and can use some simple techniques to avoid the need

250

for locking while reading.

250

for locking while reading.

251

252

If checkambig, indexfile is opened with checkambig=True at

252

If checkambig, indexfile is opened with checkambig=True at

253

writing, to avoid file stat ambiguity.

253

writing, to avoid file stat ambiguity.

254

"""

254

"""

255

def __init__(self, opener, indexfile, checkambig=False):

255

def __init__(self, opener, indexfile, checkambig=False):

256

"""

256

"""

257

create a revlog object

257

create a revlog object

258

259

opener is a function that abstracts the file opening operation

259

opener is a function that abstracts the file opening operation

260

and can be used to implement COW semantics or the like.

260

and can be used to implement COW semantics or the like.

261

"""

261

"""

262

self.indexfile = indexfile

262

self.indexfile = indexfile

263

self.datafile = indexfile[:-2] + ".d"

263

self.datafile = indexfile[:-2] + ".d"

264

self.opener = opener

264

self.opener = opener

265

# When True, indexfile is opened with checkambig=True at writing, to

265

# When True, indexfile is opened with checkambig=True at writing, to

266

# avoid file stat ambiguity.

266

# avoid file stat ambiguity.

267

self._checkambig = checkambig

267

self._checkambig = checkambig

268

# 3-tuple of (node, rev, text) for a raw revision.

268

# 3-tuple of (node, rev, text) for a raw revision.

269

self._cache = None

269

self._cache = None

270

# Maps rev to chain base rev.

270

# Maps rev to chain base rev.

271

self._chainbasecache = util.lrucachedict(100)

271

self._chainbasecache = util.lrucachedict(100)

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

273

self._chunkcache = (0, '')

273

self._chunkcache = (0, '')

274

# How much data to read and cache into the raw revlog data cache.

274

# How much data to read and cache into the raw revlog data cache.

275

self._chunkcachesize = 65536

275

self._chunkcachesize = 65536

276

self._maxchainlen = None

276

self._maxchainlen = None

277

self._aggressivemergedeltas = False

277

self._aggressivemergedeltas = False

278

self.index = []

278

self.index = []

279

# Mapping of partial identifiers to full nodes.

279

# Mapping of partial identifiers to full nodes.

280

self._pcache = {}

280

self._pcache = {}

281

# Mapping of revision integer to full node.

281

# Mapping of revision integer to full node.

282

self._nodecache = {nullid: nullrev}

282

self._nodecache = {nullid: nullrev}

283

self._nodepos = None

283

self._nodepos = None

284

self._compengine = 'zlib'

284

self._compengine = 'zlib'

285

286

v = REVLOG_DEFAULT_VERSION

286

v = REVLOG_DEFAULT_VERSION

287

opts = getattr(opener, 'options', None)

287

opts = getattr(opener, 'options', None)

288

if opts is not None:

288

if opts is not None:

289

if 'revlogv1' in opts:

289

if 'revlogv1' in opts:

290

if 'generaldelta' in opts:

290

if 'generaldelta' in opts:

291

v |= REVLOGGENERALDELTA

291

v |= REVLOGGENERALDELTA

292

else:

292

else:

293

v = 0

293

v = 0

294

if 'chunkcachesize' in opts:

294

if 'chunkcachesize' in opts:

295

self._chunkcachesize = opts['chunkcachesize']

295

self._chunkcachesize = opts['chunkcachesize']

296

if 'maxchainlen' in opts:

296

if 'maxchainlen' in opts:

297

self._maxchainlen = opts['maxchainlen']

297

self._maxchainlen = opts['maxchainlen']

298

if 'aggressivemergedeltas' in opts:

298

if 'aggressivemergedeltas' in opts:

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

301

if 'compengine' in opts:

301

if 'compengine' in opts:

302

self._compengine = opts['compengine']

302

self._compengine = opts['compengine']

303

304

if self._chunkcachesize <= 0:

304

if self._chunkcachesize <= 0:

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

306

'than 0') % self._chunkcachesize)

306

'than 0') % self._chunkcachesize)

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

309

'of 2') % self._chunkcachesize)

309

'of 2') % self._chunkcachesize)

310

311

indexdata = ''

311

indexdata = ''

312

self._initempty = True

312

self._initempty = True

313

try:

313

try:

314

f = self.opener(self.indexfile)

314

f = self.opener(self.indexfile)

315

indexdata = f.read()

315

indexdata = f.read()

316

f.close()

316

f.close()

317

if len(indexdata) > 0:

317

if len(indexdata) > 0:

318

v = struct.unpack(versionformat, indexdata[:4])[0]

318

v = struct.unpack(versionformat, indexdata[:4])[0]

319

self._initempty = False

319

self._initempty = False

320

except IOError as inst:

320

except IOError as inst:

321

if inst.errno != errno.ENOENT:

321

if inst.errno != errno.ENOENT:

322

raise

322

raise

323

324

self.version = v

324

self.version = v

325

self._inline = v & REVLOGNGINLINEDATA

325

self._inline = v & REVLOGNGINLINEDATA

326

self._generaldelta = v & REVLOGGENERALDELTA

326

self._generaldelta = v & REVLOGGENERALDELTA

327

flags = v & ~0xFFFF

327

flags = v & ~0xFFFF

328

fmt = v & 0xFFFF

328

fmt = v & 0xFFFF

329

if fmt == REVLOGV0 and flags:

329

if fmt == REVLOGV0 and flags:

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

331

% (self.indexfile, flags >> 16))

331

% (self.indexfile, flags >> 16))

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

334

% (self.indexfile, flags >> 16))

334

% (self.indexfile, flags >> 16))

335

elif fmt > REVLOGNG:

335

elif fmt > REVLOGNG:

336

raise RevlogError(_("index %s unknown format %d")

336

raise RevlogError(_("index %s unknown format %d")

337

% (self.indexfile, fmt))

337

% (self.indexfile, fmt))

338

339

self.storedeltachains = True

339

self.storedeltachains = True

340

341

self._io = revlogio()

341

self._io = revlogio()

342

if self.version == REVLOGV0:

342

if self.version == REVLOGV0:

343

self._io = revlogoldio()

343

self._io = revlogoldio()

344

try:

344

try:

345

d = self._io.parseindex(indexdata, self._inline)

345

d = self._io.parseindex(indexdata, self._inline)

346

except (ValueError, IndexError):

346

except (ValueError, IndexError):

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

348

self.index, nodemap, self._chunkcache = d

348

self.index, nodemap, self._chunkcache = d

349

if nodemap is not None:

349

if nodemap is not None:

350

self.nodemap = self._nodecache = nodemap

350

self.nodemap = self._nodecache = nodemap

351

if not self._chunkcache:

351

if not self._chunkcache:

352

self._chunkclear()

352

self._chunkclear()

353

# revnum -> (chain-length, sum-delta-length)

353

# revnum -> (chain-length, sum-delta-length)

354

self._chaininfocache = {}

354

self._chaininfocache = {}

355

# revlog header -> revlog compressor

355

# revlog header -> revlog compressor

356

self._decompressors = {}

356

self._decompressors = {}

357

358

@util.propertycache

358

@util.propertycache

359

def _compressor(self):

359

def _compressor(self):

360

return util.compengines[self._compengine].revlogcompressor()

360

return util.compengines[self._compengine].revlogcompressor()

361

362

def tip(self):

362

def tip(self):

363

return self.node(len(self.index) - 2)

363

return self.node(len(self.index) - 2)

364

def __contains__(self, rev):

364

def __contains__(self, rev):

365

return 0 <= rev < len(self)

365

return 0 <= rev < len(self)

366

def __len__(self):

366

def __len__(self):

367

return len(self.index) - 1

367

return len(self.index) - 1

368

def __iter__(self):

368

def __iter__(self):

369

return iter(xrange(len(self)))

369

return iter(xrange(len(self)))

370

def revs(self, start=0, stop=None):

370

def revs(self, start=0, stop=None):

371

"""iterate over all rev in this revlog (from start to stop)"""

371

"""iterate over all rev in this revlog (from start to stop)"""

372

step = 1

372

step = 1

373

if stop is not None:

373

if stop is not None:

374

if start > stop:

374

if start > stop:

375

step = -1

375

step = -1

376

stop += step

376

stop += step

377

else:

377

else:

378

stop = len(self)

378

stop = len(self)

379

return xrange(start, stop, step)

379

return xrange(start, stop, step)

380

381

@util.propertycache

381

@util.propertycache

382

def nodemap(self):

382

def nodemap(self):

383

self.rev(self.node(0))

383

self.rev(self.node(0))

384

return self._nodecache

384

return self._nodecache

385

386

def hasnode(self, node):

386

def hasnode(self, node):

387

try:

387

try:

388

self.rev(node)

388

self.rev(node)

389

return True

389

return True

390

except KeyError:

390

except KeyError:

391

return False

391

return False

392

393

def clearcaches(self):

393

def clearcaches(self):

394

self._cache = None

394

self._cache = None

395

self._chainbasecache.clear()

395

self._chainbasecache.clear()

396

self._chunkcache = (0, '')

396

self._chunkcache = (0, '')

397

self._pcache = {}

397

self._pcache = {}

398

399

try:

399

try:

400

self._nodecache.clearcaches()

400

self._nodecache.clearcaches()

401

except AttributeError:

401

except AttributeError:

402

self._nodecache = {nullid: nullrev}

402

self._nodecache = {nullid: nullrev}

403

self._nodepos = None

403

self._nodepos = None

404

405

def rev(self, node):

405

def rev(self, node):

406

try:

406

try:

407

return self._nodecache[node]

407

return self._nodecache[node]

408

except TypeError:

408

except TypeError:

409

raise

409

raise

410

except RevlogError:

410

except RevlogError:

411

# parsers.c radix tree lookup failed

411

# parsers.c radix tree lookup failed

412

raise LookupError(node, self.indexfile, _('no node'))

412

raise LookupError(node, self.indexfile, _('no node'))

413

except KeyError:

413

except KeyError:

414

# pure python cache lookup failed

414

# pure python cache lookup failed

415

n = self._nodecache

415

n = self._nodecache

416

i = self.index

416

i = self.index

417

p = self._nodepos

417

p = self._nodepos

418

if p is None:

418

if p is None:

419

p = len(i) - 2

419

p = len(i) - 2

420

for r in xrange(p, -1, -1):

420

for r in xrange(p, -1, -1):

421

v = i[r][7]

421

v = i[r][7]

422

n[v] = r

422

n[v] = r

423

if v == node:

423

if v == node:

424

self._nodepos = r - 1

424

self._nodepos = r - 1

425

return r

425

return r

426

raise LookupError(node, self.indexfile, _('no node'))

426

raise LookupError(node, self.indexfile, _('no node'))

427

428

# Accessors for index entries.

428

# Accessors for index entries.

429

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

431

# are flags.

431

# are flags.

432

def start(self, rev):

432

def start(self, rev):

433

return int(self.index[rev][0] >> 16)

433

return int(self.index[rev][0] >> 16)

434

435

def flags(self, rev):

435

def flags(self, rev):

436

return self.index[rev][0] & 0xFFFF

436

return self.index[rev][0] & 0xFFFF

437

438

def length(self, rev):

438

def length(self, rev):

439

return self.index[rev][1]

439

return self.index[rev][1]

440

441

def rawsize(self, rev):

441

def rawsize(self, rev):

442

"""return the length of the uncompressed text for a given revision"""

442

"""return the length of the uncompressed text for a given revision"""

443

l = self.index[rev][2]

443

l = self.index[rev][2]

444

if l >= 0:

444

if l >= 0:

445

return l

445

return l

446

447

t = self.revision(rev, raw=True)

447

t = self.revision(rev, raw=True)

448

return len(t)

448

return len(t)

449

size = rawsize

449

size = rawsize

450

451

def chainbase(self, rev):

451

def chainbase(self, rev):

452

base = self._chainbasecache.get(rev)

452

base = self._chainbasecache.get(rev)

453

if base is not None:

453

if base is not None:

454

return base

454

return base

455

456

index = self.index

456

index = self.index

457

base = index[rev][3]

457

base = index[rev][3]

458

while base != rev:

458

while base != rev:

459

rev = base

459

rev = base

460

base = index[rev][3]

460

base = index[rev][3]

461

462

self._chainbasecache[rev] = base

462

self._chainbasecache[rev] = base

463

return base

463

return base

464

465

def linkrev(self, rev):

465

def linkrev(self, rev):

466

return self.index[rev][4]

466

return self.index[rev][4]

467

468

def parentrevs(self, rev):

468

def parentrevs(self, rev):

469

return self.index[rev][5:7]

469

return self.index[rev][5:7]

470

471

def node(self, rev):

471

def node(self, rev):

472

return self.index[rev][7]

472

return self.index[rev][7]

473

474

# Derived from index values.

474

# Derived from index values.

475

476

def end(self, rev):

476

def end(self, rev):

477

return self.start(rev) + self.length(rev)

477

return self.start(rev) + self.length(rev)

478

479

def parents(self, node):

479

def parents(self, node):

480

i = self.index

480

i = self.index

481

d = i[self.rev(node)]

481

d = i[self.rev(node)]

482

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

482

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

483

484

def chainlen(self, rev):

484

def chainlen(self, rev):

485

return self._chaininfo(rev)[0]

485

return self._chaininfo(rev)[0]

486

487

def _chaininfo(self, rev):

487

def _chaininfo(self, rev):

488

chaininfocache = self._chaininfocache

488

chaininfocache = self._chaininfocache

489

if rev in chaininfocache:

489

if rev in chaininfocache:

490

return chaininfocache[rev]

490

return chaininfocache[rev]

491

index = self.index

491

index = self.index

492

generaldelta = self._generaldelta

492

generaldelta = self._generaldelta

493

iterrev = rev

493

iterrev = rev

494

e = index[iterrev]

494

e = index[iterrev]

495

clen = 0

495

clen = 0

496

compresseddeltalen = 0

496

compresseddeltalen = 0

497

while iterrev != e[3]:

497

while iterrev != e[3]:

498

clen += 1

498

clen += 1

499

compresseddeltalen += e[1]

499

compresseddeltalen += e[1]

500

if generaldelta:

500

if generaldelta:

501

iterrev = e[3]

501

iterrev = e[3]

502

else:

502

else:

503

iterrev -= 1

503

iterrev -= 1

504

if iterrev in chaininfocache:

504

if iterrev in chaininfocache:

505

t = chaininfocache[iterrev]

505

t = chaininfocache[iterrev]

506

clen += t[0]

506

clen += t[0]

507

compresseddeltalen += t[1]

507

compresseddeltalen += t[1]

508

break

508

break

509

e = index[iterrev]

509

e = index[iterrev]

510

else:

510

else:

511

# Add text length of base since decompressing that also takes

511

# Add text length of base since decompressing that also takes

512

# work. For cache hits the length is already included.

512

# work. For cache hits the length is already included.

513

compresseddeltalen += e[1]

513

compresseddeltalen += e[1]

514

r = (clen, compresseddeltalen)

514

r = (clen, compresseddeltalen)

515

chaininfocache[rev] = r

515

chaininfocache[rev] = r

516

return r

516

return r

517

518

def _deltachain(self, rev, stoprev=None):

518

def _deltachain(self, rev, stoprev=None):

519

"""Obtain the delta chain for a revision.

519

"""Obtain the delta chain for a revision.

520

521

``stoprev`` specifies a revision to stop at. If not specified, we

521

``stoprev`` specifies a revision to stop at. If not specified, we

522

stop at the base of the chain.

522

stop at the base of the chain.

523

524

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

524

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

525

revs in ascending order and ``stopped`` is a bool indicating whether

525

revs in ascending order and ``stopped`` is a bool indicating whether

526

``stoprev`` was hit.

526

``stoprev`` was hit.

527

"""

527

"""

528

chain = []

528

chain = []

529

530

# Alias to prevent attribute lookup in tight loop.

530

# Alias to prevent attribute lookup in tight loop.

531

index = self.index

531

index = self.index

532

generaldelta = self._generaldelta

532

generaldelta = self._generaldelta

533

534

iterrev = rev

534

iterrev = rev

535

e = index[iterrev]

535

e = index[iterrev]

536

while iterrev != e[3] and iterrev != stoprev:

536

while iterrev != e[3] and iterrev != stoprev:

537

chain.append(iterrev)

537

chain.append(iterrev)

538

if generaldelta:

538

if generaldelta:

539

iterrev = e[3]

539

iterrev = e[3]

540

else:

540

else:

541

iterrev -= 1

541

iterrev -= 1

542

e = index[iterrev]

542

e = index[iterrev]

543

544

if iterrev == stoprev:

544

if iterrev == stoprev:

545

stopped = True

545

stopped = True

546

else:

546

else:

547

chain.append(iterrev)

547

chain.append(iterrev)

548

stopped = False

548

stopped = False

549

550

chain.reverse()

550

chain.reverse()

551

return chain, stopped

551

return chain, stopped

552

553

def ancestors(self, revs, stoprev=0, inclusive=False):

553

def ancestors(self, revs, stoprev=0, inclusive=False):

554

"""Generate the ancestors of 'revs' in reverse topological order.

554

"""Generate the ancestors of 'revs' in reverse topological order.

555

Does not generate revs lower than stoprev.

555

Does not generate revs lower than stoprev.

556

557

See the documentation for ancestor.lazyancestors for more details."""

557

See the documentation for ancestor.lazyancestors for more details."""

558

559

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

559

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

560

inclusive=inclusive)

560

inclusive=inclusive)

561

562

def descendants(self, revs):

562

def descendants(self, revs):

563

"""Generate the descendants of 'revs' in revision order.

563

"""Generate the descendants of 'revs' in revision order.

564

565

Yield a sequence of revision numbers starting with a child of

565

Yield a sequence of revision numbers starting with a child of

566

some rev in revs, i.e., each revision is *not* considered a

566

some rev in revs, i.e., each revision is *not* considered a

567

descendant of itself. Results are ordered by revision number (a

567

descendant of itself. Results are ordered by revision number (a

568

topological sort)."""

568

topological sort)."""

569

first = min(revs)

569

first = min(revs)

570

if first == nullrev:

570

if first == nullrev:

571

for i in self:

571

for i in self:

572

yield i

572

yield i

573

return

573

return

574

575

seen = set(revs)

575

seen = set(revs)

576

for i in self.revs(start=first + 1):

576

for i in self.revs(start=first + 1):

577

for x in self.parentrevs(i):

577

for x in self.parentrevs(i):

578

if x != nullrev and x in seen:

578

if x != nullrev and x in seen:

579

seen.add(i)

579

seen.add(i)

580

yield i

580

yield i

581

break

581

break

582

583

def findcommonmissing(self, common=None, heads=None):

583

def findcommonmissing(self, common=None, heads=None):

584

"""Return a tuple of the ancestors of common and the ancestors of heads

584

"""Return a tuple of the ancestors of common and the ancestors of heads

585

that are not ancestors of common. In revset terminology, we return the

585

that are not ancestors of common. In revset terminology, we return the

586

tuple:

586

tuple:

587

588

::common, (::heads) - (::common)

588

::common, (::heads) - (::common)

589

590

The list is sorted by revision number, meaning it is

590

The list is sorted by revision number, meaning it is

591

topologically sorted.

591

topologically sorted.

592

593

'heads' and 'common' are both lists of node IDs. If heads is

593

'heads' and 'common' are both lists of node IDs. If heads is

594

not supplied, uses all of the revlog's heads. If common is not

594

not supplied, uses all of the revlog's heads. If common is not

595

supplied, uses nullid."""

595

supplied, uses nullid."""

596

if common is None:

596

if common is None:

597

common = [nullid]

597

common = [nullid]

598

if heads is None:

598

if heads is None:

599

heads = self.heads()

599

heads = self.heads()

600

601

common = [self.rev(n) for n in common]

601

common = [self.rev(n) for n in common]

602

heads = [self.rev(n) for n in heads]

602

heads = [self.rev(n) for n in heads]

603

604

# we want the ancestors, but inclusive

604

# we want the ancestors, but inclusive

605

class lazyset(object):

605

class lazyset(object):

606

def __init__(self, lazyvalues):

606

def __init__(self, lazyvalues):

607

self.addedvalues = set()

607

self.addedvalues = set()

608

self.lazyvalues = lazyvalues

608

self.lazyvalues = lazyvalues

609

610

def __contains__(self, value):

610

def __contains__(self, value):

611

return value in self.addedvalues or value in self.lazyvalues

611

return value in self.addedvalues or value in self.lazyvalues

612

613

def __iter__(self):

613

def __iter__(self):

614

added = self.addedvalues

614

added = self.addedvalues

615

for r in added:

615

for r in added:

616

yield r

616

yield r

617

for r in self.lazyvalues:

617

for r in self.lazyvalues:

618

if not r in added:

618

if not r in added:

619

yield r

619

yield r

620

621

def add(self, value):

621

def add(self, value):

622

self.addedvalues.add(value)

622

self.addedvalues.add(value)

623

624

def update(self, values):

624

def update(self, values):

625

self.addedvalues.update(values)

625

self.addedvalues.update(values)

626

627

has = lazyset(self.ancestors(common))

627

has = lazyset(self.ancestors(common))

628

has.add(nullrev)

628

has.add(nullrev)

629

has.update(common)

629

has.update(common)

630

631

# take all ancestors from heads that aren't in has

631

# take all ancestors from heads that aren't in has

632

missing = set()

632

missing = set()

633

visit = collections.deque(r for r in heads if r not in has)

633

visit = collections.deque(r for r in heads if r not in has)

634

while visit:

634

while visit:

635

r = visit.popleft()

635

r = visit.popleft()

636

if r in missing:

636

if r in missing:

637

continue

637

continue

638

else:

638

else:

639

missing.add(r)

639

missing.add(r)

640

for p in self.parentrevs(r):

640

for p in self.parentrevs(r):

641

if p not in has:

641

if p not in has:

642

visit.append(p)

642

visit.append(p)

643

missing = list(missing)

643

missing = list(missing)

644

missing.sort()

644

missing.sort()

645

return has, [self.node(miss) for miss in missing]

645

return has, [self.node(miss) for miss in missing]

646

647

def incrementalmissingrevs(self, common=None):

647

def incrementalmissingrevs(self, common=None):

648

"""Return an object that can be used to incrementally compute the

648

"""Return an object that can be used to incrementally compute the

649

revision numbers of the ancestors of arbitrary sets that are not

649

revision numbers of the ancestors of arbitrary sets that are not

650

ancestors of common. This is an ancestor.incrementalmissingancestors

650

ancestors of common. This is an ancestor.incrementalmissingancestors

651

object.

651

object.

652

653

'common' is a list of revision numbers. If common is not supplied, uses

653

'common' is a list of revision numbers. If common is not supplied, uses

654

nullrev.

654

nullrev.

655

"""

655

"""

656

if common is None:

656

if common is None:

657

common = [nullrev]

657

common = [nullrev]

658

659

return ancestor.incrementalmissingancestors(self.parentrevs, common)

659

return ancestor.incrementalmissingancestors(self.parentrevs, common)

660

661

def findmissingrevs(self, common=None, heads=None):

661

def findmissingrevs(self, common=None, heads=None):

662

"""Return the revision numbers of the ancestors of heads that

662

"""Return the revision numbers of the ancestors of heads that

663

are not ancestors of common.

663

are not ancestors of common.

664

665

More specifically, return a list of revision numbers corresponding to

665

More specifically, return a list of revision numbers corresponding to

666

nodes N such that every N satisfies the following constraints:

666

nodes N such that every N satisfies the following constraints:

667

668

1. N is an ancestor of some node in 'heads'

668

1. N is an ancestor of some node in 'heads'

669

2. N is not an ancestor of any node in 'common'

669

2. N is not an ancestor of any node in 'common'

670

671

The list is sorted by revision number, meaning it is

671

The list is sorted by revision number, meaning it is

672

topologically sorted.

672

topologically sorted.

673

674

'heads' and 'common' are both lists of revision numbers. If heads is

674

'heads' and 'common' are both lists of revision numbers. If heads is

675

not supplied, uses all of the revlog's heads. If common is not

675

not supplied, uses all of the revlog's heads. If common is not

676

supplied, uses nullid."""

676

supplied, uses nullid."""

677

if common is None:

677

if common is None:

678

common = [nullrev]

678

common = [nullrev]

679

if heads is None:

679

if heads is None:

680

heads = self.headrevs()

680

heads = self.headrevs()

681

682

inc = self.incrementalmissingrevs(common=common)

682

inc = self.incrementalmissingrevs(common=common)

683

return inc.missingancestors(heads)

683

return inc.missingancestors(heads)

684

685

def findmissing(self, common=None, heads=None):

685

def findmissing(self, common=None, heads=None):

686

"""Return the ancestors of heads that are not ancestors of common.

686

"""Return the ancestors of heads that are not ancestors of common.

687

688

More specifically, return a list of nodes N such that every N

688

More specifically, return a list of nodes N such that every N

689

satisfies the following constraints:

689

satisfies the following constraints:

690

691

1. N is an ancestor of some node in 'heads'

691

1. N is an ancestor of some node in 'heads'

692

2. N is not an ancestor of any node in 'common'

692

2. N is not an ancestor of any node in 'common'

693

694

The list is sorted by revision number, meaning it is

694

The list is sorted by revision number, meaning it is

695

topologically sorted.

695

topologically sorted.

696

697

'heads' and 'common' are both lists of node IDs. If heads is

697

'heads' and 'common' are both lists of node IDs. If heads is

698

not supplied, uses all of the revlog's heads. If common is not

698

not supplied, uses all of the revlog's heads. If common is not

699

supplied, uses nullid."""

699

supplied, uses nullid."""

700

if common is None:

700

if common is None:

701

common = [nullid]

701

common = [nullid]

702

if heads is None:

702

if heads is None:

703

heads = self.heads()

703

heads = self.heads()

704

705

common = [self.rev(n) for n in common]

705

common = [self.rev(n) for n in common]

706

heads = [self.rev(n) for n in heads]

706

heads = [self.rev(n) for n in heads]

707

708

inc = self.incrementalmissingrevs(common=common)

708

inc = self.incrementalmissingrevs(common=common)

709

return [self.node(r) for r in inc.missingancestors(heads)]

709

return [self.node(r) for r in inc.missingancestors(heads)]

710

711

def nodesbetween(self, roots=None, heads=None):

711

def nodesbetween(self, roots=None, heads=None):

712

"""Return a topological path from 'roots' to 'heads'.

712

"""Return a topological path from 'roots' to 'heads'.

713

714

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

714

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

715

topologically sorted list of all nodes N that satisfy both of

715

topologically sorted list of all nodes N that satisfy both of

716

these constraints:

716

these constraints:

717

718

1. N is a descendant of some node in 'roots'

718

1. N is a descendant of some node in 'roots'

719

2. N is an ancestor of some node in 'heads'

719

2. N is an ancestor of some node in 'heads'

720

721

Every node is considered to be both a descendant and an ancestor

721

Every node is considered to be both a descendant and an ancestor

722

of itself, so every reachable node in 'roots' and 'heads' will be

722

of itself, so every reachable node in 'roots' and 'heads' will be

723

included in 'nodes'.

723

included in 'nodes'.

724

725

'outroots' is the list of reachable nodes in 'roots', i.e., the

725

'outroots' is the list of reachable nodes in 'roots', i.e., the

726

subset of 'roots' that is returned in 'nodes'. Likewise,

726

subset of 'roots' that is returned in 'nodes'. Likewise,

727

'outheads' is the subset of 'heads' that is also in 'nodes'.

727

'outheads' is the subset of 'heads' that is also in 'nodes'.

728

729

'roots' and 'heads' are both lists of node IDs. If 'roots' is

729

'roots' and 'heads' are both lists of node IDs. If 'roots' is

730

unspecified, uses nullid as the only root. If 'heads' is

730

unspecified, uses nullid as the only root. If 'heads' is

731

unspecified, uses list of all of the revlog's heads."""

731

unspecified, uses list of all of the revlog's heads."""

732

nonodes = ([], [], [])

732

nonodes = ([], [], [])

733

if roots is not None:

733

if roots is not None:

734

roots = list(roots)

734

roots = list(roots)

735

if not roots:

735

if not roots:

736

return nonodes

736

return nonodes

737

lowestrev = min([self.rev(n) for n in roots])

737

lowestrev = min([self.rev(n) for n in roots])

738

else:

738

else:

739

roots = [nullid] # Everybody's a descendant of nullid

739

roots = [nullid] # Everybody's a descendant of nullid

740

lowestrev = nullrev

740

lowestrev = nullrev

741

if (lowestrev == nullrev) and (heads is None):

741

if (lowestrev == nullrev) and (heads is None):

742

# We want _all_ the nodes!

742

# We want _all_ the nodes!

743

return ([self.node(r) for r in self], [nullid], list(self.heads()))

743

return ([self.node(r) for r in self], [nullid], list(self.heads()))

744

if heads is None:

744

if heads is None:

745

# All nodes are ancestors, so the latest ancestor is the last

745

# All nodes are ancestors, so the latest ancestor is the last

746

# node.

746

# node.

747

highestrev = len(self) - 1

747

highestrev = len(self) - 1

748

# Set ancestors to None to signal that every node is an ancestor.

748

# Set ancestors to None to signal that every node is an ancestor.

749

ancestors = None

749

ancestors = None

750

# Set heads to an empty dictionary for later discovery of heads

750

# Set heads to an empty dictionary for later discovery of heads

751

heads = {}

751

heads = {}

752

else:

752

else:

753

heads = list(heads)

753

heads = list(heads)

754

if not heads:

754

if not heads:

755

return nonodes

755

return nonodes

756

ancestors = set()

756

ancestors = set()

757

# Turn heads into a dictionary so we can remove 'fake' heads.

757

# Turn heads into a dictionary so we can remove 'fake' heads.

758

# Also, later we will be using it to filter out the heads we can't

758

# Also, later we will be using it to filter out the heads we can't

759

# find from roots.

759

# find from roots.

760

heads = dict.fromkeys(heads, False)

760

heads = dict.fromkeys(heads, False)

761

# Start at the top and keep marking parents until we're done.

761

# Start at the top and keep marking parents until we're done.

762

nodestotag = set(heads)

762

nodestotag = set(heads)

763

# Remember where the top was so we can use it as a limit later.

763

# Remember where the top was so we can use it as a limit later.

764

highestrev = max([self.rev(n) for n in nodestotag])

764

highestrev = max([self.rev(n) for n in nodestotag])

765

while nodestotag:

765

while nodestotag:

766

# grab a node to tag

766

# grab a node to tag

767

n = nodestotag.pop()

767

n = nodestotag.pop()

768

# Never tag nullid

768

# Never tag nullid

769

if n == nullid:

769

if n == nullid:

770

continue

770

continue

771

# A node's revision number represents its place in a

771

# A node's revision number represents its place in a

772

# topologically sorted list of nodes.

772

# topologically sorted list of nodes.

773

r = self.rev(n)

773

r = self.rev(n)

774

if r >= lowestrev:

774

if r >= lowestrev:

775

if n not in ancestors:

775

if n not in ancestors:

776

# If we are possibly a descendant of one of the roots

776

# If we are possibly a descendant of one of the roots

777

# and we haven't already been marked as an ancestor

777

# and we haven't already been marked as an ancestor

778

ancestors.add(n) # Mark as ancestor

778

ancestors.add(n) # Mark as ancestor

779

# Add non-nullid parents to list of nodes to tag.

779

# Add non-nullid parents to list of nodes to tag.

780

nodestotag.update([p for p in self.parents(n) if

780

nodestotag.update([p for p in self.parents(n) if

781

p != nullid])

781

p != nullid])

782

elif n in heads: # We've seen it before, is it a fake head?

782

elif n in heads: # We've seen it before, is it a fake head?

783

# So it is, real heads should not be the ancestors of

783

# So it is, real heads should not be the ancestors of

784

# any other heads.

784

# any other heads.

785

heads.pop(n)

785

heads.pop(n)

786

if not ancestors:

786

if not ancestors:

787

return nonodes

787

return nonodes

788

# Now that we have our set of ancestors, we want to remove any

788

# Now that we have our set of ancestors, we want to remove any

789

# roots that are not ancestors.

789

# roots that are not ancestors.

790

791

# If one of the roots was nullid, everything is included anyway.

791

# If one of the roots was nullid, everything is included anyway.

792

if lowestrev > nullrev:

792

if lowestrev > nullrev:

793

# But, since we weren't, let's recompute the lowest rev to not

793

# But, since we weren't, let's recompute the lowest rev to not

794

# include roots that aren't ancestors.

794

# include roots that aren't ancestors.

795

796

# Filter out roots that aren't ancestors of heads

796

# Filter out roots that aren't ancestors of heads

797

roots = [root for root in roots if root in ancestors]

797

roots = [root for root in roots if root in ancestors]

798

# Recompute the lowest revision

798

# Recompute the lowest revision

799

if roots:

799

if roots:

800

lowestrev = min([self.rev(root) for root in roots])

800

lowestrev = min([self.rev(root) for root in roots])

801

else:

801

else:

802

# No more roots? Return empty list

802

# No more roots? Return empty list

803

return nonodes

803

return nonodes

804

else:

804

else:

805

# We are descending from nullid, and don't need to care about

805

# We are descending from nullid, and don't need to care about

806

# any other roots.

806

# any other roots.

807

lowestrev = nullrev

807

lowestrev = nullrev

808

roots = [nullid]

808

roots = [nullid]

809

# Transform our roots list into a set.

809

# Transform our roots list into a set.

810

descendants = set(roots)

810

descendants = set(roots)

811

# Also, keep the original roots so we can filter out roots that aren't

811

# Also, keep the original roots so we can filter out roots that aren't

812

# 'real' roots (i.e. are descended from other roots).

812

# 'real' roots (i.e. are descended from other roots).

813

roots = descendants.copy()

813

roots = descendants.copy()

814

# Our topologically sorted list of output nodes.

814

# Our topologically sorted list of output nodes.

815

orderedout = []

815

orderedout = []

816

# Don't start at nullid since we don't want nullid in our output list,

816

# Don't start at nullid since we don't want nullid in our output list,

817

# and if nullid shows up in descendants, empty parents will look like

817

# and if nullid shows up in descendants, empty parents will look like

818

# they're descendants.

818

# they're descendants.

819

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

819

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

820

n = self.node(r)

820

n = self.node(r)

821

isdescendant = False

821

isdescendant = False

822

if lowestrev == nullrev: # Everybody is a descendant of nullid

822

if lowestrev == nullrev: # Everybody is a descendant of nullid

823

isdescendant = True

823

isdescendant = True

824

elif n in descendants:

824

elif n in descendants:

825

# n is already a descendant

825

# n is already a descendant

826

isdescendant = True

826

isdescendant = True

827

# This check only needs to be done here because all the roots

827

# This check only needs to be done here because all the roots

828

# will start being marked is descendants before the loop.

828

# will start being marked is descendants before the loop.

829

if n in roots:

829

if n in roots:

830

# If n was a root, check if it's a 'real' root.

830

# If n was a root, check if it's a 'real' root.

831

p = tuple(self.parents(n))

831

p = tuple(self.parents(n))

832

# If any of its parents are descendants, it's not a root.

832

# If any of its parents are descendants, it's not a root.

833

if (p[0] in descendants) or (p[1] in descendants):

833

if (p[0] in descendants) or (p[1] in descendants):

834

roots.remove(n)

834

roots.remove(n)

835

else:

835

else:

836

p = tuple(self.parents(n))

836

p = tuple(self.parents(n))

837

# A node is a descendant if either of its parents are

837

# A node is a descendant if either of its parents are

838

# descendants. (We seeded the dependents list with the roots

838

# descendants. (We seeded the dependents list with the roots

839

# up there, remember?)

839

# up there, remember?)

840

if (p[0] in descendants) or (p[1] in descendants):

840

if (p[0] in descendants) or (p[1] in descendants):

841

descendants.add(n)

841

descendants.add(n)

842

isdescendant = True

842

isdescendant = True

843

if isdescendant and ((ancestors is None) or (n in ancestors)):

843

if isdescendant and ((ancestors is None) or (n in ancestors)):

844

# Only include nodes that are both descendants and ancestors.

844

# Only include nodes that are both descendants and ancestors.

845

orderedout.append(n)

845

orderedout.append(n)

846

if (ancestors is not None) and (n in heads):

846

if (ancestors is not None) and (n in heads):

847

# We're trying to figure out which heads are reachable

847

# We're trying to figure out which heads are reachable

848

# from roots.

848

# from roots.

849

# Mark this head as having been reached

849

# Mark this head as having been reached

850

heads[n] = True

850

heads[n] = True

851

elif ancestors is None:

851

elif ancestors is None:

852

# Otherwise, we're trying to discover the heads.

852

# Otherwise, we're trying to discover the heads.

853

# Assume this is a head because if it isn't, the next step

853

# Assume this is a head because if it isn't, the next step

854

# will eventually remove it.

854

# will eventually remove it.

855

heads[n] = True

855

heads[n] = True

856

# But, obviously its parents aren't.

856

# But, obviously its parents aren't.

857

for p in self.parents(n):

857

for p in self.parents(n):

858

heads.pop(p, None)

858

heads.pop(p, None)

859

heads = [head for head, flag in heads.iteritems() if flag]

859

heads = [head for head, flag in heads.iteritems() if flag]

860

roots = list(roots)

860

roots = list(roots)

861

assert orderedout

861

assert orderedout

862

assert roots

862

assert roots

863

assert heads

863

assert heads

864

return (orderedout, roots, heads)

864

return (orderedout, roots, heads)

865

866

def headrevs(self):

866

def headrevs(self):

867

try:

867

try:

868

return self.index.headrevs()

868

return self.index.headrevs()

869

except AttributeError:

869

except AttributeError:

870

return self._headrevs()

870

return self._headrevs()

871

872

def computephases(self, roots):

872

def computephases(self, roots):

873

return self.index.computephasesmapsets(roots)

873

return self.index.computephasesmapsets(roots)

874

875

def _headrevs(self):

875

def _headrevs(self):

876

count = len(self)

876

count = len(self)

877

if not count:

877

if not count:

878

return [nullrev]

878

return [nullrev]

879

# we won't iter over filtered rev so nobody is a head at start

879

# we won't iter over filtered rev so nobody is a head at start

880

ishead = [0] * (count + 1)

880

ishead = [0] * (count + 1)

881

index = self.index

881

index = self.index

882

for r in self:

882

for r in self:

883

ishead[r] = 1 # I may be an head

883

ishead[r] = 1 # I may be an head

884

e = index[r]

884

e = index[r]

885

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

885

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

886

return [r for r, val in enumerate(ishead) if val]

886

return [r for r, val in enumerate(ishead) if val]

887

888

def heads(self, start=None, stop=None):

888

def heads(self, start=None, stop=None):

889

"""return the list of all nodes that have no children

889

"""return the list of all nodes that have no children

890

891

if start is specified, only heads that are descendants of

891

if start is specified, only heads that are descendants of

892

start will be returned

892

start will be returned

893

if stop is specified, it will consider all the revs from stop

893

if stop is specified, it will consider all the revs from stop

894

as if they had no children

894

as if they had no children

895

"""

895

"""

896

if start is None and stop is None:

896

if start is None and stop is None:

897

if not len(self):

897

if not len(self):

898

return [nullid]

898

return [nullid]

899

return [self.node(r) for r in self.headrevs()]

899

return [self.node(r) for r in self.headrevs()]

900

901

if start is None:

901

if start is None:

902

start = nullid

902

start = nullid

903

if stop is None:

903

if stop is None:

904

stop = []

904

stop = []

905

stoprevs = set([self.rev(n) for n in stop])

905

stoprevs = set([self.rev(n) for n in stop])

906

startrev = self.rev(start)

906

startrev = self.rev(start)

907

reachable = set((startrev,))

907

reachable = set((startrev,))

908

heads = set((startrev,))

908

heads = set((startrev,))

909

910

parentrevs = self.parentrevs

910

parentrevs = self.parentrevs

911

for r in self.revs(start=startrev + 1):

911

for r in self.revs(start=startrev + 1):

912

for p in parentrevs(r):

912

for p in parentrevs(r):

913

if p in reachable:

913

if p in reachable:

914

if r not in stoprevs:

914

if r not in stoprevs:

915

reachable.add(r)

915

reachable.add(r)

916

heads.add(r)

916

heads.add(r)

917

if p in heads and p not in stoprevs:

917

if p in heads and p not in stoprevs:

918

heads.remove(p)

918

heads.remove(p)

919

920

return [self.node(r) for r in heads]

920

return [self.node(r) for r in heads]

921

922

def children(self, node):

922

def children(self, node):

923

"""find the children of a given node"""

923

"""find the children of a given node"""

924

c = []

924

c = []

925

p = self.rev(node)

925

p = self.rev(node)

926

for r in self.revs(start=p + 1):

926

for r in self.revs(start=p + 1):

927

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

927

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

928

if prevs:

928

if prevs:

929

for pr in prevs:

929

for pr in prevs:

930

if pr == p:

930

if pr == p:

931

c.append(self.node(r))

931

c.append(self.node(r))

932

elif p == nullrev:

932

elif p == nullrev:

933

c.append(self.node(r))

933

c.append(self.node(r))

934

return c

934

return c

935

936

def descendant(self, start, end):

936

def descendant(self, start, end):

937

if start == nullrev:

937

if start == nullrev:

938

return True

938

return True

939

for i in self.descendants([start]):

939

for i in self.descendants([start]):

940

if i == end:

940

if i == end:

941

return True

941

return True

942

elif i > end:

942

elif i > end:

943

break

943

break

944

return False

944

return False

945

946

def commonancestorsheads(self, a, b):

946

def commonancestorsheads(self, a, b):

947

"""calculate all the heads of the common ancestors of nodes a and b"""

947

"""calculate all the heads of the common ancestors of nodes a and b"""

948

a, b = self.rev(a), self.rev(b)

948

a, b = self.rev(a), self.rev(b)

949

try:

949

try:

950

ancs = self.index.commonancestorsheads(a, b)

950

ancs = self.index.commonancestorsheads(a, b)

951

except (AttributeError, OverflowError): # C implementation failed

951

except (AttributeError, OverflowError): # C implementation failed

952

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

952

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

953

return pycompat.maplist(self.node, ancs)

953

return pycompat.maplist(self.node, ancs)

954

955

def isancestor(self, a, b):

955

def isancestor(self, a, b):

956

"""return True if node a is an ancestor of node b

956

"""return True if node a is an ancestor of node b

957

958

The implementation of this is trivial but the use of

958

The implementation of this is trivial but the use of

959

commonancestorsheads is not."""

959

commonancestorsheads is not."""

960

return a in self.commonancestorsheads(a, b)

960

return a in self.commonancestorsheads(a, b)

961

962

def ancestor(self, a, b):

962

def ancestor(self, a, b):

963

"""calculate the "best" common ancestor of nodes a and b"""

963

"""calculate the "best" common ancestor of nodes a and b"""

964

965

a, b = self.rev(a), self.rev(b)

965

a, b = self.rev(a), self.rev(b)

966

try:

966

try:

967

ancs = self.index.ancestors(a, b)

967

ancs = self.index.ancestors(a, b)

968

except (AttributeError, OverflowError):

968

except (AttributeError, OverflowError):

969

ancs = ancestor.ancestors(self.parentrevs, a, b)

969

ancs = ancestor.ancestors(self.parentrevs, a, b)

970

if ancs:

970

if ancs:

971

# choose a consistent winner when there's a tie

971

# choose a consistent winner when there's a tie

972

return min(map(self.node, ancs))

972

return min(map(self.node, ancs))

973

return nullid

973

return nullid

974

975

def _match(self, id):

975

def _match(self, id):

976

if isinstance(id, int):

976

if isinstance(id, int):

977

# rev

977

# rev

978

return self.node(id)

978

return self.node(id)

979

if len(id) == 20:

979

if len(id) == 20:

980

# possibly a binary node

980

# possibly a binary node

981

# odds of a binary node being all hex in ASCII are 1 in 10**25

981

# odds of a binary node being all hex in ASCII are 1 in 10**25

982

try:

982

try:

983

node = id

983

node = id

984

self.rev(node) # quick search the index

984

self.rev(node) # quick search the index

985

return node

985

return node

986

except LookupError:

986

except LookupError:

987

pass # may be partial hex id

987

pass # may be partial hex id

988

try:

988

try:

989

# str(rev)

989

# str(rev)

990

rev = int(id)

990

rev = int(id)

991

if str(rev) != id:

991

if str(rev) != id:

992

raise ValueError

992

raise ValueError

993

if rev < 0:

993

if rev < 0:

994

rev = len(self) + rev

994

rev = len(self) + rev

995

if rev < 0 or rev >= len(self):

995

if rev < 0 or rev >= len(self):

996

raise ValueError

996

raise ValueError

997

return self.node(rev)

997

return self.node(rev)

998

except (ValueError, OverflowError):

998

except (ValueError, OverflowError):

999

pass

999

pass

1000

if len(id) == 40:

1000

if len(id) == 40:

1001

try:

1001

try:

1002

# a full hex nodeid?

1002

# a full hex nodeid?

1003

node = bin(id)

1003

node = bin(id)

1004

self.rev(node)

1004

self.rev(node)

1005

return node

1005

return node

1006

except (TypeError, LookupError):

1006

except (TypeError, LookupError):

1007

pass

1007

pass

1008

1009

def _partialmatch(self, id):

1009

def _partialmatch(self, id):

1010

try:

1010

try:

1011

partial = self.index.partialmatch(id)

1011

partial = self.index.partialmatch(id)

1012

if partial and self.hasnode(partial):

1012

if partial and self.hasnode(partial):

1013

return partial

1013

return partial

1014

return None

1014

return None

1015

except RevlogError:

1015

except RevlogError:

1016

# parsers.c radix tree lookup gave multiple matches

1016

# parsers.c radix tree lookup gave multiple matches

1017

# fast path: for unfiltered changelog, radix tree is accurate

1017

# fast path: for unfiltered changelog, radix tree is accurate

1018

if not getattr(self, 'filteredrevs', None):

1018

if not getattr(self, 'filteredrevs', None):

1019

raise LookupError(id, self.indexfile,

1019

raise LookupError(id, self.indexfile,

1020

_('ambiguous identifier'))

1020

_('ambiguous identifier'))

1021

# fall through to slow path that filters hidden revisions

1021

# fall through to slow path that filters hidden revisions

1022

except (AttributeError, ValueError):

1022

except (AttributeError, ValueError):

1023

# we are pure python, or key was too short to search radix tree

1023

# we are pure python, or key was too short to search radix tree

1024

pass

1024

pass

1025

1026

if id in self._pcache:

1026

if id in self._pcache:

1027

return self._pcache[id]

1027

return self._pcache[id]

1028

1029

if len(id) < 40:

1029

if len(id) < 40:

1030

try:

1030

try:

1031

# hex(node)[:...]

1031

# hex(node)[:...]

1032

l = len(id) // 2 # grab an even number of digits

1032

l = len(id) // 2 # grab an even number of digits

1033

prefix = bin(id[:l * 2])

1033

prefix = bin(id[:l * 2])

1034

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1034

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1035

nl = [n for n in nl if hex(n).startswith(id) and

1035

nl = [n for n in nl if hex(n).startswith(id) and

1036

self.hasnode(n)]

1036

self.hasnode(n)]

1037

if len(nl) > 0:

1037

if len(nl) > 0:

1038

if len(nl) == 1:

1038

if len(nl) == 1:

1039

self._pcache[id] = nl[0]

1039

self._pcache[id] = nl[0]

1040

return nl[0]

1040

return nl[0]

1041

raise LookupError(id, self.indexfile,

1041

raise LookupError(id, self.indexfile,

1042

_('ambiguous identifier'))

1042

_('ambiguous identifier'))

1043

return None

1043

return None

1044

except TypeError:

1044

except TypeError:

1045

pass

1045

pass

1046

1047

def lookup(self, id):

1047

def lookup(self, id):

1048

"""locate a node based on:

1048

"""locate a node based on:

1049

- revision number or str(revision number)

1049

- revision number or str(revision number)

1050

- nodeid or subset of hex nodeid

1050

- nodeid or subset of hex nodeid

1051

"""

1051

"""

1052

n = self._match(id)

1052

n = self._match(id)

1053

if n is not None:

1053

if n is not None:

1054

return n

1054

return n

1055

n = self._partialmatch(id)

1055

n = self._partialmatch(id)

1056

if n:

1056

if n:

1057

return n

1057

return n

1058

1059

raise LookupError(id, self.indexfile, _('no match found'))

1059

raise LookupError(id, self.indexfile, _('no match found'))

1060

1061

def cmp(self, node, text):

1061

def cmp(self, node, text):

1062

"""compare text with a given file revision

1062

"""compare text with a given file revision

1063

1064

returns True if text is different than what is stored.

1064

returns True if text is different than what is stored.

1065

"""

1065

"""

1066

p1, p2 = self.parents(node)

1066

p1, p2 = self.parents(node)

1067

return hash(text, p1, p2) != node

1067

return hash(text, p1, p2) != node

1068

1069

def _addchunk(self, offset, data):

1069

def _addchunk(self, offset, data):

1070

"""Add a segment to the revlog cache.

1070

"""Add a segment to the revlog cache.

1071

1072

Accepts an absolute offset and the data that is at that location.

1072

Accepts an absolute offset and the data that is at that location.

1073

"""

1073

"""

1074

o, d = self._chunkcache

1074

o, d = self._chunkcache

1075

# try to add to existing cache

1075

# try to add to existing cache

1076

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1076

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1077

self._chunkcache = o, d + data

1077

self._chunkcache = o, d + data

1078

else:

1078

else:

1079

self._chunkcache = offset, data

1079

self._chunkcache = offset, data

1080

1081

def _loadchunk(self, offset, length, df=None):

1081

def _loadchunk(self, offset, length, df=None):

1082

"""Load a segment of raw data from the revlog.

1082

"""Load a segment of raw data from the revlog.

1083

1084

Accepts an absolute offset, length to read, and an optional existing

1084

Accepts an absolute offset, length to read, and an optional existing

1085

file handle to read from.

1085

file handle to read from.

1086

1087

If an existing file handle is passed, it will be seeked and the

1087

If an existing file handle is passed, it will be seeked and the

1088

original seek position will NOT be restored.

1088

original seek position will NOT be restored.

1089

1090

Returns a str or buffer of raw byte data.

1090

Returns a str or buffer of raw byte data.

1091

"""

1091

"""

1092

if df is not None:

1092

if df is not None:

1093

closehandle = False

1093

closehandle = False

1094

else:

1094

else:

1095

if self._inline:

1095

if self._inline:

1096

df = self.opener(self.indexfile)

1096

df = self.opener(self.indexfile)

1097

else:

1097

else:

1098

df = self.opener(self.datafile)

1098

df = self.opener(self.datafile)

1099

closehandle = True

1099

closehandle = True

1100

1101

# Cache data both forward and backward around the requested

1101

# Cache data both forward and backward around the requested

1102

# data, in a fixed size window. This helps speed up operations

1102

# data, in a fixed size window. This helps speed up operations

1103

# involving reading the revlog backwards.

1103

# involving reading the revlog backwards.

1104

cachesize = self._chunkcachesize

1104

cachesize = self._chunkcachesize

1105

realoffset = offset & ~(cachesize - 1)

1105

realoffset = offset & ~(cachesize - 1)

1106

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1106

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1107

- realoffset)

1107

- realoffset)

1108

df.seek(realoffset)

1108

df.seek(realoffset)

1109

d = df.read(reallength)

1109

d = df.read(reallength)

1110

if closehandle:

1110

if closehandle:

1111

df.close()

1111

df.close()

1112

self._addchunk(realoffset, d)

1112

self._addchunk(realoffset, d)

1113

if offset != realoffset or reallength != length:

1113

if offset != realoffset or reallength != length:

1114

return util.buffer(d, offset - realoffset, length)

1114

return util.buffer(d, offset - realoffset, length)

1115

return d

1115

return d

1116

1117

def _getchunk(self, offset, length, df=None):

1117

def _getchunk(self, offset, length, df=None):

1118

"""Obtain a segment of raw data from the revlog.

1118

"""Obtain a segment of raw data from the revlog.

1119

1120

Accepts an absolute offset, length of bytes to obtain, and an

1120

Accepts an absolute offset, length of bytes to obtain, and an

1121

optional file handle to the already-opened revlog. If the file

1121

optional file handle to the already-opened revlog. If the file

1122

handle is used, it's original seek position will not be preserved.

1122

handle is used, it's original seek position will not be preserved.

1123

1124

Requests for data may be returned from a cache.

1124

Requests for data may be returned from a cache.

1125

1126

Returns a str or a buffer instance of raw byte data.

1126

Returns a str or a buffer instance of raw byte data.

1127

"""

1127

"""

1128

o, d = self._chunkcache

1128

o, d = self._chunkcache

1129

l = len(d)

1129

l = len(d)

1130

1131

# is it in the cache?

1131

# is it in the cache?

1132

cachestart = offset - o

1132

cachestart = offset - o

1133

cacheend = cachestart + length

1133

cacheend = cachestart + length

1134

if cachestart >= 0 and cacheend <= l:

1134

if cachestart >= 0 and cacheend <= l:

1135

if cachestart == 0 and cacheend == l:

1135

if cachestart == 0 and cacheend == l:

1136

return d # avoid a copy

1136

return d # avoid a copy

1137

return util.buffer(d, cachestart, cacheend - cachestart)

1137

return util.buffer(d, cachestart, cacheend - cachestart)

1138

1139

return self._loadchunk(offset, length, df=df)

1139

return self._loadchunk(offset, length, df=df)

1140

1141

def _chunkraw(self, startrev, endrev, df=None):

1141

def _chunkraw(self, startrev, endrev, df=None):

1142

"""Obtain a segment of raw data corresponding to a range of revisions.

1142

"""Obtain a segment of raw data corresponding to a range of revisions.

1143

1144

Accepts the start and end revisions and an optional already-open

1144

Accepts the start and end revisions and an optional already-open

1145

file handle to be used for reading. If the file handle is read, its

1145

file handle to be used for reading. If the file handle is read, its

1146

seek position will not be preserved.

1146

seek position will not be preserved.

1147

1148

Requests for data may be satisfied by a cache.

1148

Requests for data may be satisfied by a cache.

1149

1150

Returns a 2-tuple of (offset, data) for the requested range of

1150

Returns a 2-tuple of (offset, data) for the requested range of

1151

revisions. Offset is the integer offset from the beginning of the

1151

revisions. Offset is the integer offset from the beginning of the

1152

revlog and data is a str or buffer of the raw byte data.

1152

revlog and data is a str or buffer of the raw byte data.

1153

1154

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1154

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1155

to determine where each revision's data begins and ends.

1155

to determine where each revision's data begins and ends.

1156

"""

1156

"""

1157

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1157

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1158

# (functions are expensive).

1158

# (functions are expensive).

1159

index = self.index

1159

index = self.index

1160

istart = index[startrev]

1160

istart = index[startrev]

1161

start = int(istart[0] >> 16)

1161

start = int(istart[0] >> 16)

1162

if startrev == endrev:

1162

if startrev == endrev:

1163

end = start + istart[1]

1163

end = start + istart[1]

1164

else:

1164

else:

1165

iend = index[endrev]

1165

iend = index[endrev]

1166

end = int(iend[0] >> 16) + iend[1]

1166

end = int(iend[0] >> 16) + iend[1]

1167

1168

if self._inline:

1168

if self._inline:

1169

start += (startrev + 1) * self._io.size

1169

start += (startrev + 1) * self._io.size

1170

end += (endrev + 1) * self._io.size

1170

end += (endrev + 1) * self._io.size

1171

length = end - start

1171

length = end - start

1172

1173

return start, self._getchunk(start, length, df=df)

1173

return start, self._getchunk(start, length, df=df)

1174

1175

def _chunk(self, rev, df=None):

1175

def _chunk(self, rev, df=None):

1176

"""Obtain a single decompressed chunk for a revision.

1176

"""Obtain a single decompressed chunk for a revision.

1177

1178

Accepts an integer revision and an optional already-open file handle

1178

Accepts an integer revision and an optional already-open file handle

1179

to be used for reading. If used, the seek position of the file will not

1179

to be used for reading. If used, the seek position of the file will not

1180

be preserved.

1180

be preserved.

1181

1182

Returns a str holding uncompressed data for the requested revision.

1182

Returns a str holding uncompressed data for the requested revision.

1183

"""

1183

"""

1184

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1184

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1185

1186

def _chunks(self, revs, df=None):

1186

def _chunks(self, revs, df=None):

1187

"""Obtain decompressed chunks for the specified revisions.

1187

"""Obtain decompressed chunks for the specified revisions.

1188

1189

Accepts an iterable of numeric revisions that are assumed to be in

1189

Accepts an iterable of numeric revisions that are assumed to be in

1190

ascending order. Also accepts an optional already-open file handle

1190

ascending order. Also accepts an optional already-open file handle

1191

to be used for reading. If used, the seek position of the file will

1191

to be used for reading. If used, the seek position of the file will

1192

not be preserved.

1192

not be preserved.

1193

1194

This function is similar to calling ``self._chunk()`` multiple times,

1194

This function is similar to calling ``self._chunk()`` multiple times,

1195

but is faster.

1195

but is faster.

1196

1197

Returns a list with decompressed data for each requested revision.

1197

Returns a list with decompressed data for each requested revision.

1198

"""

1198

"""

1199

if not revs:

1199

if not revs:

1200

return []

1200

return []

1201

start = self.start

1201

start = self.start

1202

length = self.length

1202

length = self.length

1203

inline = self._inline

1203

inline = self._inline

1204

iosize = self._io.size

1204

iosize = self._io.size

1205

buffer = util.buffer

1205

buffer = util.buffer

1206

1207

l = []

1207

l = []

1208

ladd = l.append

1208

ladd = l.append

1209

1210

try:

1210

try:

1211

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1211

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1212

except OverflowError:

1212

except OverflowError:

1213

# issue4215 - we can't cache a run of chunks greater than

1213

# issue4215 - we can't cache a run of chunks greater than

1214

# 2G on Windows

1214

# 2G on Windows

1215

return [self._chunk(rev, df=df) for rev in revs]

1215

return [self._chunk(rev, df=df) for rev in revs]

1216

1217

decomp = self.decompress

1217

decomp = self.decompress

1218

for rev in revs:

1218

for rev in revs:

1219

chunkstart = start(rev)

1219

chunkstart = start(rev)

1220

if inline:

1220

if inline:

1221

chunkstart += (rev + 1) * iosize

1221

chunkstart += (rev + 1) * iosize

1222

chunklength = length(rev)

1222

chunklength = length(rev)

1223

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1223

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1224

1225

return l

1225

return l

1226

1227

def _chunkclear(self):

1227

def _chunkclear(self):

1228

"""Clear the raw chunk cache."""

1228

"""Clear the raw chunk cache."""

1229

self._chunkcache = (0, '')

1229

self._chunkcache = (0, '')

1230

1231

def deltaparent(self, rev):

1231

def deltaparent(self, rev):

1232

"""return deltaparent of the given revision"""

1232

"""return deltaparent of the given revision"""

1233

base = self.index[rev][3]

1233

base = self.index[rev][3]

1234

if base == rev:

1234

if base == rev:

1235

return nullrev

1235

return nullrev

1236

elif self._generaldelta:

1236

elif self._generaldelta:

1237

return base

1237

return base

1238

else:

1238

else:

1239

return rev - 1

1239

return rev - 1

1240

1241

def revdiff(self, rev1, rev2):

1241

def revdiff(self, rev1, rev2):

1242

"""return or calculate a delta between two revisions

1242

"""return or calculate a delta between two revisions

1243

1244

The delta calculated is in binary form and is intended to be written to

1244

The delta calculated is in binary form and is intended to be written to

1245

revlog data directly. So this function needs raw revision data.

1245

revlog data directly. So this function needs raw revision data.

1246

"""

1246

"""

1247

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1247

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1248

return bytes(self._chunk(rev2))

1248

return bytes(self._chunk(rev2))

1249

1250

return mdiff.textdiff(self.revision(rev1, raw=True),

1250

return mdiff.textdiff(self.revision(rev1, raw=True),

1251

self.revision(rev2, raw=True))

1251

self.revision(rev2, raw=True))

1252

1253

def revision(self, nodeorrev, _df=None, raw=False):

1253

def revision(self, nodeorrev, _df=None, raw=False):

1254

"""return an uncompressed revision of a given node or revision

1254

"""return an uncompressed revision of a given node or revision

1255

number.

1255

number.

1256

1257

_df - an existing file handle to read from. (internal-only)

1257

_df - an existing file handle to read from. (internal-only)

1258

raw - an optional argument specifying if the revision data is to be

1258

raw - an optional argument specifying if the revision data is to be

1259

treated as raw data when applying flag transforms. 'raw' should be set

1259

treated as raw data when applying flag transforms. 'raw' should be set

1260

to True when generating changegroups or in debug commands.

1260

to True when generating changegroups or in debug commands.

1261

"""

1261

"""

1262

if isinstance(nodeorrev, int):

1262

if isinstance(nodeorrev, int):

1263

rev = nodeorrev

1263

rev = nodeorrev

1264

node = self.node(rev)

1264

node = self.node(rev)

1265

else:

1265

else:

1266

node = nodeorrev

1266

node = nodeorrev

1267

rev = None

1267

rev = None

1268

1269

cachedrev = None

1269

cachedrev = None

1270

flags = None

1270

flags = None

1271

rawtext = None

1271

if node == nullid:

1272

if node == nullid:

1272

return ""

1273

return ""

1273

if self._cache:

1274

if self._cache:

1274

if self._cache[0] == node:

1275

if self._cache[0] == node:

1275

# _cache only stores rawtext

1276

# _cache only stores rawtext

1276

if raw:

1277

if raw:

1277

return self._cache[2]

1278

return self._cache[2]

1278

# duplicated, but good for perf

1279

# duplicated, but good for perf

1279

if rev is None:

1280

if rev is None:

1280

rev = self.rev(node)

1281

rev = self.rev(node)

1281

if flags is None:

1282

if flags is None:

1282

flags = self.flags(rev)

1283

flags = self.flags(rev)

1283

# no extra flags set, no flag processor runs, text = rawtext

1284

# no extra flags set, no flag processor runs, text = rawtext

1284

if flags == REVIDX_DEFAULT_FLAGS:

1285

if flags == REVIDX_DEFAULT_FLAGS:

1285

return self._cache[2]

1286

return self._cache[2]

1287

# rawtext is reusable. need to run flag processor

1288

rawtext = self._cache[2]

1286

1289

1287

cachedrev = self._cache[1]

1290

cachedrev = self._cache[1]

1288

1291

1289

# look up what we need to read

1292

# look up what we need to read

1290

rawtext = None

1291

if rawtext is None:

1293

if rawtext is None:

1292

if rev is None:

1294

if rev is None:

1293

rev = self.rev(node)

1295

rev = self.rev(node)

1294

1296

1295

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1297

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1296

if stopped:

1298

if stopped:

1297

rawtext = self._cache[2]

1299

rawtext = self._cache[2]

1298

1300

1299

# drop cache to save memory

1301

# drop cache to save memory

1300

self._cache = None

1302

self._cache = None

1301

1303

1302

bins = self._chunks(chain, df=_df)

1304

bins = self._chunks(chain, df=_df)

1303

if rawtext is None:

1305

if rawtext is None:

1304

rawtext = bytes(bins[0])

1306

rawtext = bytes(bins[0])

1305

bins = bins[1:]

1307

bins = bins[1:]

1306

1308

1307

rawtext = mdiff.patches(rawtext, bins)

1309

rawtext = mdiff.patches(rawtext, bins)

1310

self._cache = (node, rev, rawtext)

1308

1311

1309

if flags is None:

1312

if flags is None:

1313

if rev is None:

1314

rev = self.rev(node)

1310

flags = self.flags(rev)

1315

flags = self.flags(rev)

1311

1316

1312

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1317

text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)

1313

if validatehash:

1318

if validatehash:

1314

self.checkhash(text, node, rev=rev)

1319

self.checkhash(text, node, rev=rev)

1315

1320

1316

self._cache = (node, rev, rawtext)

1317

return text

1321

return text

1318

1322

1319

def hash(self, text, p1, p2):

1323

def hash(self, text, p1, p2):

1320

"""Compute a node hash.

1324

"""Compute a node hash.

1321

1325

1322

Available as a function so that subclasses can replace the hash

1326

Available as a function so that subclasses can replace the hash

1323

as needed.

1327

as needed.

1324

"""

1328

"""

1325

return hash(text, p1, p2)

1329

return hash(text, p1, p2)

1326

1330

1327

def _processflags(self, text, flags, operation, raw=False):

1331

def _processflags(self, text, flags, operation, raw=False):

1328

"""Inspect revision data flags and applies transforms defined by

1332

"""Inspect revision data flags and applies transforms defined by

1329

registered flag processors.

1333

registered flag processors.

1330

1334

1331

``text`` - the revision data to process

1335

``text`` - the revision data to process

1332

``flags`` - the revision flags

1336

``flags`` - the revision flags

1333

``operation`` - the operation being performed (read or write)

1337

``operation`` - the operation being performed (read or write)

1334

``raw`` - an optional argument describing if the raw transform should be

1338

``raw`` - an optional argument describing if the raw transform should be

1335

applied.

1339

applied.

1336

1340

1337

This method processes the flags in the order (or reverse order if

1341

This method processes the flags in the order (or reverse order if

1338

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1342

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1339

flag processors registered for present flags. The order of flags defined

1343

flag processors registered for present flags. The order of flags defined

1340

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1344

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1341

1345

1342

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1346

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1343

processed text and ``validatehash`` is a bool indicating whether the

1347

processed text and ``validatehash`` is a bool indicating whether the

1344

returned text should be checked for hash integrity.

1348

returned text should be checked for hash integrity.

1345

1349

1346

Note: If the ``raw`` argument is set, it has precedence over the

1350

Note: If the ``raw`` argument is set, it has precedence over the

1347

operation and will only update the value of ``validatehash``.

1351

operation and will only update the value of ``validatehash``.

1348

"""

1352

"""

1349

if not operation in ('read', 'write'):

1353

if not operation in ('read', 'write'):

1350

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1354

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1351

# Check all flags are known.

1355

# Check all flags are known.

1352

if flags & ~REVIDX_KNOWN_FLAGS:

1356

if flags & ~REVIDX_KNOWN_FLAGS:

1353

raise RevlogError(_("incompatible revision flag '%#x'") %

1357

raise RevlogError(_("incompatible revision flag '%#x'") %

1354

(flags & ~REVIDX_KNOWN_FLAGS))

1358

(flags & ~REVIDX_KNOWN_FLAGS))

1355

validatehash = True

1359

validatehash = True

1356

# Depending on the operation (read or write), the order might be

1360

# Depending on the operation (read or write), the order might be

1357

# reversed due to non-commutative transforms.

1361

# reversed due to non-commutative transforms.

1358

orderedflags = REVIDX_FLAGS_ORDER

1362

orderedflags = REVIDX_FLAGS_ORDER

1359

if operation == 'write':

1363

if operation == 'write':

1360

orderedflags = reversed(orderedflags)

1364

orderedflags = reversed(orderedflags)

1361

1365

1362

for flag in orderedflags:

1366

for flag in orderedflags:

1363

# If a flagprocessor has been registered for a known flag, apply the

1367

# If a flagprocessor has been registered for a known flag, apply the

1364

# related operation transform and update result tuple.

1368

# related operation transform and update result tuple.

1365

if flag & flags:

1369

if flag & flags:

1366

vhash = True

1370

vhash = True

1367

1371

1368

if flag not in _flagprocessors:

1372

if flag not in _flagprocessors:

1369

message = _("missing processor for flag '%#x'") % (flag)

1373

message = _("missing processor for flag '%#x'") % (flag)

1370

raise RevlogError(message)

1374

raise RevlogError(message)

1371

1375

1372

processor = _flagprocessors[flag]

1376

processor = _flagprocessors[flag]

1373

if processor is not None:

1377

if processor is not None:

1374

readtransform, writetransform, rawtransform = processor

1378

readtransform, writetransform, rawtransform = processor

1375

1379

1376

if raw:

1380

if raw:

1377

vhash = rawtransform(self, text)

1381

vhash = rawtransform(self, text)

1378

elif operation == 'read':

1382

elif operation == 'read':

1379

text, vhash = readtransform(self, text)

1383

text, vhash = readtransform(self, text)

1380

else: # write operation

1384

else: # write operation

1381

text, vhash = writetransform(self, text)

1385

text, vhash = writetransform(self, text)

1382

validatehash = validatehash and vhash

1386

validatehash = validatehash and vhash

1383

1387

1384

return text, validatehash

1388

return text, validatehash

1385

1389

1386

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1390

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1387

"""Check node hash integrity.

1391

"""Check node hash integrity.

1388

1392

1389

Available as a function so that subclasses can extend hash mismatch

1393

Available as a function so that subclasses can extend hash mismatch

1390

behaviors as needed.

1394

behaviors as needed.

1391

"""

1395

"""

1392

if p1 is None and p2 is None:

1396

if p1 is None and p2 is None:

1393

p1, p2 = self.parents(node)

1397

p1, p2 = self.parents(node)

1394

if node != self.hash(text, p1, p2):

1398

if node != self.hash(text, p1, p2):

1395

revornode = rev

1399

revornode = rev

1396

if revornode is None:

1400

if revornode is None:

1397

revornode = templatefilters.short(hex(node))

1401

revornode = templatefilters.short(hex(node))

1398

raise RevlogError(_("integrity check failed on %s:%s")

1402

raise RevlogError(_("integrity check failed on %s:%s")

1399

% (self.indexfile, revornode))

1403

% (self.indexfile, revornode))

1400

1404

1401

def checkinlinesize(self, tr, fp=None):

1405

def checkinlinesize(self, tr, fp=None):

1402

"""Check if the revlog is too big for inline and convert if so.

1406

"""Check if the revlog is too big for inline and convert if so.

1403

1407

1404

This should be called after revisions are added to the revlog. If the

1408

This should be called after revisions are added to the revlog. If the

1405

revlog has grown too large to be an inline revlog, it will convert it

1409

revlog has grown too large to be an inline revlog, it will convert it

1406

to use multiple index and data files.

1410

to use multiple index and data files.

1407

"""

1411

"""

1408

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1412

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1409

return

1413

return

1410

1414

1411

trinfo = tr.find(self.indexfile)

1415

trinfo = tr.find(self.indexfile)

1412

if trinfo is None:

1416

if trinfo is None:

1413

raise RevlogError(_("%s not found in the transaction")

1417

raise RevlogError(_("%s not found in the transaction")

1414

% self.indexfile)

1418

% self.indexfile)

1415

1419

1416

trindex = trinfo[2]

1420

trindex = trinfo[2]

1417

if trindex is not None:

1421

if trindex is not None:

1418

dataoff = self.start(trindex)

1422

dataoff = self.start(trindex)

1419

else:

1423

else:

1420

# revlog was stripped at start of transaction, use all leftover data

1424

# revlog was stripped at start of transaction, use all leftover data

1421

trindex = len(self) - 1

1425

trindex = len(self) - 1

1422

dataoff = self.end(-2)

1426

dataoff = self.end(-2)

1423

1427

1424

tr.add(self.datafile, dataoff)

1428

tr.add(self.datafile, dataoff)

1425

1429

1426

if fp:

1430

if fp:

1427

fp.flush()

1431

fp.flush()

1428

fp.close()

1432

fp.close()

1429

1433

1430

df = self.opener(self.datafile, 'w')

1434

df = self.opener(self.datafile, 'w')

1431

try:

1435

try:

1432

for r in self:

1436

for r in self:

1433

df.write(self._chunkraw(r, r)[1])

1437

df.write(self._chunkraw(r, r)[1])

1434

finally:

1438

finally:

1435

df.close()

1439

df.close()

1436

1440

1437

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1441

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1438

checkambig=self._checkambig)

1442

checkambig=self._checkambig)

1439

self.version &= ~(REVLOGNGINLINEDATA)

1443

self.version &= ~(REVLOGNGINLINEDATA)

1440

self._inline = False

1444

self._inline = False

1441

for i in self:

1445

for i in self:

1442

e = self._io.packentry(self.index[i], self.node, self.version, i)

1446

e = self._io.packentry(self.index[i], self.node, self.version, i)

1443

fp.write(e)

1447

fp.write(e)

1444

1448

1445

# if we don't call close, the temp file will never replace the

1449

# if we don't call close, the temp file will never replace the

1446

# real index

1450

# real index

1447

fp.close()

1451

fp.close()

1448

1452

1449

tr.replace(self.indexfile, trindex * self._io.size)

1453

tr.replace(self.indexfile, trindex * self._io.size)

1450

self._chunkclear()

1454

self._chunkclear()

1451

1455

1452

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1456

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1453

node=None, flags=REVIDX_DEFAULT_FLAGS):

1457

node=None, flags=REVIDX_DEFAULT_FLAGS):

1454

"""add a revision to the log

1458

"""add a revision to the log

1455

1459

1456

text - the revision data to add

1460

text - the revision data to add

1457

transaction - the transaction object used for rollback

1461

transaction - the transaction object used for rollback

1458

link - the linkrev data to add

1462

link - the linkrev data to add

1459

p1, p2 - the parent nodeids of the revision

1463

p1, p2 - the parent nodeids of the revision

1460

cachedelta - an optional precomputed delta

1464

cachedelta - an optional precomputed delta

1461

node - nodeid of revision; typically node is not specified, and it is

1465

node - nodeid of revision; typically node is not specified, and it is

1462

computed by default as hash(text, p1, p2), however subclasses might

1466

computed by default as hash(text, p1, p2), however subclasses might

1463

use different hashing method (and override checkhash() in such case)

1467

use different hashing method (and override checkhash() in such case)

1464

flags - the known flags to set on the revision

1468

flags - the known flags to set on the revision

1465

"""

1469

"""

1466

if link == nullrev:

1470

if link == nullrev:

1467

raise RevlogError(_("attempted to add linkrev -1 to %s")

1471

raise RevlogError(_("attempted to add linkrev -1 to %s")

1468

% self.indexfile)

1472

% self.indexfile)

1469

1473

1470

if flags:

1474

if flags:

1471

node = node or self.hash(text, p1, p2)

1475

node = node or self.hash(text, p1, p2)

1472

1476

1473

rawtext, validatehash = self._processflags(text, flags, 'write')

1477

rawtext, validatehash = self._processflags(text, flags, 'write')

1474

1478

1475

# If the flag processor modifies the revision data, ignore any provided

1479

# If the flag processor modifies the revision data, ignore any provided

1476

# cachedelta.

1480

# cachedelta.

1477

if rawtext != text:

1481

if rawtext != text:

1478

cachedelta = None

1482

cachedelta = None

1479

1483

1480

if len(rawtext) > _maxentrysize:

1484

if len(rawtext) > _maxentrysize:

1481

raise RevlogError(

1485

raise RevlogError(

1482

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1486

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1483

% (self.indexfile, len(rawtext)))

1487

% (self.indexfile, len(rawtext)))

1484

1488

1485

node = node or self.hash(rawtext, p1, p2)

1489

node = node or self.hash(rawtext, p1, p2)

1486

if node in self.nodemap:

1490

if node in self.nodemap:

1487

return node

1491

return node

1488

1492

1489

if validatehash:

1493

if validatehash:

1490

self.checkhash(rawtext, node, p1=p1, p2=p2)

1494

self.checkhash(rawtext, node, p1=p1, p2=p2)

1491

1495

1492

dfh = None

1496

dfh = None

1493

if not self._inline:

1497

if not self._inline:

1494

dfh = self.opener(self.datafile, "a+")

1498

dfh = self.opener(self.datafile, "a+")

1495

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1499

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1496

try:

1500

try:

1497

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1501

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1498

flags, cachedelta, ifh, dfh)

1502

flags, cachedelta, ifh, dfh)

1499

finally:

1503

finally:

1500

if dfh:

1504

if dfh:

1501

dfh.close()

1505

dfh.close()

1502

ifh.close()

1506

ifh.close()

1503

1507

1504

def compress(self, data):

1508

def compress(self, data):

1505

"""Generate a possibly-compressed representation of data."""

1509

"""Generate a possibly-compressed representation of data."""

1506

if not data:

1510

if not data:

1507

return '', data

1511

return '', data

1508

1512

1509

compressed = self._compressor.compress(data)

1513

compressed = self._compressor.compress(data)

1510

1514

1511

if compressed:

1515

if compressed:

1512

# The revlog compressor added the header in the returned data.

1516

# The revlog compressor added the header in the returned data.

1513

return '', compressed

1517

return '', compressed

1514

1518

1515

if data[0:1] == '\0':

1519

if data[0:1] == '\0':

1516

return '', data

1520

return '', data

1517

return 'u', data

1521

return 'u', data

1518

1522

1519

def decompress(self, data):

1523

def decompress(self, data):

1520

"""Decompress a revlog chunk.

1524

"""Decompress a revlog chunk.

1521

1525

1522

The chunk is expected to begin with a header identifying the

1526

The chunk is expected to begin with a header identifying the

1523

format type so it can be routed to an appropriate decompressor.

1527

format type so it can be routed to an appropriate decompressor.

1524

"""

1528

"""

1525

if not data:

1529

if not data:

1526

return data

1530

return data

1527

1531

1528

# Revlogs are read much more frequently than they are written and many

1532

# Revlogs are read much more frequently than they are written and many

1529

# chunks only take microseconds to decompress, so performance is

1533

# chunks only take microseconds to decompress, so performance is

1530

# important here.

1534

# important here.

1531

#

1535

#

1532

# We can make a few assumptions about revlogs:

1536

# We can make a few assumptions about revlogs:

1533

#

1537

#

1534

# 1) the majority of chunks will be compressed (as opposed to inline

1538

# 1) the majority of chunks will be compressed (as opposed to inline

1535

# raw data).

1539

# raw data).

1536

# 2) decompressing *any* data will likely by at least 10x slower than

1540

# 2) decompressing *any* data will likely by at least 10x slower than

1537

# returning raw inline data.

1541

# returning raw inline data.

1538

# 3) we want to prioritize common and officially supported compression

1542

# 3) we want to prioritize common and officially supported compression

1539

# engines

1543

# engines

1540

#

1544

#

1541

# It follows that we want to optimize for "decompress compressed data

1545

# It follows that we want to optimize for "decompress compressed data

1542

# when encoded with common and officially supported compression engines"

1546

# when encoded with common and officially supported compression engines"

1543

# case over "raw data" and "data encoded by less common or non-official

1547

# case over "raw data" and "data encoded by less common or non-official

1544

# compression engines." That is why we have the inline lookup first

1548

# compression engines." That is why we have the inline lookup first

1545

# followed by the compengines lookup.

1549

# followed by the compengines lookup.

1546

#

1550

#

1547

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1551

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1548

# compressed chunks. And this matters for changelog and manifest reads.

1552

# compressed chunks. And this matters for changelog and manifest reads.

1549

t = data[0:1]

1553

t = data[0:1]

1550

1554

1551

if t == 'x':

1555

if t == 'x':

1552

try:

1556

try:

1553

return _zlibdecompress(data)

1557

return _zlibdecompress(data)

1554

except zlib.error as e:

1558

except zlib.error as e:

1555

raise RevlogError(_('revlog decompress error: %s') % str(e))

1559

raise RevlogError(_('revlog decompress error: %s') % str(e))

1556

# '\0' is more common than 'u' so it goes first.

1560

# '\0' is more common than 'u' so it goes first.

1557

elif t == '\0':

1561

elif t == '\0':

1558

return data

1562

return data

1559

elif t == 'u':

1563

elif t == 'u':

1560

return util.buffer(data, 1)

1564

return util.buffer(data, 1)

1561

1565

1562

try:

1566

try:

1563

compressor = self._decompressors[t]

1567

compressor = self._decompressors[t]

1564

except KeyError:

1568

except KeyError:

1565

try:

1569

try:

1566

engine = util.compengines.forrevlogheader(t)

1570

engine = util.compengines.forrevlogheader(t)

1567

compressor = engine.revlogcompressor()

1571

compressor = engine.revlogcompressor()

1568

self._decompressors[t] = compressor

1572

self._decompressors[t] = compressor

1569

except KeyError:

1573

except KeyError:

1570

raise RevlogError(_('unknown compression type %r') % t)

1574

raise RevlogError(_('unknown compression type %r') % t)

1571

1575

1572

return compressor.decompress(data)

1576

return compressor.decompress(data)

1573

1577

1574

def _isgooddelta(self, d, textlen):

1578

def _isgooddelta(self, d, textlen):

1575

"""Returns True if the given delta is good. Good means that it is within

1579

"""Returns True if the given delta is good. Good means that it is within

1576

the disk span, disk size, and chain length bounds that we know to be

1580

the disk span, disk size, and chain length bounds that we know to be

1577

performant."""

1581

performant."""

1578

if d is None:

1582

if d is None:

1579

return False

1583

return False

1580

1584

1581

# - 'dist' is the distance from the base revision -- bounding it limits

1585

# - 'dist' is the distance from the base revision -- bounding it limits

1582

# the amount of I/O we need to do.

1586

# the amount of I/O we need to do.

1583

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1587

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1584

# to apply -- bounding it limits the amount of CPU we consume.

1588

# to apply -- bounding it limits the amount of CPU we consume.

1585

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1589

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1586

if (dist > textlen * 4 or l > textlen or

1590

if (dist > textlen * 4 or l > textlen or

1587

compresseddeltalen > textlen * 2 or

1591

compresseddeltalen > textlen * 2 or

1588

(self._maxchainlen and chainlen > self._maxchainlen)):

1592

(self._maxchainlen and chainlen > self._maxchainlen)):

1589

return False

1593

return False

1590

1594

1591

return True

1595

return True

1592

1596

1593

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1597

def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,

1594

cachedelta, ifh, dfh, alwayscache=False):

1598

cachedelta, ifh, dfh, alwayscache=False):

1595

"""internal function to add revisions to the log

1599

"""internal function to add revisions to the log

1596

1600

1597

see addrevision for argument descriptions.

1601

see addrevision for argument descriptions.

1598

1602

1599

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1603

note: "addrevision" takes non-raw text, "_addrevision" takes raw text.

1600

1604

1601

invariants:

1605

invariants:

1602

- rawtext is optional (can be None); if not set, cachedelta must be set.

1606

- rawtext is optional (can be None); if not set, cachedelta must be set.

1603

if both are set, they must correspond to each other.

1607

if both are set, they must correspond to each other.

1604

"""

1608

"""

1605

btext = [rawtext]

1609

btext = [rawtext]

1606

def buildtext():

1610

def buildtext():

1607

if btext[0] is not None:

1611

if btext[0] is not None:

1608

return btext[0]

1612

return btext[0]

1609

baserev = cachedelta[0]

1613

baserev = cachedelta[0]

1610

delta = cachedelta[1]

1614

delta = cachedelta[1]

1611

# special case deltas which replace entire base; no need to decode

1615

# special case deltas which replace entire base; no need to decode

1612

# base revision. this neatly avoids censored bases, which throw when

1616

# base revision. this neatly avoids censored bases, which throw when

1613

# they're decoded.

1617

# they're decoded.

1614

hlen = struct.calcsize(">lll")

1618

hlen = struct.calcsize(">lll")

1615

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1619

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1616

len(delta) - hlen):

1620

len(delta) - hlen):

1617

btext[0] = delta[hlen:]

1621

btext[0] = delta[hlen:]

1618

else:

1622

else:

1619

if self._inline:

1623

if self._inline:

1620

fh = ifh

1624

fh = ifh

1621

else:

1625

else:

1622

fh = dfh

1626

fh = dfh

1623

basetext = self.revision(baserev, _df=fh, raw=True)

1627

basetext = self.revision(baserev, _df=fh, raw=True)

1624

btext[0] = mdiff.patch(basetext, delta)

1628

btext[0] = mdiff.patch(basetext, delta)

1625

1629

1626

try:

1630

try:

1627

res = self._processflags(btext[0], flags, 'read', raw=True)

1631

res = self._processflags(btext[0], flags, 'read', raw=True)

1628

btext[0], validatehash = res

1632

btext[0], validatehash = res

1629

if validatehash:

1633

if validatehash:

1630

self.checkhash(btext[0], node, p1=p1, p2=p2)

1634

self.checkhash(btext[0], node, p1=p1, p2=p2)

1631

if flags & REVIDX_ISCENSORED:

1635

if flags & REVIDX_ISCENSORED:

1632

raise RevlogError(_('node %s is not censored') % node)

1636

raise RevlogError(_('node %s is not censored') % node)

1633

except CensoredNodeError:

1637

except CensoredNodeError:

1634

# must pass the censored index flag to add censored revisions

1638

# must pass the censored index flag to add censored revisions

1635

if not flags & REVIDX_ISCENSORED:

1639

if not flags & REVIDX_ISCENSORED:

1636

raise

1640

raise

1637

return btext[0]

1641

return btext[0]

1638

1642

1639

def builddelta(rev):

1643

def builddelta(rev):

1640

# can we use the cached delta?

1644

# can we use the cached delta?

1641

if cachedelta and cachedelta[0] == rev:

1645

if cachedelta and cachedelta[0] == rev:

1642

delta = cachedelta[1]

1646

delta = cachedelta[1]

1643

else:

1647

else:

1644

t = buildtext()

1648

t = buildtext()

1645

if self.iscensored(rev):

1649

if self.iscensored(rev):

1646

# deltas based on a censored revision must replace the

1650

# deltas based on a censored revision must replace the

1647

# full content in one patch, so delta works everywhere

1651

# full content in one patch, so delta works everywhere

1648

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1652

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1649

delta = header + t

1653

delta = header + t

1650

else:

1654

else:

1651

if self._inline:

1655

if self._inline:

1652

fh = ifh

1656

fh = ifh

1653

else:

1657

else:

1654

fh = dfh

1658

fh = dfh

1655

ptext = self.revision(rev, _df=fh, raw=True)

1659

ptext = self.revision(rev, _df=fh, raw=True)

1656

delta = mdiff.textdiff(ptext, t)

1660

delta = mdiff.textdiff(ptext, t)

1657

header, data = self.compress(delta)

1661

header, data = self.compress(delta)

1658

deltalen = len(header) + len(data)

1662

deltalen = len(header) + len(data)

1659

chainbase = self.chainbase(rev)

1663

chainbase = self.chainbase(rev)

1660

dist = deltalen + offset - self.start(chainbase)

1664

dist = deltalen + offset - self.start(chainbase)

1661

if self._generaldelta:

1665

if self._generaldelta:

1662

base = rev

1666

base = rev

1663

else:

1667

else:

1664

base = chainbase

1668

base = chainbase

1665

chainlen, compresseddeltalen = self._chaininfo(rev)

1669

chainlen, compresseddeltalen = self._chaininfo(rev)

1666

chainlen += 1

1670

chainlen += 1

1667

compresseddeltalen += deltalen

1671

compresseddeltalen += deltalen

1668

return (dist, deltalen, (header, data), base,

1672

return (dist, deltalen, (header, data), base,

1669

chainbase, chainlen, compresseddeltalen)

1673

chainbase, chainlen, compresseddeltalen)

1670

1674

1671

curr = len(self)

1675

curr = len(self)

1672

prev = curr - 1

1676

prev = curr - 1

1673

offset = self.end(prev)

1677

offset = self.end(prev)

1674

delta = None

1678

delta = None

1675

p1r, p2r = self.rev(p1), self.rev(p2)

1679

p1r, p2r = self.rev(p1), self.rev(p2)

1676

1680

1677

# full versions are inserted when the needed deltas

1681

# full versions are inserted when the needed deltas

1678

# become comparable to the uncompressed text

1682

# become comparable to the uncompressed text

1679

if rawtext is None:

1683

if rawtext is None:

1680

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1684

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1681

cachedelta[1])

1685

cachedelta[1])

1682

else:

1686

else:

1683

textlen = len(rawtext)

1687

textlen = len(rawtext)

1684

1688

1685

# should we try to build a delta?

1689

# should we try to build a delta?

1686

if prev != nullrev and self.storedeltachains:

1690

if prev != nullrev and self.storedeltachains:

1687

tested = set()

1691

tested = set()

1688

# This condition is true most of the time when processing

1692

# This condition is true most of the time when processing

1689

# changegroup data into a generaldelta repo. The only time it

1693

# changegroup data into a generaldelta repo. The only time it

1690

# isn't true is if this is the first revision in a delta chain

1694

# isn't true is if this is the first revision in a delta chain

1691

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1695

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1692

if cachedelta and self._generaldelta and self._lazydeltabase:

1696

if cachedelta and self._generaldelta and self._lazydeltabase:

1693

# Assume what we received from the server is a good choice

1697

# Assume what we received from the server is a good choice

1694

# build delta will reuse the cache

1698

# build delta will reuse the cache

1695

candidatedelta = builddelta(cachedelta[0])

1699

candidatedelta = builddelta(cachedelta[0])

1696

tested.add(cachedelta[0])

1700

tested.add(cachedelta[0])

1697

if self._isgooddelta(candidatedelta, textlen):

1701

if self._isgooddelta(candidatedelta, textlen):

1698

delta = candidatedelta

1702

delta = candidatedelta

1699

if delta is None and self._generaldelta:

1703

if delta is None and self._generaldelta:

1700

# exclude already lazy tested base if any

1704

# exclude already lazy tested base if any

1701

parents = [p for p in (p1r, p2r)

1705

parents = [p for p in (p1r, p2r)

1702

if p != nullrev and p not in tested]

1706

if p != nullrev and p not in tested]

1703

if parents and not self._aggressivemergedeltas:

1707

if parents and not self._aggressivemergedeltas:

1704

# Pick whichever parent is closer to us (to minimize the

1708

# Pick whichever parent is closer to us (to minimize the

1705

# chance of having to build a fulltext).

1709

# chance of having to build a fulltext).

1706

parents = [max(parents)]

1710

parents = [max(parents)]

1707

tested.update(parents)

1711

tested.update(parents)

1708

pdeltas = []

1712

pdeltas = []

1709

for p in parents:

1713

for p in parents:

1710

pd = builddelta(p)

1714

pd = builddelta(p)

1711

if self._isgooddelta(pd, textlen):

1715

if self._isgooddelta(pd, textlen):

1712

pdeltas.append(pd)

1716

pdeltas.append(pd)

1713

if pdeltas:

1717

if pdeltas:

1714

delta = min(pdeltas, key=lambda x: x[1])

1718

delta = min(pdeltas, key=lambda x: x[1])

1715

if delta is None and prev not in tested:

1719

if delta is None and prev not in tested:

1716

# other approach failed try against prev to hopefully save us a

1720

# other approach failed try against prev to hopefully save us a

1717

# fulltext.

1721

# fulltext.

1718

candidatedelta = builddelta(prev)

1722

candidatedelta = builddelta(prev)

1719

if self._isgooddelta(candidatedelta, textlen):

1723

if self._isgooddelta(candidatedelta, textlen):

1720

delta = candidatedelta

1724

delta = candidatedelta

1721

if delta is not None:

1725

if delta is not None:

1722

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1726

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1723

else:

1727

else:

1724

rawtext = buildtext()

1728

rawtext = buildtext()

1725

data = self.compress(rawtext)

1729

data = self.compress(rawtext)

1726

l = len(data[1]) + len(data[0])

1730

l = len(data[1]) + len(data[0])

1727

base = chainbase = curr

1731

base = chainbase = curr

1728

1732

1729

e = (offset_type(offset, flags), l, textlen,

1733

e = (offset_type(offset, flags), l, textlen,

1730

base, link, p1r, p2r, node)

1734

base, link, p1r, p2r, node)

1731

self.index.insert(-1, e)

1735

self.index.insert(-1, e)

1732

self.nodemap[node] = curr

1736

self.nodemap[node] = curr

1733

1737

1734

entry = self._io.packentry(e, self.node, self.version, curr)

1738

entry = self._io.packentry(e, self.node, self.version, curr)

1735

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1739

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1736

1740

1737

if alwayscache and rawtext is None:

1741

if alwayscache and rawtext is None:

1738

rawtext = buildtext()

1742

rawtext = buildtext()

1739

1743

1740

if type(rawtext) == str: # only accept immutable objects

1744

if type(rawtext) == str: # only accept immutable objects

1741

self._cache = (node, curr, rawtext)

1745

self._cache = (node, curr, rawtext)

1742

self._chainbasecache[curr] = chainbase

1746

self._chainbasecache[curr] = chainbase

1743

return node

1747

return node

1744

1748

1745

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1749

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1746

# Files opened in a+ mode have inconsistent behavior on various

1750

# Files opened in a+ mode have inconsistent behavior on various

1747

# platforms. Windows requires that a file positioning call be made

1751

# platforms. Windows requires that a file positioning call be made

1748

# when the file handle transitions between reads and writes. See

1752

# when the file handle transitions between reads and writes. See

1749

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1753

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1750

# platforms, Python or the platform itself can be buggy. Some versions

1754

# platforms, Python or the platform itself can be buggy. Some versions

1751

# of Solaris have been observed to not append at the end of the file

1755

# of Solaris have been observed to not append at the end of the file

1752

# if the file was seeked to before the end. See issue4943 for more.

1756

# if the file was seeked to before the end. See issue4943 for more.

1753

#

1757

#

1754

# We work around this issue by inserting a seek() before writing.

1758

# We work around this issue by inserting a seek() before writing.

1755

# Note: This is likely not necessary on Python 3.

1759

# Note: This is likely not necessary on Python 3.

1756

ifh.seek(0, os.SEEK_END)

1760

ifh.seek(0, os.SEEK_END)

1757

if dfh:

1761

if dfh:

1758

dfh.seek(0, os.SEEK_END)

1762

dfh.seek(0, os.SEEK_END)

1759

1763

1760

curr = len(self) - 1

1764

curr = len(self) - 1

1761

if not self._inline:

1765

if not self._inline:

1762

transaction.add(self.datafile, offset)

1766

transaction.add(self.datafile, offset)

1763

transaction.add(self.indexfile, curr * len(entry))

1767

transaction.add(self.indexfile, curr * len(entry))

1764

if data[0]:

1768

if data[0]:

1765

dfh.write(data[0])

1769

dfh.write(data[0])

1766

dfh.write(data[1])

1770

dfh.write(data[1])

1767

ifh.write(entry)

1771

ifh.write(entry)

1768

else:

1772

else:

1769

offset += curr * self._io.size

1773

offset += curr * self._io.size

1770

transaction.add(self.indexfile, offset, curr)

1774

transaction.add(self.indexfile, offset, curr)

1771

ifh.write(entry)

1775

ifh.write(entry)

1772

ifh.write(data[0])

1776

ifh.write(data[0])

1773

ifh.write(data[1])

1777

ifh.write(data[1])

1774

self.checkinlinesize(transaction, ifh)

1778

self.checkinlinesize(transaction, ifh)

1775

1779

1776

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1780

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1777

"""

1781

"""

1778

add a delta group

1782

add a delta group

1779

1783

1780

given a set of deltas, add them to the revision log. the

1784

given a set of deltas, add them to the revision log. the

1781

first delta is against its parent, which should be in our

1785

first delta is against its parent, which should be in our

1782

log, the rest are against the previous delta.

1786

log, the rest are against the previous delta.

1783

1787

1784

If ``addrevisioncb`` is defined, it will be called with arguments of

1788

If ``addrevisioncb`` is defined, it will be called with arguments of

1785

this revlog and the node that was added.

1789

this revlog and the node that was added.

1786

"""

1790

"""

1787

1791

1788

# track the base of the current delta log

1792

# track the base of the current delta log

1789

content = []

1793

content = []

1790

node = None

1794

node = None

1791

1795

1792

r = len(self)

1796

r = len(self)

1793

end = 0

1797

end = 0

1794

if r:

1798

if r:

1795

end = self.end(r - 1)

1799

end = self.end(r - 1)

1796

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1800

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1797

isize = r * self._io.size

1801

isize = r * self._io.size

1798

if self._inline:

1802

if self._inline:

1799

transaction.add(self.indexfile, end + isize, r)

1803

transaction.add(self.indexfile, end + isize, r)

1800

dfh = None

1804

dfh = None

1801

else:

1805

else:

1802

transaction.add(self.indexfile, isize, r)

1806

transaction.add(self.indexfile, isize, r)

1803

transaction.add(self.datafile, end)

1807

transaction.add(self.datafile, end)

1804

dfh = self.opener(self.datafile, "a+")

1808

dfh = self.opener(self.datafile, "a+")

1805

def flush():

1809

def flush():

1806

if dfh:

1810

if dfh:

1807

dfh.flush()

1811

dfh.flush()

1808

ifh.flush()

1812

ifh.flush()

1809

try:

1813

try:

1810

# loop through our set of deltas

1814

# loop through our set of deltas

1811

chain = None

1815

chain = None

1812

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1816

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1813

node = chunkdata['node']

1817

node = chunkdata['node']

1814

p1 = chunkdata['p1']

1818

p1 = chunkdata['p1']

1815

p2 = chunkdata['p2']

1819

p2 = chunkdata['p2']

1816

cs = chunkdata['cs']

1820

cs = chunkdata['cs']

1817

deltabase = chunkdata['deltabase']

1821

deltabase = chunkdata['deltabase']

1818

delta = chunkdata['delta']

1822

delta = chunkdata['delta']

1819

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1823

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1820

1824

1821

content.append(node)

1825

content.append(node)

1822

1826

1823

link = linkmapper(cs)

1827

link = linkmapper(cs)

1824

if node in self.nodemap:

1828

if node in self.nodemap:

1825

# this can happen if two branches make the same change

1829

# this can happen if two branches make the same change

1826

chain = node

1830

chain = node

1827

continue

1831

continue

1828

1832

1829

for p in (p1, p2):

1833

for p in (p1, p2):

1830

if p not in self.nodemap:

1834

if p not in self.nodemap:

1831

raise LookupError(p, self.indexfile,

1835

raise LookupError(p, self.indexfile,

1832

_('unknown parent'))

1836

_('unknown parent'))

1833

1837

1834

if deltabase not in self.nodemap:

1838

if deltabase not in self.nodemap:

1835

raise LookupError(deltabase, self.indexfile,

1839

raise LookupError(deltabase, self.indexfile,

1836

_('unknown delta base'))

1840

_('unknown delta base'))

1837

1841

1838

baserev = self.rev(deltabase)

1842

baserev = self.rev(deltabase)

1839

1843

1840

if baserev != nullrev and self.iscensored(baserev):

1844

if baserev != nullrev and self.iscensored(baserev):

1841

# if base is censored, delta must be full replacement in a

1845

# if base is censored, delta must be full replacement in a

1842

# single patch operation

1846

# single patch operation

1843

hlen = struct.calcsize(">lll")

1847

hlen = struct.calcsize(">lll")

1844

oldlen = self.rawsize(baserev)

1848

oldlen = self.rawsize(baserev)

1845

newlen = len(delta) - hlen

1849

newlen = len(delta) - hlen

1846

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1850

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1847

raise error.CensoredBaseError(self.indexfile,

1851

raise error.CensoredBaseError(self.indexfile,

1848

self.node(baserev))

1852

self.node(baserev))

1849

1853

1850

if not flags and self._peek_iscensored(baserev, delta, flush):

1854

if not flags and self._peek_iscensored(baserev, delta, flush):

1851

flags |= REVIDX_ISCENSORED

1855

flags |= REVIDX_ISCENSORED

1852

1856

1853

# We assume consumers of addrevisioncb will want to retrieve

1857

# We assume consumers of addrevisioncb will want to retrieve

1854

# the added revision, which will require a call to

1858

# the added revision, which will require a call to

1855

# revision(). revision() will fast path if there is a cache

1859

# revision(). revision() will fast path if there is a cache

1856

# hit. So, we tell _addrevision() to always cache in this case.

1860

# hit. So, we tell _addrevision() to always cache in this case.

1857

# We're only using addgroup() in the context of changegroup

1861

# We're only using addgroup() in the context of changegroup

1858

# generation so the revision data can always be handled as raw

1862

# generation so the revision data can always be handled as raw

1859

# by the flagprocessor.

1863

# by the flagprocessor.

1860

chain = self._addrevision(node, None, transaction, link,

1864

chain = self._addrevision(node, None, transaction, link,

1861

p1, p2, flags, (baserev, delta),

1865

p1, p2, flags, (baserev, delta),

1862

ifh, dfh,

1866

ifh, dfh,

1863

alwayscache=bool(addrevisioncb))

1867

alwayscache=bool(addrevisioncb))

1864

1868

1865

if addrevisioncb:

1869

if addrevisioncb:

1866

addrevisioncb(self, chain)

1870

addrevisioncb(self, chain)

1867

1871

1868

if not dfh and not self._inline:

1872

if not dfh and not self._inline:

1869

# addrevision switched from inline to conventional

1873

# addrevision switched from inline to conventional

1870

# reopen the index

1874

# reopen the index

1871

ifh.close()

1875

ifh.close()

1872

dfh = self.opener(self.datafile, "a+")

1876

dfh = self.opener(self.datafile, "a+")

1873

ifh = self.opener(self.indexfile, "a+",

1877

ifh = self.opener(self.indexfile, "a+",

1874

checkambig=self._checkambig)

1878

checkambig=self._checkambig)

1875

finally:

1879

finally:

1876

if dfh:

1880

if dfh:

1877

dfh.close()

1881

dfh.close()

1878

ifh.close()

1882

ifh.close()

1879

1883

1880

return content

1884

return content

1881

1885

1882

def iscensored(self, rev):

1886

def iscensored(self, rev):

1883

"""Check if a file revision is censored."""

1887

"""Check if a file revision is censored."""

1884

return False

1888

return False

1885

1889

1886

def _peek_iscensored(self, baserev, delta, flush):

1890

def _peek_iscensored(self, baserev, delta, flush):

1887

"""Quickly check if a delta produces a censored revision."""

1891

"""Quickly check if a delta produces a censored revision."""

1888

return False

1892

return False

1889

1893

1890

def getstrippoint(self, minlink):

1894

def getstrippoint(self, minlink):

1891

"""find the minimum rev that must be stripped to strip the linkrev

1895

"""find the minimum rev that must be stripped to strip the linkrev

1892

1896

1893

Returns a tuple containing the minimum rev and a set of all revs that

1897

Returns a tuple containing the minimum rev and a set of all revs that

1894

have linkrevs that will be broken by this strip.

1898

have linkrevs that will be broken by this strip.

1895

"""

1899

"""

1896

brokenrevs = set()

1900

brokenrevs = set()

1897

strippoint = len(self)

1901

strippoint = len(self)

1898

1902

1899

heads = {}

1903

heads = {}

1900

futurelargelinkrevs = set()

1904

futurelargelinkrevs = set()

1901

for head in self.headrevs():

1905

for head in self.headrevs():

1902

headlinkrev = self.linkrev(head)

1906

headlinkrev = self.linkrev(head)

1903

heads[head] = headlinkrev

1907

heads[head] = headlinkrev

1904

if headlinkrev >= minlink:

1908

if headlinkrev >= minlink:

1905

futurelargelinkrevs.add(headlinkrev)

1909

futurelargelinkrevs.add(headlinkrev)

1906

1910

1907

# This algorithm involves walking down the rev graph, starting at the

1911

# This algorithm involves walking down the rev graph, starting at the

1908

# heads. Since the revs are topologically sorted according to linkrev,

1912

# heads. Since the revs are topologically sorted according to linkrev,

1909

# once all head linkrevs are below the minlink, we know there are

1913

# once all head linkrevs are below the minlink, we know there are

1910

# no more revs that could have a linkrev greater than minlink.

1914

# no more revs that could have a linkrev greater than minlink.

1911

# So we can stop walking.

1915

# So we can stop walking.

1912

while futurelargelinkrevs:

1916

while futurelargelinkrevs:

1913

strippoint -= 1

1917

strippoint -= 1

1914

linkrev = heads.pop(strippoint)

1918

linkrev = heads.pop(strippoint)

1915

1919

1916

if linkrev < minlink:

1920

if linkrev < minlink:

1917

brokenrevs.add(strippoint)

1921

brokenrevs.add(strippoint)

1918

else:

1922

else:

1919

futurelargelinkrevs.remove(linkrev)

1923

futurelargelinkrevs.remove(linkrev)

1920

1924

1921

for p in self.parentrevs(strippoint):

1925

for p in self.parentrevs(strippoint):

1922

if p != nullrev:

1926

if p != nullrev:

1923

plinkrev = self.linkrev(p)

1927

plinkrev = self.linkrev(p)

1924

heads[p] = plinkrev

1928

heads[p] = plinkrev

1925

if plinkrev >= minlink:

1929

if plinkrev >= minlink:

1926

futurelargelinkrevs.add(plinkrev)

1930

futurelargelinkrevs.add(plinkrev)

1927

1931

1928

return strippoint, brokenrevs

1932

return strippoint, brokenrevs

1929

1933

1930

def strip(self, minlink, transaction):

1934

def strip(self, minlink, transaction):

1931

"""truncate the revlog on the first revision with a linkrev >= minlink

1935

"""truncate the revlog on the first revision with a linkrev >= minlink

1932

1936

1933

This function is called when we're stripping revision minlink and

1937

This function is called when we're stripping revision minlink and

1934

its descendants from the repository.

1938

its descendants from the repository.

1935

1939

1936

We have to remove all revisions with linkrev >= minlink, because

1940

We have to remove all revisions with linkrev >= minlink, because

1937

the equivalent changelog revisions will be renumbered after the

1941

the equivalent changelog revisions will be renumbered after the

1938

strip.

1942

strip.

1939

1943

1940

So we truncate the revlog on the first of these revisions, and

1944

So we truncate the revlog on the first of these revisions, and

1941

trust that the caller has saved the revisions that shouldn't be

1945

trust that the caller has saved the revisions that shouldn't be

1942

removed and that it'll re-add them after this truncation.

1946

removed and that it'll re-add them after this truncation.

1943

"""

1947

"""

1944

if len(self) == 0:

1948

if len(self) == 0:

1945

return

1949

return

1946

1950

1947

rev, _ = self.getstrippoint(minlink)

1951

rev, _ = self.getstrippoint(minlink)

1948

if rev == len(self):

1952

if rev == len(self):

1949

return

1953

return

1950

1954

1951

# first truncate the files on disk

1955

# first truncate the files on disk

1952

end = self.start(rev)

1956

end = self.start(rev)

1953

if not self._inline:

1957

if not self._inline:

1954

transaction.add(self.datafile, end)

1958

transaction.add(self.datafile, end)

1955

end = rev * self._io.size

1959

end = rev * self._io.size

1956

else:

1960

else:

1957

end += rev * self._io.size

1961

end += rev * self._io.size

1958

1962

1959

transaction.add(self.indexfile, end)

1963

transaction.add(self.indexfile, end)

1960

1964

1961

# then reset internal state in memory to forget those revisions

1965

# then reset internal state in memory to forget those revisions

1962

self._cache = None

1966

self._cache = None

1963

self._chaininfocache = {}

1967

self._chaininfocache = {}

1964

self._chunkclear()

1968

self._chunkclear()

1965

for x in xrange(rev, len(self)):

1969

for x in xrange(rev, len(self)):

1966

del self.nodemap[self.node(x)]

1970

del self.nodemap[self.node(x)]

1967

1971

1968

del self.index[rev:-1]

1972

del self.index[rev:-1]

1969

1973

1970

def checksize(self):

1974

def checksize(self):

1971

expected = 0

1975

expected = 0

1972

if len(self):

1976

if len(self):

1973

expected = max(0, self.end(len(self) - 1))

1977

expected = max(0, self.end(len(self) - 1))

1974

1978

1975

try:

1979

try:

1976

f = self.opener(self.datafile)

1980

f = self.opener(self.datafile)

1977

f.seek(0, 2)

1981

f.seek(0, 2)

1978

actual = f.tell()

1982

actual = f.tell()

1979

f.close()

1983

f.close()

1980

dd = actual - expected

1984

dd = actual - expected

1981

except IOError as inst:

1985

except IOError as inst:

1982

if inst.errno != errno.ENOENT:

1986

if inst.errno != errno.ENOENT:

1983

raise

1987

raise

1984

dd = 0

1988

dd = 0

1985

1989

1986

try:

1990

try:

1987

f = self.opener(self.indexfile)

1991

f = self.opener(self.indexfile)

1988

f.seek(0, 2)

1992

f.seek(0, 2)

1989

actual = f.tell()

1993

actual = f.tell()

1990

f.close()

1994

f.close()

1991

s = self._io.size

1995

s = self._io.size

1992

i = max(0, actual // s)

1996

i = max(0, actual // s)

1993

di = actual - (i * s)

1997

di = actual - (i * s)

1994

if self._inline:

1998

if self._inline:

1995

databytes = 0

1999

databytes = 0

1996

for r in self:

2000

for r in self:

1997

databytes += max(0, self.length(r))

2001

databytes += max(0, self.length(r))

1998

dd = 0

2002

dd = 0

1999

di = actual - len(self) * s - databytes

2003

di = actual - len(self) * s - databytes

2000

except IOError as inst:

2004

except IOError as inst:

2001

if inst.errno != errno.ENOENT:

2005

if inst.errno != errno.ENOENT:

2002

raise

2006

raise

2003

di = 0

2007

di = 0

2004

2008

2005

return (dd, di)

2009

return (dd, di)

2006

2010

2007

def files(self):

2011

def files(self):

2008

res = [self.indexfile]

2012

res = [self.indexfile]

2009

if not self._inline:

2013

if not self._inline:

2010

res.append(self.datafile)

2014

res.append(self.datafile)

2011

return res

2015

return res

2012

2016

2013

DELTAREUSEALWAYS = 'always'

2017

DELTAREUSEALWAYS = 'always'

2014

DELTAREUSESAMEREVS = 'samerevs'

2018

DELTAREUSESAMEREVS = 'samerevs'

2015

DELTAREUSENEVER = 'never'

2019

DELTAREUSENEVER = 'never'

2016

2020

2017

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

2021

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

2018

2022

2019

def clone(self, tr, destrevlog, addrevisioncb=None,

2023

def clone(self, tr, destrevlog, addrevisioncb=None,

2020

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2024

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2021

"""Copy this revlog to another, possibly with format changes.

2025

"""Copy this revlog to another, possibly with format changes.

2022

2026

2023

The destination revlog will contain the same revisions and nodes.

2027

The destination revlog will contain the same revisions and nodes.

2024

However, it may not be bit-for-bit identical due to e.g. delta encoding

2028

However, it may not be bit-for-bit identical due to e.g. delta encoding

2025

differences.

2029

differences.

2026

2030

2027

The ``deltareuse`` argument control how deltas from the existing revlog

2031

The ``deltareuse`` argument control how deltas from the existing revlog

2028

are preserved in the destination revlog. The argument can have the

2032

are preserved in the destination revlog. The argument can have the

2029

following values:

2033

following values:

2030

2034

2031

DELTAREUSEALWAYS

2035

DELTAREUSEALWAYS

2032

Deltas will always be reused (if possible), even if the destination

2036

Deltas will always be reused (if possible), even if the destination

2033

revlog would not select the same revisions for the delta. This is the

2037

revlog would not select the same revisions for the delta. This is the

2034

fastest mode of operation.

2038

fastest mode of operation.

2035

DELTAREUSESAMEREVS

2039

DELTAREUSESAMEREVS

2036

Deltas will be reused if the destination revlog would pick the same

2040

Deltas will be reused if the destination revlog would pick the same

2037

revisions for the delta. This mode strikes a balance between speed

2041

revisions for the delta. This mode strikes a balance between speed

2038

and optimization.

2042

and optimization.

2039

DELTAREUSENEVER

2043

DELTAREUSENEVER

2040

Deltas will never be reused. This is the slowest mode of execution.

2044

Deltas will never be reused. This is the slowest mode of execution.

2041

This mode can be used to recompute deltas (e.g. if the diff/delta

2045

This mode can be used to recompute deltas (e.g. if the diff/delta

2042

algorithm changes).

2046

algorithm changes).

2043

2047

2044

Delta computation can be slow, so the choice of delta reuse policy can

2048

Delta computation can be slow, so the choice of delta reuse policy can

2045

significantly affect run time.

2049

significantly affect run time.

2046

2050

2047

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2051

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2048

two extremes. Deltas will be reused if they are appropriate. But if the

2052

two extremes. Deltas will be reused if they are appropriate. But if the

2049

delta could choose a better revision, it will do so. This means if you

2053

delta could choose a better revision, it will do so. This means if you

2050

are converting a non-generaldelta revlog to a generaldelta revlog,

2054

are converting a non-generaldelta revlog to a generaldelta revlog,

2051

deltas will be recomputed if the delta's parent isn't a parent of the

2055

deltas will be recomputed if the delta's parent isn't a parent of the

2052

revision.

2056

revision.

2053

2057

2054

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2058

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2055

controls whether to compute deltas against both parents for merges.

2059

controls whether to compute deltas against both parents for merges.

2056

By default, the current default is used.

2060

By default, the current default is used.

2057

"""

2061

"""

2058

if deltareuse not in self.DELTAREUSEALL:

2062

if deltareuse not in self.DELTAREUSEALL:

2059

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2063

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2060

2064

2061

if len(destrevlog):

2065

if len(destrevlog):

2062

raise ValueError(_('destination revlog is not empty'))

2066

raise ValueError(_('destination revlog is not empty'))

2063

2067

2064

if getattr(self, 'filteredrevs', None):

2068

if getattr(self, 'filteredrevs', None):

2065

raise ValueError(_('source revlog has filtered revisions'))

2069

raise ValueError(_('source revlog has filtered revisions'))

2066

if getattr(destrevlog, 'filteredrevs', None):

2070

if getattr(destrevlog, 'filteredrevs', None):

2067

raise ValueError(_('destination revlog has filtered revisions'))

2071

raise ValueError(_('destination revlog has filtered revisions'))

2068

2072

2069

# lazydeltabase controls whether to reuse a cached delta, if possible.

2073

# lazydeltabase controls whether to reuse a cached delta, if possible.

2070

oldlazydeltabase = destrevlog._lazydeltabase

2074

oldlazydeltabase = destrevlog._lazydeltabase

2071

oldamd = destrevlog._aggressivemergedeltas

2075

oldamd = destrevlog._aggressivemergedeltas

2072

2076

2073

try:

2077

try:

2074

if deltareuse == self.DELTAREUSEALWAYS:

2078

if deltareuse == self.DELTAREUSEALWAYS:

2075

destrevlog._lazydeltabase = True

2079

destrevlog._lazydeltabase = True

2076

elif deltareuse == self.DELTAREUSESAMEREVS:

2080

elif deltareuse == self.DELTAREUSESAMEREVS:

2077

destrevlog._lazydeltabase = False

2081

destrevlog._lazydeltabase = False

2078

2082

2079

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2083

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2080

2084

2081

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2085

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2082

self.DELTAREUSESAMEREVS)

2086

self.DELTAREUSESAMEREVS)

2083

2087

2084

index = self.index

2088

index = self.index

2085

for rev in self:

2089

for rev in self:

2086

entry = index[rev]

2090

entry = index[rev]

2087

2091

2088

# Some classes override linkrev to take filtered revs into

2092

# Some classes override linkrev to take filtered revs into

2089

# account. Use raw entry from index.

2093

# account. Use raw entry from index.

2090

flags = entry[0] & 0xffff

2094

flags = entry[0] & 0xffff

2091

linkrev = entry[4]

2095

linkrev = entry[4]

2092

p1 = index[entry[5]][7]

2096

p1 = index[entry[5]][7]

2093

p2 = index[entry[6]][7]

2097

p2 = index[entry[6]][7]

2094

node = entry[7]

2098

node = entry[7]

2095

2099

2096

# (Possibly) reuse the delta from the revlog if allowed and

2100

# (Possibly) reuse the delta from the revlog if allowed and

2097

# the revlog chunk is a delta.

2101

# the revlog chunk is a delta.

2098

cachedelta = None

2102

cachedelta = None

2099

rawtext = None

2103

rawtext = None

2100

if populatecachedelta:

2104

if populatecachedelta:

2101

dp = self.deltaparent(rev)

2105

dp = self.deltaparent(rev)

2102

if dp != nullrev:

2106

if dp != nullrev:

2103

cachedelta = (dp, str(self._chunk(rev)))

2107

cachedelta = (dp, str(self._chunk(rev)))

2104

2108

2105

if not cachedelta:

2109

if not cachedelta:

2106

rawtext = self.revision(rev, raw=True)

2110

rawtext = self.revision(rev, raw=True)

2107

2111

2108

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2112

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2109

checkambig=False)

2113

checkambig=False)

2110

dfh = None

2114

dfh = None

2111

if not destrevlog._inline:

2115

if not destrevlog._inline:

2112

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2116

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2113

try:

2117

try:

2114

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2118

destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,

2115

flags, cachedelta, ifh, dfh)

2119

flags, cachedelta, ifh, dfh)

2116

finally:

2120

finally:

2117

if dfh:

2121

if dfh:

2118

dfh.close()

2122

dfh.close()

2119

ifh.close()

2123

ifh.close()

2120

2124

2121

if addrevisioncb:

2125

if addrevisioncb:

2122

addrevisioncb(self, rev, node)

2126

addrevisioncb(self, rev, node)

2123

finally:

2127

finally:

2124

destrevlog._lazydeltabase = oldlazydeltabase

2128

destrevlog._lazydeltabase = oldlazydeltabase

2125

destrevlog._aggressivemergedeltas = oldamd

2129

destrevlog._aggressivemergedeltas = oldamd

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 parsers,
                 pycompat,
                 templatefilters,
                 util,
             )
             _pack = struct.pack
             _unpack = struct.unpack
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = hashlib.sha1(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 """
                 def __init__(self, opener, indexfile, checkambig=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev, raw=True)
                     return len(t)
                 size = rawsize
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def node(self, rev):
                     return self.index[rev][7]
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return pycompat.maplist(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             return partial
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._addchunk(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getchunk(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length, df=df)
                 def _chunkraw(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getchunk(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._chunkraw(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     try:
                         offset, data = self._chunkraw(revs[0], revs[-1], df=df)
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     decomp = self.decompress
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.revision(rev1, raw=True),
                                           self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     flags = None
+                    rawtext = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                             # duplicated, but good for perf
                             if rev is None:
                                 rev = self.rev(node)
                             if flags is None:
                                 flags = self.flags(rev)
                             # no extra flags set, no flag processor runs, text = rawtext
                             if flags == REVIDX_DEFAULT_FLAGS:
                                 return self._cache[2]
+                            # rawtext is reusable. need to run flag processor
+                            rawtext = self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
-                    rawtext = None
                     if rawtext is None:
                         if rev is None:
                             rev = self.rev(node)
                         chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                         if stopped:
                             rawtext = self._cache[2]
                         # drop cache to save memory
                         self._cache = None
                         bins = self._chunks(chain, df=_df)
                         if rawtext is None:
                             rawtext = bytes(bins[0])
                             bins = bins[1:]
                         rawtext = mdiff.patches(rawtext, bins)
+                        self._cache = (node, rev, rawtext)
                     if flags is None:
+                        if rev is None:
+                            rev = self.rev(node)
                         flags = self.flags(rev)
                     text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
-                    self._cache = (node, rev, rawtext)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     btext = [rawtext]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(baserev, _df=fh, raw=True)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=True)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(rev, _df=fh, raw=True)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(rawtext)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         rawtext = buildtext()
                         data = self.compress(rawtext)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and rawtext is None:
                         rawtext = buildtext()
                     if type(rawtext) == str: # only accept immutable objects
                         self._cache = (node, curr, rawtext)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb))
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             rawtext = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 rawtext = self.revision(rev, raw=True)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, rawtext, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd