upstream/mercurial-mirror Commit - r31753:5d11b5ed

1

# revlog.py - storage back-end for mercurial

1

# revlog.py - storage back-end for mercurial

2

#

2

#

3

4

#

4

#

5

# This software may be used and distributed according to the terms of the

5

# This software may be used and distributed according to the terms of the

6

# GNU General Public License version 2 or any later version.

6

# GNU General Public License version 2 or any later version.

7

8

"""Storage back-end for Mercurial.

8

"""Storage back-end for Mercurial.

9

10

This provides efficient delta storage with O(1) retrieve and append

10

This provides efficient delta storage with O(1) retrieve and append

11

and O(changes) merge between branches.

11

and O(changes) merge between branches.

12

"""

12

"""

13

14

from __future__ import absolute_import

14

from __future__ import absolute_import

15

16

import collections

16

import collections

17

import errno

17

import errno

18

import hashlib

18

import hashlib

19

import os

19

import os

20

import struct

20

import struct

21

import zlib

21

import zlib

22

23

# import stuff from node for others to import from revlog

23

# import stuff from node for others to import from revlog

24

from .node import (

24

from .node import (

25

bin,

25

bin,

26

hex,

26

hex,

27

nullid,

27

nullid,

28

nullrev,

28

nullrev,

29

)

29

)

30

from .i18n import _

30

from .i18n import _

31

from . import (

31

from . import (

32

ancestor,

32

ancestor,

33

error,

33

error,

34

mdiff,

34

mdiff,

35

parsers,

35

parsers,

36

pycompat,

36

pycompat,

37

templatefilters,

37

templatefilters,

38

util,

38

util,

39

)

39

)

40

41

_pack = struct.pack

41

_pack = struct.pack

42

_unpack = struct.unpack

42

_unpack = struct.unpack

43

# Aliased for performance.

43

# Aliased for performance.

44

_zlibdecompress = zlib.decompress

44

_zlibdecompress = zlib.decompress

45

46

# revlog header flags

46

# revlog header flags

47

REVLOGV0 = 0

47

REVLOGV0 = 0

48

REVLOGNG = 1

48

REVLOGNG = 1

49

REVLOGNGINLINEDATA = (1 << 16)

49

REVLOGNGINLINEDATA = (1 << 16)

50

REVLOGGENERALDELTA = (1 << 17)

50

REVLOGGENERALDELTA = (1 << 17)

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

51

REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

52

REVLOG_DEFAULT_FORMAT = REVLOGNG

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

53

REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

54

REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA

55

56

# revlog index flags

56

# revlog index flags

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

57

REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

58

REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

59

REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally

60

REVIDX_DEFAULT_FLAGS = 0

60

REVIDX_DEFAULT_FLAGS = 0

61

# stable order in which flags need to be processed and their processors applied

61

# stable order in which flags need to be processed and their processors applied

62

REVIDX_FLAGS_ORDER = [

62

REVIDX_FLAGS_ORDER = [

63

REVIDX_ISCENSORED,

63

REVIDX_ISCENSORED,

64

REVIDX_ELLIPSIS,

64

REVIDX_ELLIPSIS,

65

REVIDX_EXTSTORED,

65

REVIDX_EXTSTORED,

66

]

66

]

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

67

REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)

68

69

# max size of revlog with inline data

69

# max size of revlog with inline data

70

_maxinline = 131072

70

_maxinline = 131072

71

_chunksize = 1048576

71

_chunksize = 1048576

72

73

RevlogError = error.RevlogError

73

RevlogError = error.RevlogError

74

LookupError = error.LookupError

74

LookupError = error.LookupError

75

CensoredNodeError = error.CensoredNodeError

75

CensoredNodeError = error.CensoredNodeError

76

ProgrammingError = error.ProgrammingError

76

ProgrammingError = error.ProgrammingError

77

78

# Store flag processors (cf. 'addflagprocessor()' to register)

78

# Store flag processors (cf. 'addflagprocessor()' to register)

79

_flagprocessors = {

79

_flagprocessors = {

80

REVIDX_ISCENSORED: None,

80

REVIDX_ISCENSORED: None,

81

}

81

}

82

83

def addflagprocessor(flag, processor):

83

def addflagprocessor(flag, processor):

84

"""Register a flag processor on a revision data flag.

84

"""Register a flag processor on a revision data flag.

85

86

Invariant:

86

Invariant:

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

87

- Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.

88

- Only one flag processor can be registered on a specific flag.

88

- Only one flag processor can be registered on a specific flag.

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

89

- flagprocessors must be 3-tuples of functions (read, write, raw) with the

90

following signatures:

90

following signatures:

91

- (read) f(self, rawtext) -> text, bool

91

- (read) f(self, rawtext) -> text, bool

92

- (write) f(self, text) -> rawtext, bool

92

- (write) f(self, text) -> rawtext, bool

93

- (raw) f(self, rawtext) -> bool

93

- (raw) f(self, rawtext) -> bool

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

94

"text" is presented to the user. "rawtext" is stored in revlog data, not

95

directly visible to the user.

95

directly visible to the user.

96

The boolean returned by these transforms is used to determine whether

96

The boolean returned by these transforms is used to determine whether

97

the returned text can be used for hash integrity checking. For example,

97

the returned text can be used for hash integrity checking. For example,

98

if "write" returns False, then "text" is used to generate hash. If

98

if "write" returns False, then "text" is used to generate hash. If

99

"write" returns True, that basically means "rawtext" returned by "write"

99

"write" returns True, that basically means "rawtext" returned by "write"

100

should be used to generate hash. Usually, "write" and "read" return

100

should be used to generate hash. Usually, "write" and "read" return

101

different booleans. And "raw" returns a same boolean as "write".

101

different booleans. And "raw" returns a same boolean as "write".

102

103

Note: The 'raw' transform is used for changegroup generation and in some

103

Note: The 'raw' transform is used for changegroup generation and in some

104

debug commands. In this case the transform only indicates whether the

104

debug commands. In this case the transform only indicates whether the

105

contents can be used for hash integrity checks.

105

contents can be used for hash integrity checks.

106

"""

106

"""

107

if not flag & REVIDX_KNOWN_FLAGS:

107

if not flag & REVIDX_KNOWN_FLAGS:

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

108

msg = _("cannot register processor on unknown flag '%#x'.") % (flag)

109

raise ProgrammingError(msg)

109

raise ProgrammingError(msg)

110

if flag not in REVIDX_FLAGS_ORDER:

110

if flag not in REVIDX_FLAGS_ORDER:

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

111

msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)

112

raise ProgrammingError(msg)

112

raise ProgrammingError(msg)

113

if flag in _flagprocessors:

113

if flag in _flagprocessors:

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

114

msg = _("cannot register multiple processors on flag '%#x'.") % (flag)

115

raise error.Abort(msg)

115

raise error.Abort(msg)

116

_flagprocessors[flag] = processor

116

_flagprocessors[flag] = processor

117

118

def getoffset(q):

118

def getoffset(q):

119

return int(q >> 16)

119

return int(q >> 16)

120

121

def gettype(q):

121

def gettype(q):

122

return int(q & 0xFFFF)

122

return int(q & 0xFFFF)

123

124

def offset_type(offset, type):

124

def offset_type(offset, type):

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

125

if (type & ~REVIDX_KNOWN_FLAGS) != 0:

126

raise ValueError('unknown revlog index flags')

126

raise ValueError('unknown revlog index flags')

127

return int(int(offset) << 16 | type)

127

return int(int(offset) << 16 | type)

128

129

_nullhash = hashlib.sha1(nullid)

129

_nullhash = hashlib.sha1(nullid)

130

131

def hash(text, p1, p2):

131

def hash(text, p1, p2):

132

"""generate a hash from the given text and its parent hashes

132

"""generate a hash from the given text and its parent hashes

133

134

This hash combines both the current file contents and its history

134

This hash combines both the current file contents and its history

135

in a manner that makes it easy to distinguish nodes with the same

135

in a manner that makes it easy to distinguish nodes with the same

136

content in the revision graph.

136

content in the revision graph.

137

"""

137

"""

138

# As of now, if one of the parent node is null, p2 is null

138

# As of now, if one of the parent node is null, p2 is null

139

if p2 == nullid:

139

if p2 == nullid:

140

# deep copy of a hash is faster than creating one

140

# deep copy of a hash is faster than creating one

141

s = _nullhash.copy()

141

s = _nullhash.copy()

142

s.update(p1)

142

s.update(p1)

143

else:

143

else:

144

# none of the parent nodes are nullid

144

# none of the parent nodes are nullid

145

l = [p1, p2]

145

l = [p1, p2]

146

l.sort()

146

l.sort()

147

s = hashlib.sha1(l[0])

147

s = hashlib.sha1(l[0])

148

s.update(l[1])

148

s.update(l[1])

149

s.update(text)

149

s.update(text)

150

return s.digest()

150

return s.digest()

151

152

# index v0:

152

# index v0:

153

# 4 bytes: offset

153

# 4 bytes: offset

154

# 4 bytes: compressed length

154

# 4 bytes: compressed length

155

# 4 bytes: base rev

155

# 4 bytes: base rev

156

# 4 bytes: link rev

156

# 4 bytes: link rev

157

# 20 bytes: parent 1 nodeid

157

# 20 bytes: parent 1 nodeid

158

# 20 bytes: parent 2 nodeid

158

# 20 bytes: parent 2 nodeid

159

# 20 bytes: nodeid

159

# 20 bytes: nodeid

160

indexformatv0 = ">4l20s20s20s"

160

indexformatv0 = ">4l20s20s20s"

161

162

class revlogoldio(object):

162

class revlogoldio(object):

163

def __init__(self):

163

def __init__(self):

164

self.size = struct.calcsize(indexformatv0)

164

self.size = struct.calcsize(indexformatv0)

165

166

def parseindex(self, data, inline):

166

def parseindex(self, data, inline):

167

s = self.size

167

s = self.size

168

index = []

168

index = []

169

nodemap = {nullid: nullrev}

169

nodemap = {nullid: nullrev}

170

n = off = 0

170

n = off = 0

171

l = len(data)

171

l = len(data)

172

while off + s <= l:

172

while off + s <= l:

173

cur = data[off:off + s]

173

cur = data[off:off + s]

174

off += s

174

off += s

175

e = _unpack(indexformatv0, cur)

175

e = _unpack(indexformatv0, cur)

176

# transform to revlogv1 format

176

# transform to revlogv1 format

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

177

e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

178

nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])

179

index.append(e2)

179

index.append(e2)

180

nodemap[e[6]] = n

180

nodemap[e[6]] = n

181

n += 1

181

n += 1

182

183

# add the magic null revision at -1

183

# add the magic null revision at -1

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

184

index.append((0, 0, 0, -1, -1, -1, -1, nullid))

185

186

return index, nodemap, None

186

return index, nodemap, None

187

188

def packentry(self, entry, node, version, rev):

188

def packentry(self, entry, node, version, rev):

189

if gettype(entry[0]):

189

if gettype(entry[0]):

190

raise RevlogError(_("index entry flags need RevlogNG"))

190

raise RevlogError(_("index entry flags need RevlogNG"))

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

191

e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],

192

node(entry[5]), node(entry[6]), entry[7])

192

node(entry[5]), node(entry[6]), entry[7])

193

return _pack(indexformatv0, *e2)

193

return _pack(indexformatv0, *e2)

194

195

# index ng:

195

# index ng:

196

# 6 bytes: offset

196

# 6 bytes: offset

197

# 2 bytes: flags

197

# 2 bytes: flags

198

# 4 bytes: compressed length

198

# 4 bytes: compressed length

199

# 4 bytes: uncompressed length

199

# 4 bytes: uncompressed length

200

# 4 bytes: base rev

200

# 4 bytes: base rev

201

# 4 bytes: link rev

201

# 4 bytes: link rev

202

# 4 bytes: parent 1 rev

202

# 4 bytes: parent 1 rev

203

# 4 bytes: parent 2 rev

203

# 4 bytes: parent 2 rev

204

# 32 bytes: nodeid

204

# 32 bytes: nodeid

205

indexformatng = ">Qiiiiii20s12x"

205

indexformatng = ">Qiiiiii20s12x"

206

versionformat = ">I"

206

versionformat = ">I"

207

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

208

# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte

209

# signed integer)

209

# signed integer)

210

_maxentrysize = 0x7fffffff

210

_maxentrysize = 0x7fffffff

211

212

class revlogio(object):

212

class revlogio(object):

213

def __init__(self):

213

def __init__(self):

214

self.size = struct.calcsize(indexformatng)

214

self.size = struct.calcsize(indexformatng)

215

216

def parseindex(self, data, inline):

216

def parseindex(self, data, inline):

217

# call the C implementation to parse the index data

217

# call the C implementation to parse the index data

218

index, cache = parsers.parse_index2(data, inline)

218

index, cache = parsers.parse_index2(data, inline)

219

return index, getattr(index, 'nodemap', None), cache

219

return index, getattr(index, 'nodemap', None), cache

220

221

def packentry(self, entry, node, version, rev):

221

def packentry(self, entry, node, version, rev):

222

p = _pack(indexformatng, *entry)

222

p = _pack(indexformatng, *entry)

223

if rev == 0:

223

if rev == 0:

224

p = _pack(versionformat, version) + p[4:]

224

p = _pack(versionformat, version) + p[4:]

225

return p

225

return p

226

227

class revlog(object):

227

class revlog(object):

228

"""

228

"""

229

the underlying revision storage object

229

the underlying revision storage object

230

231

A revlog consists of two parts, an index and the revision data.

231

A revlog consists of two parts, an index and the revision data.

232

233

The index is a file with a fixed record size containing

233

The index is a file with a fixed record size containing

234

information on each revision, including its nodeid (hash), the

234

information on each revision, including its nodeid (hash), the

235

nodeids of its parents, the position and offset of its data within

235

nodeids of its parents, the position and offset of its data within

236

the data file, and the revision it's based on. Finally, each entry

236

the data file, and the revision it's based on. Finally, each entry

237

contains a linkrev entry that can serve as a pointer to external

237

contains a linkrev entry that can serve as a pointer to external

238

data.

238

data.

239

240

The revision data itself is a linear collection of data chunks.

240

The revision data itself is a linear collection of data chunks.

241

Each chunk represents a revision and is usually represented as a

241

Each chunk represents a revision and is usually represented as a

242

delta against the previous chunk. To bound lookup time, runs of

242

delta against the previous chunk. To bound lookup time, runs of

243

deltas are limited to about 2 times the length of the original

243

deltas are limited to about 2 times the length of the original

244

version data. This makes retrieval of a version proportional to

244

version data. This makes retrieval of a version proportional to

245

its size, or O(1) relative to the number of revisions.

245

its size, or O(1) relative to the number of revisions.

246

247

Both pieces of the revlog are written to in an append-only

247

Both pieces of the revlog are written to in an append-only

248

fashion, which means we never need to rewrite a file to insert or

248

fashion, which means we never need to rewrite a file to insert or

249

remove data, and can use some simple techniques to avoid the need

249

remove data, and can use some simple techniques to avoid the need

250

for locking while reading.

250

for locking while reading.

251

252

If checkambig, indexfile is opened with checkambig=True at

252

If checkambig, indexfile is opened with checkambig=True at

253

writing, to avoid file stat ambiguity.

253

writing, to avoid file stat ambiguity.

254

"""

254

"""

255

def __init__(self, opener, indexfile, checkambig=False):

255

def __init__(self, opener, indexfile, checkambig=False):

256

"""

256

"""

257

create a revlog object

257

create a revlog object

258

259

opener is a function that abstracts the file opening operation

259

opener is a function that abstracts the file opening operation

260

and can be used to implement COW semantics or the like.

260

and can be used to implement COW semantics or the like.

261

"""

261

"""

262

self.indexfile = indexfile

262

self.indexfile = indexfile

263

self.datafile = indexfile[:-2] + ".d"

263

self.datafile = indexfile[:-2] + ".d"

264

self.opener = opener

264

self.opener = opener

265

# When True, indexfile is opened with checkambig=True at writing, to

265

# When True, indexfile is opened with checkambig=True at writing, to

266

# avoid file stat ambiguity.

266

# avoid file stat ambiguity.

267

self._checkambig = checkambig

267

self._checkambig = checkambig

268

# 3-tuple of (node, rev, text) for a raw revision.

268

# 3-tuple of (node, rev, text) for a raw revision.

269

self._cache = None

269

self._cache = None

270

# Maps rev to chain base rev.

270

# Maps rev to chain base rev.

271

self._chainbasecache = util.lrucachedict(100)

271

self._chainbasecache = util.lrucachedict(100)

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

272

# 2-tuple of (offset, data) of raw data from the revlog at an offset.

273

self._chunkcache = (0, '')

273

self._chunkcache = (0, '')

274

# How much data to read and cache into the raw revlog data cache.

274

# How much data to read and cache into the raw revlog data cache.

275

self._chunkcachesize = 65536

275

self._chunkcachesize = 65536

276

self._maxchainlen = None

276

self._maxchainlen = None

277

self._aggressivemergedeltas = False

277

self._aggressivemergedeltas = False

278

self.index = []

278

self.index = []

279

# Mapping of partial identifiers to full nodes.

279

# Mapping of partial identifiers to full nodes.

280

self._pcache = {}

280

self._pcache = {}

281

# Mapping of revision integer to full node.

281

# Mapping of revision integer to full node.

282

self._nodecache = {nullid: nullrev}

282

self._nodecache = {nullid: nullrev}

283

self._nodepos = None

283

self._nodepos = None

284

self._compengine = 'zlib'

284

self._compengine = 'zlib'

285

286

v = REVLOG_DEFAULT_VERSION

286

v = REVLOG_DEFAULT_VERSION

287

opts = getattr(opener, 'options', None)

287

opts = getattr(opener, 'options', None)

288

if opts is not None:

288

if opts is not None:

289

if 'revlogv1' in opts:

289

if 'revlogv1' in opts:

290

if 'generaldelta' in opts:

290

if 'generaldelta' in opts:

291

v |= REVLOGGENERALDELTA

291

v |= REVLOGGENERALDELTA

292

else:

292

else:

293

v = 0

293

v = 0

294

if 'chunkcachesize' in opts:

294

if 'chunkcachesize' in opts:

295

self._chunkcachesize = opts['chunkcachesize']

295

self._chunkcachesize = opts['chunkcachesize']

296

if 'maxchainlen' in opts:

296

if 'maxchainlen' in opts:

297

self._maxchainlen = opts['maxchainlen']

297

self._maxchainlen = opts['maxchainlen']

298

if 'aggressivemergedeltas' in opts:

298

if 'aggressivemergedeltas' in opts:

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

299

self._aggressivemergedeltas = opts['aggressivemergedeltas']

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

300

self._lazydeltabase = bool(opts.get('lazydeltabase', False))

301

if 'compengine' in opts:

301

if 'compengine' in opts:

302

self._compengine = opts['compengine']

302

self._compengine = opts['compengine']

303

304

if self._chunkcachesize <= 0:

304

if self._chunkcachesize <= 0:

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

305

raise RevlogError(_('revlog chunk cache size %r is not greater '

306

'than 0') % self._chunkcachesize)

306

'than 0') % self._chunkcachesize)

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

307

elif self._chunkcachesize & (self._chunkcachesize - 1):

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

308

raise RevlogError(_('revlog chunk cache size %r is not a power '

309

'of 2') % self._chunkcachesize)

309

'of 2') % self._chunkcachesize)

310

311

indexdata = ''

311

indexdata = ''

312

self._initempty = True

312

self._initempty = True

313

try:

313

try:

314

f = self.opener(self.indexfile)

314

f = self.opener(self.indexfile)

315

indexdata = f.read()

315

indexdata = f.read()

316

f.close()

316

f.close()

317

if len(indexdata) > 0:

317

if len(indexdata) > 0:

318

v = struct.unpack(versionformat, indexdata[:4])[0]

318

v = struct.unpack(versionformat, indexdata[:4])[0]

319

self._initempty = False

319

self._initempty = False

320

except IOError as inst:

320

except IOError as inst:

321

if inst.errno != errno.ENOENT:

321

if inst.errno != errno.ENOENT:

322

raise

322

raise

323

324

self.version = v

324

self.version = v

325

self._inline = v & REVLOGNGINLINEDATA

325

self._inline = v & REVLOGNGINLINEDATA

326

self._generaldelta = v & REVLOGGENERALDELTA

326

self._generaldelta = v & REVLOGGENERALDELTA

327

flags = v & ~0xFFFF

327

flags = v & ~0xFFFF

328

fmt = v & 0xFFFF

328

fmt = v & 0xFFFF

329

if fmt == REVLOGV0 and flags:

329

if fmt == REVLOGV0 and flags:

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

330

raise RevlogError(_("index %s unknown flags %#04x for format v0")

331

% (self.indexfile, flags >> 16))

331

% (self.indexfile, flags >> 16))

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

332

elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

333

raise RevlogError(_("index %s unknown flags %#04x for revlogng")

334

% (self.indexfile, flags >> 16))

334

% (self.indexfile, flags >> 16))

335

elif fmt > REVLOGNG:

335

elif fmt > REVLOGNG:

336

raise RevlogError(_("index %s unknown format %d")

336

raise RevlogError(_("index %s unknown format %d")

337

% (self.indexfile, fmt))

337

% (self.indexfile, fmt))

338

339

self.storedeltachains = True

339

self.storedeltachains = True

340

341

self._io = revlogio()

341

self._io = revlogio()

342

if self.version == REVLOGV0:

342

if self.version == REVLOGV0:

343

self._io = revlogoldio()

343

self._io = revlogoldio()

344

try:

344

try:

345

d = self._io.parseindex(indexdata, self._inline)

345

d = self._io.parseindex(indexdata, self._inline)

346

except (ValueError, IndexError):

346

except (ValueError, IndexError):

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

347

raise RevlogError(_("index %s is corrupted") % (self.indexfile))

348

self.index, nodemap, self._chunkcache = d

348

self.index, nodemap, self._chunkcache = d

349

if nodemap is not None:

349

if nodemap is not None:

350

self.nodemap = self._nodecache = nodemap

350

self.nodemap = self._nodecache = nodemap

351

if not self._chunkcache:

351

if not self._chunkcache:

352

self._chunkclear()

352

self._chunkclear()

353

# revnum -> (chain-length, sum-delta-length)

353

# revnum -> (chain-length, sum-delta-length)

354

self._chaininfocache = {}

354

self._chaininfocache = {}

355

# revlog header -> revlog compressor

355

# revlog header -> revlog compressor

356

self._decompressors = {}

356

self._decompressors = {}

357

358

@util.propertycache

358

@util.propertycache

359

def _compressor(self):

359

def _compressor(self):

360

return util.compengines[self._compengine].revlogcompressor()

360

return util.compengines[self._compengine].revlogcompressor()

361

362

def tip(self):

362

def tip(self):

363

return self.node(len(self.index) - 2)

363

return self.node(len(self.index) - 2)

364

def __contains__(self, rev):

364

def __contains__(self, rev):

365

return 0 <= rev < len(self)

365

return 0 <= rev < len(self)

366

def __len__(self):

366

def __len__(self):

367

return len(self.index) - 1

367

return len(self.index) - 1

368

def __iter__(self):

368

def __iter__(self):

369

return iter(xrange(len(self)))

369

return iter(xrange(len(self)))

370

def revs(self, start=0, stop=None):

370

def revs(self, start=0, stop=None):

371

"""iterate over all rev in this revlog (from start to stop)"""

371

"""iterate over all rev in this revlog (from start to stop)"""

372

step = 1

372

step = 1

373

if stop is not None:

373

if stop is not None:

374

if start > stop:

374

if start > stop:

375

step = -1

375

step = -1

376

stop += step

376

stop += step

377

else:

377

else:

378

stop = len(self)

378

stop = len(self)

379

return xrange(start, stop, step)

379

return xrange(start, stop, step)

380

381

@util.propertycache

381

@util.propertycache

382

def nodemap(self):

382

def nodemap(self):

383

self.rev(self.node(0))

383

self.rev(self.node(0))

384

return self._nodecache

384

return self._nodecache

385

386

def hasnode(self, node):

386

def hasnode(self, node):

387

try:

387

try:

388

self.rev(node)

388

self.rev(node)

389

return True

389

return True

390

except KeyError:

390

except KeyError:

391

return False

391

return False

392

393

def clearcaches(self):

393

def clearcaches(self):

394

self._cache = None

394

self._cache = None

395

self._chainbasecache.clear()

395

self._chainbasecache.clear()

396

self._chunkcache = (0, '')

396

self._chunkcache = (0, '')

397

self._pcache = {}

397

self._pcache = {}

398

399

try:

399

try:

400

self._nodecache.clearcaches()

400

self._nodecache.clearcaches()

401

except AttributeError:

401

except AttributeError:

402

self._nodecache = {nullid: nullrev}

402

self._nodecache = {nullid: nullrev}

403

self._nodepos = None

403

self._nodepos = None

404

405

def rev(self, node):

405

def rev(self, node):

406

try:

406

try:

407

return self._nodecache[node]

407

return self._nodecache[node]

408

except TypeError:

408

except TypeError:

409

raise

409

raise

410

except RevlogError:

410

except RevlogError:

411

# parsers.c radix tree lookup failed

411

# parsers.c radix tree lookup failed

412

raise LookupError(node, self.indexfile, _('no node'))

412

raise LookupError(node, self.indexfile, _('no node'))

413

except KeyError:

413

except KeyError:

414

# pure python cache lookup failed

414

# pure python cache lookup failed

415

n = self._nodecache

415

n = self._nodecache

416

i = self.index

416

i = self.index

417

p = self._nodepos

417

p = self._nodepos

418

if p is None:

418

if p is None:

419

p = len(i) - 2

419

p = len(i) - 2

420

for r in xrange(p, -1, -1):

420

for r in xrange(p, -1, -1):

421

v = i[r][7]

421

v = i[r][7]

422

n[v] = r

422

n[v] = r

423

if v == node:

423

if v == node:

424

self._nodepos = r - 1

424

self._nodepos = r - 1

425

return r

425

return r

426

raise LookupError(node, self.indexfile, _('no node'))

426

raise LookupError(node, self.indexfile, _('no node'))

427

428

# Accessors for index entries.

428

# Accessors for index entries.

429

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

430

# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes

431

# are flags.

431

# are flags.

432

def start(self, rev):

432

def start(self, rev):

433

return int(self.index[rev][0] >> 16)

433

return int(self.index[rev][0] >> 16)

434

435

def flags(self, rev):

435

def flags(self, rev):

436

return self.index[rev][0] & 0xFFFF

436

return self.index[rev][0] & 0xFFFF

437

438

def length(self, rev):

438

def length(self, rev):

439

return self.index[rev][1]

439

return self.index[rev][1]

440

441

def rawsize(self, rev):

441

def rawsize(self, rev):

442

"""return the length of the uncompressed text for a given revision"""

442

"""return the length of the uncompressed text for a given revision"""

443

l = self.index[rev][2]

443

l = self.index[rev][2]

444

if l >= 0:

444

if l >= 0:

445

return l

445

return l

446

447

t = self.revision(rev)

447

t = self.revision(rev)

448

return len(t)

448

return len(t)

449

size = rawsize

449

size = rawsize

450

451

def chainbase(self, rev):

451

def chainbase(self, rev):

452

base = self._chainbasecache.get(rev)

452

base = self._chainbasecache.get(rev)

453

if base is not None:

453

if base is not None:

454

return base

454

return base

455

456

index = self.index

456

index = self.index

457

base = index[rev][3]

457

base = index[rev][3]

458

while base != rev:

458

while base != rev:

459

rev = base

459

rev = base

460

base = index[rev][3]

460

base = index[rev][3]

461

462

self._chainbasecache[rev] = base

462

self._chainbasecache[rev] = base

463

return base

463

return base

464

465

def linkrev(self, rev):

465

def linkrev(self, rev):

466

return self.index[rev][4]

466

return self.index[rev][4]

467

468

def parentrevs(self, rev):

468

def parentrevs(self, rev):

469

return self.index[rev][5:7]

469

return self.index[rev][5:7]

470

471

def node(self, rev):

471

def node(self, rev):

472

return self.index[rev][7]

472

return self.index[rev][7]

473

474

# Derived from index values.

474

# Derived from index values.

475

476

def end(self, rev):

476

def end(self, rev):

477

return self.start(rev) + self.length(rev)

477

return self.start(rev) + self.length(rev)

478

479

def parents(self, node):

479

def parents(self, node):

480

i = self.index

480

i = self.index

481

d = i[self.rev(node)]

481

d = i[self.rev(node)]

482

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

482

return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline

483

484

def chainlen(self, rev):

484

def chainlen(self, rev):

485

return self._chaininfo(rev)[0]

485

return self._chaininfo(rev)[0]

486

487

def _chaininfo(self, rev):

487

def _chaininfo(self, rev):

488

chaininfocache = self._chaininfocache

488

chaininfocache = self._chaininfocache

489

if rev in chaininfocache:

489

if rev in chaininfocache:

490

return chaininfocache[rev]

490

return chaininfocache[rev]

491

index = self.index

491

index = self.index

492

generaldelta = self._generaldelta

492

generaldelta = self._generaldelta

493

iterrev = rev

493

iterrev = rev

494

e = index[iterrev]

494

e = index[iterrev]

495

clen = 0

495

clen = 0

496

compresseddeltalen = 0

496

compresseddeltalen = 0

497

while iterrev != e[3]:

497

while iterrev != e[3]:

498

clen += 1

498

clen += 1

499

compresseddeltalen += e[1]

499

compresseddeltalen += e[1]

500

if generaldelta:

500

if generaldelta:

501

iterrev = e[3]

501

iterrev = e[3]

502

else:

502

else:

503

iterrev -= 1

503

iterrev -= 1

504

if iterrev in chaininfocache:

504

if iterrev in chaininfocache:

505

t = chaininfocache[iterrev]

505

t = chaininfocache[iterrev]

506

clen += t[0]

506

clen += t[0]

507

compresseddeltalen += t[1]

507

compresseddeltalen += t[1]

508

break

508

break

509

e = index[iterrev]

509

e = index[iterrev]

510

else:

510

else:

511

# Add text length of base since decompressing that also takes

511

# Add text length of base since decompressing that also takes

512

# work. For cache hits the length is already included.

512

# work. For cache hits the length is already included.

513

compresseddeltalen += e[1]

513

compresseddeltalen += e[1]

514

r = (clen, compresseddeltalen)

514

r = (clen, compresseddeltalen)

515

chaininfocache[rev] = r

515

chaininfocache[rev] = r

516

return r

516

return r

517

518

def _deltachain(self, rev, stoprev=None):

518

def _deltachain(self, rev, stoprev=None):

519

"""Obtain the delta chain for a revision.

519

"""Obtain the delta chain for a revision.

520

521

``stoprev`` specifies a revision to stop at. If not specified, we

521

``stoprev`` specifies a revision to stop at. If not specified, we

522

stop at the base of the chain.

522

stop at the base of the chain.

523

524

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

524

Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of

525

revs in ascending order and ``stopped`` is a bool indicating whether

525

revs in ascending order and ``stopped`` is a bool indicating whether

526

``stoprev`` was hit.

526

``stoprev`` was hit.

527

"""

527

"""

528

chain = []

528

chain = []

529

530

# Alias to prevent attribute lookup in tight loop.

530

# Alias to prevent attribute lookup in tight loop.

531

index = self.index

531

index = self.index

532

generaldelta = self._generaldelta

532

generaldelta = self._generaldelta

533

534

iterrev = rev

534

iterrev = rev

535

e = index[iterrev]

535

e = index[iterrev]

536

while iterrev != e[3] and iterrev != stoprev:

536

while iterrev != e[3] and iterrev != stoprev:

537

chain.append(iterrev)

537

chain.append(iterrev)

538

if generaldelta:

538

if generaldelta:

539

iterrev = e[3]

539

iterrev = e[3]

540

else:

540

else:

541

iterrev -= 1

541

iterrev -= 1

542

e = index[iterrev]

542

e = index[iterrev]

543

544

if iterrev == stoprev:

544

if iterrev == stoprev:

545

stopped = True

545

stopped = True

546

else:

546

else:

547

chain.append(iterrev)

547

chain.append(iterrev)

548

stopped = False

548

stopped = False

549

550

chain.reverse()

550

chain.reverse()

551

return chain, stopped

551

return chain, stopped

552

553

def ancestors(self, revs, stoprev=0, inclusive=False):

553

def ancestors(self, revs, stoprev=0, inclusive=False):

554

"""Generate the ancestors of 'revs' in reverse topological order.

554

"""Generate the ancestors of 'revs' in reverse topological order.

555

Does not generate revs lower than stoprev.

555

Does not generate revs lower than stoprev.

556

557

See the documentation for ancestor.lazyancestors for more details."""

557

See the documentation for ancestor.lazyancestors for more details."""

558

559

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

559

return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,

560

inclusive=inclusive)

560

inclusive=inclusive)

561

562

def descendants(self, revs):

562

def descendants(self, revs):

563

"""Generate the descendants of 'revs' in revision order.

563

"""Generate the descendants of 'revs' in revision order.

564

565

Yield a sequence of revision numbers starting with a child of

565

Yield a sequence of revision numbers starting with a child of

566

some rev in revs, i.e., each revision is *not* considered a

566

some rev in revs, i.e., each revision is *not* considered a

567

descendant of itself. Results are ordered by revision number (a

567

descendant of itself. Results are ordered by revision number (a

568

topological sort)."""

568

topological sort)."""

569

first = min(revs)

569

first = min(revs)

570

if first == nullrev:

570

if first == nullrev:

571

for i in self:

571

for i in self:

572

yield i

572

yield i

573

return

573

return

574

575

seen = set(revs)

575

seen = set(revs)

576

for i in self.revs(start=first + 1):

576

for i in self.revs(start=first + 1):

577

for x in self.parentrevs(i):

577

for x in self.parentrevs(i):

578

if x != nullrev and x in seen:

578

if x != nullrev and x in seen:

579

seen.add(i)

579

seen.add(i)

580

yield i

580

yield i

581

break

581

break

582

583

def findcommonmissing(self, common=None, heads=None):

583

def findcommonmissing(self, common=None, heads=None):

584

"""Return a tuple of the ancestors of common and the ancestors of heads

584

"""Return a tuple of the ancestors of common and the ancestors of heads

585

that are not ancestors of common. In revset terminology, we return the

585

that are not ancestors of common. In revset terminology, we return the

586

tuple:

586

tuple:

587

588

::common, (::heads) - (::common)

588

::common, (::heads) - (::common)

589

590

The list is sorted by revision number, meaning it is

590

The list is sorted by revision number, meaning it is

591

topologically sorted.

591

topologically sorted.

592

593

'heads' and 'common' are both lists of node IDs. If heads is

593

'heads' and 'common' are both lists of node IDs. If heads is

594

not supplied, uses all of the revlog's heads. If common is not

594

not supplied, uses all of the revlog's heads. If common is not

595

supplied, uses nullid."""

595

supplied, uses nullid."""

596

if common is None:

596

if common is None:

597

common = [nullid]

597

common = [nullid]

598

if heads is None:

598

if heads is None:

599

heads = self.heads()

599

heads = self.heads()

600

601

common = [self.rev(n) for n in common]

601

common = [self.rev(n) for n in common]

602

heads = [self.rev(n) for n in heads]

602

heads = [self.rev(n) for n in heads]

603

604

# we want the ancestors, but inclusive

604

# we want the ancestors, but inclusive

605

class lazyset(object):

605

class lazyset(object):

606

def __init__(self, lazyvalues):

606

def __init__(self, lazyvalues):

607

self.addedvalues = set()

607

self.addedvalues = set()

608

self.lazyvalues = lazyvalues

608

self.lazyvalues = lazyvalues

609

610

def __contains__(self, value):

610

def __contains__(self, value):

611

return value in self.addedvalues or value in self.lazyvalues

611

return value in self.addedvalues or value in self.lazyvalues

612

613

def __iter__(self):

613

def __iter__(self):

614

added = self.addedvalues

614

added = self.addedvalues

615

for r in added:

615

for r in added:

616

yield r

616

yield r

617

for r in self.lazyvalues:

617

for r in self.lazyvalues:

618

if not r in added:

618

if not r in added:

619

yield r

619

yield r

620

621

def add(self, value):

621

def add(self, value):

622

self.addedvalues.add(value)

622

self.addedvalues.add(value)

623

624

def update(self, values):

624

def update(self, values):

625

self.addedvalues.update(values)

625

self.addedvalues.update(values)

626

627

has = lazyset(self.ancestors(common))

627

has = lazyset(self.ancestors(common))

628

has.add(nullrev)

628

has.add(nullrev)

629

has.update(common)

629

has.update(common)

630

631

# take all ancestors from heads that aren't in has

631

# take all ancestors from heads that aren't in has

632

missing = set()

632

missing = set()

633

visit = collections.deque(r for r in heads if r not in has)

633

visit = collections.deque(r for r in heads if r not in has)

634

while visit:

634

while visit:

635

r = visit.popleft()

635

r = visit.popleft()

636

if r in missing:

636

if r in missing:

637

continue

637

continue

638

else:

638

else:

639

missing.add(r)

639

missing.add(r)

640

for p in self.parentrevs(r):

640

for p in self.parentrevs(r):

641

if p not in has:

641

if p not in has:

642

visit.append(p)

642

visit.append(p)

643

missing = list(missing)

643

missing = list(missing)

644

missing.sort()

644

missing.sort()

645

return has, [self.node(miss) for miss in missing]

645

return has, [self.node(miss) for miss in missing]

646

647

def incrementalmissingrevs(self, common=None):

647

def incrementalmissingrevs(self, common=None):

648

"""Return an object that can be used to incrementally compute the

648

"""Return an object that can be used to incrementally compute the

649

revision numbers of the ancestors of arbitrary sets that are not

649

revision numbers of the ancestors of arbitrary sets that are not

650

ancestors of common. This is an ancestor.incrementalmissingancestors

650

ancestors of common. This is an ancestor.incrementalmissingancestors

651

object.

651

object.

652

653

'common' is a list of revision numbers. If common is not supplied, uses

653

'common' is a list of revision numbers. If common is not supplied, uses

654

nullrev.

654

nullrev.

655

"""

655

"""

656

if common is None:

656

if common is None:

657

common = [nullrev]

657

common = [nullrev]

658

659

return ancestor.incrementalmissingancestors(self.parentrevs, common)

659

return ancestor.incrementalmissingancestors(self.parentrevs, common)

660

661

def findmissingrevs(self, common=None, heads=None):

661

def findmissingrevs(self, common=None, heads=None):

662

"""Return the revision numbers of the ancestors of heads that

662

"""Return the revision numbers of the ancestors of heads that

663

are not ancestors of common.

663

are not ancestors of common.

664

665

More specifically, return a list of revision numbers corresponding to

665

More specifically, return a list of revision numbers corresponding to

666

nodes N such that every N satisfies the following constraints:

666

nodes N such that every N satisfies the following constraints:

667

668

1. N is an ancestor of some node in 'heads'

668

1. N is an ancestor of some node in 'heads'

669

2. N is not an ancestor of any node in 'common'

669

2. N is not an ancestor of any node in 'common'

670

671

The list is sorted by revision number, meaning it is

671

The list is sorted by revision number, meaning it is

672

topologically sorted.

672

topologically sorted.

673

674

'heads' and 'common' are both lists of revision numbers. If heads is

674

'heads' and 'common' are both lists of revision numbers. If heads is

675

not supplied, uses all of the revlog's heads. If common is not

675

not supplied, uses all of the revlog's heads. If common is not

676

supplied, uses nullid."""

676

supplied, uses nullid."""

677

if common is None:

677

if common is None:

678

common = [nullrev]

678

common = [nullrev]

679

if heads is None:

679

if heads is None:

680

heads = self.headrevs()

680

heads = self.headrevs()

681

682

inc = self.incrementalmissingrevs(common=common)

682

inc = self.incrementalmissingrevs(common=common)

683

return inc.missingancestors(heads)

683

return inc.missingancestors(heads)

684

685

def findmissing(self, common=None, heads=None):

685

def findmissing(self, common=None, heads=None):

686

"""Return the ancestors of heads that are not ancestors of common.

686

"""Return the ancestors of heads that are not ancestors of common.

687

688

More specifically, return a list of nodes N such that every N

688

More specifically, return a list of nodes N such that every N

689

satisfies the following constraints:

689

satisfies the following constraints:

690

691

1. N is an ancestor of some node in 'heads'

691

1. N is an ancestor of some node in 'heads'

692

2. N is not an ancestor of any node in 'common'

692

2. N is not an ancestor of any node in 'common'

693

694

The list is sorted by revision number, meaning it is

694

The list is sorted by revision number, meaning it is

695

topologically sorted.

695

topologically sorted.

696

697

'heads' and 'common' are both lists of node IDs. If heads is

697

'heads' and 'common' are both lists of node IDs. If heads is

698

not supplied, uses all of the revlog's heads. If common is not

698

not supplied, uses all of the revlog's heads. If common is not

699

supplied, uses nullid."""

699

supplied, uses nullid."""

700

if common is None:

700

if common is None:

701

common = [nullid]

701

common = [nullid]

702

if heads is None:

702

if heads is None:

703

heads = self.heads()

703

heads = self.heads()

704

705

common = [self.rev(n) for n in common]

705

common = [self.rev(n) for n in common]

706

heads = [self.rev(n) for n in heads]

706

heads = [self.rev(n) for n in heads]

707

708

inc = self.incrementalmissingrevs(common=common)

708

inc = self.incrementalmissingrevs(common=common)

709

return [self.node(r) for r in inc.missingancestors(heads)]

709

return [self.node(r) for r in inc.missingancestors(heads)]

710

711

def nodesbetween(self, roots=None, heads=None):

711

def nodesbetween(self, roots=None, heads=None):

712

"""Return a topological path from 'roots' to 'heads'.

712

"""Return a topological path from 'roots' to 'heads'.

713

714

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

714

Return a tuple (nodes, outroots, outheads) where 'nodes' is a

715

topologically sorted list of all nodes N that satisfy both of

715

topologically sorted list of all nodes N that satisfy both of

716

these constraints:

716

these constraints:

717

718

1. N is a descendant of some node in 'roots'

718

1. N is a descendant of some node in 'roots'

719

2. N is an ancestor of some node in 'heads'

719

2. N is an ancestor of some node in 'heads'

720

721

Every node is considered to be both a descendant and an ancestor

721

Every node is considered to be both a descendant and an ancestor

722

of itself, so every reachable node in 'roots' and 'heads' will be

722

of itself, so every reachable node in 'roots' and 'heads' will be

723

included in 'nodes'.

723

included in 'nodes'.

724

725

'outroots' is the list of reachable nodes in 'roots', i.e., the

725

'outroots' is the list of reachable nodes in 'roots', i.e., the

726

subset of 'roots' that is returned in 'nodes'. Likewise,

726

subset of 'roots' that is returned in 'nodes'. Likewise,

727

'outheads' is the subset of 'heads' that is also in 'nodes'.

727

'outheads' is the subset of 'heads' that is also in 'nodes'.

728

729

'roots' and 'heads' are both lists of node IDs. If 'roots' is

729

'roots' and 'heads' are both lists of node IDs. If 'roots' is

730

unspecified, uses nullid as the only root. If 'heads' is

730

unspecified, uses nullid as the only root. If 'heads' is

731

unspecified, uses list of all of the revlog's heads."""

731

unspecified, uses list of all of the revlog's heads."""

732

nonodes = ([], [], [])

732

nonodes = ([], [], [])

733

if roots is not None:

733

if roots is not None:

734

roots = list(roots)

734

roots = list(roots)

735

if not roots:

735

if not roots:

736

return nonodes

736

return nonodes

737

lowestrev = min([self.rev(n) for n in roots])

737

lowestrev = min([self.rev(n) for n in roots])

738

else:

738

else:

739

roots = [nullid] # Everybody's a descendant of nullid

739

roots = [nullid] # Everybody's a descendant of nullid

740

lowestrev = nullrev

740

lowestrev = nullrev

741

if (lowestrev == nullrev) and (heads is None):

741

if (lowestrev == nullrev) and (heads is None):

742

# We want _all_ the nodes!

742

# We want _all_ the nodes!

743

return ([self.node(r) for r in self], [nullid], list(self.heads()))

743

return ([self.node(r) for r in self], [nullid], list(self.heads()))

744

if heads is None:

744

if heads is None:

745

# All nodes are ancestors, so the latest ancestor is the last

745

# All nodes are ancestors, so the latest ancestor is the last

746

# node.

746

# node.

747

highestrev = len(self) - 1

747

highestrev = len(self) - 1

748

# Set ancestors to None to signal that every node is an ancestor.

748

# Set ancestors to None to signal that every node is an ancestor.

749

ancestors = None

749

ancestors = None

750

# Set heads to an empty dictionary for later discovery of heads

750

# Set heads to an empty dictionary for later discovery of heads

751

heads = {}

751

heads = {}

752

else:

752

else:

753

heads = list(heads)

753

heads = list(heads)

754

if not heads:

754

if not heads:

755

return nonodes

755

return nonodes

756

ancestors = set()

756

ancestors = set()

757

# Turn heads into a dictionary so we can remove 'fake' heads.

757

# Turn heads into a dictionary so we can remove 'fake' heads.

758

# Also, later we will be using it to filter out the heads we can't

758

# Also, later we will be using it to filter out the heads we can't

759

# find from roots.

759

# find from roots.

760

heads = dict.fromkeys(heads, False)

760

heads = dict.fromkeys(heads, False)

761

# Start at the top and keep marking parents until we're done.

761

# Start at the top and keep marking parents until we're done.

762

nodestotag = set(heads)

762

nodestotag = set(heads)

763

# Remember where the top was so we can use it as a limit later.

763

# Remember where the top was so we can use it as a limit later.

764

highestrev = max([self.rev(n) for n in nodestotag])

764

highestrev = max([self.rev(n) for n in nodestotag])

765

while nodestotag:

765

while nodestotag:

766

# grab a node to tag

766

# grab a node to tag

767

n = nodestotag.pop()

767

n = nodestotag.pop()

768

# Never tag nullid

768

# Never tag nullid

769

if n == nullid:

769

if n == nullid:

770

continue

770

continue

771

# A node's revision number represents its place in a

771

# A node's revision number represents its place in a

772

# topologically sorted list of nodes.

772

# topologically sorted list of nodes.

773

r = self.rev(n)

773

r = self.rev(n)

774

if r >= lowestrev:

774

if r >= lowestrev:

775

if n not in ancestors:

775

if n not in ancestors:

776

# If we are possibly a descendant of one of the roots

776

# If we are possibly a descendant of one of the roots

777

# and we haven't already been marked as an ancestor

777

# and we haven't already been marked as an ancestor

778

ancestors.add(n) # Mark as ancestor

778

ancestors.add(n) # Mark as ancestor

779

# Add non-nullid parents to list of nodes to tag.

779

# Add non-nullid parents to list of nodes to tag.

780

nodestotag.update([p for p in self.parents(n) if

780

nodestotag.update([p for p in self.parents(n) if

781

p != nullid])

781

p != nullid])

782

elif n in heads: # We've seen it before, is it a fake head?

782

elif n in heads: # We've seen it before, is it a fake head?

783

# So it is, real heads should not be the ancestors of

783

# So it is, real heads should not be the ancestors of

784

# any other heads.

784

# any other heads.

785

heads.pop(n)

785

heads.pop(n)

786

if not ancestors:

786

if not ancestors:

787

return nonodes

787

return nonodes

788

# Now that we have our set of ancestors, we want to remove any

788

# Now that we have our set of ancestors, we want to remove any

789

# roots that are not ancestors.

789

# roots that are not ancestors.

790

791

# If one of the roots was nullid, everything is included anyway.

791

# If one of the roots was nullid, everything is included anyway.

792

if lowestrev > nullrev:

792

if lowestrev > nullrev:

793

# But, since we weren't, let's recompute the lowest rev to not

793

# But, since we weren't, let's recompute the lowest rev to not

794

# include roots that aren't ancestors.

794

# include roots that aren't ancestors.

795

796

# Filter out roots that aren't ancestors of heads

796

# Filter out roots that aren't ancestors of heads

797

roots = [root for root in roots if root in ancestors]

797

roots = [root for root in roots if root in ancestors]

798

# Recompute the lowest revision

798

# Recompute the lowest revision

799

if roots:

799

if roots:

800

lowestrev = min([self.rev(root) for root in roots])

800

lowestrev = min([self.rev(root) for root in roots])

801

else:

801

else:

802

# No more roots? Return empty list

802

# No more roots? Return empty list

803

return nonodes

803

return nonodes

804

else:

804

else:

805

# We are descending from nullid, and don't need to care about

805

# We are descending from nullid, and don't need to care about

806

# any other roots.

806

# any other roots.

807

lowestrev = nullrev

807

lowestrev = nullrev

808

roots = [nullid]

808

roots = [nullid]

809

# Transform our roots list into a set.

809

# Transform our roots list into a set.

810

descendants = set(roots)

810

descendants = set(roots)

811

# Also, keep the original roots so we can filter out roots that aren't

811

# Also, keep the original roots so we can filter out roots that aren't

812

# 'real' roots (i.e. are descended from other roots).

812

# 'real' roots (i.e. are descended from other roots).

813

roots = descendants.copy()

813

roots = descendants.copy()

814

# Our topologically sorted list of output nodes.

814

# Our topologically sorted list of output nodes.

815

orderedout = []

815

orderedout = []

816

# Don't start at nullid since we don't want nullid in our output list,

816

# Don't start at nullid since we don't want nullid in our output list,

817

# and if nullid shows up in descendants, empty parents will look like

817

# and if nullid shows up in descendants, empty parents will look like

818

# they're descendants.

818

# they're descendants.

819

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

819

for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):

820

n = self.node(r)

820

n = self.node(r)

821

isdescendant = False

821

isdescendant = False

822

if lowestrev == nullrev: # Everybody is a descendant of nullid

822

if lowestrev == nullrev: # Everybody is a descendant of nullid

823

isdescendant = True

823

isdescendant = True

824

elif n in descendants:

824

elif n in descendants:

825

# n is already a descendant

825

# n is already a descendant

826

isdescendant = True

826

isdescendant = True

827

# This check only needs to be done here because all the roots

827

# This check only needs to be done here because all the roots

828

# will start being marked is descendants before the loop.

828

# will start being marked is descendants before the loop.

829

if n in roots:

829

if n in roots:

830

# If n was a root, check if it's a 'real' root.

830

# If n was a root, check if it's a 'real' root.

831

p = tuple(self.parents(n))

831

p = tuple(self.parents(n))

832

# If any of its parents are descendants, it's not a root.

832

# If any of its parents are descendants, it's not a root.

833

if (p[0] in descendants) or (p[1] in descendants):

833

if (p[0] in descendants) or (p[1] in descendants):

834

roots.remove(n)

834

roots.remove(n)

835

else:

835

else:

836

p = tuple(self.parents(n))

836

p = tuple(self.parents(n))

837

# A node is a descendant if either of its parents are

837

# A node is a descendant if either of its parents are

838

# descendants. (We seeded the dependents list with the roots

838

# descendants. (We seeded the dependents list with the roots

839

# up there, remember?)

839

# up there, remember?)

840

if (p[0] in descendants) or (p[1] in descendants):

840

if (p[0] in descendants) or (p[1] in descendants):

841

descendants.add(n)

841

descendants.add(n)

842

isdescendant = True

842

isdescendant = True

843

if isdescendant and ((ancestors is None) or (n in ancestors)):

843

if isdescendant and ((ancestors is None) or (n in ancestors)):

844

# Only include nodes that are both descendants and ancestors.

844

# Only include nodes that are both descendants and ancestors.

845

orderedout.append(n)

845

orderedout.append(n)

846

if (ancestors is not None) and (n in heads):

846

if (ancestors is not None) and (n in heads):

847

# We're trying to figure out which heads are reachable

847

# We're trying to figure out which heads are reachable

848

# from roots.

848

# from roots.

849

# Mark this head as having been reached

849

# Mark this head as having been reached

850

heads[n] = True

850

heads[n] = True

851

elif ancestors is None:

851

elif ancestors is None:

852

# Otherwise, we're trying to discover the heads.

852

# Otherwise, we're trying to discover the heads.

853

# Assume this is a head because if it isn't, the next step

853

# Assume this is a head because if it isn't, the next step

854

# will eventually remove it.

854

# will eventually remove it.

855

heads[n] = True

855

heads[n] = True

856

# But, obviously its parents aren't.

856

# But, obviously its parents aren't.

857

for p in self.parents(n):

857

for p in self.parents(n):

858

heads.pop(p, None)

858

heads.pop(p, None)

859

heads = [head for head, flag in heads.iteritems() if flag]

859

heads = [head for head, flag in heads.iteritems() if flag]

860

roots = list(roots)

860

roots = list(roots)

861

assert orderedout

861

assert orderedout

862

assert roots

862

assert roots

863

assert heads

863

assert heads

864

return (orderedout, roots, heads)

864

return (orderedout, roots, heads)

865

866

def headrevs(self):

866

def headrevs(self):

867

try:

867

try:

868

return self.index.headrevs()

868

return self.index.headrevs()

869

except AttributeError:

869

except AttributeError:

870

return self._headrevs()

870

return self._headrevs()

871

872

def computephases(self, roots):

872

def computephases(self, roots):

873

return self.index.computephasesmapsets(roots)

873

return self.index.computephasesmapsets(roots)

874

875

def _headrevs(self):

875

def _headrevs(self):

876

count = len(self)

876

count = len(self)

877

if not count:

877

if not count:

878

return [nullrev]

878

return [nullrev]

879

# we won't iter over filtered rev so nobody is a head at start

879

# we won't iter over filtered rev so nobody is a head at start

880

ishead = [0] * (count + 1)

880

ishead = [0] * (count + 1)

881

index = self.index

881

index = self.index

882

for r in self:

882

for r in self:

883

ishead[r] = 1 # I may be an head

883

ishead[r] = 1 # I may be an head

884

e = index[r]

884

e = index[r]

885

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

885

ishead[e[5]] = ishead[e[6]] = 0 # my parent are not

886

return [r for r, val in enumerate(ishead) if val]

886

return [r for r, val in enumerate(ishead) if val]

887

888

def heads(self, start=None, stop=None):

888

def heads(self, start=None, stop=None):

889

"""return the list of all nodes that have no children

889

"""return the list of all nodes that have no children

890

891

if start is specified, only heads that are descendants of

891

if start is specified, only heads that are descendants of

892

start will be returned

892

start will be returned

893

if stop is specified, it will consider all the revs from stop

893

if stop is specified, it will consider all the revs from stop

894

as if they had no children

894

as if they had no children

895

"""

895

"""

896

if start is None and stop is None:

896

if start is None and stop is None:

897

if not len(self):

897

if not len(self):

898

return [nullid]

898

return [nullid]

899

return [self.node(r) for r in self.headrevs()]

899

return [self.node(r) for r in self.headrevs()]

900

901

if start is None:

901

if start is None:

902

start = nullid

902

start = nullid

903

if stop is None:

903

if stop is None:

904

stop = []

904

stop = []

905

stoprevs = set([self.rev(n) for n in stop])

905

stoprevs = set([self.rev(n) for n in stop])

906

startrev = self.rev(start)

906

startrev = self.rev(start)

907

reachable = set((startrev,))

907

reachable = set((startrev,))

908

heads = set((startrev,))

908

heads = set((startrev,))

909

910

parentrevs = self.parentrevs

910

parentrevs = self.parentrevs

911

for r in self.revs(start=startrev + 1):

911

for r in self.revs(start=startrev + 1):

912

for p in parentrevs(r):

912

for p in parentrevs(r):

913

if p in reachable:

913

if p in reachable:

914

if r not in stoprevs:

914

if r not in stoprevs:

915

reachable.add(r)

915

reachable.add(r)

916

heads.add(r)

916

heads.add(r)

917

if p in heads and p not in stoprevs:

917

if p in heads and p not in stoprevs:

918

heads.remove(p)

918

heads.remove(p)

919

920

return [self.node(r) for r in heads]

920

return [self.node(r) for r in heads]

921

922

def children(self, node):

922

def children(self, node):

923

"""find the children of a given node"""

923

"""find the children of a given node"""

924

c = []

924

c = []

925

p = self.rev(node)

925

p = self.rev(node)

926

for r in self.revs(start=p + 1):

926

for r in self.revs(start=p + 1):

927

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

927

prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]

928

if prevs:

928

if prevs:

929

for pr in prevs:

929

for pr in prevs:

930

if pr == p:

930

if pr == p:

931

c.append(self.node(r))

931

c.append(self.node(r))

932

elif p == nullrev:

932

elif p == nullrev:

933

c.append(self.node(r))

933

c.append(self.node(r))

934

return c

934

return c

935

936

def descendant(self, start, end):

936

def descendant(self, start, end):

937

if start == nullrev:

937

if start == nullrev:

938

return True

938

return True

939

for i in self.descendants([start]):

939

for i in self.descendants([start]):

940

if i == end:

940

if i == end:

941

return True

941

return True

942

elif i > end:

942

elif i > end:

943

break

943

break

944

return False

944

return False

945

946

def commonancestorsheads(self, a, b):

946

def commonancestorsheads(self, a, b):

947

"""calculate all the heads of the common ancestors of nodes a and b"""

947

"""calculate all the heads of the common ancestors of nodes a and b"""

948

a, b = self.rev(a), self.rev(b)

948

a, b = self.rev(a), self.rev(b)

949

try:

949

try:

950

ancs = self.index.commonancestorsheads(a, b)

950

ancs = self.index.commonancestorsheads(a, b)

951

except (AttributeError, OverflowError): # C implementation failed

951

except (AttributeError, OverflowError): # C implementation failed

952

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

952

ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)

953

return pycompat.maplist(self.node, ancs)

953

return pycompat.maplist(self.node, ancs)

954

955

def isancestor(self, a, b):

955

def isancestor(self, a, b):

956

"""return True if node a is an ancestor of node b

956

"""return True if node a is an ancestor of node b

957

958

The implementation of this is trivial but the use of

958

The implementation of this is trivial but the use of

959

commonancestorsheads is not."""

959

commonancestorsheads is not."""

960

return a in self.commonancestorsheads(a, b)

960

return a in self.commonancestorsheads(a, b)

961

962

def ancestor(self, a, b):

962

def ancestor(self, a, b):

963

"""calculate the "best" common ancestor of nodes a and b"""

963

"""calculate the "best" common ancestor of nodes a and b"""

964

965

a, b = self.rev(a), self.rev(b)

965

a, b = self.rev(a), self.rev(b)

966

try:

966

try:

967

ancs = self.index.ancestors(a, b)

967

ancs = self.index.ancestors(a, b)

968

except (AttributeError, OverflowError):

968

except (AttributeError, OverflowError):

969

ancs = ancestor.ancestors(self.parentrevs, a, b)

969

ancs = ancestor.ancestors(self.parentrevs, a, b)

970

if ancs:

970

if ancs:

971

# choose a consistent winner when there's a tie

971

# choose a consistent winner when there's a tie

972

return min(map(self.node, ancs))

972

return min(map(self.node, ancs))

973

return nullid

973

return nullid

974

975

def _match(self, id):

975

def _match(self, id):

976

if isinstance(id, int):

976

if isinstance(id, int):

977

# rev

977

# rev

978

return self.node(id)

978

return self.node(id)

979

if len(id) == 20:

979

if len(id) == 20:

980

# possibly a binary node

980

# possibly a binary node

981

# odds of a binary node being all hex in ASCII are 1 in 10**25

981

# odds of a binary node being all hex in ASCII are 1 in 10**25

982

try:

982

try:

983

node = id

983

node = id

984

self.rev(node) # quick search the index

984

self.rev(node) # quick search the index

985

return node

985

return node

986

except LookupError:

986

except LookupError:

987

pass # may be partial hex id

987

pass # may be partial hex id

988

try:

988

try:

989

# str(rev)

989

# str(rev)

990

rev = int(id)

990

rev = int(id)

991

if str(rev) != id:

991

if str(rev) != id:

992

raise ValueError

992

raise ValueError

993

if rev < 0:

993

if rev < 0:

994

rev = len(self) + rev

994

rev = len(self) + rev

995

if rev < 0 or rev >= len(self):

995

if rev < 0 or rev >= len(self):

996

raise ValueError

996

raise ValueError

997

return self.node(rev)

997

return self.node(rev)

998

except (ValueError, OverflowError):

998

except (ValueError, OverflowError):

999

pass

999

pass

1000

if len(id) == 40:

1000

if len(id) == 40:

1001

try:

1001

try:

1002

# a full hex nodeid?

1002

# a full hex nodeid?

1003

node = bin(id)

1003

node = bin(id)

1004

self.rev(node)

1004

self.rev(node)

1005

return node

1005

return node

1006

except (TypeError, LookupError):

1006

except (TypeError, LookupError):

1007

pass

1007

pass

1008

1009

def _partialmatch(self, id):

1009

def _partialmatch(self, id):

1010

try:

1010

try:

1011

partial = self.index.partialmatch(id)

1011

partial = self.index.partialmatch(id)

1012

if partial and self.hasnode(partial):

1012

if partial and self.hasnode(partial):

1013

return partial

1013

return partial

1014

return None

1014

return None

1015

except RevlogError:

1015

except RevlogError:

1016

# parsers.c radix tree lookup gave multiple matches

1016

# parsers.c radix tree lookup gave multiple matches

1017

# fast path: for unfiltered changelog, radix tree is accurate

1017

# fast path: for unfiltered changelog, radix tree is accurate

1018

if not getattr(self, 'filteredrevs', None):

1018

if not getattr(self, 'filteredrevs', None):

1019

raise LookupError(id, self.indexfile,

1019

raise LookupError(id, self.indexfile,

1020

_('ambiguous identifier'))

1020

_('ambiguous identifier'))

1021

# fall through to slow path that filters hidden revisions

1021

# fall through to slow path that filters hidden revisions

1022

except (AttributeError, ValueError):

1022

except (AttributeError, ValueError):

1023

# we are pure python, or key was too short to search radix tree

1023

# we are pure python, or key was too short to search radix tree

1024

pass

1024

pass

1025

1026

if id in self._pcache:

1026

if id in self._pcache:

1027

return self._pcache[id]

1027

return self._pcache[id]

1028

1029

if len(id) < 40:

1029

if len(id) < 40:

1030

try:

1030

try:

1031

# hex(node)[:...]

1031

# hex(node)[:...]

1032

l = len(id) // 2 # grab an even number of digits

1032

l = len(id) // 2 # grab an even number of digits

1033

prefix = bin(id[:l * 2])

1033

prefix = bin(id[:l * 2])

1034

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1034

nl = [e[7] for e in self.index if e[7].startswith(prefix)]

1035

nl = [n for n in nl if hex(n).startswith(id) and

1035

nl = [n for n in nl if hex(n).startswith(id) and

1036

self.hasnode(n)]

1036

self.hasnode(n)]

1037

if len(nl) > 0:

1037

if len(nl) > 0:

1038

if len(nl) == 1:

1038

if len(nl) == 1:

1039

self._pcache[id] = nl[0]

1039

self._pcache[id] = nl[0]

1040

return nl[0]

1040

return nl[0]

1041

raise LookupError(id, self.indexfile,

1041

raise LookupError(id, self.indexfile,

1042

_('ambiguous identifier'))

1042

_('ambiguous identifier'))

1043

return None

1043

return None

1044

except TypeError:

1044

except TypeError:

1045

pass

1045

pass

1046

1047

def lookup(self, id):

1047

def lookup(self, id):

1048

"""locate a node based on:

1048

"""locate a node based on:

1049

- revision number or str(revision number)

1049

- revision number or str(revision number)

1050

- nodeid or subset of hex nodeid

1050

- nodeid or subset of hex nodeid

1051

"""

1051

"""

1052

n = self._match(id)

1052

n = self._match(id)

1053

if n is not None:

1053

if n is not None:

1054

return n

1054

return n

1055

n = self._partialmatch(id)

1055

n = self._partialmatch(id)

1056

if n:

1056

if n:

1057

return n

1057

return n

1058

1059

raise LookupError(id, self.indexfile, _('no match found'))

1059

raise LookupError(id, self.indexfile, _('no match found'))

1060

1061

def cmp(self, node, text):

1061

def cmp(self, node, text):

1062

"""compare text with a given file revision

1062

"""compare text with a given file revision

1063

1064

returns True if text is different than what is stored.

1064

returns True if text is different than what is stored.

1065

"""

1065

"""

1066

p1, p2 = self.parents(node)

1066

p1, p2 = self.parents(node)

1067

return hash(text, p1, p2) != node

1067

return hash(text, p1, p2) != node

1068

1069

def _addchunk(self, offset, data):

1069

def _addchunk(self, offset, data):

1070

"""Add a segment to the revlog cache.

1070

"""Add a segment to the revlog cache.

1071

1072

Accepts an absolute offset and the data that is at that location.

1072

Accepts an absolute offset and the data that is at that location.

1073

"""

1073

"""

1074

o, d = self._chunkcache

1074

o, d = self._chunkcache

1075

# try to add to existing cache

1075

# try to add to existing cache

1076

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1076

if o + len(d) == offset and len(d) + len(data) < _chunksize:

1077

self._chunkcache = o, d + data

1077

self._chunkcache = o, d + data

1078

else:

1078

else:

1079

self._chunkcache = offset, data

1079

self._chunkcache = offset, data

1080

1081

def _loadchunk(self, offset, length, df=None):

1081

def _loadchunk(self, offset, length, df=None):

1082

"""Load a segment of raw data from the revlog.

1082

"""Load a segment of raw data from the revlog.

1083

1084

Accepts an absolute offset, length to read, and an optional existing

1084

Accepts an absolute offset, length to read, and an optional existing

1085

file handle to read from.

1085

file handle to read from.

1086

1087

If an existing file handle is passed, it will be seeked and the

1087

If an existing file handle is passed, it will be seeked and the

1088

original seek position will NOT be restored.

1088

original seek position will NOT be restored.

1089

1090

Returns a str or buffer of raw byte data.

1090

Returns a str or buffer of raw byte data.

1091

"""

1091

"""

1092

if df is not None:

1092

if df is not None:

1093

closehandle = False

1093

closehandle = False

1094

else:

1094

else:

1095

if self._inline:

1095

if self._inline:

1096

df = self.opener(self.indexfile)

1096

df = self.opener(self.indexfile)

1097

else:

1097

else:

1098

df = self.opener(self.datafile)

1098

df = self.opener(self.datafile)

1099

closehandle = True

1099

closehandle = True

1100

1101

# Cache data both forward and backward around the requested

1101

# Cache data both forward and backward around the requested

1102

# data, in a fixed size window. This helps speed up operations

1102

# data, in a fixed size window. This helps speed up operations

1103

# involving reading the revlog backwards.

1103

# involving reading the revlog backwards.

1104

cachesize = self._chunkcachesize

1104

cachesize = self._chunkcachesize

1105

realoffset = offset & ~(cachesize - 1)

1105

realoffset = offset & ~(cachesize - 1)

1106

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1106

reallength = (((offset + length + cachesize) & ~(cachesize - 1))

1107

- realoffset)

1107

- realoffset)

1108

df.seek(realoffset)

1108

df.seek(realoffset)

1109

d = df.read(reallength)

1109

d = df.read(reallength)

1110

if closehandle:

1110

if closehandle:

1111

df.close()

1111

df.close()

1112

self._addchunk(realoffset, d)

1112

self._addchunk(realoffset, d)

1113

if offset != realoffset or reallength != length:

1113

if offset != realoffset or reallength != length:

1114

return util.buffer(d, offset - realoffset, length)

1114

return util.buffer(d, offset - realoffset, length)

1115

return d

1115

return d

1116

1117

def _getchunk(self, offset, length, df=None):

1117

def _getchunk(self, offset, length, df=None):

1118

"""Obtain a segment of raw data from the revlog.

1118

"""Obtain a segment of raw data from the revlog.

1119

1120

Accepts an absolute offset, length of bytes to obtain, and an

1120

Accepts an absolute offset, length of bytes to obtain, and an

1121

optional file handle to the already-opened revlog. If the file

1121

optional file handle to the already-opened revlog. If the file

1122

handle is used, it's original seek position will not be preserved.

1122

handle is used, it's original seek position will not be preserved.

1123

1124

Requests for data may be returned from a cache.

1124

Requests for data may be returned from a cache.

1125

1126

Returns a str or a buffer instance of raw byte data.

1126

Returns a str or a buffer instance of raw byte data.

1127

"""

1127

"""

1128

o, d = self._chunkcache

1128

o, d = self._chunkcache

1129

l = len(d)

1129

l = len(d)

1130

1131

# is it in the cache?

1131

# is it in the cache?

1132

cachestart = offset - o

1132

cachestart = offset - o

1133

cacheend = cachestart + length

1133

cacheend = cachestart + length

1134

if cachestart >= 0 and cacheend <= l:

1134

if cachestart >= 0 and cacheend <= l:

1135

if cachestart == 0 and cacheend == l:

1135

if cachestart == 0 and cacheend == l:

1136

return d # avoid a copy

1136

return d # avoid a copy

1137

return util.buffer(d, cachestart, cacheend - cachestart)

1137

return util.buffer(d, cachestart, cacheend - cachestart)

1138

1139

return self._loadchunk(offset, length, df=df)

1139

return self._loadchunk(offset, length, df=df)

1140

1141

def _chunkraw(self, startrev, endrev, df=None):

1141

def _chunkraw(self, startrev, endrev, df=None):

1142

"""Obtain a segment of raw data corresponding to a range of revisions.

1142

"""Obtain a segment of raw data corresponding to a range of revisions.

1143

1144

Accepts the start and end revisions and an optional already-open

1144

Accepts the start and end revisions and an optional already-open

1145

file handle to be used for reading. If the file handle is read, its

1145

file handle to be used for reading. If the file handle is read, its

1146

seek position will not be preserved.

1146

seek position will not be preserved.

1147

1148

Requests for data may be satisfied by a cache.

1148

Requests for data may be satisfied by a cache.

1149

1150

Returns a 2-tuple of (offset, data) for the requested range of

1150

Returns a 2-tuple of (offset, data) for the requested range of

1151

revisions. Offset is the integer offset from the beginning of the

1151

revisions. Offset is the integer offset from the beginning of the

1152

revlog and data is a str or buffer of the raw byte data.

1152

revlog and data is a str or buffer of the raw byte data.

1153

1154

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1154

Callers will need to call ``self.start(rev)`` and ``self.length(rev)``

1155

to determine where each revision's data begins and ends.

1155

to determine where each revision's data begins and ends.

1156

"""

1156

"""

1157

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1157

# Inlined self.start(startrev) & self.end(endrev) for perf reasons

1158

# (functions are expensive).

1158

# (functions are expensive).

1159

index = self.index

1159

index = self.index

1160

istart = index[startrev]

1160

istart = index[startrev]

1161

start = int(istart[0] >> 16)

1161

start = int(istart[0] >> 16)

1162

if startrev == endrev:

1162

if startrev == endrev:

1163

end = start + istart[1]

1163

end = start + istart[1]

1164

else:

1164

else:

1165

iend = index[endrev]

1165

iend = index[endrev]

1166

end = int(iend[0] >> 16) + iend[1]

1166

end = int(iend[0] >> 16) + iend[1]

1167

1168

if self._inline:

1168

if self._inline:

1169

start += (startrev + 1) * self._io.size

1169

start += (startrev + 1) * self._io.size

1170

end += (endrev + 1) * self._io.size

1170

end += (endrev + 1) * self._io.size

1171

length = end - start

1171

length = end - start

1172

1173

return start, self._getchunk(start, length, df=df)

1173

return start, self._getchunk(start, length, df=df)

1174

1175

def _chunk(self, rev, df=None):

1175

def _chunk(self, rev, df=None):

1176

"""Obtain a single decompressed chunk for a revision.

1176

"""Obtain a single decompressed chunk for a revision.

1177

1178

Accepts an integer revision and an optional already-open file handle

1178

Accepts an integer revision and an optional already-open file handle

1179

to be used for reading. If used, the seek position of the file will not

1179

to be used for reading. If used, the seek position of the file will not

1180

be preserved.

1180

be preserved.

1181

1182

Returns a str holding uncompressed data for the requested revision.

1182

Returns a str holding uncompressed data for the requested revision.

1183

"""

1183

"""

1184

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1184

return self.decompress(self._chunkraw(rev, rev, df=df)[1])

1185

1186

def _chunks(self, revs, df=None):

1186

def _chunks(self, revs, df=None):

1187

"""Obtain decompressed chunks for the specified revisions.

1187

"""Obtain decompressed chunks for the specified revisions.

1188

1189

Accepts an iterable of numeric revisions that are assumed to be in

1189

Accepts an iterable of numeric revisions that are assumed to be in

1190

ascending order. Also accepts an optional already-open file handle

1190

ascending order. Also accepts an optional already-open file handle

1191

to be used for reading. If used, the seek position of the file will

1191

to be used for reading. If used, the seek position of the file will

1192

not be preserved.

1192

not be preserved.

1193

1194

This function is similar to calling ``self._chunk()`` multiple times,

1194

This function is similar to calling ``self._chunk()`` multiple times,

1195

but is faster.

1195

but is faster.

1196

1197

Returns a list with decompressed data for each requested revision.

1197

Returns a list with decompressed data for each requested revision.

1198

"""

1198

"""

1199

if not revs:

1199

if not revs:

1200

return []

1200

return []

1201

start = self.start

1201

start = self.start

1202

length = self.length

1202

length = self.length

1203

inline = self._inline

1203

inline = self._inline

1204

iosize = self._io.size

1204

iosize = self._io.size

1205

buffer = util.buffer

1205

buffer = util.buffer

1206

1207

l = []

1207

l = []

1208

ladd = l.append

1208

ladd = l.append

1209

1210

try:

1210

try:

1211

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1211

offset, data = self._chunkraw(revs[0], revs[-1], df=df)

1212

except OverflowError:

1212

except OverflowError:

1213

# issue4215 - we can't cache a run of chunks greater than

1213

# issue4215 - we can't cache a run of chunks greater than

1214

# 2G on Windows

1214

# 2G on Windows

1215

return [self._chunk(rev, df=df) for rev in revs]

1215

return [self._chunk(rev, df=df) for rev in revs]

1216

1217

decomp = self.decompress

1217

decomp = self.decompress

1218

for rev in revs:

1218

for rev in revs:

1219

chunkstart = start(rev)

1219

chunkstart = start(rev)

1220

if inline:

1220

if inline:

1221

chunkstart += (rev + 1) * iosize

1221

chunkstart += (rev + 1) * iosize

1222

chunklength = length(rev)

1222

chunklength = length(rev)

1223

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1223

ladd(decomp(buffer(data, chunkstart - offset, chunklength)))

1224

1225

return l

1225

return l

1226

1227

def _chunkclear(self):

1227

def _chunkclear(self):

1228

"""Clear the raw chunk cache."""

1228

"""Clear the raw chunk cache."""

1229

self._chunkcache = (0, '')

1229

self._chunkcache = (0, '')

1230

1231

def deltaparent(self, rev):

1231

def deltaparent(self, rev):

1232

"""return deltaparent of the given revision"""

1232

"""return deltaparent of the given revision"""

1233

base = self.index[rev][3]

1233

base = self.index[rev][3]

1234

if base == rev:

1234

if base == rev:

1235

return nullrev

1235

return nullrev

1236

elif self._generaldelta:

1236

elif self._generaldelta:

1237

return base

1237

return base

1238

else:

1238

else:

1239

return rev - 1

1239

return rev - 1

1240

1241

def revdiff(self, rev1, rev2):

1241

def revdiff(self, rev1, rev2):

1242

"""return or calculate a delta between two revisions~~"""~~

1242

"""return or calculate a delta between two revisions

1243

1244

The delta calculated is in binary form and is intended to be written to

1245

revlog data directly. So this function needs raw revision data.

1246

"""

1243

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1247

if rev1 != nullrev and self.deltaparent(rev2) == rev1:

1244

return bytes(self._chunk(rev2))

1248

return bytes(self._chunk(rev2))

1245

1249

1246

return mdiff.textdiff(self.revision(rev1),

1250

return mdiff.textdiff(self.revision(rev1, raw=True),

1247

self.revision(rev2))

1251

self.revision(rev2, raw=True))

1248

1252

1249

def revision(self, nodeorrev, _df=None, raw=False):

1253

def revision(self, nodeorrev, _df=None, raw=False):

1250

"""return an uncompressed revision of a given node or revision

1254

"""return an uncompressed revision of a given node or revision

1251

number.

1255

number.

1252

1256

1253

_df - an existing file handle to read from. (internal-only)

1257

_df - an existing file handle to read from. (internal-only)

1254

raw - an optional argument specifying if the revision data is to be

1258

raw - an optional argument specifying if the revision data is to be

1255

treated as raw data when applying flag transforms. 'raw' should be set

1259

treated as raw data when applying flag transforms. 'raw' should be set

1256

to True when generating changegroups or in debug commands.

1260

to True when generating changegroups or in debug commands.

1257

"""

1261

"""

1258

if isinstance(nodeorrev, int):

1262

if isinstance(nodeorrev, int):

1259

rev = nodeorrev

1263

rev = nodeorrev

1260

node = self.node(rev)

1264

node = self.node(rev)

1261

else:

1265

else:

1262

node = nodeorrev

1266

node = nodeorrev

1263

rev = None

1267

rev = None

1264

1268

1265

cachedrev = None

1269

cachedrev = None

1266

if node == nullid:

1270

if node == nullid:

1267

return ""

1271

return ""

1268

if self._cache:

1272

if self._cache:

1269

if self._cache[0] == node:

1273

if self._cache[0] == node:

1270

# _cache only stores rawtext

1274

# _cache only stores rawtext

1271

if raw:

1275

if raw:

1272

return self._cache[2]

1276

return self._cache[2]

1273

cachedrev = self._cache[1]

1277

cachedrev = self._cache[1]

1274

1278

1275

# look up what we need to read

1279

# look up what we need to read

1276

rawtext = None

1280

rawtext = None

1277

if rev is None:

1281

if rev is None:

1278

rev = self.rev(node)

1282

rev = self.rev(node)

1279

1283

1280

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1284

chain, stopped = self._deltachain(rev, stoprev=cachedrev)

1281

if stopped:

1285

if stopped:

1282

rawtext = self._cache[2]

1286

rawtext = self._cache[2]

1283

1287

1284

# drop cache to save memory

1288

# drop cache to save memory

1285

self._cache = None

1289

self._cache = None

1286

1290

1287

bins = self._chunks(chain, df=_df)

1291

bins = self._chunks(chain, df=_df)

1288

if rawtext is None:

1292

if rawtext is None:

1289

rawtext = bytes(bins[0])

1293

rawtext = bytes(bins[0])

1290

bins = bins[1:]

1294

bins = bins[1:]

1291

1295

1292

rawtext = mdiff.patches(rawtext, bins)

1296

rawtext = mdiff.patches(rawtext, bins)

1293

1297

1294

text, validatehash = self._processflags(rawtext, self.flags(rev),

1298

text, validatehash = self._processflags(rawtext, self.flags(rev),

1295

'read', raw=raw)

1299

'read', raw=raw)

1296

if validatehash:

1300

if validatehash:

1297

self.checkhash(text, node, rev=rev)

1301

self.checkhash(text, node, rev=rev)

1298

1302

1299

self._cache = (node, rev, rawtext)

1303

self._cache = (node, rev, rawtext)

1300

return text

1304

return text

1301

1305

1302

def hash(self, text, p1, p2):

1306

def hash(self, text, p1, p2):

1303

"""Compute a node hash.

1307

"""Compute a node hash.

1304

1308

1305

Available as a function so that subclasses can replace the hash

1309

Available as a function so that subclasses can replace the hash

1306

as needed.

1310

as needed.

1307

"""

1311

"""

1308

return hash(text, p1, p2)

1312

return hash(text, p1, p2)

1309

1313

1310

def _processflags(self, text, flags, operation, raw=False):

1314

def _processflags(self, text, flags, operation, raw=False):

1311

"""Inspect revision data flags and applies transforms defined by

1315

"""Inspect revision data flags and applies transforms defined by

1312

registered flag processors.

1316

registered flag processors.

1313

1317

1314

``text`` - the revision data to process

1318

``text`` - the revision data to process

1315

``flags`` - the revision flags

1319

``flags`` - the revision flags

1316

``operation`` - the operation being performed (read or write)

1320

``operation`` - the operation being performed (read or write)

1317

``raw`` - an optional argument describing if the raw transform should be

1321

``raw`` - an optional argument describing if the raw transform should be

1318

applied.

1322

applied.

1319

1323

1320

This method processes the flags in the order (or reverse order if

1324

This method processes the flags in the order (or reverse order if

1321

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1325

``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the

1322

flag processors registered for present flags. The order of flags defined

1326

flag processors registered for present flags. The order of flags defined

1323

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1327

in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.

1324

1328

1325

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1329

Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the

1326

processed text and ``validatehash`` is a bool indicating whether the

1330

processed text and ``validatehash`` is a bool indicating whether the

1327

returned text should be checked for hash integrity.

1331

returned text should be checked for hash integrity.

1328

1332

1329

Note: If the ``raw`` argument is set, it has precedence over the

1333

Note: If the ``raw`` argument is set, it has precedence over the

1330

operation and will only update the value of ``validatehash``.

1334

operation and will only update the value of ``validatehash``.

1331

"""

1335

"""

1332

if not operation in ('read', 'write'):

1336

if not operation in ('read', 'write'):

1333

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1337

raise ProgrammingError(_("invalid '%s' operation ") % (operation))

1334

# Check all flags are known.

1338

# Check all flags are known.

1335

if flags & ~REVIDX_KNOWN_FLAGS:

1339

if flags & ~REVIDX_KNOWN_FLAGS:

1336

raise RevlogError(_("incompatible revision flag '%#x'") %

1340

raise RevlogError(_("incompatible revision flag '%#x'") %

1337

(flags & ~REVIDX_KNOWN_FLAGS))

1341

(flags & ~REVIDX_KNOWN_FLAGS))

1338

validatehash = True

1342

validatehash = True

1339

# Depending on the operation (read or write), the order might be

1343

# Depending on the operation (read or write), the order might be

1340

# reversed due to non-commutative transforms.

1344

# reversed due to non-commutative transforms.

1341

orderedflags = REVIDX_FLAGS_ORDER

1345

orderedflags = REVIDX_FLAGS_ORDER

1342

if operation == 'write':

1346

if operation == 'write':

1343

orderedflags = reversed(orderedflags)

1347

orderedflags = reversed(orderedflags)

1344

1348

1345

for flag in orderedflags:

1349

for flag in orderedflags:

1346

# If a flagprocessor has been registered for a known flag, apply the

1350

# If a flagprocessor has been registered for a known flag, apply the

1347

# related operation transform and update result tuple.

1351

# related operation transform and update result tuple.

1348

if flag & flags:

1352

if flag & flags:

1349

vhash = True

1353

vhash = True

1350

1354

1351

if flag not in _flagprocessors:

1355

if flag not in _flagprocessors:

1352

message = _("missing processor for flag '%#x'") % (flag)

1356

message = _("missing processor for flag '%#x'") % (flag)

1353

raise RevlogError(message)

1357

raise RevlogError(message)

1354

1358

1355

processor = _flagprocessors[flag]

1359

processor = _flagprocessors[flag]

1356

if processor is not None:

1360

if processor is not None:

1357

readtransform, writetransform, rawtransform = processor

1361

readtransform, writetransform, rawtransform = processor

1358

1362

1359

if raw:

1363

if raw:

1360

vhash = rawtransform(self, text)

1364

vhash = rawtransform(self, text)

1361

elif operation == 'read':

1365

elif operation == 'read':

1362

text, vhash = readtransform(self, text)

1366

text, vhash = readtransform(self, text)

1363

else: # write operation

1367

else: # write operation

1364

text, vhash = writetransform(self, text)

1368

text, vhash = writetransform(self, text)

1365

validatehash = validatehash and vhash

1369

validatehash = validatehash and vhash

1366

1370

1367

return text, validatehash

1371

return text, validatehash

1368

1372

1369

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1373

def checkhash(self, text, node, p1=None, p2=None, rev=None):

1370

"""Check node hash integrity.

1374

"""Check node hash integrity.

1371

1375

1372

Available as a function so that subclasses can extend hash mismatch

1376

Available as a function so that subclasses can extend hash mismatch

1373

behaviors as needed.

1377

behaviors as needed.

1374

"""

1378

"""

1375

if p1 is None and p2 is None:

1379

if p1 is None and p2 is None:

1376

p1, p2 = self.parents(node)

1380

p1, p2 = self.parents(node)

1377

if node != self.hash(text, p1, p2):

1381

if node != self.hash(text, p1, p2):

1378

revornode = rev

1382

revornode = rev

1379

if revornode is None:

1383

if revornode is None:

1380

revornode = templatefilters.short(hex(node))

1384

revornode = templatefilters.short(hex(node))

1381

raise RevlogError(_("integrity check failed on %s:%s")

1385

raise RevlogError(_("integrity check failed on %s:%s")

1382

% (self.indexfile, revornode))

1386

% (self.indexfile, revornode))

1383

1387

1384

def checkinlinesize(self, tr, fp=None):

1388

def checkinlinesize(self, tr, fp=None):

1385

"""Check if the revlog is too big for inline and convert if so.

1389

"""Check if the revlog is too big for inline and convert if so.

1386

1390

1387

This should be called after revisions are added to the revlog. If the

1391

This should be called after revisions are added to the revlog. If the

1388

revlog has grown too large to be an inline revlog, it will convert it

1392

revlog has grown too large to be an inline revlog, it will convert it

1389

to use multiple index and data files.

1393

to use multiple index and data files.

1390

"""

1394

"""

1391

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1395

if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:

1392

return

1396

return

1393

1397

1394

trinfo = tr.find(self.indexfile)

1398

trinfo = tr.find(self.indexfile)

1395

if trinfo is None:

1399

if trinfo is None:

1396

raise RevlogError(_("%s not found in the transaction")

1400

raise RevlogError(_("%s not found in the transaction")

1397

% self.indexfile)

1401

% self.indexfile)

1398

1402

1399

trindex = trinfo[2]

1403

trindex = trinfo[2]

1400

if trindex is not None:

1404

if trindex is not None:

1401

dataoff = self.start(trindex)

1405

dataoff = self.start(trindex)

1402

else:

1406

else:

1403

# revlog was stripped at start of transaction, use all leftover data

1407

# revlog was stripped at start of transaction, use all leftover data

1404

trindex = len(self) - 1

1408

trindex = len(self) - 1

1405

dataoff = self.end(-2)

1409

dataoff = self.end(-2)

1406

1410

1407

tr.add(self.datafile, dataoff)

1411

tr.add(self.datafile, dataoff)

1408

1412

1409

if fp:

1413

if fp:

1410

fp.flush()

1414

fp.flush()

1411

fp.close()

1415

fp.close()

1412

1416

1413

df = self.opener(self.datafile, 'w')

1417

df = self.opener(self.datafile, 'w')

1414

try:

1418

try:

1415

for r in self:

1419

for r in self:

1416

df.write(self._chunkraw(r, r)[1])

1420

df.write(self._chunkraw(r, r)[1])

1417

finally:

1421

finally:

1418

df.close()

1422

df.close()

1419

1423

1420

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1424

fp = self.opener(self.indexfile, 'w', atomictemp=True,

1421

checkambig=self._checkambig)

1425

checkambig=self._checkambig)

1422

self.version &= ~(REVLOGNGINLINEDATA)

1426

self.version &= ~(REVLOGNGINLINEDATA)

1423

self._inline = False

1427

self._inline = False

1424

for i in self:

1428

for i in self:

1425

e = self._io.packentry(self.index[i], self.node, self.version, i)

1429

e = self._io.packentry(self.index[i], self.node, self.version, i)

1426

fp.write(e)

1430

fp.write(e)

1427

1431

1428

# if we don't call close, the temp file will never replace the

1432

# if we don't call close, the temp file will never replace the

1429

# real index

1433

# real index

1430

fp.close()

1434

fp.close()

1431

1435

1432

tr.replace(self.indexfile, trindex * self._io.size)

1436

tr.replace(self.indexfile, trindex * self._io.size)

1433

self._chunkclear()

1437

self._chunkclear()

1434

1438

1435

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1439

def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,

1436

node=None, flags=REVIDX_DEFAULT_FLAGS):

1440

node=None, flags=REVIDX_DEFAULT_FLAGS):

1437

"""add a revision to the log

1441

"""add a revision to the log

1438

1442

1439

text - the revision data to add

1443

text - the revision data to add

1440

transaction - the transaction object used for rollback

1444

transaction - the transaction object used for rollback

1441

link - the linkrev data to add

1445

link - the linkrev data to add

1442

p1, p2 - the parent nodeids of the revision

1446

p1, p2 - the parent nodeids of the revision

1443

cachedelta - an optional precomputed delta

1447

cachedelta - an optional precomputed delta

1444

node - nodeid of revision; typically node is not specified, and it is

1448

node - nodeid of revision; typically node is not specified, and it is

1445

computed by default as hash(text, p1, p2), however subclasses might

1449

computed by default as hash(text, p1, p2), however subclasses might

1446

use different hashing method (and override checkhash() in such case)

1450

use different hashing method (and override checkhash() in such case)

1447

flags - the known flags to set on the revision

1451

flags - the known flags to set on the revision

1448

"""

1452

"""

1449

if link == nullrev:

1453

if link == nullrev:

1450

raise RevlogError(_("attempted to add linkrev -1 to %s")

1454

raise RevlogError(_("attempted to add linkrev -1 to %s")

1451

% self.indexfile)

1455

% self.indexfile)

1452

1456

1453

if flags:

1457

if flags:

1454

node = node or self.hash(text, p1, p2)

1458

node = node or self.hash(text, p1, p2)

1455

1459

1456

rawtext, validatehash = self._processflags(text, flags, 'write')

1460

rawtext, validatehash = self._processflags(text, flags, 'write')

1457

1461

1458

# If the flag processor modifies the revision data, ignore any provided

1462

# If the flag processor modifies the revision data, ignore any provided

1459

# cachedelta.

1463

# cachedelta.

1460

if rawtext != text:

1464

if rawtext != text:

1461

cachedelta = None

1465

cachedelta = None

1462

1466

1463

if len(rawtext) > _maxentrysize:

1467

if len(rawtext) > _maxentrysize:

1464

raise RevlogError(

1468

raise RevlogError(

1465

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1469

_("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")

1466

% (self.indexfile, len(rawtext)))

1470

% (self.indexfile, len(rawtext)))

1467

1471

1468

node = node or self.hash(rawtext, p1, p2)

1472

node = node or self.hash(rawtext, p1, p2)

1469

if node in self.nodemap:

1473

if node in self.nodemap:

1470

return node

1474

return node

1471

1475

1472

if validatehash:

1476

if validatehash:

1473

self.checkhash(rawtext, node, p1=p1, p2=p2)

1477

self.checkhash(rawtext, node, p1=p1, p2=p2)

1474

1478

1475

dfh = None

1479

dfh = None

1476

if not self._inline:

1480

if not self._inline:

1477

dfh = self.opener(self.datafile, "a+")

1481

dfh = self.opener(self.datafile, "a+")

1478

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1482

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1479

try:

1483

try:

1480

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1484

return self._addrevision(node, rawtext, transaction, link, p1, p2,

1481

flags, cachedelta, ifh, dfh)

1485

flags, cachedelta, ifh, dfh)

1482

finally:

1486

finally:

1483

if dfh:

1487

if dfh:

1484

dfh.close()

1488

dfh.close()

1485

ifh.close()

1489

ifh.close()

1486

1490

1487

def compress(self, data):

1491

def compress(self, data):

1488

"""Generate a possibly-compressed representation of data."""

1492

"""Generate a possibly-compressed representation of data."""

1489

if not data:

1493

if not data:

1490

return '', data

1494

return '', data

1491

1495

1492

compressed = self._compressor.compress(data)

1496

compressed = self._compressor.compress(data)

1493

1497

1494

if compressed:

1498

if compressed:

1495

# The revlog compressor added the header in the returned data.

1499

# The revlog compressor added the header in the returned data.

1496

return '', compressed

1500

return '', compressed

1497

1501

1498

if data[0:1] == '\0':

1502

if data[0:1] == '\0':

1499

return '', data

1503

return '', data

1500

return 'u', data

1504

return 'u', data

1501

1505

1502

def decompress(self, data):

1506

def decompress(self, data):

1503

"""Decompress a revlog chunk.

1507

"""Decompress a revlog chunk.

1504

1508

1505

The chunk is expected to begin with a header identifying the

1509

The chunk is expected to begin with a header identifying the

1506

format type so it can be routed to an appropriate decompressor.

1510

format type so it can be routed to an appropriate decompressor.

1507

"""

1511

"""

1508

if not data:

1512

if not data:

1509

return data

1513

return data

1510

1514

1511

# Revlogs are read much more frequently than they are written and many

1515

# Revlogs are read much more frequently than they are written and many

1512

# chunks only take microseconds to decompress, so performance is

1516

# chunks only take microseconds to decompress, so performance is

1513

# important here.

1517

# important here.

1514

#

1518

#

1515

# We can make a few assumptions about revlogs:

1519

# We can make a few assumptions about revlogs:

1516

#

1520

#

1517

# 1) the majority of chunks will be compressed (as opposed to inline

1521

# 1) the majority of chunks will be compressed (as opposed to inline

1518

# raw data).

1522

# raw data).

1519

# 2) decompressing *any* data will likely by at least 10x slower than

1523

# 2) decompressing *any* data will likely by at least 10x slower than

1520

# returning raw inline data.

1524

# returning raw inline data.

1521

# 3) we want to prioritize common and officially supported compression

1525

# 3) we want to prioritize common and officially supported compression

1522

# engines

1526

# engines

1523

#

1527

#

1524

# It follows that we want to optimize for "decompress compressed data

1528

# It follows that we want to optimize for "decompress compressed data

1525

# when encoded with common and officially supported compression engines"

1529

# when encoded with common and officially supported compression engines"

1526

# case over "raw data" and "data encoded by less common or non-official

1530

# case over "raw data" and "data encoded by less common or non-official

1527

# compression engines." That is why we have the inline lookup first

1531

# compression engines." That is why we have the inline lookup first

1528

# followed by the compengines lookup.

1532

# followed by the compengines lookup.

1529

#

1533

#

1530

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1534

# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib

1531

# compressed chunks. And this matters for changelog and manifest reads.

1535

# compressed chunks. And this matters for changelog and manifest reads.

1532

t = data[0:1]

1536

t = data[0:1]

1533

1537

1534

if t == 'x':

1538

if t == 'x':

1535

try:

1539

try:

1536

return _zlibdecompress(data)

1540

return _zlibdecompress(data)

1537

except zlib.error as e:

1541

except zlib.error as e:

1538

raise RevlogError(_('revlog decompress error: %s') % str(e))

1542

raise RevlogError(_('revlog decompress error: %s') % str(e))

1539

# '\0' is more common than 'u' so it goes first.

1543

# '\0' is more common than 'u' so it goes first.

1540

elif t == '\0':

1544

elif t == '\0':

1541

return data

1545

return data

1542

elif t == 'u':

1546

elif t == 'u':

1543

return util.buffer(data, 1)

1547

return util.buffer(data, 1)

1544

1548

1545

try:

1549

try:

1546

compressor = self._decompressors[t]

1550

compressor = self._decompressors[t]

1547

except KeyError:

1551

except KeyError:

1548

try:

1552

try:

1549

engine = util.compengines.forrevlogheader(t)

1553

engine = util.compengines.forrevlogheader(t)

1550

compressor = engine.revlogcompressor()

1554

compressor = engine.revlogcompressor()

1551

self._decompressors[t] = compressor

1555

self._decompressors[t] = compressor

1552

except KeyError:

1556

except KeyError:

1553

raise RevlogError(_('unknown compression type %r') % t)

1557

raise RevlogError(_('unknown compression type %r') % t)

1554

1558

1555

return compressor.decompress(data)

1559

return compressor.decompress(data)

1556

1560

1557

def _isgooddelta(self, d, textlen):

1561

def _isgooddelta(self, d, textlen):

1558

"""Returns True if the given delta is good. Good means that it is within

1562

"""Returns True if the given delta is good. Good means that it is within

1559

the disk span, disk size, and chain length bounds that we know to be

1563

the disk span, disk size, and chain length bounds that we know to be

1560

performant."""

1564

performant."""

1561

if d is None:

1565

if d is None:

1562

return False

1566

return False

1563

1567

1564

# - 'dist' is the distance from the base revision -- bounding it limits

1568

# - 'dist' is the distance from the base revision -- bounding it limits

1565

# the amount of I/O we need to do.

1569

# the amount of I/O we need to do.

1566

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1570

# - 'compresseddeltalen' is the sum of the total size of deltas we need

1567

# to apply -- bounding it limits the amount of CPU we consume.

1571

# to apply -- bounding it limits the amount of CPU we consume.

1568

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1572

dist, l, data, base, chainbase, chainlen, compresseddeltalen = d

1569

if (dist > textlen * 4 or l > textlen or

1573

if (dist > textlen * 4 or l > textlen or

1570

compresseddeltalen > textlen * 2 or

1574

compresseddeltalen > textlen * 2 or

1571

(self._maxchainlen and chainlen > self._maxchainlen)):

1575

(self._maxchainlen and chainlen > self._maxchainlen)):

1572

return False

1576

return False

1573

1577

1574

return True

1578

return True

1575

1579

1576

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1580

def _addrevision(self, node, text, transaction, link, p1, p2, flags,

1577

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1581

cachedelta, ifh, dfh, alwayscache=False, raw=False):

1578

"""internal function to add revisions to the log

1582

"""internal function to add revisions to the log

1579

1583

1580

see addrevision for argument descriptions.

1584

see addrevision for argument descriptions.

1581

invariants:

1585

invariants:

1582

- text is optional (can be None); if not set, cachedelta must be set.

1586

- text is optional (can be None); if not set, cachedelta must be set.

1583

if both are set, they must correspond to each other.

1587

if both are set, they must correspond to each other.

1584

- raw is optional; if set to True, it indicates the revision data is to

1588

- raw is optional; if set to True, it indicates the revision data is to

1585

be treated by _processflags() as raw. It is usually set by changegroup

1589

be treated by _processflags() as raw. It is usually set by changegroup

1586

generation and debug commands.

1590

generation and debug commands.

1587

"""

1591

"""

1588

btext = [text]

1592

btext = [text]

1589

def buildtext():

1593

def buildtext():

1590

if btext[0] is not None:

1594

if btext[0] is not None:

1591

return btext[0]

1595

return btext[0]

1592

baserev = cachedelta[0]

1596

baserev = cachedelta[0]

1593

delta = cachedelta[1]

1597

delta = cachedelta[1]

1594

# special case deltas which replace entire base; no need to decode

1598

# special case deltas which replace entire base; no need to decode

1595

# base revision. this neatly avoids censored bases, which throw when

1599

# base revision. this neatly avoids censored bases, which throw when

1596

# they're decoded.

1600

# they're decoded.

1597

hlen = struct.calcsize(">lll")

1601

hlen = struct.calcsize(">lll")

1598

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1602

if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),

1599

len(delta) - hlen):

1603

len(delta) - hlen):

1600

btext[0] = delta[hlen:]

1604

btext[0] = delta[hlen:]

1601

else:

1605

else:

1602

if self._inline:

1606

if self._inline:

1603

fh = ifh

1607

fh = ifh

1604

else:

1608

else:

1605

fh = dfh

1609

fh = dfh

1606

basetext = self.revision(baserev, _df=fh, raw=raw)

1610

basetext = self.revision(baserev, _df=fh, raw=raw)

1607

btext[0] = mdiff.patch(basetext, delta)

1611

btext[0] = mdiff.patch(basetext, delta)

1608

1612

1609

try:

1613

try:

1610

res = self._processflags(btext[0], flags, 'read', raw=raw)

1614

res = self._processflags(btext[0], flags, 'read', raw=raw)

1611

btext[0], validatehash = res

1615

btext[0], validatehash = res

1612

if validatehash:

1616

if validatehash:

1613

self.checkhash(btext[0], node, p1=p1, p2=p2)

1617

self.checkhash(btext[0], node, p1=p1, p2=p2)

1614

if flags & REVIDX_ISCENSORED:

1618

if flags & REVIDX_ISCENSORED:

1615

raise RevlogError(_('node %s is not censored') % node)

1619

raise RevlogError(_('node %s is not censored') % node)

1616

except CensoredNodeError:

1620

except CensoredNodeError:

1617

# must pass the censored index flag to add censored revisions

1621

# must pass the censored index flag to add censored revisions

1618

if not flags & REVIDX_ISCENSORED:

1622

if not flags & REVIDX_ISCENSORED:

1619

raise

1623

raise

1620

return btext[0]

1624

return btext[0]

1621

1625

1622

def builddelta(rev):

1626

def builddelta(rev):

1623

# can we use the cached delta?

1627

# can we use the cached delta?

1624

if cachedelta and cachedelta[0] == rev:

1628

if cachedelta and cachedelta[0] == rev:

1625

delta = cachedelta[1]

1629

delta = cachedelta[1]

1626

else:

1630

else:

1627

t = buildtext()

1631

t = buildtext()

1628

if self.iscensored(rev):

1632

if self.iscensored(rev):

1629

# deltas based on a censored revision must replace the

1633

# deltas based on a censored revision must replace the

1630

# full content in one patch, so delta works everywhere

1634

# full content in one patch, so delta works everywhere

1631

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1635

header = mdiff.replacediffheader(self.rawsize(rev), len(t))

1632

delta = header + t

1636

delta = header + t

1633

else:

1637

else:

1634

if self._inline:

1638

if self._inline:

1635

fh = ifh

1639

fh = ifh

1636

else:

1640

else:

1637

fh = dfh

1641

fh = dfh

1638

ptext = self.revision(rev, _df=fh, raw=True)

1642

ptext = self.revision(rev, _df=fh, raw=True)

1639

delta = mdiff.textdiff(ptext, t)

1643

delta = mdiff.textdiff(ptext, t)

1640

header, data = self.compress(delta)

1644

header, data = self.compress(delta)

1641

deltalen = len(header) + len(data)

1645

deltalen = len(header) + len(data)

1642

chainbase = self.chainbase(rev)

1646

chainbase = self.chainbase(rev)

1643

dist = deltalen + offset - self.start(chainbase)

1647

dist = deltalen + offset - self.start(chainbase)

1644

if self._generaldelta:

1648

if self._generaldelta:

1645

base = rev

1649

base = rev

1646

else:

1650

else:

1647

base = chainbase

1651

base = chainbase

1648

chainlen, compresseddeltalen = self._chaininfo(rev)

1652

chainlen, compresseddeltalen = self._chaininfo(rev)

1649

chainlen += 1

1653

chainlen += 1

1650

compresseddeltalen += deltalen

1654

compresseddeltalen += deltalen

1651

return (dist, deltalen, (header, data), base,

1655

return (dist, deltalen, (header, data), base,

1652

chainbase, chainlen, compresseddeltalen)

1656

chainbase, chainlen, compresseddeltalen)

1653

1657

1654

curr = len(self)

1658

curr = len(self)

1655

prev = curr - 1

1659

prev = curr - 1

1656

offset = self.end(prev)

1660

offset = self.end(prev)

1657

delta = None

1661

delta = None

1658

p1r, p2r = self.rev(p1), self.rev(p2)

1662

p1r, p2r = self.rev(p1), self.rev(p2)

1659

1663

1660

# full versions are inserted when the needed deltas

1664

# full versions are inserted when the needed deltas

1661

# become comparable to the uncompressed text

1665

# become comparable to the uncompressed text

1662

if text is None:

1666

if text is None:

1663

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1667

textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),

1664

cachedelta[1])

1668

cachedelta[1])

1665

else:

1669

else:

1666

textlen = len(text)

1670

textlen = len(text)

1667

1671

1668

# should we try to build a delta?

1672

# should we try to build a delta?

1669

if prev != nullrev and self.storedeltachains:

1673

if prev != nullrev and self.storedeltachains:

1670

tested = set()

1674

tested = set()

1671

# This condition is true most of the time when processing

1675

# This condition is true most of the time when processing

1672

# changegroup data into a generaldelta repo. The only time it

1676

# changegroup data into a generaldelta repo. The only time it

1673

# isn't true is if this is the first revision in a delta chain

1677

# isn't true is if this is the first revision in a delta chain

1674

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1678

# or if ``format.generaldelta=true`` disabled ``lazydeltabase``.

1675

if cachedelta and self._generaldelta and self._lazydeltabase:

1679

if cachedelta and self._generaldelta and self._lazydeltabase:

1676

# Assume what we received from the server is a good choice

1680

# Assume what we received from the server is a good choice

1677

# build delta will reuse the cache

1681

# build delta will reuse the cache

1678

candidatedelta = builddelta(cachedelta[0])

1682

candidatedelta = builddelta(cachedelta[0])

1679

tested.add(cachedelta[0])

1683

tested.add(cachedelta[0])

1680

if self._isgooddelta(candidatedelta, textlen):

1684

if self._isgooddelta(candidatedelta, textlen):

1681

delta = candidatedelta

1685

delta = candidatedelta

1682

if delta is None and self._generaldelta:

1686

if delta is None and self._generaldelta:

1683

# exclude already lazy tested base if any

1687

# exclude already lazy tested base if any

1684

parents = [p for p in (p1r, p2r)

1688

parents = [p for p in (p1r, p2r)

1685

if p != nullrev and p not in tested]

1689

if p != nullrev and p not in tested]

1686

if parents and not self._aggressivemergedeltas:

1690

if parents and not self._aggressivemergedeltas:

1687

# Pick whichever parent is closer to us (to minimize the

1691

# Pick whichever parent is closer to us (to minimize the

1688

# chance of having to build a fulltext).

1692

# chance of having to build a fulltext).

1689

parents = [max(parents)]

1693

parents = [max(parents)]

1690

tested.update(parents)

1694

tested.update(parents)

1691

pdeltas = []

1695

pdeltas = []

1692

for p in parents:

1696

for p in parents:

1693

pd = builddelta(p)

1697

pd = builddelta(p)

1694

if self._isgooddelta(pd, textlen):

1698

if self._isgooddelta(pd, textlen):

1695

pdeltas.append(pd)

1699

pdeltas.append(pd)

1696

if pdeltas:

1700

if pdeltas:

1697

delta = min(pdeltas, key=lambda x: x[1])

1701

delta = min(pdeltas, key=lambda x: x[1])

1698

if delta is None and prev not in tested:

1702

if delta is None and prev not in tested:

1699

# other approach failed try against prev to hopefully save us a

1703

# other approach failed try against prev to hopefully save us a

1700

# fulltext.

1704

# fulltext.

1701

candidatedelta = builddelta(prev)

1705

candidatedelta = builddelta(prev)

1702

if self._isgooddelta(candidatedelta, textlen):

1706

if self._isgooddelta(candidatedelta, textlen):

1703

delta = candidatedelta

1707

delta = candidatedelta

1704

if delta is not None:

1708

if delta is not None:

1705

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1709

dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta

1706

else:

1710

else:

1707

text = buildtext()

1711

text = buildtext()

1708

data = self.compress(text)

1712

data = self.compress(text)

1709

l = len(data[1]) + len(data[0])

1713

l = len(data[1]) + len(data[0])

1710

base = chainbase = curr

1714

base = chainbase = curr

1711

1715

1712

e = (offset_type(offset, flags), l, textlen,

1716

e = (offset_type(offset, flags), l, textlen,

1713

base, link, p1r, p2r, node)

1717

base, link, p1r, p2r, node)

1714

self.index.insert(-1, e)

1718

self.index.insert(-1, e)

1715

self.nodemap[node] = curr

1719

self.nodemap[node] = curr

1716

1720

1717

entry = self._io.packentry(e, self.node, self.version, curr)

1721

entry = self._io.packentry(e, self.node, self.version, curr)

1718

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1722

self._writeentry(transaction, ifh, dfh, entry, data, link, offset)

1719

1723

1720

if alwayscache and text is None:

1724

if alwayscache and text is None:

1721

text = buildtext()

1725

text = buildtext()

1722

1726

1723

if type(text) == str: # only accept immutable objects

1727

if type(text) == str: # only accept immutable objects

1724

self._cache = (node, curr, text)

1728

self._cache = (node, curr, text)

1725

self._chainbasecache[curr] = chainbase

1729

self._chainbasecache[curr] = chainbase

1726

return node

1730

return node

1727

1731

1728

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1732

def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):

1729

# Files opened in a+ mode have inconsistent behavior on various

1733

# Files opened in a+ mode have inconsistent behavior on various

1730

# platforms. Windows requires that a file positioning call be made

1734

# platforms. Windows requires that a file positioning call be made

1731

# when the file handle transitions between reads and writes. See

1735

# when the file handle transitions between reads and writes. See

1732

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1736

# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other

1733

# platforms, Python or the platform itself can be buggy. Some versions

1737

# platforms, Python or the platform itself can be buggy. Some versions

1734

# of Solaris have been observed to not append at the end of the file

1738

# of Solaris have been observed to not append at the end of the file

1735

# if the file was seeked to before the end. See issue4943 for more.

1739

# if the file was seeked to before the end. See issue4943 for more.

1736

#

1740

#

1737

# We work around this issue by inserting a seek() before writing.

1741

# We work around this issue by inserting a seek() before writing.

1738

# Note: This is likely not necessary on Python 3.

1742

# Note: This is likely not necessary on Python 3.

1739

ifh.seek(0, os.SEEK_END)

1743

ifh.seek(0, os.SEEK_END)

1740

if dfh:

1744

if dfh:

1741

dfh.seek(0, os.SEEK_END)

1745

dfh.seek(0, os.SEEK_END)

1742

1746

1743

curr = len(self) - 1

1747

curr = len(self) - 1

1744

if not self._inline:

1748

if not self._inline:

1745

transaction.add(self.datafile, offset)

1749

transaction.add(self.datafile, offset)

1746

transaction.add(self.indexfile, curr * len(entry))

1750

transaction.add(self.indexfile, curr * len(entry))

1747

if data[0]:

1751

if data[0]:

1748

dfh.write(data[0])

1752

dfh.write(data[0])

1749

dfh.write(data[1])

1753

dfh.write(data[1])

1750

ifh.write(entry)

1754

ifh.write(entry)

1751

else:

1755

else:

1752

offset += curr * self._io.size

1756

offset += curr * self._io.size

1753

transaction.add(self.indexfile, offset, curr)

1757

transaction.add(self.indexfile, offset, curr)

1754

ifh.write(entry)

1758

ifh.write(entry)

1755

ifh.write(data[0])

1759

ifh.write(data[0])

1756

ifh.write(data[1])

1760

ifh.write(data[1])

1757

self.checkinlinesize(transaction, ifh)

1761

self.checkinlinesize(transaction, ifh)

1758

1762

1759

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1763

def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):

1760

"""

1764

"""

1761

add a delta group

1765

add a delta group

1762

1766

1763

given a set of deltas, add them to the revision log. the

1767

given a set of deltas, add them to the revision log. the

1764

first delta is against its parent, which should be in our

1768

first delta is against its parent, which should be in our

1765

log, the rest are against the previous delta.

1769

log, the rest are against the previous delta.

1766

1770

1767

If ``addrevisioncb`` is defined, it will be called with arguments of

1771

If ``addrevisioncb`` is defined, it will be called with arguments of

1768

this revlog and the node that was added.

1772

this revlog and the node that was added.

1769

"""

1773

"""

1770

1774

1771

# track the base of the current delta log

1775

# track the base of the current delta log

1772

content = []

1776

content = []

1773

node = None

1777

node = None

1774

1778

1775

r = len(self)

1779

r = len(self)

1776

end = 0

1780

end = 0

1777

if r:

1781

if r:

1778

end = self.end(r - 1)

1782

end = self.end(r - 1)

1779

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1783

ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)

1780

isize = r * self._io.size

1784

isize = r * self._io.size

1781

if self._inline:

1785

if self._inline:

1782

transaction.add(self.indexfile, end + isize, r)

1786

transaction.add(self.indexfile, end + isize, r)

1783

dfh = None

1787

dfh = None

1784

else:

1788

else:

1785

transaction.add(self.indexfile, isize, r)

1789

transaction.add(self.indexfile, isize, r)

1786

transaction.add(self.datafile, end)

1790

transaction.add(self.datafile, end)

1787

dfh = self.opener(self.datafile, "a+")

1791

dfh = self.opener(self.datafile, "a+")

1788

def flush():

1792

def flush():

1789

if dfh:

1793

if dfh:

1790

dfh.flush()

1794

dfh.flush()

1791

ifh.flush()

1795

ifh.flush()

1792

try:

1796

try:

1793

# loop through our set of deltas

1797

# loop through our set of deltas

1794

chain = None

1798

chain = None

1795

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1799

for chunkdata in iter(lambda: cg.deltachunk(chain), {}):

1796

node = chunkdata['node']

1800

node = chunkdata['node']

1797

p1 = chunkdata['p1']

1801

p1 = chunkdata['p1']

1798

p2 = chunkdata['p2']

1802

p2 = chunkdata['p2']

1799

cs = chunkdata['cs']

1803

cs = chunkdata['cs']

1800

deltabase = chunkdata['deltabase']

1804

deltabase = chunkdata['deltabase']

1801

delta = chunkdata['delta']

1805

delta = chunkdata['delta']

1802

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1806

flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS

1803

1807

1804

content.append(node)

1808

content.append(node)

1805

1809

1806

link = linkmapper(cs)

1810

link = linkmapper(cs)

1807

if node in self.nodemap:

1811

if node in self.nodemap:

1808

# this can happen if two branches make the same change

1812

# this can happen if two branches make the same change

1809

chain = node

1813

chain = node

1810

continue

1814

continue

1811

1815

1812

for p in (p1, p2):

1816

for p in (p1, p2):

1813

if p not in self.nodemap:

1817

if p not in self.nodemap:

1814

raise LookupError(p, self.indexfile,

1818

raise LookupError(p, self.indexfile,

1815

_('unknown parent'))

1819

_('unknown parent'))

1816

1820

1817

if deltabase not in self.nodemap:

1821

if deltabase not in self.nodemap:

1818

raise LookupError(deltabase, self.indexfile,

1822

raise LookupError(deltabase, self.indexfile,

1819

_('unknown delta base'))

1823

_('unknown delta base'))

1820

1824

1821

baserev = self.rev(deltabase)

1825

baserev = self.rev(deltabase)

1822

1826

1823

if baserev != nullrev and self.iscensored(baserev):

1827

if baserev != nullrev and self.iscensored(baserev):

1824

# if base is censored, delta must be full replacement in a

1828

# if base is censored, delta must be full replacement in a

1825

# single patch operation

1829

# single patch operation

1826

hlen = struct.calcsize(">lll")

1830

hlen = struct.calcsize(">lll")

1827

oldlen = self.rawsize(baserev)

1831

oldlen = self.rawsize(baserev)

1828

newlen = len(delta) - hlen

1832

newlen = len(delta) - hlen

1829

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1833

if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):

1830

raise error.CensoredBaseError(self.indexfile,

1834

raise error.CensoredBaseError(self.indexfile,

1831

self.node(baserev))

1835

self.node(baserev))

1832

1836

1833

if not flags and self._peek_iscensored(baserev, delta, flush):

1837

if not flags and self._peek_iscensored(baserev, delta, flush):

1834

flags |= REVIDX_ISCENSORED

1838

flags |= REVIDX_ISCENSORED

1835

1839

1836

# We assume consumers of addrevisioncb will want to retrieve

1840

# We assume consumers of addrevisioncb will want to retrieve

1837

# the added revision, which will require a call to

1841

# the added revision, which will require a call to

1838

# revision(). revision() will fast path if there is a cache

1842

# revision(). revision() will fast path if there is a cache

1839

# hit. So, we tell _addrevision() to always cache in this case.

1843

# hit. So, we tell _addrevision() to always cache in this case.

1840

# We're only using addgroup() in the context of changegroup

1844

# We're only using addgroup() in the context of changegroup

1841

# generation so the revision data can always be handled as raw

1845

# generation so the revision data can always be handled as raw

1842

# by the flagprocessor.

1846

# by the flagprocessor.

1843

chain = self._addrevision(node, None, transaction, link,

1847

chain = self._addrevision(node, None, transaction, link,

1844

p1, p2, flags, (baserev, delta),

1848

p1, p2, flags, (baserev, delta),

1845

ifh, dfh,

1849

ifh, dfh,

1846

alwayscache=bool(addrevisioncb),

1850

alwayscache=bool(addrevisioncb),

1847

raw=True)

1851

raw=True)

1848

1852

1849

if addrevisioncb:

1853

if addrevisioncb:

1850

addrevisioncb(self, chain)

1854

addrevisioncb(self, chain)

1851

1855

1852

if not dfh and not self._inline:

1856

if not dfh and not self._inline:

1853

# addrevision switched from inline to conventional

1857

# addrevision switched from inline to conventional

1854

# reopen the index

1858

# reopen the index

1855

ifh.close()

1859

ifh.close()

1856

dfh = self.opener(self.datafile, "a+")

1860

dfh = self.opener(self.datafile, "a+")

1857

ifh = self.opener(self.indexfile, "a+",

1861

ifh = self.opener(self.indexfile, "a+",

1858

checkambig=self._checkambig)

1862

checkambig=self._checkambig)

1859

finally:

1863

finally:

1860

if dfh:

1864

if dfh:

1861

dfh.close()

1865

dfh.close()

1862

ifh.close()

1866

ifh.close()

1863

1867

1864

return content

1868

return content

1865

1869

1866

def iscensored(self, rev):

1870

def iscensored(self, rev):

1867

"""Check if a file revision is censored."""

1871

"""Check if a file revision is censored."""

1868

return False

1872

return False

1869

1873

1870

def _peek_iscensored(self, baserev, delta, flush):

1874

def _peek_iscensored(self, baserev, delta, flush):

1871

"""Quickly check if a delta produces a censored revision."""

1875

"""Quickly check if a delta produces a censored revision."""

1872

return False

1876

return False

1873

1877

1874

def getstrippoint(self, minlink):

1878

def getstrippoint(self, minlink):

1875

"""find the minimum rev that must be stripped to strip the linkrev

1879

"""find the minimum rev that must be stripped to strip the linkrev

1876

1880

1877

Returns a tuple containing the minimum rev and a set of all revs that

1881

Returns a tuple containing the minimum rev and a set of all revs that

1878

have linkrevs that will be broken by this strip.

1882

have linkrevs that will be broken by this strip.

1879

"""

1883

"""

1880

brokenrevs = set()

1884

brokenrevs = set()

1881

strippoint = len(self)

1885

strippoint = len(self)

1882

1886

1883

heads = {}

1887

heads = {}

1884

futurelargelinkrevs = set()

1888

futurelargelinkrevs = set()

1885

for head in self.headrevs():

1889

for head in self.headrevs():

1886

headlinkrev = self.linkrev(head)

1890

headlinkrev = self.linkrev(head)

1887

heads[head] = headlinkrev

1891

heads[head] = headlinkrev

1888

if headlinkrev >= minlink:

1892

if headlinkrev >= minlink:

1889

futurelargelinkrevs.add(headlinkrev)

1893

futurelargelinkrevs.add(headlinkrev)

1890

1894

1891

# This algorithm involves walking down the rev graph, starting at the

1895

# This algorithm involves walking down the rev graph, starting at the

1892

# heads. Since the revs are topologically sorted according to linkrev,

1896

# heads. Since the revs are topologically sorted according to linkrev,

1893

# once all head linkrevs are below the minlink, we know there are

1897

# once all head linkrevs are below the minlink, we know there are

1894

# no more revs that could have a linkrev greater than minlink.

1898

# no more revs that could have a linkrev greater than minlink.

1895

# So we can stop walking.

1899

# So we can stop walking.

1896

while futurelargelinkrevs:

1900

while futurelargelinkrevs:

1897

strippoint -= 1

1901

strippoint -= 1

1898

linkrev = heads.pop(strippoint)

1902

linkrev = heads.pop(strippoint)

1899

1903

1900

if linkrev < minlink:

1904

if linkrev < minlink:

1901

brokenrevs.add(strippoint)

1905

brokenrevs.add(strippoint)

1902

else:

1906

else:

1903

futurelargelinkrevs.remove(linkrev)

1907

futurelargelinkrevs.remove(linkrev)

1904

1908

1905

for p in self.parentrevs(strippoint):

1909

for p in self.parentrevs(strippoint):

1906

if p != nullrev:

1910

if p != nullrev:

1907

plinkrev = self.linkrev(p)

1911

plinkrev = self.linkrev(p)

1908

heads[p] = plinkrev

1912

heads[p] = plinkrev

1909

if plinkrev >= minlink:

1913

if plinkrev >= minlink:

1910

futurelargelinkrevs.add(plinkrev)

1914

futurelargelinkrevs.add(plinkrev)

1911

1915

1912

return strippoint, brokenrevs

1916

return strippoint, brokenrevs

1913

1917

1914

def strip(self, minlink, transaction):

1918

def strip(self, minlink, transaction):

1915

"""truncate the revlog on the first revision with a linkrev >= minlink

1919

"""truncate the revlog on the first revision with a linkrev >= minlink

1916

1920

1917

This function is called when we're stripping revision minlink and

1921

This function is called when we're stripping revision minlink and

1918

its descendants from the repository.

1922

its descendants from the repository.

1919

1923

1920

We have to remove all revisions with linkrev >= minlink, because

1924

We have to remove all revisions with linkrev >= minlink, because

1921

the equivalent changelog revisions will be renumbered after the

1925

the equivalent changelog revisions will be renumbered after the

1922

strip.

1926

strip.

1923

1927

1924

So we truncate the revlog on the first of these revisions, and

1928

So we truncate the revlog on the first of these revisions, and

1925

trust that the caller has saved the revisions that shouldn't be

1929

trust that the caller has saved the revisions that shouldn't be

1926

removed and that it'll re-add them after this truncation.

1930

removed and that it'll re-add them after this truncation.

1927

"""

1931

"""

1928

if len(self) == 0:

1932

if len(self) == 0:

1929

return

1933

return

1930

1934

1931

rev, _ = self.getstrippoint(minlink)

1935

rev, _ = self.getstrippoint(minlink)

1932

if rev == len(self):

1936

if rev == len(self):

1933

return

1937

return

1934

1938

1935

# first truncate the files on disk

1939

# first truncate the files on disk

1936

end = self.start(rev)

1940

end = self.start(rev)

1937

if not self._inline:

1941

if not self._inline:

1938

transaction.add(self.datafile, end)

1942

transaction.add(self.datafile, end)

1939

end = rev * self._io.size

1943

end = rev * self._io.size

1940

else:

1944

else:

1941

end += rev * self._io.size

1945

end += rev * self._io.size

1942

1946

1943

transaction.add(self.indexfile, end)

1947

transaction.add(self.indexfile, end)

1944

1948

1945

# then reset internal state in memory to forget those revisions

1949

# then reset internal state in memory to forget those revisions

1946

self._cache = None

1950

self._cache = None

1947

self._chaininfocache = {}

1951

self._chaininfocache = {}

1948

self._chunkclear()

1952

self._chunkclear()

1949

for x in xrange(rev, len(self)):

1953

for x in xrange(rev, len(self)):

1950

del self.nodemap[self.node(x)]

1954

del self.nodemap[self.node(x)]

1951

1955

1952

del self.index[rev:-1]

1956

del self.index[rev:-1]

1953

1957

1954

def checksize(self):

1958

def checksize(self):

1955

expected = 0

1959

expected = 0

1956

if len(self):

1960

if len(self):

1957

expected = max(0, self.end(len(self) - 1))

1961

expected = max(0, self.end(len(self) - 1))

1958

1962

1959

try:

1963

try:

1960

f = self.opener(self.datafile)

1964

f = self.opener(self.datafile)

1961

f.seek(0, 2)

1965

f.seek(0, 2)

1962

actual = f.tell()

1966

actual = f.tell()

1963

f.close()

1967

f.close()

1964

dd = actual - expected

1968

dd = actual - expected

1965

except IOError as inst:

1969

except IOError as inst:

1966

if inst.errno != errno.ENOENT:

1970

if inst.errno != errno.ENOENT:

1967

raise

1971

raise

1968

dd = 0

1972

dd = 0

1969

1973

1970

try:

1974

try:

1971

f = self.opener(self.indexfile)

1975

f = self.opener(self.indexfile)

1972

f.seek(0, 2)

1976

f.seek(0, 2)

1973

actual = f.tell()

1977

actual = f.tell()

1974

f.close()

1978

f.close()

1975

s = self._io.size

1979

s = self._io.size

1976

i = max(0, actual // s)

1980

i = max(0, actual // s)

1977

di = actual - (i * s)

1981

di = actual - (i * s)

1978

if self._inline:

1982

if self._inline:

1979

databytes = 0

1983

databytes = 0

1980

for r in self:

1984

for r in self:

1981

databytes += max(0, self.length(r))

1985

databytes += max(0, self.length(r))

1982

dd = 0

1986

dd = 0

1983

di = actual - len(self) * s - databytes

1987

di = actual - len(self) * s - databytes

1984

except IOError as inst:

1988

except IOError as inst:

1985

if inst.errno != errno.ENOENT:

1989

if inst.errno != errno.ENOENT:

1986

raise

1990

raise

1987

di = 0

1991

di = 0

1988

1992

1989

return (dd, di)

1993

return (dd, di)

1990

1994

1991

def files(self):

1995

def files(self):

1992

res = [self.indexfile]

1996

res = [self.indexfile]

1993

if not self._inline:

1997

if not self._inline:

1994

res.append(self.datafile)

1998

res.append(self.datafile)

1995

return res

1999

return res

1996

2000

1997

DELTAREUSEALWAYS = 'always'

2001

DELTAREUSEALWAYS = 'always'

1998

DELTAREUSESAMEREVS = 'samerevs'

2002

DELTAREUSESAMEREVS = 'samerevs'

1999

DELTAREUSENEVER = 'never'

2003

DELTAREUSENEVER = 'never'

2000

2004

2001

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

2005

DELTAREUSEALL = set(['always', 'samerevs', 'never'])

2002

2006

2003

def clone(self, tr, destrevlog, addrevisioncb=None,

2007

def clone(self, tr, destrevlog, addrevisioncb=None,

2004

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2008

deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):

2005

"""Copy this revlog to another, possibly with format changes.

2009

"""Copy this revlog to another, possibly with format changes.

2006

2010

2007

The destination revlog will contain the same revisions and nodes.

2011

The destination revlog will contain the same revisions and nodes.

2008

However, it may not be bit-for-bit identical due to e.g. delta encoding

2012

However, it may not be bit-for-bit identical due to e.g. delta encoding

2009

differences.

2013

differences.

2010

2014

2011

The ``deltareuse`` argument control how deltas from the existing revlog

2015

The ``deltareuse`` argument control how deltas from the existing revlog

2012

are preserved in the destination revlog. The argument can have the

2016

are preserved in the destination revlog. The argument can have the

2013

following values:

2017

following values:

2014

2018

2015

DELTAREUSEALWAYS

2019

DELTAREUSEALWAYS

2016

Deltas will always be reused (if possible), even if the destination

2020

Deltas will always be reused (if possible), even if the destination

2017

revlog would not select the same revisions for the delta. This is the

2021

revlog would not select the same revisions for the delta. This is the

2018

fastest mode of operation.

2022

fastest mode of operation.

2019

DELTAREUSESAMEREVS

2023

DELTAREUSESAMEREVS

2020

Deltas will be reused if the destination revlog would pick the same

2024

Deltas will be reused if the destination revlog would pick the same

2021

revisions for the delta. This mode strikes a balance between speed

2025

revisions for the delta. This mode strikes a balance between speed

2022

and optimization.

2026

and optimization.

2023

DELTAREUSENEVER

2027

DELTAREUSENEVER

2024

Deltas will never be reused. This is the slowest mode of execution.

2028

Deltas will never be reused. This is the slowest mode of execution.

2025

This mode can be used to recompute deltas (e.g. if the diff/delta

2029

This mode can be used to recompute deltas (e.g. if the diff/delta

2026

algorithm changes).

2030

algorithm changes).

2027

2031

2028

Delta computation can be slow, so the choice of delta reuse policy can

2032

Delta computation can be slow, so the choice of delta reuse policy can

2029

significantly affect run time.

2033

significantly affect run time.

2030

2034

2031

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2035

The default policy (``DELTAREUSESAMEREVS``) strikes a balance between

2032

two extremes. Deltas will be reused if they are appropriate. But if the

2036

two extremes. Deltas will be reused if they are appropriate. But if the

2033

delta could choose a better revision, it will do so. This means if you

2037

delta could choose a better revision, it will do so. This means if you

2034

are converting a non-generaldelta revlog to a generaldelta revlog,

2038

are converting a non-generaldelta revlog to a generaldelta revlog,

2035

deltas will be recomputed if the delta's parent isn't a parent of the

2039

deltas will be recomputed if the delta's parent isn't a parent of the

2036

revision.

2040

revision.

2037

2041

2038

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2042

In addition to the delta policy, the ``aggressivemergedeltas`` argument

2039

controls whether to compute deltas against both parents for merges.

2043

controls whether to compute deltas against both parents for merges.

2040

By default, the current default is used.

2044

By default, the current default is used.

2041

"""

2045

"""

2042

if deltareuse not in self.DELTAREUSEALL:

2046

if deltareuse not in self.DELTAREUSEALL:

2043

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2047

raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)

2044

2048

2045

if len(destrevlog):

2049

if len(destrevlog):

2046

raise ValueError(_('destination revlog is not empty'))

2050

raise ValueError(_('destination revlog is not empty'))

2047

2051

2048

if getattr(self, 'filteredrevs', None):

2052

if getattr(self, 'filteredrevs', None):

2049

raise ValueError(_('source revlog has filtered revisions'))

2053

raise ValueError(_('source revlog has filtered revisions'))

2050

if getattr(destrevlog, 'filteredrevs', None):

2054

if getattr(destrevlog, 'filteredrevs', None):

2051

raise ValueError(_('destination revlog has filtered revisions'))

2055

raise ValueError(_('destination revlog has filtered revisions'))

2052

2056

2053

# lazydeltabase controls whether to reuse a cached delta, if possible.

2057

# lazydeltabase controls whether to reuse a cached delta, if possible.

2054

oldlazydeltabase = destrevlog._lazydeltabase

2058

oldlazydeltabase = destrevlog._lazydeltabase

2055

oldamd = destrevlog._aggressivemergedeltas

2059

oldamd = destrevlog._aggressivemergedeltas

2056

2060

2057

try:

2061

try:

2058

if deltareuse == self.DELTAREUSEALWAYS:

2062

if deltareuse == self.DELTAREUSEALWAYS:

2059

destrevlog._lazydeltabase = True

2063

destrevlog._lazydeltabase = True

2060

elif deltareuse == self.DELTAREUSESAMEREVS:

2064

elif deltareuse == self.DELTAREUSESAMEREVS:

2061

destrevlog._lazydeltabase = False

2065

destrevlog._lazydeltabase = False

2062

2066

2063

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2067

destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd

2064

2068

2065

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2069

populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,

2066

self.DELTAREUSESAMEREVS)

2070

self.DELTAREUSESAMEREVS)

2067

2071

2068

index = self.index

2072

index = self.index

2069

for rev in self:

2073

for rev in self:

2070

entry = index[rev]

2074

entry = index[rev]

2071

2075

2072

# Some classes override linkrev to take filtered revs into

2076

# Some classes override linkrev to take filtered revs into

2073

# account. Use raw entry from index.

2077

# account. Use raw entry from index.

2074

flags = entry[0] & 0xffff

2078

flags = entry[0] & 0xffff

2075

linkrev = entry[4]

2079

linkrev = entry[4]

2076

p1 = index[entry[5]][7]

2080

p1 = index[entry[5]][7]

2077

p2 = index[entry[6]][7]

2081

p2 = index[entry[6]][7]

2078

node = entry[7]

2082

node = entry[7]

2079

2083

2080

# (Possibly) reuse the delta from the revlog if allowed and

2084

# (Possibly) reuse the delta from the revlog if allowed and

2081

# the revlog chunk is a delta.

2085

# the revlog chunk is a delta.

2082

cachedelta = None

2086

cachedelta = None

2083

text = None

2087

text = None

2084

if populatecachedelta:

2088

if populatecachedelta:

2085

dp = self.deltaparent(rev)

2089

dp = self.deltaparent(rev)

2086

if dp != nullrev:

2090

if dp != nullrev:

2087

cachedelta = (dp, str(self._chunk(rev)))

2091

cachedelta = (dp, str(self._chunk(rev)))

2088

2092

2089

if not cachedelta:

2093

if not cachedelta:

2090

text = self.revision(rev)

2094

text = self.revision(rev)

2091

2095

2092

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2096

ifh = destrevlog.opener(destrevlog.indexfile, 'a+',

2093

checkambig=False)

2097

checkambig=False)

2094

dfh = None

2098

dfh = None

2095

if not destrevlog._inline:

2099

if not destrevlog._inline:

2096

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2100

dfh = destrevlog.opener(destrevlog.datafile, 'a+')

2097

try:

2101

try:

2098

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2102

destrevlog._addrevision(node, text, tr, linkrev, p1, p2,

2099

flags, cachedelta, ifh, dfh)

2103

flags, cachedelta, ifh, dfh)

2100

finally:

2104

finally:

2101

if dfh:

2105

if dfh:

2102

dfh.close()

2106

dfh.close()

2103

ifh.close()

2107

ifh.close()

2104

2108

2105

if addrevisioncb:

2109

if addrevisioncb:

2106

addrevisioncb(self, rev, node)

2110

addrevisioncb(self, rev, node)

2107

finally:

2111

finally:

2108

destrevlog._lazydeltabase = oldlazydeltabase

2112

destrevlog._lazydeltabase = oldlazydeltabase

2109

destrevlog._aggressivemergedeltas = oldamd

2113

destrevlog._aggressivemergedeltas = oldamd

             local test passed
+            addgroupcopy test passed
             abort: crashed: invalid patch

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages

             # revlog.py - storage back-end for mercurial
             #
             # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import collections
             import errno
             import hashlib
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullid,
                 nullrev,
             )
             from .i18n import _
             from . import (
                 ancestor,
                 error,
                 mdiff,
                 parsers,
                 pycompat,
                 templatefilters,
                 util,
             )
             _pack = struct.pack
             _unpack = struct.unpack
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # revlog header flags
             REVLOGV0 = 0
             REVLOGNG = 1
             REVLOGNGINLINEDATA = (1 << 16)
             REVLOGGENERALDELTA = (1 << 17)
             REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
             REVLOG_DEFAULT_FORMAT = REVLOGNG
             REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
             REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGGENERALDELTA
             # revlog index flags
             REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
             REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
             REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
             REVIDX_DEFAULT_FLAGS = 0
             # stable order in which flags need to be processed and their processors applied
             REVIDX_FLAGS_ORDER = [
                 REVIDX_ISCENSORED,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
             ]
             REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             RevlogError = error.RevlogError
             LookupError = error.LookupError
             CensoredNodeError = error.CensoredNodeError
             ProgrammingError = error.ProgrammingError
             # Store flag processors (cf. 'addflagprocessor()' to register)
             _flagprocessors = {
                 REVIDX_ISCENSORED: None,
             }
             def addflagprocessor(flag, processor):
                 """Register a flag processor on a revision data flag.
                 Invariant:
                 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
                 - Only one flag processor can be registered on a specific flag.
                 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
                   following signatures:
                       - (read)  f(self, rawtext) -> text, bool
                       - (write) f(self, text) -> rawtext, bool
                       - (raw)   f(self, rawtext) -> bool
                   "text" is presented to the user. "rawtext" is stored in revlog data, not
                   directly visible to the user.
                   The boolean returned by these transforms is used to determine whether
                   the returned text can be used for hash integrity checking. For example,
                   if "write" returns False, then "text" is used to generate hash. If
                   "write" returns True, that basically means "rawtext" returned by "write"
                   should be used to generate hash. Usually, "write" and "read" return
                   different booleans. And "raw" returns a same boolean as "write".
                   Note: The 'raw' transform is used for changegroup generation and in some
                   debug commands. In this case the transform only indicates whether the
                   contents can be used for hash integrity checks.
                 """
                 if not flag & REVIDX_KNOWN_FLAGS:
                     msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
                     raise ProgrammingError(msg)
                 if flag not in REVIDX_FLAGS_ORDER:
                     msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
                     raise ProgrammingError(msg)
                 if flag in _flagprocessors:
                     msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
                     raise error.Abort(msg)
                 _flagprocessors[flag] = processor
             def getoffset(q):
                 return int(q >> 16)
             def gettype(q):
                 return int(q & 0xFFFF)
             def offset_type(offset, type):
                 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError('unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             _nullhash = hashlib.sha1(nullid)
             def hash(text, p1, p2):
                 """generate a hash from the given text and its parent hashes
                 This hash combines both the current file contents and its history
                 in a manner that makes it easy to distinguish nodes with the same
                 content in the revision graph.
                 """
                 # As of now, if one of the parent node is null, p2 is null
                 if p2 == nullid:
                     # deep copy of a hash is faster than creating one
                     s = _nullhash.copy()
                     s.update(p1)
                 else:
                     # none of the parent nodes are nullid
                     l = [p1, p2]
                     l.sort()
                     s = hashlib.sha1(l[0])
                     s.update(l[1])
                 s.update(text)
                 return s.digest()
             # index v0:
             #  4 bytes: offset
             #  4 bytes: compressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             # 20 bytes: parent 1 nodeid
             # 20 bytes: parent 2 nodeid
             # 20 bytes: nodeid
             indexformatv0 = ">4l20s20s20s"
             class revlogoldio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatv0)
                 def parseindex(self, data, inline):
                     s = self.size
                     index = []
                     nodemap = {nullid: nullrev}
                     n = off = 0
                     l = len(data)
                     while off + s <= l:
                         cur = data[off:off + s]
                         off += s
                         e = _unpack(indexformatv0, cur)
                         # transform to revlogv1 format
                         e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
                               nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
                         index.append(e2)
                         nodemap[e[6]] = n
                         n += 1
                     # add the magic null revision at -1
                     index.append((0, 0, 0, -1, -1, -1, -1, nullid))
                     return index, nodemap, None
                 def packentry(self, entry, node, version, rev):
                     if gettype(entry[0]):
                         raise RevlogError(_("index entry flags need RevlogNG"))
                     e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
                           node(entry[5]), node(entry[6]), entry[7])
                     return _pack(indexformatv0, *e2)
             # index ng:
             #  6 bytes: offset
             #  2 bytes: flags
             #  4 bytes: compressed length
             #  4 bytes: uncompressed length
             #  4 bytes: base rev
             #  4 bytes: link rev
             #  4 bytes: parent 1 rev
             #  4 bytes: parent 2 rev
             # 32 bytes: nodeid
             indexformatng = ">Qiiiiii20s12x"
             versionformat = ">I"
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7fffffff
             class revlogio(object):
                 def __init__(self):
                     self.size = struct.calcsize(indexformatng)
                 def parseindex(self, data, inline):
                     # call the C implementation to parse the index data
                     index, cache = parsers.parse_index2(data, inline)
                     return index, getattr(index, 'nodemap', None), cache
                 def packentry(self, entry, node, version, rev):
                     p = _pack(indexformatng, *entry)
                     if rev == 0:
                         p = _pack(versionformat, version) + p[4:]
                     return p
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 """
                 def __init__(self, opener, indexfile, checkambig=False):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     """
                     self.indexfile = indexfile
                     self.datafile = indexfile[:-2] + ".d"
                     self.opener = opener
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._cache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, '')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._aggressivemergedeltas = False
                     self.index = []
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._nodecache = {nullid: nullrev}
                     self._nodepos = None
                     self._compengine = 'zlib'
                     v = REVLOG_DEFAULT_VERSION
                     opts = getattr(opener, 'options', None)
                     if opts is not None:
                         if 'revlogv1' in opts:
                             if 'generaldelta' in opts:
                                 v |= REVLOGGENERALDELTA
                         else:
                             v = 0
                         if 'chunkcachesize' in opts:
                             self._chunkcachesize = opts['chunkcachesize']
                         if 'maxchainlen' in opts:
                             self._maxchainlen = opts['maxchainlen']
                         if 'aggressivemergedeltas' in opts:
                             self._aggressivemergedeltas = opts['aggressivemergedeltas']
                         self._lazydeltabase = bool(opts.get('lazydeltabase', False))
                         if 'compengine' in opts:
                             self._compengine = opts['compengine']
                     if self._chunkcachesize <= 0:
                         raise RevlogError(_('revlog chunk cache size %r is not greater '
                                             'than 0') % self._chunkcachesize)
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise RevlogError(_('revlog chunk cache size %r is not a power '
                                             'of 2') % self._chunkcachesize)
                     indexdata = ''
                     self._initempty = True
                     try:
                         f = self.opener(self.indexfile)
                         indexdata = f.read()
                         f.close()
                         if len(indexdata) > 0:
                             v = struct.unpack(versionformat, indexdata[:4])[0]
                             self._initempty = False
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                     self.version = v
                     self._inline = v & REVLOGNGINLINEDATA
                     self._generaldelta = v & REVLOGGENERALDELTA
                     flags = v & ~0xFFFF
                     fmt = v & 0xFFFF
                     if fmt == REVLOGV0 and flags:
                         raise RevlogError(_("index %s unknown flags %#04x for format v0")
                                           % (self.indexfile, flags >> 16))
                     elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
                         raise RevlogError(_("index %s unknown flags %#04x for revlogng")
                                           % (self.indexfile, flags >> 16))
                     elif fmt > REVLOGNG:
                         raise RevlogError(_("index %s unknown format %d")
                                           % (self.indexfile, fmt))
                     self.storedeltachains = True
                     self._io = revlogio()
                     if self.version == REVLOGV0:
                         self._io = revlogoldio()
                     try:
                         d = self._io.parseindex(indexdata, self._inline)
                     except (ValueError, IndexError):
                         raise RevlogError(_("index %s is corrupted") % (self.indexfile))
                     self.index, nodemap, self._chunkcache = d
                     if nodemap is not None:
                         self.nodemap = self._nodecache = nodemap
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = {}
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def _compressor(self):
                     return util.compengines[self._compengine].revlogcompressor()
                 def tip(self):
                     return self.node(len(self.index) - 2)
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index) - 1
                 def __iter__(self):
                     return iter(xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     step = 1
                     if stop is not None:
                         if start > stop:
                             step = -1
                         stop += step
                     else:
                         stop = len(self)
                     return xrange(start, stop, step)
                 @util.propertycache
                 def nodemap(self):
                     self.rev(self.node(0))
                     return self._nodecache
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def clearcaches(self):
                     self._cache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, '')
                     self._pcache = {}
                     try:
                         self._nodecache.clearcaches()
                     except AttributeError:
                         self._nodecache = {nullid: nullrev}
                         self._nodepos = None
                 def rev(self, node):
                     try:
                         return self._nodecache[node]
                     except TypeError:
                         raise
                     except RevlogError:
                         # parsers.c radix tree lookup failed
                         raise LookupError(node, self.indexfile, _('no node'))
                     except KeyError:
                         # pure python cache lookup failed
                         n = self._nodecache
                         i = self.index
                         p = self._nodepos
                         if p is None:
                             p = len(i) - 2
                         for r in xrange(p, -1, -1):
                             v = i[r][7]
                             n[v] = r
                             if v == node:
                                 self._nodepos = r - 1
                                 return r
                         raise LookupError(node, self.indexfile, _('no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.revision(rev)
                     return len(t)
                 size = rawsize
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     base = index[rev][3]
                     while base != rev:
                         rev = base
                         base = index[rev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     return self.index[rev][5:7]
                 def node(self, rev):
                     return self.index[rev][7]
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse topological order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
                                                   inclusive=inclusive)
                 def descendants(self, revs):
                     """Generate the descendants of 'revs' in revision order.
                     Yield a sequence of revision numbers starting with a child of
                     some rev in revs, i.e., each revision is *not* considered a
                     descendant of itself.  Results are ordered by revision number (a
                     topological sort)."""
                     first = min(revs)
                     if first == nullrev:
                         for i in self:
                             yield i
                         return
                     seen = set(revs)
                     for i in self.revs(start=first + 1):
                         for x in self.parentrevs(i):
                             if x != nullrev and x in seen:
                                 seen.add(i)
                                 yield i
                                 break
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [nullid] # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return ([self.node(r) for r in self], [nullid], list(self.heads()))
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n) # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update([p for p in self.parents(n) if
                                                        p != nullid])
                                 elif n in heads: # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in heads.iteritems() if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self):
                     try:
                         return self.index.headrevs()
                     except AttributeError:
                         return self._headrevs()
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullid
                     if stop is None:
                         stop = []
                     stoprevs = set([self.rev(n) for n in stop])
                     startrev = self.rev(start)
                     reachable = set((startrev,))
                     heads = set((startrev,))
                     parentrevs = self.parentrevs
                     for r in self.revs(start=startrev + 1):
                         for p in parentrevs(r):
                             if p in reachable:
                                 if r not in stoprevs:
                                     reachable.add(r)
                                 heads.add(r)
                             if p in heads and p not in stoprevs:
                                 heads.remove(p)
                     return [self.node(r) for r in heads]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def descendant(self, start, end):
                     if start == nullrev:
                         return True
                     for i in self.descendants([start]):
                         if i == end:
                             return True
                         elif i > end:
                             break
                     return False
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.commonancestorsheads(a, b)
                     except (AttributeError, OverflowError): # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
                     return pycompat.maplist(self.node, ancs)
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     The implementation of this is trivial but the use of
                     commonancestorsheads is not."""
                     return a in self.commonancestorsheads(a, b)
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == 20:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node) # quick search the index
                             return node
                         except LookupError:
                             pass # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if str(rev) != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 40:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, LookupError):
                             pass
                 def _partialmatch(self, id):
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             return partial
                         return None
                     except RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             raise LookupError(id, self.indexfile,
                                               _('ambiguous identifier'))
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) < 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[:l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [n for n in nl if hex(n).startswith(id) and
                                   self.hasnode(n)]
                             if len(nl) > 0:
                                 if len(nl) == 1:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise LookupError(id, self.indexfile,
                                                   _('ambiguous identifier'))
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                         - revision number or str(revision number)
                         - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise LookupError(id, self.indexfile, _('no match found'))
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return hash(text, p1, p2) != node
                 def _addchunk(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _loadchunk(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     """
                     if df is not None:
                         closehandle = False
                     else:
                         if self._inline:
                             df = self.opener(self.indexfile)
                         else:
                             df = self.opener(self.datafile)
                         closehandle = True
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (((offset + length + cachesize) & ~(cachesize - 1))
                                   - realoffset)
                     df.seek(realoffset)
                     d = df.read(reallength)
                     if closehandle:
                         df.close()
                     self._addchunk(realoffset, d)
                     if offset != realoffset or reallength != length:
                         return util.buffer(d, offset - realoffset, length)
                     return d
                 def _getchunk(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._loadchunk(offset, length, df=df)
                 def _chunkraw(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self._io.size
                         end += (endrev + 1) * self._io.size
                     length = end - start
                     return start, self._getchunk(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     return self.decompress(self._chunkraw(rev, rev, df=df)[1])
                 def _chunks(self, revs, df=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self._io.size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     try:
                         offset, data = self._chunkraw(revs[0], revs[-1], df=df)
                     except OverflowError:
                         # issue4215 - we can't cache a run of chunks greater than
                         # 2G on Windows
                         return [self._chunk(rev, df=df) for rev in revs]
                     decomp = self.decompress
                     for rev in revs:
                         chunkstart = start(rev)
                         if inline:
                             chunkstart += (rev + 1) * iosize
                         chunklength = length(rev)
                         ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, '')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def revdiff(self, rev1, rev2):
-                    """return or calculate a delta between two revisions"""
+                    """return or calculate a delta between two revisions
+                    The delta calculated is in binary form and is intended to be written to
+                    revlog data directly. So this function needs raw revision data.
+                    """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
-                    return mdiff.textdiff(self.revision(rev1),
+                    return mdiff.textdiff(self.revision(rev1, raw=True),
-                                          self.revision(rev2))
+                                          self.revision(rev2, raw=True))
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     cachedrev = None
                     if node == nullid:
                         return ""
                     if self._cache:
                         if self._cache[0] == node:
                             # _cache only stores rawtext
                             if raw:
                                 return self._cache[2]
                         cachedrev = self._cache[1]
                     # look up what we need to read
                     rawtext = None
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         rawtext = self._cache[2]
                     # drop cache to save memory
                     self._cache = None
                     bins = self._chunks(chain, df=_df)
                     if rawtext is None:
                         rawtext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(rawtext, bins)
                     text, validatehash = self._processflags(rawtext, self.flags(rev),
                                                             'read', raw=raw)
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     self._cache = (node, rev, rawtext)
                     return text
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return hash(text, p1, p2)
                 def _processflags(self, text, flags, operation, raw=False):
                     """Inspect revision data flags and applies transforms defined by
                     registered flag processors.
                     ``text`` - the revision data to process
                     ``flags`` - the revision flags
                     ``operation`` - the operation being performed (read or write)
                     ``raw`` - an optional argument describing if the raw transform should be
                     applied.
                     This method processes the flags in the order (or reverse order if
                     ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
                     flag processors registered for present flags. The order of flags defined
                     in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
                     Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
                     processed text and ``validatehash`` is a bool indicating whether the
                     returned text should be checked for hash integrity.
                     Note: If the ``raw`` argument is set, it has precedence over the
                     operation and will only update the value of ``validatehash``.
                     """
                     if not operation in ('read', 'write'):
                         raise ProgrammingError(_("invalid '%s' operation ") % (operation))
                     # Check all flags are known.
                     if flags & ~REVIDX_KNOWN_FLAGS:
                         raise RevlogError(_("incompatible revision flag '%#x'") %
                                           (flags & ~REVIDX_KNOWN_FLAGS))
                     validatehash = True
                     # Depending on the operation (read or write), the order might be
                     # reversed due to non-commutative transforms.
                     orderedflags = REVIDX_FLAGS_ORDER
                     if operation == 'write':
                         orderedflags = reversed(orderedflags)
                     for flag in orderedflags:
                         # If a flagprocessor has been registered for a known flag, apply the
                         # related operation transform and update result tuple.
                         if flag & flags:
                             vhash = True
                             if flag not in _flagprocessors:
                                 message = _("missing processor for flag '%#x'") % (flag)
                                 raise RevlogError(message)
                             processor = _flagprocessors[flag]
                             if processor is not None:
                                 readtransform, writetransform, rawtransform = processor
                                 if raw:
                                     vhash = rawtransform(self, text)
                                 elif operation == 'read':
                                     text, vhash = readtransform(self, text)
                                 else: # write operation
                                     text, vhash = writetransform(self, text)
                             validatehash = validatehash and vhash
                     return text, validatehash
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     if p1 is None and p2 is None:
                         p1, p2 = self.parents(node)
                     if node != self.hash(text, p1, p2):
                         revornode = rev
                         if revornode is None:
                             revornode = templatefilters.short(hex(node))
                         raise RevlogError(_("integrity check failed on %s:%s")
                             % (self.indexfile, revornode))
                 def checkinlinesize(self, tr, fp=None):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
                         return
                     trinfo = tr.find(self.indexfile)
                     if trinfo is None:
                         raise RevlogError(_("%s not found in the transaction")
                                           % self.indexfile)
                     trindex = trinfo[2]
                     if trindex is not None:
                         dataoff = self.start(trindex)
                     else:
                         # revlog was stripped at start of transaction, use all leftover data
                         trindex = len(self) - 1
                         dataoff = self.end(-2)
                     tr.add(self.datafile, dataoff)
                     if fp:
                         fp.flush()
                         fp.close()
                     df = self.opener(self.datafile, 'w')
                     try:
                         for r in self:
                             df.write(self._chunkraw(r, r)[1])
                     finally:
                         df.close()
                     fp = self.opener(self.indexfile, 'w', atomictemp=True,
                                      checkambig=self._checkambig)
                     self.version &= ~(REVLOGNGINLINEDATA)
                     self._inline = False
                     for i in self:
                         e = self._io.packentry(self.index[i], self.node, self.version, i)
                         fp.write(e)
                     # if we don't call close, the temp file will never replace the
                     # real index
                     fp.close()
                     tr.replace(self.indexfile, trindex * self._io.size)
                     self._chunkclear()
                 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
                                 node=None, flags=REVIDX_DEFAULT_FLAGS):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     """
                     if link == nullrev:
                         raise RevlogError(_("attempted to add linkrev -1 to %s")
                                           % self.indexfile)
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = self._processflags(text, flags, 'write')
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise RevlogError(
                             _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
                             % (self.indexfile, len(rawtext)))
                     node = node or self.hash(rawtext, p1, p2)
                     if node in self.nodemap:
                         return node
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     dfh = None
                     if not self._inline:
                         dfh = self.opener(self.datafile, "a+")
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     try:
                         return self._addrevision(node, rawtext, transaction, link, p1, p2,
                                                  flags, cachedelta, ifh, dfh)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return '', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return '', compressed
                     if data[0:1] == '\0':
                         return '', data
                     return 'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == 'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise RevlogError(_('revlog decompress error: %s') % str(e))
                     # '\0' is more common than 'u' so it goes first.
                     elif t == '\0':
                         return data
                     elif t == 'u':
                         return util.buffer(data, 1)
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor()
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise RevlogError(_('unknown compression type %r') % t)
                     return compressor.decompress(data)
                 def _isgooddelta(self, d, textlen):
                     """Returns True if the given delta is good. Good means that it is within
                     the disk span, disk size, and chain length bounds that we know to be
                     performant."""
                     if d is None:
                         return False
                     # - 'dist' is the distance from the base revision -- bounding it limits
                     #   the amount of I/O we need to do.
                     # - 'compresseddeltalen' is the sum of the total size of deltas we need
                     #   to apply -- bounding it limits the amount of CPU we consume.
                     dist, l, data, base, chainbase, chainlen, compresseddeltalen = d
                     if (dist > textlen * 4 or l > textlen or
                         compresseddeltalen > textlen * 2 or
                         (self._maxchainlen and chainlen > self._maxchainlen)):
                         return False
                     return True
                 def _addrevision(self, node, text, transaction, link, p1, p2, flags,
                                  cachedelta, ifh, dfh, alwayscache=False, raw=False):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     invariants:
                     - text is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     - raw is optional; if set to True, it indicates the revision data is to
                       be treated by _processflags() as raw. It is usually set by changegroup
                       generation and debug commands.
                     """
                     btext = [text]
                     def buildtext():
                         if btext[0] is not None:
                             return btext[0]
                         baserev = cachedelta[0]
                         delta = cachedelta[1]
                         # special case deltas which replace entire base; no need to decode
                         # base revision. this neatly avoids censored bases, which throw when
                         # they're decoded.
                         hlen = struct.calcsize(">lll")
                         if delta[:hlen] == mdiff.replacediffheader(self.rawsize(baserev),
                                                                    len(delta) - hlen):
                             btext[0] = delta[hlen:]
                         else:
                             if self._inline:
                                 fh = ifh
                             else:
                                 fh = dfh
                             basetext = self.revision(baserev, _df=fh, raw=raw)
                             btext[0] = mdiff.patch(basetext, delta)
                         try:
                             res = self._processflags(btext[0], flags, 'read', raw=raw)
                             btext[0], validatehash = res
                             if validatehash:
                                 self.checkhash(btext[0], node, p1=p1, p2=p2)
                             if flags & REVIDX_ISCENSORED:
                                 raise RevlogError(_('node %s is not censored') % node)
                         except CensoredNodeError:
                             # must pass the censored index flag to add censored revisions
                             if not flags & REVIDX_ISCENSORED:
                                 raise
                         return btext[0]
                     def builddelta(rev):
                         # can we use the cached delta?
                         if cachedelta and cachedelta[0] == rev:
                             delta = cachedelta[1]
                         else:
                             t = buildtext()
                             if self.iscensored(rev):
                                 # deltas based on a censored revision must replace the
                                 # full content in one patch, so delta works everywhere
                                 header = mdiff.replacediffheader(self.rawsize(rev), len(t))
                                 delta = header + t
                             else:
                                 if self._inline:
                                     fh = ifh
                                 else:
                                     fh = dfh
                                 ptext = self.revision(rev, _df=fh, raw=True)
                                 delta = mdiff.textdiff(ptext, t)
                         header, data = self.compress(delta)
                         deltalen = len(header) + len(data)
                         chainbase = self.chainbase(rev)
                         dist = deltalen + offset - self.start(chainbase)
                         if self._generaldelta:
                             base = rev
                         else:
                             base = chainbase
                         chainlen, compresseddeltalen = self._chaininfo(rev)
                         chainlen += 1
                         compresseddeltalen += deltalen
                         return (dist, deltalen, (header, data), base,
                                 chainbase, chainlen, compresseddeltalen)
                     curr = len(self)
                     prev = curr - 1
                     offset = self.end(prev)
                     delta = None
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if text is None:
                         textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
                                                     cachedelta[1])
                     else:
                         textlen = len(text)
                     # should we try to build a delta?
                     if prev != nullrev and self.storedeltachains:
                         tested = set()
                         # This condition is true most of the time when processing
                         # changegroup data into a generaldelta repo. The only time it
                         # isn't true is if this is the first revision in a delta chain
                         # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
                         if cachedelta and self._generaldelta and self._lazydeltabase:
                             # Assume what we received from the server is a good choice
                             # build delta will reuse the cache
                             candidatedelta = builddelta(cachedelta[0])
                             tested.add(cachedelta[0])
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                         if delta is None and self._generaldelta:
                             # exclude already lazy tested base if any
                             parents = [p for p in (p1r, p2r)
                                        if p != nullrev and p not in tested]
                             if parents and not self._aggressivemergedeltas:
                                 # Pick whichever parent is closer to us (to minimize the
                                 # chance of having to build a fulltext).
                                 parents = [max(parents)]
                             tested.update(parents)
                             pdeltas = []
                             for p in parents:
                                 pd = builddelta(p)
                                 if self._isgooddelta(pd, textlen):
                                     pdeltas.append(pd)
                             if pdeltas:
                                 delta = min(pdeltas, key=lambda x: x[1])
                         if delta is None and prev not in tested:
                             # other approach failed try against prev to hopefully save us a
                             # fulltext.
                             candidatedelta = builddelta(prev)
                             if self._isgooddelta(candidatedelta, textlen):
                                 delta = candidatedelta
                     if delta is not None:
                         dist, l, data, base, chainbase, chainlen, compresseddeltalen = delta
                     else:
                         text = buildtext()
                         data = self.compress(text)
                         l = len(data[1]) + len(data[0])
                         base = chainbase = curr
                     e = (offset_type(offset, flags), l, textlen,
                          base, link, p1r, p2r, node)
                     self.index.insert(-1, e)
                     self.nodemap[node] = curr
                     entry = self._io.packentry(e, self.node, self.version, curr)
                     self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
                     if alwayscache and text is None:
                         text = buildtext()
                     if type(text) == str: # only accept immutable objects
                         self._cache = (node, curr, text)
                     self._chainbasecache[curr] = chainbase
                     return node
                 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3.
                     ifh.seek(0, os.SEEK_END)
                     if dfh:
                         dfh.seek(0, os.SEEK_END)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self.datafile, offset)
                         transaction.add(self.indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         ifh.write(entry)
                     else:
                         offset += curr * self._io.size
                         transaction.add(self.indexfile, offset, curr)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         self.checkinlinesize(transaction, ifh)
                 def addgroup(self, cg, linkmapper, transaction, addrevisioncb=None):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     # track the base of the current delta log
                     content = []
                     node = None
                     r = len(self)
                     end = 0
                     if r:
                         end = self.end(r - 1)
                     ifh = self.opener(self.indexfile, "a+", checkambig=self._checkambig)
                     isize = r * self._io.size
                     if self._inline:
                         transaction.add(self.indexfile, end + isize, r)
                         dfh = None
                     else:
                         transaction.add(self.indexfile, isize, r)
                         transaction.add(self.datafile, end)
                         dfh = self.opener(self.datafile, "a+")
                     def flush():
                         if dfh:
                             dfh.flush()
                         ifh.flush()
                     try:
                         # loop through our set of deltas
                         chain = None
                         for chunkdata in iter(lambda: cg.deltachunk(chain), {}):
                             node = chunkdata['node']
                             p1 = chunkdata['p1']
                             p2 = chunkdata['p2']
                             cs = chunkdata['cs']
                             deltabase = chunkdata['deltabase']
                             delta = chunkdata['delta']
                             flags = chunkdata['flags'] or REVIDX_DEFAULT_FLAGS
                             content.append(node)
                             link = linkmapper(cs)
                             if node in self.nodemap:
                                 # this can happen if two branches make the same change
                                 chain = node
                                 continue
                             for p in (p1, p2):
                                 if p not in self.nodemap:
                                     raise LookupError(p, self.indexfile,
                                                       _('unknown parent'))
                             if deltabase not in self.nodemap:
                                 raise LookupError(deltabase, self.indexfile,
                                                   _('unknown delta base'))
                             baserev = self.rev(deltabase)
                             if baserev != nullrev and self.iscensored(baserev):
                                 # if base is censored, delta must be full replacement in a
                                 # single patch operation
                                 hlen = struct.calcsize(">lll")
                                 oldlen = self.rawsize(baserev)
                                 newlen = len(delta) - hlen
                                 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
                                     raise error.CensoredBaseError(self.indexfile,
                                                                   self.node(baserev))
                             if not flags and self._peek_iscensored(baserev, delta, flush):
                                 flags |= REVIDX_ISCENSORED
                             # We assume consumers of addrevisioncb will want to retrieve
                             # the added revision, which will require a call to
                             # revision(). revision() will fast path if there is a cache
                             # hit. So, we tell _addrevision() to always cache in this case.
                             # We're only using addgroup() in the context of changegroup
                             # generation so the revision data can always be handled as raw
                             # by the flagprocessor.
                             chain = self._addrevision(node, None, transaction, link,
                                                       p1, p2, flags, (baserev, delta),
                                                       ifh, dfh,
                                                       alwayscache=bool(addrevisioncb),
                                                       raw=True)
                             if addrevisioncb:
                                 addrevisioncb(self, chain)
                             if not dfh and not self._inline:
                                 # addrevision switched from inline to conventional
                                 # reopen the index
                                 ifh.close()
                                 dfh = self.opener(self.datafile, "a+")
                                 ifh = self.opener(self.indexfile, "a+",
                                                   checkambig=self._checkambig)
                     finally:
                         if dfh:
                             dfh.close()
                         ifh.close()
                     return content
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     return False
                 def _peek_iscensored(self, baserev, delta, flush):
                     """Quickly check if a delta produces a censored revision."""
                     return False
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     brokenrevs = set()
                     strippoint = len(self)
                     heads = {}
                     futurelargelinkrevs = set()
                     for head in self.headrevs():
                         headlinkrev = self.linkrev(head)
                         heads[head] = headlinkrev
                         if headlinkrev >= minlink:
                             futurelargelinkrevs.add(headlinkrev)
                     # This algorithm involves walking down the rev graph, starting at the
                     # heads. Since the revs are topologically sorted according to linkrev,
                     # once all head linkrevs are below the minlink, we know there are
                     # no more revs that could have a linkrev greater than minlink.
                     # So we can stop walking.
                     while futurelargelinkrevs:
                         strippoint -= 1
                         linkrev = heads.pop(strippoint)
                         if linkrev < minlink:
                             brokenrevs.add(strippoint)
                         else:
                             futurelargelinkrevs.remove(linkrev)
                         for p in self.parentrevs(strippoint):
                             if p != nullrev:
                                 plinkrev = self.linkrev(p)
                                 heads[p] = plinkrev
                                 if plinkrev >= minlink:
                                     futurelargelinkrevs.add(plinkrev)
                     return strippoint, brokenrevs
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     end = self.start(rev)
                     if not self._inline:
                         transaction.add(self.datafile, end)
                         end = rev * self._io.size
                     else:
                         end += rev * self._io.size
                     transaction.add(self.indexfile, end)
                     # then reset internal state in memory to forget those revisions
                     self._cache = None
                     self._chaininfocache = {}
                     self._chunkclear()
                     for x in xrange(rev, len(self)):
                         del self.nodemap[self.node(x)]
                     del self.index[rev:-1]
                 def checksize(self):
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         f = self.opener(self.datafile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self.indexfile)
                         f.seek(0, 2)
                         actual = f.tell()
                         f.close()
                         s = self._io.size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self.indexfile]
                     if not self._inline:
                         res.append(self.datafile)
                     return res
                 DELTAREUSEALWAYS = 'always'
                 DELTAREUSESAMEREVS = 'samerevs'
                 DELTAREUSENEVER = 'never'
                 DELTAREUSEALL = set(['always', 'samerevs', 'never'])
                 def clone(self, tr, destrevlog, addrevisioncb=None,
                           deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``aggressivemergedeltas`` argument
                     controls whether to compute deltas against both parents for merges.
                     By default, the current default is used.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
                     if len(destrevlog):
                         raise ValueError(_('destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_('source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_('destination revlog has filtered revisions'))
                     # lazydeltabase controls whether to reuse a cached delta, if possible.
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._aggressivemergedeltas
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                         destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
                         populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
                                                             self.DELTAREUSESAMEREVS)
                         index = self.index
                         for rev in self:
                             entry = index[rev]
                             # Some classes override linkrev to take filtered revs into
                             # account. Use raw entry from index.
                             flags = entry[0] & 0xffff
                             linkrev = entry[4]
                             p1 = index[entry[5]][7]
                             p2 = index[entry[6]][7]
                             node = entry[7]
                             # (Possibly) reuse the delta from the revlog if allowed and
                             # the revlog chunk is a delta.
                             cachedelta = None
                             text = None
                             if populatecachedelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, str(self._chunk(rev)))
                             if not cachedelta:
                                 text = self.revision(rev)
                             ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
                                                     checkambig=False)
                             dfh = None
                             if not destrevlog._inline:
                                 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
                             try:
                                 destrevlog._addrevision(node, text, tr, linkrev, p1, p2,
                                                         flags, cachedelta, ifh, dfh)
                             finally:
                                 if dfh:
                                     dfh.close()
                                 ifh.close()
                             if addrevisioncb:
                                 addrevisioncb(self, rev, node)
                     finally:
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._aggressivemergedeltas = oldamd