##// END OF EJS Templates
revlog: deprecate the use of `revision(..., raw=True)`...
marmoute -
r43111:f75f47b3 default
parent child Browse files
Show More
@@ -1,2678 +1,2682 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False
116 return text, False
117
117
118 def ellipsiswriteprocessor(rl, text):
118 def ellipsiswriteprocessor(rl, text):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(object):
264 class revlog(object):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
301 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
302 mmaplargeindex=False, censorable=False,
302 mmaplargeindex=False, censorable=False,
303 upperboundcomp=None):
303 upperboundcomp=None):
304 """
304 """
305 create a revlog object
305 create a revlog object
306
306
307 opener is a function that abstracts the file opening operation
307 opener is a function that abstracts the file opening operation
308 and can be used to implement COW semantics or the like.
308 and can be used to implement COW semantics or the like.
309
309
310 """
310 """
311 self.upperboundcomp = upperboundcomp
311 self.upperboundcomp = upperboundcomp
312 self.indexfile = indexfile
312 self.indexfile = indexfile
313 self.datafile = datafile or (indexfile[:-2] + ".d")
313 self.datafile = datafile or (indexfile[:-2] + ".d")
314 self.opener = opener
314 self.opener = opener
315 # When True, indexfile is opened with checkambig=True at writing, to
315 # When True, indexfile is opened with checkambig=True at writing, to
316 # avoid file stat ambiguity.
316 # avoid file stat ambiguity.
317 self._checkambig = checkambig
317 self._checkambig = checkambig
318 self._mmaplargeindex = mmaplargeindex
318 self._mmaplargeindex = mmaplargeindex
319 self._censorable = censorable
319 self._censorable = censorable
320 # 3-tuple of (node, rev, text) for a raw revision.
320 # 3-tuple of (node, rev, text) for a raw revision.
321 self._revisioncache = None
321 self._revisioncache = None
322 # Maps rev to chain base rev.
322 # Maps rev to chain base rev.
323 self._chainbasecache = util.lrucachedict(100)
323 self._chainbasecache = util.lrucachedict(100)
324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
324 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
325 self._chunkcache = (0, '')
325 self._chunkcache = (0, '')
326 # How much data to read and cache into the raw revlog data cache.
326 # How much data to read and cache into the raw revlog data cache.
327 self._chunkcachesize = 65536
327 self._chunkcachesize = 65536
328 self._maxchainlen = None
328 self._maxchainlen = None
329 self._deltabothparents = True
329 self._deltabothparents = True
330 self.index = []
330 self.index = []
331 # Mapping of partial identifiers to full nodes.
331 # Mapping of partial identifiers to full nodes.
332 self._pcache = {}
332 self._pcache = {}
333 # Mapping of revision integer to full node.
333 # Mapping of revision integer to full node.
334 self._nodecache = {nullid: nullrev}
334 self._nodecache = {nullid: nullrev}
335 self._nodepos = None
335 self._nodepos = None
336 self._compengine = 'zlib'
336 self._compengine = 'zlib'
337 self._compengineopts = {}
337 self._compengineopts = {}
338 self._maxdeltachainspan = -1
338 self._maxdeltachainspan = -1
339 self._withsparseread = False
339 self._withsparseread = False
340 self._sparserevlog = False
340 self._sparserevlog = False
341 self._srdensitythreshold = 0.50
341 self._srdensitythreshold = 0.50
342 self._srmingapsize = 262144
342 self._srmingapsize = 262144
343
343
344 # Make copy of flag processors so each revlog instance can support
344 # Make copy of flag processors so each revlog instance can support
345 # custom flags.
345 # custom flags.
346 self._flagprocessors = dict(flagutil.flagprocessors)
346 self._flagprocessors = dict(flagutil.flagprocessors)
347
347
348 # 2-tuple of file handles being used for active writing.
348 # 2-tuple of file handles being used for active writing.
349 self._writinghandles = None
349 self._writinghandles = None
350
350
351 self._loadindex()
351 self._loadindex()
352
352
353 def _loadindex(self):
353 def _loadindex(self):
354 mmapindexthreshold = None
354 mmapindexthreshold = None
355 opts = getattr(self.opener, 'options', {}) or {}
355 opts = getattr(self.opener, 'options', {}) or {}
356
356
357 if 'revlogv2' in opts:
357 if 'revlogv2' in opts:
358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
358 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
359 elif 'revlogv1' in opts:
359 elif 'revlogv1' in opts:
360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
360 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
361 if 'generaldelta' in opts:
361 if 'generaldelta' in opts:
362 newversionflags |= FLAG_GENERALDELTA
362 newversionflags |= FLAG_GENERALDELTA
363 elif getattr(self.opener, 'options', None) is not None:
363 elif getattr(self.opener, 'options', None) is not None:
364 # If options provided but no 'revlog*' found, the repository
364 # If options provided but no 'revlog*' found, the repository
365 # would have no 'requires' file in it, which means we have to
365 # would have no 'requires' file in it, which means we have to
366 # stick to the old format.
366 # stick to the old format.
367 newversionflags = REVLOGV0
367 newversionflags = REVLOGV0
368 else:
368 else:
369 newversionflags = REVLOG_DEFAULT_VERSION
369 newversionflags = REVLOG_DEFAULT_VERSION
370
370
371 if 'chunkcachesize' in opts:
371 if 'chunkcachesize' in opts:
372 self._chunkcachesize = opts['chunkcachesize']
372 self._chunkcachesize = opts['chunkcachesize']
373 if 'maxchainlen' in opts:
373 if 'maxchainlen' in opts:
374 self._maxchainlen = opts['maxchainlen']
374 self._maxchainlen = opts['maxchainlen']
375 if 'deltabothparents' in opts:
375 if 'deltabothparents' in opts:
376 self._deltabothparents = opts['deltabothparents']
376 self._deltabothparents = opts['deltabothparents']
377 self._lazydelta = bool(opts.get('lazydelta', True))
377 self._lazydelta = bool(opts.get('lazydelta', True))
378 self._lazydeltabase = False
378 self._lazydeltabase = False
379 if self._lazydelta:
379 if self._lazydelta:
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 if 'compengine' in opts:
381 if 'compengine' in opts:
382 self._compengine = opts['compengine']
382 self._compengine = opts['compengine']
383 if 'zlib.level' in opts:
383 if 'zlib.level' in opts:
384 self._compengineopts['zlib.level'] = opts['zlib.level']
384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 if 'zstd.level' in opts:
385 if 'zstd.level' in opts:
386 self._compengineopts['zstd.level'] = opts['zstd.level']
386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 if 'maxdeltachainspan' in opts:
387 if 'maxdeltachainspan' in opts:
388 self._maxdeltachainspan = opts['maxdeltachainspan']
388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 mmapindexthreshold = opts['mmapindexthreshold']
390 mmapindexthreshold = opts['mmapindexthreshold']
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
391 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
392 withsparseread = bool(opts.get('with-sparse-read', False))
393 # sparse-revlog forces sparse-read
393 # sparse-revlog forces sparse-read
394 self._withsparseread = self._sparserevlog or withsparseread
394 self._withsparseread = self._sparserevlog or withsparseread
395 if 'sparse-read-density-threshold' in opts:
395 if 'sparse-read-density-threshold' in opts:
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
396 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 if 'sparse-read-min-gap-size' in opts:
397 if 'sparse-read-min-gap-size' in opts:
398 self._srmingapsize = opts['sparse-read-min-gap-size']
398 self._srmingapsize = opts['sparse-read-min-gap-size']
399 if opts.get('enableellipsis'):
399 if opts.get('enableellipsis'):
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
400 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401
401
402 # revlog v0 doesn't have flag processors
402 # revlog v0 doesn't have flag processors
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
403 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
404 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405
405
406 if self._chunkcachesize <= 0:
406 if self._chunkcachesize <= 0:
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
407 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 'greater than 0') % self._chunkcachesize)
408 'greater than 0') % self._chunkcachesize)
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
409 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
410 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 'power of 2') % self._chunkcachesize)
411 'power of 2') % self._chunkcachesize)
412
412
413 indexdata = ''
413 indexdata = ''
414 self._initempty = True
414 self._initempty = True
415 try:
415 try:
416 with self._indexfp() as f:
416 with self._indexfp() as f:
417 if (mmapindexthreshold is not None and
417 if (mmapindexthreshold is not None and
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
418 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 # TODO: should .close() to release resources without
419 # TODO: should .close() to release resources without
420 # relying on Python GC
420 # relying on Python GC
421 indexdata = util.buffer(util.mmapread(f))
421 indexdata = util.buffer(util.mmapread(f))
422 else:
422 else:
423 indexdata = f.read()
423 indexdata = f.read()
424 if len(indexdata) > 0:
424 if len(indexdata) > 0:
425 versionflags = versionformat_unpack(indexdata[:4])[0]
425 versionflags = versionformat_unpack(indexdata[:4])[0]
426 self._initempty = False
426 self._initempty = False
427 else:
427 else:
428 versionflags = newversionflags
428 versionflags = newversionflags
429 except IOError as inst:
429 except IOError as inst:
430 if inst.errno != errno.ENOENT:
430 if inst.errno != errno.ENOENT:
431 raise
431 raise
432
432
433 versionflags = newversionflags
433 versionflags = newversionflags
434
434
435 self.version = versionflags
435 self.version = versionflags
436
436
437 flags = versionflags & ~0xFFFF
437 flags = versionflags & ~0xFFFF
438 fmt = versionflags & 0xFFFF
438 fmt = versionflags & 0xFFFF
439
439
440 if fmt == REVLOGV0:
440 if fmt == REVLOGV0:
441 if flags:
441 if flags:
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
442 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 'revlog %s') %
443 'revlog %s') %
444 (flags >> 16, fmt, self.indexfile))
444 (flags >> 16, fmt, self.indexfile))
445
445
446 self._inline = False
446 self._inline = False
447 self._generaldelta = False
447 self._generaldelta = False
448
448
449 elif fmt == REVLOGV1:
449 elif fmt == REVLOGV1:
450 if flags & ~REVLOGV1_FLAGS:
450 if flags & ~REVLOGV1_FLAGS:
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 'revlog %s') %
452 'revlog %s') %
453 (flags >> 16, fmt, self.indexfile))
453 (flags >> 16, fmt, self.indexfile))
454
454
455 self._inline = versionflags & FLAG_INLINE_DATA
455 self._inline = versionflags & FLAG_INLINE_DATA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
456 self._generaldelta = versionflags & FLAG_GENERALDELTA
457
457
458 elif fmt == REVLOGV2:
458 elif fmt == REVLOGV2:
459 if flags & ~REVLOGV2_FLAGS:
459 if flags & ~REVLOGV2_FLAGS:
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
460 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 'revlog %s') %
461 'revlog %s') %
462 (flags >> 16, fmt, self.indexfile))
462 (flags >> 16, fmt, self.indexfile))
463
463
464 self._inline = versionflags & FLAG_INLINE_DATA
464 self._inline = versionflags & FLAG_INLINE_DATA
465 # generaldelta implied by version 2 revlogs.
465 # generaldelta implied by version 2 revlogs.
466 self._generaldelta = True
466 self._generaldelta = True
467
467
468 else:
468 else:
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
469 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 (fmt, self.indexfile))
470 (fmt, self.indexfile))
471 # sparse-revlog can't be on without general-delta (issue6056)
471 # sparse-revlog can't be on without general-delta (issue6056)
472 if not self._generaldelta:
472 if not self._generaldelta:
473 self._sparserevlog = False
473 self._sparserevlog = False
474
474
475 self._storedeltachains = True
475 self._storedeltachains = True
476
476
477 self._io = revlogio()
477 self._io = revlogio()
478 if self.version == REVLOGV0:
478 if self.version == REVLOGV0:
479 self._io = revlogoldio()
479 self._io = revlogoldio()
480 try:
480 try:
481 d = self._io.parseindex(indexdata, self._inline)
481 d = self._io.parseindex(indexdata, self._inline)
482 except (ValueError, IndexError):
482 except (ValueError, IndexError):
483 raise error.RevlogError(_("index %s is corrupted") %
483 raise error.RevlogError(_("index %s is corrupted") %
484 self.indexfile)
484 self.indexfile)
485 self.index, nodemap, self._chunkcache = d
485 self.index, nodemap, self._chunkcache = d
486 if nodemap is not None:
486 if nodemap is not None:
487 self.nodemap = self._nodecache = nodemap
487 self.nodemap = self._nodecache = nodemap
488 if not self._chunkcache:
488 if not self._chunkcache:
489 self._chunkclear()
489 self._chunkclear()
490 # revnum -> (chain-length, sum-delta-length)
490 # revnum -> (chain-length, sum-delta-length)
491 self._chaininfocache = {}
491 self._chaininfocache = {}
492 # revlog header -> revlog compressor
492 # revlog header -> revlog compressor
493 self._decompressors = {}
493 self._decompressors = {}
494
494
495 @util.propertycache
495 @util.propertycache
496 def _compressor(self):
496 def _compressor(self):
497 engine = util.compengines[self._compengine]
497 engine = util.compengines[self._compengine]
498 return engine.revlogcompressor(self._compengineopts)
498 return engine.revlogcompressor(self._compengineopts)
499
499
500 def _indexfp(self, mode='r'):
500 def _indexfp(self, mode='r'):
501 """file object for the revlog's index file"""
501 """file object for the revlog's index file"""
502 args = {r'mode': mode}
502 args = {r'mode': mode}
503 if mode != 'r':
503 if mode != 'r':
504 args[r'checkambig'] = self._checkambig
504 args[r'checkambig'] = self._checkambig
505 if mode == 'w':
505 if mode == 'w':
506 args[r'atomictemp'] = True
506 args[r'atomictemp'] = True
507 return self.opener(self.indexfile, **args)
507 return self.opener(self.indexfile, **args)
508
508
509 def _datafp(self, mode='r'):
509 def _datafp(self, mode='r'):
510 """file object for the revlog's data file"""
510 """file object for the revlog's data file"""
511 return self.opener(self.datafile, mode=mode)
511 return self.opener(self.datafile, mode=mode)
512
512
513 @contextlib.contextmanager
513 @contextlib.contextmanager
514 def _datareadfp(self, existingfp=None):
514 def _datareadfp(self, existingfp=None):
515 """file object suitable to read data"""
515 """file object suitable to read data"""
516 # Use explicit file handle, if given.
516 # Use explicit file handle, if given.
517 if existingfp is not None:
517 if existingfp is not None:
518 yield existingfp
518 yield existingfp
519
519
520 # Use a file handle being actively used for writes, if available.
520 # Use a file handle being actively used for writes, if available.
521 # There is some danger to doing this because reads will seek the
521 # There is some danger to doing this because reads will seek the
522 # file. However, _writeentry() performs a SEEK_END before all writes,
522 # file. However, _writeentry() performs a SEEK_END before all writes,
523 # so we should be safe.
523 # so we should be safe.
524 elif self._writinghandles:
524 elif self._writinghandles:
525 if self._inline:
525 if self._inline:
526 yield self._writinghandles[0]
526 yield self._writinghandles[0]
527 else:
527 else:
528 yield self._writinghandles[1]
528 yield self._writinghandles[1]
529
529
530 # Otherwise open a new file handle.
530 # Otherwise open a new file handle.
531 else:
531 else:
532 if self._inline:
532 if self._inline:
533 func = self._indexfp
533 func = self._indexfp
534 else:
534 else:
535 func = self._datafp
535 func = self._datafp
536 with func() as fp:
536 with func() as fp:
537 yield fp
537 yield fp
538
538
539 def tip(self):
539 def tip(self):
540 return self.node(len(self.index) - 1)
540 return self.node(len(self.index) - 1)
541 def __contains__(self, rev):
541 def __contains__(self, rev):
542 return 0 <= rev < len(self)
542 return 0 <= rev < len(self)
543 def __len__(self):
543 def __len__(self):
544 return len(self.index)
544 return len(self.index)
545 def __iter__(self):
545 def __iter__(self):
546 return iter(pycompat.xrange(len(self)))
546 return iter(pycompat.xrange(len(self)))
547 def revs(self, start=0, stop=None):
547 def revs(self, start=0, stop=None):
548 """iterate over all rev in this revlog (from start to stop)"""
548 """iterate over all rev in this revlog (from start to stop)"""
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
549 return storageutil.iterrevs(len(self), start=start, stop=stop)
550
550
551 @util.propertycache
551 @util.propertycache
552 def nodemap(self):
552 def nodemap(self):
553 if self.index:
553 if self.index:
554 # populate mapping down to the initial node
554 # populate mapping down to the initial node
555 node0 = self.index[0][7] # get around changelog filtering
555 node0 = self.index[0][7] # get around changelog filtering
556 self.rev(node0)
556 self.rev(node0)
557 return self._nodecache
557 return self._nodecache
558
558
559 def hasnode(self, node):
559 def hasnode(self, node):
560 try:
560 try:
561 self.rev(node)
561 self.rev(node)
562 return True
562 return True
563 except KeyError:
563 except KeyError:
564 return False
564 return False
565
565
566 def candelta(self, baserev, rev):
566 def candelta(self, baserev, rev):
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
567 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 # Disable delta if either rev requires a content-changing flag
568 # Disable delta if either rev requires a content-changing flag
569 # processor (ex. LFS). This is because such flag processor can alter
569 # processor (ex. LFS). This is because such flag processor can alter
570 # the rawtext content that the delta will be based on, and two clients
570 # the rawtext content that the delta will be based on, and two clients
571 # could have a same revlog node with different flags (i.e. different
571 # could have a same revlog node with different flags (i.e. different
572 # rawtext contents) and the delta could be incompatible.
572 # rawtext contents) and the delta could be incompatible.
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
573 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
574 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 return False
575 return False
576 return True
576 return True
577
577
578 def clearcaches(self):
578 def clearcaches(self):
579 self._revisioncache = None
579 self._revisioncache = None
580 self._chainbasecache.clear()
580 self._chainbasecache.clear()
581 self._chunkcache = (0, '')
581 self._chunkcache = (0, '')
582 self._pcache = {}
582 self._pcache = {}
583
583
584 try:
584 try:
585 # If we are using the native C version, you are in a fun case
585 # If we are using the native C version, you are in a fun case
586 # where self.index, self.nodemap and self._nodecaches is the same
586 # where self.index, self.nodemap and self._nodecaches is the same
587 # object.
587 # object.
588 self._nodecache.clearcaches()
588 self._nodecache.clearcaches()
589 except AttributeError:
589 except AttributeError:
590 self._nodecache = {nullid: nullrev}
590 self._nodecache = {nullid: nullrev}
591 self._nodepos = None
591 self._nodepos = None
592
592
593 def rev(self, node):
593 def rev(self, node):
594 try:
594 try:
595 return self._nodecache[node]
595 return self._nodecache[node]
596 except TypeError:
596 except TypeError:
597 raise
597 raise
598 except error.RevlogError:
598 except error.RevlogError:
599 # parsers.c radix tree lookup failed
599 # parsers.c radix tree lookup failed
600 if node == wdirid or node in wdirfilenodeids:
600 if node == wdirid or node in wdirfilenodeids:
601 raise error.WdirUnsupported
601 raise error.WdirUnsupported
602 raise error.LookupError(node, self.indexfile, _('no node'))
602 raise error.LookupError(node, self.indexfile, _('no node'))
603 except KeyError:
603 except KeyError:
604 # pure python cache lookup failed
604 # pure python cache lookup failed
605 n = self._nodecache
605 n = self._nodecache
606 i = self.index
606 i = self.index
607 p = self._nodepos
607 p = self._nodepos
608 if p is None:
608 if p is None:
609 p = len(i) - 1
609 p = len(i) - 1
610 else:
610 else:
611 assert p < len(i)
611 assert p < len(i)
612 for r in pycompat.xrange(p, -1, -1):
612 for r in pycompat.xrange(p, -1, -1):
613 v = i[r][7]
613 v = i[r][7]
614 n[v] = r
614 n[v] = r
615 if v == node:
615 if v == node:
616 self._nodepos = r - 1
616 self._nodepos = r - 1
617 return r
617 return r
618 if node == wdirid or node in wdirfilenodeids:
618 if node == wdirid or node in wdirfilenodeids:
619 raise error.WdirUnsupported
619 raise error.WdirUnsupported
620 raise error.LookupError(node, self.indexfile, _('no node'))
620 raise error.LookupError(node, self.indexfile, _('no node'))
621
621
622 # Accessors for index entries.
622 # Accessors for index entries.
623
623
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
624 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # are flags.
625 # are flags.
626 def start(self, rev):
626 def start(self, rev):
627 return int(self.index[rev][0] >> 16)
627 return int(self.index[rev][0] >> 16)
628
628
629 def flags(self, rev):
629 def flags(self, rev):
630 return self.index[rev][0] & 0xFFFF
630 return self.index[rev][0] & 0xFFFF
631
631
632 def length(self, rev):
632 def length(self, rev):
633 return self.index[rev][1]
633 return self.index[rev][1]
634
634
635 def rawsize(self, rev):
635 def rawsize(self, rev):
636 """return the length of the uncompressed text for a given revision"""
636 """return the length of the uncompressed text for a given revision"""
637 l = self.index[rev][2]
637 l = self.index[rev][2]
638 if l >= 0:
638 if l >= 0:
639 return l
639 return l
640
640
641 t = self.rawdata(rev)
641 t = self.rawdata(rev)
642 return len(t)
642 return len(t)
643
643
644 def size(self, rev):
644 def size(self, rev):
645 """length of non-raw text (processed by a "read" flag processor)"""
645 """length of non-raw text (processed by a "read" flag processor)"""
646 # fast path: if no "read" flag processor could change the content,
646 # fast path: if no "read" flag processor could change the content,
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
647 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 flags = self.flags(rev)
648 flags = self.flags(rev)
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
649 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 return self.rawsize(rev)
650 return self.rawsize(rev)
651
651
652 return len(self.revision(rev, raw=False))
652 return len(self.revision(rev, raw=False))
653
653
654 def chainbase(self, rev):
654 def chainbase(self, rev):
655 base = self._chainbasecache.get(rev)
655 base = self._chainbasecache.get(rev)
656 if base is not None:
656 if base is not None:
657 return base
657 return base
658
658
659 index = self.index
659 index = self.index
660 iterrev = rev
660 iterrev = rev
661 base = index[iterrev][3]
661 base = index[iterrev][3]
662 while base != iterrev:
662 while base != iterrev:
663 iterrev = base
663 iterrev = base
664 base = index[iterrev][3]
664 base = index[iterrev][3]
665
665
666 self._chainbasecache[rev] = base
666 self._chainbasecache[rev] = base
667 return base
667 return base
668
668
669 def linkrev(self, rev):
669 def linkrev(self, rev):
670 return self.index[rev][4]
670 return self.index[rev][4]
671
671
672 def parentrevs(self, rev):
672 def parentrevs(self, rev):
673 try:
673 try:
674 entry = self.index[rev]
674 entry = self.index[rev]
675 except IndexError:
675 except IndexError:
676 if rev == wdirrev:
676 if rev == wdirrev:
677 raise error.WdirUnsupported
677 raise error.WdirUnsupported
678 raise
678 raise
679
679
680 return entry[5], entry[6]
680 return entry[5], entry[6]
681
681
682 # fast parentrevs(rev) where rev isn't filtered
682 # fast parentrevs(rev) where rev isn't filtered
683 _uncheckedparentrevs = parentrevs
683 _uncheckedparentrevs = parentrevs
684
684
685 def node(self, rev):
685 def node(self, rev):
686 try:
686 try:
687 return self.index[rev][7]
687 return self.index[rev][7]
688 except IndexError:
688 except IndexError:
689 if rev == wdirrev:
689 if rev == wdirrev:
690 raise error.WdirUnsupported
690 raise error.WdirUnsupported
691 raise
691 raise
692
692
693 # Derived from index values.
693 # Derived from index values.
694
694
695 def end(self, rev):
695 def end(self, rev):
696 return self.start(rev) + self.length(rev)
696 return self.start(rev) + self.length(rev)
697
697
698 def parents(self, node):
698 def parents(self, node):
699 i = self.index
699 i = self.index
700 d = i[self.rev(node)]
700 d = i[self.rev(node)]
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
701 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702
702
703 def chainlen(self, rev):
703 def chainlen(self, rev):
704 return self._chaininfo(rev)[0]
704 return self._chaininfo(rev)[0]
705
705
706 def _chaininfo(self, rev):
706 def _chaininfo(self, rev):
707 chaininfocache = self._chaininfocache
707 chaininfocache = self._chaininfocache
708 if rev in chaininfocache:
708 if rev in chaininfocache:
709 return chaininfocache[rev]
709 return chaininfocache[rev]
710 index = self.index
710 index = self.index
711 generaldelta = self._generaldelta
711 generaldelta = self._generaldelta
712 iterrev = rev
712 iterrev = rev
713 e = index[iterrev]
713 e = index[iterrev]
714 clen = 0
714 clen = 0
715 compresseddeltalen = 0
715 compresseddeltalen = 0
716 while iterrev != e[3]:
716 while iterrev != e[3]:
717 clen += 1
717 clen += 1
718 compresseddeltalen += e[1]
718 compresseddeltalen += e[1]
719 if generaldelta:
719 if generaldelta:
720 iterrev = e[3]
720 iterrev = e[3]
721 else:
721 else:
722 iterrev -= 1
722 iterrev -= 1
723 if iterrev in chaininfocache:
723 if iterrev in chaininfocache:
724 t = chaininfocache[iterrev]
724 t = chaininfocache[iterrev]
725 clen += t[0]
725 clen += t[0]
726 compresseddeltalen += t[1]
726 compresseddeltalen += t[1]
727 break
727 break
728 e = index[iterrev]
728 e = index[iterrev]
729 else:
729 else:
730 # Add text length of base since decompressing that also takes
730 # Add text length of base since decompressing that also takes
731 # work. For cache hits the length is already included.
731 # work. For cache hits the length is already included.
732 compresseddeltalen += e[1]
732 compresseddeltalen += e[1]
733 r = (clen, compresseddeltalen)
733 r = (clen, compresseddeltalen)
734 chaininfocache[rev] = r
734 chaininfocache[rev] = r
735 return r
735 return r
736
736
737 def _deltachain(self, rev, stoprev=None):
737 def _deltachain(self, rev, stoprev=None):
738 """Obtain the delta chain for a revision.
738 """Obtain the delta chain for a revision.
739
739
740 ``stoprev`` specifies a revision to stop at. If not specified, we
740 ``stoprev`` specifies a revision to stop at. If not specified, we
741 stop at the base of the chain.
741 stop at the base of the chain.
742
742
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
743 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 revs in ascending order and ``stopped`` is a bool indicating whether
744 revs in ascending order and ``stopped`` is a bool indicating whether
745 ``stoprev`` was hit.
745 ``stoprev`` was hit.
746 """
746 """
747 # Try C implementation.
747 # Try C implementation.
748 try:
748 try:
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
749 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 except AttributeError:
750 except AttributeError:
751 pass
751 pass
752
752
753 chain = []
753 chain = []
754
754
755 # Alias to prevent attribute lookup in tight loop.
755 # Alias to prevent attribute lookup in tight loop.
756 index = self.index
756 index = self.index
757 generaldelta = self._generaldelta
757 generaldelta = self._generaldelta
758
758
759 iterrev = rev
759 iterrev = rev
760 e = index[iterrev]
760 e = index[iterrev]
761 while iterrev != e[3] and iterrev != stoprev:
761 while iterrev != e[3] and iterrev != stoprev:
762 chain.append(iterrev)
762 chain.append(iterrev)
763 if generaldelta:
763 if generaldelta:
764 iterrev = e[3]
764 iterrev = e[3]
765 else:
765 else:
766 iterrev -= 1
766 iterrev -= 1
767 e = index[iterrev]
767 e = index[iterrev]
768
768
769 if iterrev == stoprev:
769 if iterrev == stoprev:
770 stopped = True
770 stopped = True
771 else:
771 else:
772 chain.append(iterrev)
772 chain.append(iterrev)
773 stopped = False
773 stopped = False
774
774
775 chain.reverse()
775 chain.reverse()
776 return chain, stopped
776 return chain, stopped
777
777
778 def ancestors(self, revs, stoprev=0, inclusive=False):
778 def ancestors(self, revs, stoprev=0, inclusive=False):
779 """Generate the ancestors of 'revs' in reverse revision order.
779 """Generate the ancestors of 'revs' in reverse revision order.
780 Does not generate revs lower than stoprev.
780 Does not generate revs lower than stoprev.
781
781
782 See the documentation for ancestor.lazyancestors for more details."""
782 See the documentation for ancestor.lazyancestors for more details."""
783
783
784 # first, make sure start revisions aren't filtered
784 # first, make sure start revisions aren't filtered
785 revs = list(revs)
785 revs = list(revs)
786 checkrev = self.node
786 checkrev = self.node
787 for r in revs:
787 for r in revs:
788 checkrev(r)
788 checkrev(r)
789 # and we're sure ancestors aren't filtered as well
789 # and we're sure ancestors aren't filtered as well
790
790
791 if rustancestor is not None:
791 if rustancestor is not None:
792 lazyancestors = rustancestor.LazyAncestors
792 lazyancestors = rustancestor.LazyAncestors
793 arg = self.index
793 arg = self.index
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
794 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 lazyancestors = ancestor.rustlazyancestors
795 lazyancestors = ancestor.rustlazyancestors
796 arg = self.index
796 arg = self.index
797 else:
797 else:
798 lazyancestors = ancestor.lazyancestors
798 lazyancestors = ancestor.lazyancestors
799 arg = self._uncheckedparentrevs
799 arg = self._uncheckedparentrevs
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
800 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801
801
802 def descendants(self, revs):
802 def descendants(self, revs):
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
803 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804
804
805 def findcommonmissing(self, common=None, heads=None):
805 def findcommonmissing(self, common=None, heads=None):
806 """Return a tuple of the ancestors of common and the ancestors of heads
806 """Return a tuple of the ancestors of common and the ancestors of heads
807 that are not ancestors of common. In revset terminology, we return the
807 that are not ancestors of common. In revset terminology, we return the
808 tuple:
808 tuple:
809
809
810 ::common, (::heads) - (::common)
810 ::common, (::heads) - (::common)
811
811
812 The list is sorted by revision number, meaning it is
812 The list is sorted by revision number, meaning it is
813 topologically sorted.
813 topologically sorted.
814
814
815 'heads' and 'common' are both lists of node IDs. If heads is
815 'heads' and 'common' are both lists of node IDs. If heads is
816 not supplied, uses all of the revlog's heads. If common is not
816 not supplied, uses all of the revlog's heads. If common is not
817 supplied, uses nullid."""
817 supplied, uses nullid."""
818 if common is None:
818 if common is None:
819 common = [nullid]
819 common = [nullid]
820 if heads is None:
820 if heads is None:
821 heads = self.heads()
821 heads = self.heads()
822
822
823 common = [self.rev(n) for n in common]
823 common = [self.rev(n) for n in common]
824 heads = [self.rev(n) for n in heads]
824 heads = [self.rev(n) for n in heads]
825
825
826 # we want the ancestors, but inclusive
826 # we want the ancestors, but inclusive
827 class lazyset(object):
827 class lazyset(object):
828 def __init__(self, lazyvalues):
828 def __init__(self, lazyvalues):
829 self.addedvalues = set()
829 self.addedvalues = set()
830 self.lazyvalues = lazyvalues
830 self.lazyvalues = lazyvalues
831
831
832 def __contains__(self, value):
832 def __contains__(self, value):
833 return value in self.addedvalues or value in self.lazyvalues
833 return value in self.addedvalues or value in self.lazyvalues
834
834
835 def __iter__(self):
835 def __iter__(self):
836 added = self.addedvalues
836 added = self.addedvalues
837 for r in added:
837 for r in added:
838 yield r
838 yield r
839 for r in self.lazyvalues:
839 for r in self.lazyvalues:
840 if not r in added:
840 if not r in added:
841 yield r
841 yield r
842
842
843 def add(self, value):
843 def add(self, value):
844 self.addedvalues.add(value)
844 self.addedvalues.add(value)
845
845
846 def update(self, values):
846 def update(self, values):
847 self.addedvalues.update(values)
847 self.addedvalues.update(values)
848
848
849 has = lazyset(self.ancestors(common))
849 has = lazyset(self.ancestors(common))
850 has.add(nullrev)
850 has.add(nullrev)
851 has.update(common)
851 has.update(common)
852
852
853 # take all ancestors from heads that aren't in has
853 # take all ancestors from heads that aren't in has
854 missing = set()
854 missing = set()
855 visit = collections.deque(r for r in heads if r not in has)
855 visit = collections.deque(r for r in heads if r not in has)
856 while visit:
856 while visit:
857 r = visit.popleft()
857 r = visit.popleft()
858 if r in missing:
858 if r in missing:
859 continue
859 continue
860 else:
860 else:
861 missing.add(r)
861 missing.add(r)
862 for p in self.parentrevs(r):
862 for p in self.parentrevs(r):
863 if p not in has:
863 if p not in has:
864 visit.append(p)
864 visit.append(p)
865 missing = list(missing)
865 missing = list(missing)
866 missing.sort()
866 missing.sort()
867 return has, [self.node(miss) for miss in missing]
867 return has, [self.node(miss) for miss in missing]
868
868
869 def incrementalmissingrevs(self, common=None):
869 def incrementalmissingrevs(self, common=None):
870 """Return an object that can be used to incrementally compute the
870 """Return an object that can be used to incrementally compute the
871 revision numbers of the ancestors of arbitrary sets that are not
871 revision numbers of the ancestors of arbitrary sets that are not
872 ancestors of common. This is an ancestor.incrementalmissingancestors
872 ancestors of common. This is an ancestor.incrementalmissingancestors
873 object.
873 object.
874
874
875 'common' is a list of revision numbers. If common is not supplied, uses
875 'common' is a list of revision numbers. If common is not supplied, uses
876 nullrev.
876 nullrev.
877 """
877 """
878 if common is None:
878 if common is None:
879 common = [nullrev]
879 common = [nullrev]
880
880
881 if rustancestor is not None:
881 if rustancestor is not None:
882 return rustancestor.MissingAncestors(self.index, common)
882 return rustancestor.MissingAncestors(self.index, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
883 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884
884
885 def findmissingrevs(self, common=None, heads=None):
885 def findmissingrevs(self, common=None, heads=None):
886 """Return the revision numbers of the ancestors of heads that
886 """Return the revision numbers of the ancestors of heads that
887 are not ancestors of common.
887 are not ancestors of common.
888
888
889 More specifically, return a list of revision numbers corresponding to
889 More specifically, return a list of revision numbers corresponding to
890 nodes N such that every N satisfies the following constraints:
890 nodes N such that every N satisfies the following constraints:
891
891
892 1. N is an ancestor of some node in 'heads'
892 1. N is an ancestor of some node in 'heads'
893 2. N is not an ancestor of any node in 'common'
893 2. N is not an ancestor of any node in 'common'
894
894
895 The list is sorted by revision number, meaning it is
895 The list is sorted by revision number, meaning it is
896 topologically sorted.
896 topologically sorted.
897
897
898 'heads' and 'common' are both lists of revision numbers. If heads is
898 'heads' and 'common' are both lists of revision numbers. If heads is
899 not supplied, uses all of the revlog's heads. If common is not
899 not supplied, uses all of the revlog's heads. If common is not
900 supplied, uses nullid."""
900 supplied, uses nullid."""
901 if common is None:
901 if common is None:
902 common = [nullrev]
902 common = [nullrev]
903 if heads is None:
903 if heads is None:
904 heads = self.headrevs()
904 heads = self.headrevs()
905
905
906 inc = self.incrementalmissingrevs(common=common)
906 inc = self.incrementalmissingrevs(common=common)
907 return inc.missingancestors(heads)
907 return inc.missingancestors(heads)
908
908
909 def findmissing(self, common=None, heads=None):
909 def findmissing(self, common=None, heads=None):
910 """Return the ancestors of heads that are not ancestors of common.
910 """Return the ancestors of heads that are not ancestors of common.
911
911
912 More specifically, return a list of nodes N such that every N
912 More specifically, return a list of nodes N such that every N
913 satisfies the following constraints:
913 satisfies the following constraints:
914
914
915 1. N is an ancestor of some node in 'heads'
915 1. N is an ancestor of some node in 'heads'
916 2. N is not an ancestor of any node in 'common'
916 2. N is not an ancestor of any node in 'common'
917
917
918 The list is sorted by revision number, meaning it is
918 The list is sorted by revision number, meaning it is
919 topologically sorted.
919 topologically sorted.
920
920
921 'heads' and 'common' are both lists of node IDs. If heads is
921 'heads' and 'common' are both lists of node IDs. If heads is
922 not supplied, uses all of the revlog's heads. If common is not
922 not supplied, uses all of the revlog's heads. If common is not
923 supplied, uses nullid."""
923 supplied, uses nullid."""
924 if common is None:
924 if common is None:
925 common = [nullid]
925 common = [nullid]
926 if heads is None:
926 if heads is None:
927 heads = self.heads()
927 heads = self.heads()
928
928
929 common = [self.rev(n) for n in common]
929 common = [self.rev(n) for n in common]
930 heads = [self.rev(n) for n in heads]
930 heads = [self.rev(n) for n in heads]
931
931
932 inc = self.incrementalmissingrevs(common=common)
932 inc = self.incrementalmissingrevs(common=common)
933 return [self.node(r) for r in inc.missingancestors(heads)]
933 return [self.node(r) for r in inc.missingancestors(heads)]
934
934
935 def nodesbetween(self, roots=None, heads=None):
935 def nodesbetween(self, roots=None, heads=None):
936 """Return a topological path from 'roots' to 'heads'.
936 """Return a topological path from 'roots' to 'heads'.
937
937
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
938 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 topologically sorted list of all nodes N that satisfy both of
939 topologically sorted list of all nodes N that satisfy both of
940 these constraints:
940 these constraints:
941
941
942 1. N is a descendant of some node in 'roots'
942 1. N is a descendant of some node in 'roots'
943 2. N is an ancestor of some node in 'heads'
943 2. N is an ancestor of some node in 'heads'
944
944
945 Every node is considered to be both a descendant and an ancestor
945 Every node is considered to be both a descendant and an ancestor
946 of itself, so every reachable node in 'roots' and 'heads' will be
946 of itself, so every reachable node in 'roots' and 'heads' will be
947 included in 'nodes'.
947 included in 'nodes'.
948
948
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
949 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 subset of 'roots' that is returned in 'nodes'. Likewise,
950 subset of 'roots' that is returned in 'nodes'. Likewise,
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
951 'outheads' is the subset of 'heads' that is also in 'nodes'.
952
952
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
953 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 unspecified, uses nullid as the only root. If 'heads' is
954 unspecified, uses nullid as the only root. If 'heads' is
955 unspecified, uses list of all of the revlog's heads."""
955 unspecified, uses list of all of the revlog's heads."""
956 nonodes = ([], [], [])
956 nonodes = ([], [], [])
957 if roots is not None:
957 if roots is not None:
958 roots = list(roots)
958 roots = list(roots)
959 if not roots:
959 if not roots:
960 return nonodes
960 return nonodes
961 lowestrev = min([self.rev(n) for n in roots])
961 lowestrev = min([self.rev(n) for n in roots])
962 else:
962 else:
963 roots = [nullid] # Everybody's a descendant of nullid
963 roots = [nullid] # Everybody's a descendant of nullid
964 lowestrev = nullrev
964 lowestrev = nullrev
965 if (lowestrev == nullrev) and (heads is None):
965 if (lowestrev == nullrev) and (heads is None):
966 # We want _all_ the nodes!
966 # We want _all_ the nodes!
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
967 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 if heads is None:
968 if heads is None:
969 # All nodes are ancestors, so the latest ancestor is the last
969 # All nodes are ancestors, so the latest ancestor is the last
970 # node.
970 # node.
971 highestrev = len(self) - 1
971 highestrev = len(self) - 1
972 # Set ancestors to None to signal that every node is an ancestor.
972 # Set ancestors to None to signal that every node is an ancestor.
973 ancestors = None
973 ancestors = None
974 # Set heads to an empty dictionary for later discovery of heads
974 # Set heads to an empty dictionary for later discovery of heads
975 heads = {}
975 heads = {}
976 else:
976 else:
977 heads = list(heads)
977 heads = list(heads)
978 if not heads:
978 if not heads:
979 return nonodes
979 return nonodes
980 ancestors = set()
980 ancestors = set()
981 # Turn heads into a dictionary so we can remove 'fake' heads.
981 # Turn heads into a dictionary so we can remove 'fake' heads.
982 # Also, later we will be using it to filter out the heads we can't
982 # Also, later we will be using it to filter out the heads we can't
983 # find from roots.
983 # find from roots.
984 heads = dict.fromkeys(heads, False)
984 heads = dict.fromkeys(heads, False)
985 # Start at the top and keep marking parents until we're done.
985 # Start at the top and keep marking parents until we're done.
986 nodestotag = set(heads)
986 nodestotag = set(heads)
987 # Remember where the top was so we can use it as a limit later.
987 # Remember where the top was so we can use it as a limit later.
988 highestrev = max([self.rev(n) for n in nodestotag])
988 highestrev = max([self.rev(n) for n in nodestotag])
989 while nodestotag:
989 while nodestotag:
990 # grab a node to tag
990 # grab a node to tag
991 n = nodestotag.pop()
991 n = nodestotag.pop()
992 # Never tag nullid
992 # Never tag nullid
993 if n == nullid:
993 if n == nullid:
994 continue
994 continue
995 # A node's revision number represents its place in a
995 # A node's revision number represents its place in a
996 # topologically sorted list of nodes.
996 # topologically sorted list of nodes.
997 r = self.rev(n)
997 r = self.rev(n)
998 if r >= lowestrev:
998 if r >= lowestrev:
999 if n not in ancestors:
999 if n not in ancestors:
1000 # If we are possibly a descendant of one of the roots
1000 # If we are possibly a descendant of one of the roots
1001 # and we haven't already been marked as an ancestor
1001 # and we haven't already been marked as an ancestor
1002 ancestors.add(n) # Mark as ancestor
1002 ancestors.add(n) # Mark as ancestor
1003 # Add non-nullid parents to list of nodes to tag.
1003 # Add non-nullid parents to list of nodes to tag.
1004 nodestotag.update([p for p in self.parents(n) if
1004 nodestotag.update([p for p in self.parents(n) if
1005 p != nullid])
1005 p != nullid])
1006 elif n in heads: # We've seen it before, is it a fake head?
1006 elif n in heads: # We've seen it before, is it a fake head?
1007 # So it is, real heads should not be the ancestors of
1007 # So it is, real heads should not be the ancestors of
1008 # any other heads.
1008 # any other heads.
1009 heads.pop(n)
1009 heads.pop(n)
1010 if not ancestors:
1010 if not ancestors:
1011 return nonodes
1011 return nonodes
1012 # Now that we have our set of ancestors, we want to remove any
1012 # Now that we have our set of ancestors, we want to remove any
1013 # roots that are not ancestors.
1013 # roots that are not ancestors.
1014
1014
1015 # If one of the roots was nullid, everything is included anyway.
1015 # If one of the roots was nullid, everything is included anyway.
1016 if lowestrev > nullrev:
1016 if lowestrev > nullrev:
1017 # But, since we weren't, let's recompute the lowest rev to not
1017 # But, since we weren't, let's recompute the lowest rev to not
1018 # include roots that aren't ancestors.
1018 # include roots that aren't ancestors.
1019
1019
1020 # Filter out roots that aren't ancestors of heads
1020 # Filter out roots that aren't ancestors of heads
1021 roots = [root for root in roots if root in ancestors]
1021 roots = [root for root in roots if root in ancestors]
1022 # Recompute the lowest revision
1022 # Recompute the lowest revision
1023 if roots:
1023 if roots:
1024 lowestrev = min([self.rev(root) for root in roots])
1024 lowestrev = min([self.rev(root) for root in roots])
1025 else:
1025 else:
1026 # No more roots? Return empty list
1026 # No more roots? Return empty list
1027 return nonodes
1027 return nonodes
1028 else:
1028 else:
1029 # We are descending from nullid, and don't need to care about
1029 # We are descending from nullid, and don't need to care about
1030 # any other roots.
1030 # any other roots.
1031 lowestrev = nullrev
1031 lowestrev = nullrev
1032 roots = [nullid]
1032 roots = [nullid]
1033 # Transform our roots list into a set.
1033 # Transform our roots list into a set.
1034 descendants = set(roots)
1034 descendants = set(roots)
1035 # Also, keep the original roots so we can filter out roots that aren't
1035 # Also, keep the original roots so we can filter out roots that aren't
1036 # 'real' roots (i.e. are descended from other roots).
1036 # 'real' roots (i.e. are descended from other roots).
1037 roots = descendants.copy()
1037 roots = descendants.copy()
1038 # Our topologically sorted list of output nodes.
1038 # Our topologically sorted list of output nodes.
1039 orderedout = []
1039 orderedout = []
1040 # Don't start at nullid since we don't want nullid in our output list,
1040 # Don't start at nullid since we don't want nullid in our output list,
1041 # and if nullid shows up in descendants, empty parents will look like
1041 # and if nullid shows up in descendants, empty parents will look like
1042 # they're descendants.
1042 # they're descendants.
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1043 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 n = self.node(r)
1044 n = self.node(r)
1045 isdescendant = False
1045 isdescendant = False
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1046 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 isdescendant = True
1047 isdescendant = True
1048 elif n in descendants:
1048 elif n in descendants:
1049 # n is already a descendant
1049 # n is already a descendant
1050 isdescendant = True
1050 isdescendant = True
1051 # This check only needs to be done here because all the roots
1051 # This check only needs to be done here because all the roots
1052 # will start being marked is descendants before the loop.
1052 # will start being marked is descendants before the loop.
1053 if n in roots:
1053 if n in roots:
1054 # If n was a root, check if it's a 'real' root.
1054 # If n was a root, check if it's a 'real' root.
1055 p = tuple(self.parents(n))
1055 p = tuple(self.parents(n))
1056 # If any of its parents are descendants, it's not a root.
1056 # If any of its parents are descendants, it's not a root.
1057 if (p[0] in descendants) or (p[1] in descendants):
1057 if (p[0] in descendants) or (p[1] in descendants):
1058 roots.remove(n)
1058 roots.remove(n)
1059 else:
1059 else:
1060 p = tuple(self.parents(n))
1060 p = tuple(self.parents(n))
1061 # A node is a descendant if either of its parents are
1061 # A node is a descendant if either of its parents are
1062 # descendants. (We seeded the dependents list with the roots
1062 # descendants. (We seeded the dependents list with the roots
1063 # up there, remember?)
1063 # up there, remember?)
1064 if (p[0] in descendants) or (p[1] in descendants):
1064 if (p[0] in descendants) or (p[1] in descendants):
1065 descendants.add(n)
1065 descendants.add(n)
1066 isdescendant = True
1066 isdescendant = True
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1067 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 # Only include nodes that are both descendants and ancestors.
1068 # Only include nodes that are both descendants and ancestors.
1069 orderedout.append(n)
1069 orderedout.append(n)
1070 if (ancestors is not None) and (n in heads):
1070 if (ancestors is not None) and (n in heads):
1071 # We're trying to figure out which heads are reachable
1071 # We're trying to figure out which heads are reachable
1072 # from roots.
1072 # from roots.
1073 # Mark this head as having been reached
1073 # Mark this head as having been reached
1074 heads[n] = True
1074 heads[n] = True
1075 elif ancestors is None:
1075 elif ancestors is None:
1076 # Otherwise, we're trying to discover the heads.
1076 # Otherwise, we're trying to discover the heads.
1077 # Assume this is a head because if it isn't, the next step
1077 # Assume this is a head because if it isn't, the next step
1078 # will eventually remove it.
1078 # will eventually remove it.
1079 heads[n] = True
1079 heads[n] = True
1080 # But, obviously its parents aren't.
1080 # But, obviously its parents aren't.
1081 for p in self.parents(n):
1081 for p in self.parents(n):
1082 heads.pop(p, None)
1082 heads.pop(p, None)
1083 heads = [head for head, flag in heads.iteritems() if flag]
1083 heads = [head for head, flag in heads.iteritems() if flag]
1084 roots = list(roots)
1084 roots = list(roots)
1085 assert orderedout
1085 assert orderedout
1086 assert roots
1086 assert roots
1087 assert heads
1087 assert heads
1088 return (orderedout, roots, heads)
1088 return (orderedout, roots, heads)
1089
1089
1090 def headrevs(self, revs=None):
1090 def headrevs(self, revs=None):
1091 if revs is None:
1091 if revs is None:
1092 try:
1092 try:
1093 return self.index.headrevs()
1093 return self.index.headrevs()
1094 except AttributeError:
1094 except AttributeError:
1095 return self._headrevs()
1095 return self._headrevs()
1096 if rustdagop is not None:
1096 if rustdagop is not None:
1097 return rustdagop.headrevs(self.index, revs)
1097 return rustdagop.headrevs(self.index, revs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1098 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099
1099
1100 def computephases(self, roots):
1100 def computephases(self, roots):
1101 return self.index.computephasesmapsets(roots)
1101 return self.index.computephasesmapsets(roots)
1102
1102
1103 def _headrevs(self):
1103 def _headrevs(self):
1104 count = len(self)
1104 count = len(self)
1105 if not count:
1105 if not count:
1106 return [nullrev]
1106 return [nullrev]
1107 # we won't iter over filtered rev so nobody is a head at start
1107 # we won't iter over filtered rev so nobody is a head at start
1108 ishead = [0] * (count + 1)
1108 ishead = [0] * (count + 1)
1109 index = self.index
1109 index = self.index
1110 for r in self:
1110 for r in self:
1111 ishead[r] = 1 # I may be an head
1111 ishead[r] = 1 # I may be an head
1112 e = index[r]
1112 e = index[r]
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1113 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 return [r for r, val in enumerate(ishead) if val]
1114 return [r for r, val in enumerate(ishead) if val]
1115
1115
1116 def heads(self, start=None, stop=None):
1116 def heads(self, start=None, stop=None):
1117 """return the list of all nodes that have no children
1117 """return the list of all nodes that have no children
1118
1118
1119 if start is specified, only heads that are descendants of
1119 if start is specified, only heads that are descendants of
1120 start will be returned
1120 start will be returned
1121 if stop is specified, it will consider all the revs from stop
1121 if stop is specified, it will consider all the revs from stop
1122 as if they had no children
1122 as if they had no children
1123 """
1123 """
1124 if start is None and stop is None:
1124 if start is None and stop is None:
1125 if not len(self):
1125 if not len(self):
1126 return [nullid]
1126 return [nullid]
1127 return [self.node(r) for r in self.headrevs()]
1127 return [self.node(r) for r in self.headrevs()]
1128
1128
1129 if start is None:
1129 if start is None:
1130 start = nullrev
1130 start = nullrev
1131 else:
1131 else:
1132 start = self.rev(start)
1132 start = self.rev(start)
1133
1133
1134 stoprevs = set(self.rev(n) for n in stop or [])
1134 stoprevs = set(self.rev(n) for n in stop or [])
1135
1135
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1136 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 stoprevs=stoprevs)
1137 stoprevs=stoprevs)
1138
1138
1139 return [self.node(rev) for rev in revs]
1139 return [self.node(rev) for rev in revs]
1140
1140
1141 def children(self, node):
1141 def children(self, node):
1142 """find the children of a given node"""
1142 """find the children of a given node"""
1143 c = []
1143 c = []
1144 p = self.rev(node)
1144 p = self.rev(node)
1145 for r in self.revs(start=p + 1):
1145 for r in self.revs(start=p + 1):
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1146 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 if prevs:
1147 if prevs:
1148 for pr in prevs:
1148 for pr in prevs:
1149 if pr == p:
1149 if pr == p:
1150 c.append(self.node(r))
1150 c.append(self.node(r))
1151 elif p == nullrev:
1151 elif p == nullrev:
1152 c.append(self.node(r))
1152 c.append(self.node(r))
1153 return c
1153 return c
1154
1154
1155 def commonancestorsheads(self, a, b):
1155 def commonancestorsheads(self, a, b):
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1156 """calculate all the heads of the common ancestors of nodes a and b"""
1157 a, b = self.rev(a), self.rev(b)
1157 a, b = self.rev(a), self.rev(b)
1158 ancs = self._commonancestorsheads(a, b)
1158 ancs = self._commonancestorsheads(a, b)
1159 return pycompat.maplist(self.node, ancs)
1159 return pycompat.maplist(self.node, ancs)
1160
1160
1161 def _commonancestorsheads(self, *revs):
1161 def _commonancestorsheads(self, *revs):
1162 """calculate all the heads of the common ancestors of revs"""
1162 """calculate all the heads of the common ancestors of revs"""
1163 try:
1163 try:
1164 ancs = self.index.commonancestorsheads(*revs)
1164 ancs = self.index.commonancestorsheads(*revs)
1165 except (AttributeError, OverflowError): # C implementation failed
1165 except (AttributeError, OverflowError): # C implementation failed
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1166 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 return ancs
1167 return ancs
1168
1168
1169 def isancestor(self, a, b):
1169 def isancestor(self, a, b):
1170 """return True if node a is an ancestor of node b
1170 """return True if node a is an ancestor of node b
1171
1171
1172 A revision is considered an ancestor of itself."""
1172 A revision is considered an ancestor of itself."""
1173 a, b = self.rev(a), self.rev(b)
1173 a, b = self.rev(a), self.rev(b)
1174 return self.isancestorrev(a, b)
1174 return self.isancestorrev(a, b)
1175
1175
1176 def isancestorrev(self, a, b):
1176 def isancestorrev(self, a, b):
1177 """return True if revision a is an ancestor of revision b
1177 """return True if revision a is an ancestor of revision b
1178
1178
1179 A revision is considered an ancestor of itself.
1179 A revision is considered an ancestor of itself.
1180
1180
1181 The implementation of this is trivial but the use of
1181 The implementation of this is trivial but the use of
1182 reachableroots is not."""
1182 reachableroots is not."""
1183 if a == nullrev:
1183 if a == nullrev:
1184 return True
1184 return True
1185 elif a == b:
1185 elif a == b:
1186 return True
1186 return True
1187 elif a > b:
1187 elif a > b:
1188 return False
1188 return False
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1189 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190
1190
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1191 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 """return (heads(::<roots> and <roots>::<heads>))
1192 """return (heads(::<roots> and <roots>::<heads>))
1193
1193
1194 If includepath is True, return (<roots>::<heads>)."""
1194 If includepath is True, return (<roots>::<heads>)."""
1195 try:
1195 try:
1196 return self.index.reachableroots2(minroot, heads, roots,
1196 return self.index.reachableroots2(minroot, heads, roots,
1197 includepath)
1197 includepath)
1198 except AttributeError:
1198 except AttributeError:
1199 return dagop._reachablerootspure(self.parentrevs,
1199 return dagop._reachablerootspure(self.parentrevs,
1200 minroot, roots, heads, includepath)
1200 minroot, roots, heads, includepath)
1201
1201
1202 def ancestor(self, a, b):
1202 def ancestor(self, a, b):
1203 """calculate the "best" common ancestor of nodes a and b"""
1203 """calculate the "best" common ancestor of nodes a and b"""
1204
1204
1205 a, b = self.rev(a), self.rev(b)
1205 a, b = self.rev(a), self.rev(b)
1206 try:
1206 try:
1207 ancs = self.index.ancestors(a, b)
1207 ancs = self.index.ancestors(a, b)
1208 except (AttributeError, OverflowError):
1208 except (AttributeError, OverflowError):
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 if ancs:
1210 if ancs:
1211 # choose a consistent winner when there's a tie
1211 # choose a consistent winner when there's a tie
1212 return min(map(self.node, ancs))
1212 return min(map(self.node, ancs))
1213 return nullid
1213 return nullid
1214
1214
1215 def _match(self, id):
1215 def _match(self, id):
1216 if isinstance(id, int):
1216 if isinstance(id, int):
1217 # rev
1217 # rev
1218 return self.node(id)
1218 return self.node(id)
1219 if len(id) == 20:
1219 if len(id) == 20:
1220 # possibly a binary node
1220 # possibly a binary node
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 try:
1222 try:
1223 node = id
1223 node = id
1224 self.rev(node) # quick search the index
1224 self.rev(node) # quick search the index
1225 return node
1225 return node
1226 except error.LookupError:
1226 except error.LookupError:
1227 pass # may be partial hex id
1227 pass # may be partial hex id
1228 try:
1228 try:
1229 # str(rev)
1229 # str(rev)
1230 rev = int(id)
1230 rev = int(id)
1231 if "%d" % rev != id:
1231 if "%d" % rev != id:
1232 raise ValueError
1232 raise ValueError
1233 if rev < 0:
1233 if rev < 0:
1234 rev = len(self) + rev
1234 rev = len(self) + rev
1235 if rev < 0 or rev >= len(self):
1235 if rev < 0 or rev >= len(self):
1236 raise ValueError
1236 raise ValueError
1237 return self.node(rev)
1237 return self.node(rev)
1238 except (ValueError, OverflowError):
1238 except (ValueError, OverflowError):
1239 pass
1239 pass
1240 if len(id) == 40:
1240 if len(id) == 40:
1241 try:
1241 try:
1242 # a full hex nodeid?
1242 # a full hex nodeid?
1243 node = bin(id)
1243 node = bin(id)
1244 self.rev(node)
1244 self.rev(node)
1245 return node
1245 return node
1246 except (TypeError, error.LookupError):
1246 except (TypeError, error.LookupError):
1247 pass
1247 pass
1248
1248
1249 def _partialmatch(self, id):
1249 def _partialmatch(self, id):
1250 # we don't care wdirfilenodeids as they should be always full hash
1250 # we don't care wdirfilenodeids as they should be always full hash
1251 maybewdir = wdirhex.startswith(id)
1251 maybewdir = wdirhex.startswith(id)
1252 try:
1252 try:
1253 partial = self.index.partialmatch(id)
1253 partial = self.index.partialmatch(id)
1254 if partial and self.hasnode(partial):
1254 if partial and self.hasnode(partial):
1255 if maybewdir:
1255 if maybewdir:
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1257 raise error.RevlogError
1257 raise error.RevlogError
1258 return partial
1258 return partial
1259 if maybewdir:
1259 if maybewdir:
1260 # no 'ff...' match in radix tree, wdir identified
1260 # no 'ff...' match in radix tree, wdir identified
1261 raise error.WdirUnsupported
1261 raise error.WdirUnsupported
1262 return None
1262 return None
1263 except error.RevlogError:
1263 except error.RevlogError:
1264 # parsers.c radix tree lookup gave multiple matches
1264 # parsers.c radix tree lookup gave multiple matches
1265 # fast path: for unfiltered changelog, radix tree is accurate
1265 # fast path: for unfiltered changelog, radix tree is accurate
1266 if not getattr(self, 'filteredrevs', None):
1266 if not getattr(self, 'filteredrevs', None):
1267 raise error.AmbiguousPrefixLookupError(
1267 raise error.AmbiguousPrefixLookupError(
1268 id, self.indexfile, _('ambiguous identifier'))
1268 id, self.indexfile, _('ambiguous identifier'))
1269 # fall through to slow path that filters hidden revisions
1269 # fall through to slow path that filters hidden revisions
1270 except (AttributeError, ValueError):
1270 except (AttributeError, ValueError):
1271 # we are pure python, or key was too short to search radix tree
1271 # we are pure python, or key was too short to search radix tree
1272 pass
1272 pass
1273
1273
1274 if id in self._pcache:
1274 if id in self._pcache:
1275 return self._pcache[id]
1275 return self._pcache[id]
1276
1276
1277 if len(id) <= 40:
1277 if len(id) <= 40:
1278 try:
1278 try:
1279 # hex(node)[:...]
1279 # hex(node)[:...]
1280 l = len(id) // 2 # grab an even number of digits
1280 l = len(id) // 2 # grab an even number of digits
1281 prefix = bin(id[:l * 2])
1281 prefix = bin(id[:l * 2])
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 nl = [n for n in nl if hex(n).startswith(id) and
1283 nl = [n for n in nl if hex(n).startswith(id) and
1284 self.hasnode(n)]
1284 self.hasnode(n)]
1285 if nullhex.startswith(id):
1285 if nullhex.startswith(id):
1286 nl.append(nullid)
1286 nl.append(nullid)
1287 if len(nl) > 0:
1287 if len(nl) > 0:
1288 if len(nl) == 1 and not maybewdir:
1288 if len(nl) == 1 and not maybewdir:
1289 self._pcache[id] = nl[0]
1289 self._pcache[id] = nl[0]
1290 return nl[0]
1290 return nl[0]
1291 raise error.AmbiguousPrefixLookupError(
1291 raise error.AmbiguousPrefixLookupError(
1292 id, self.indexfile, _('ambiguous identifier'))
1292 id, self.indexfile, _('ambiguous identifier'))
1293 if maybewdir:
1293 if maybewdir:
1294 raise error.WdirUnsupported
1294 raise error.WdirUnsupported
1295 return None
1295 return None
1296 except TypeError:
1296 except TypeError:
1297 pass
1297 pass
1298
1298
1299 def lookup(self, id):
1299 def lookup(self, id):
1300 """locate a node based on:
1300 """locate a node based on:
1301 - revision number or str(revision number)
1301 - revision number or str(revision number)
1302 - nodeid or subset of hex nodeid
1302 - nodeid or subset of hex nodeid
1303 """
1303 """
1304 n = self._match(id)
1304 n = self._match(id)
1305 if n is not None:
1305 if n is not None:
1306 return n
1306 return n
1307 n = self._partialmatch(id)
1307 n = self._partialmatch(id)
1308 if n:
1308 if n:
1309 return n
1309 return n
1310
1310
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1312
1312
1313 def shortest(self, node, minlength=1):
1313 def shortest(self, node, minlength=1):
1314 """Find the shortest unambiguous prefix that matches node."""
1314 """Find the shortest unambiguous prefix that matches node."""
1315 def isvalid(prefix):
1315 def isvalid(prefix):
1316 try:
1316 try:
1317 matchednode = self._partialmatch(prefix)
1317 matchednode = self._partialmatch(prefix)
1318 except error.AmbiguousPrefixLookupError:
1318 except error.AmbiguousPrefixLookupError:
1319 return False
1319 return False
1320 except error.WdirUnsupported:
1320 except error.WdirUnsupported:
1321 # single 'ff...' match
1321 # single 'ff...' match
1322 return True
1322 return True
1323 if matchednode is None:
1323 if matchednode is None:
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1325 return True
1325 return True
1326
1326
1327 def maybewdir(prefix):
1327 def maybewdir(prefix):
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1328 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329
1329
1330 hexnode = hex(node)
1330 hexnode = hex(node)
1331
1331
1332 def disambiguate(hexnode, minlength):
1332 def disambiguate(hexnode, minlength):
1333 """Disambiguate against wdirid."""
1333 """Disambiguate against wdirid."""
1334 for length in range(minlength, 41):
1334 for length in range(minlength, 41):
1335 prefix = hexnode[:length]
1335 prefix = hexnode[:length]
1336 if not maybewdir(prefix):
1336 if not maybewdir(prefix):
1337 return prefix
1337 return prefix
1338
1338
1339 if not getattr(self, 'filteredrevs', None):
1339 if not getattr(self, 'filteredrevs', None):
1340 try:
1340 try:
1341 length = max(self.index.shortest(node), minlength)
1341 length = max(self.index.shortest(node), minlength)
1342 return disambiguate(hexnode, length)
1342 return disambiguate(hexnode, length)
1343 except error.RevlogError:
1343 except error.RevlogError:
1344 if node != wdirid:
1344 if node != wdirid:
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1346 except AttributeError:
1346 except AttributeError:
1347 # Fall through to pure code
1347 # Fall through to pure code
1348 pass
1348 pass
1349
1349
1350 if node == wdirid:
1350 if node == wdirid:
1351 for length in range(minlength, 41):
1351 for length in range(minlength, 41):
1352 prefix = hexnode[:length]
1352 prefix = hexnode[:length]
1353 if isvalid(prefix):
1353 if isvalid(prefix):
1354 return prefix
1354 return prefix
1355
1355
1356 for length in range(minlength, 41):
1356 for length in range(minlength, 41):
1357 prefix = hexnode[:length]
1357 prefix = hexnode[:length]
1358 if isvalid(prefix):
1358 if isvalid(prefix):
1359 return disambiguate(hexnode, length)
1359 return disambiguate(hexnode, length)
1360
1360
1361 def cmp(self, node, text):
1361 def cmp(self, node, text):
1362 """compare text with a given file revision
1362 """compare text with a given file revision
1363
1363
1364 returns True if text is different than what is stored.
1364 returns True if text is different than what is stored.
1365 """
1365 """
1366 p1, p2 = self.parents(node)
1366 p1, p2 = self.parents(node)
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368
1368
1369 def _cachesegment(self, offset, data):
1369 def _cachesegment(self, offset, data):
1370 """Add a segment to the revlog cache.
1370 """Add a segment to the revlog cache.
1371
1371
1372 Accepts an absolute offset and the data that is at that location.
1372 Accepts an absolute offset and the data that is at that location.
1373 """
1373 """
1374 o, d = self._chunkcache
1374 o, d = self._chunkcache
1375 # try to add to existing cache
1375 # try to add to existing cache
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 self._chunkcache = o, d + data
1377 self._chunkcache = o, d + data
1378 else:
1378 else:
1379 self._chunkcache = offset, data
1379 self._chunkcache = offset, data
1380
1380
1381 def _readsegment(self, offset, length, df=None):
1381 def _readsegment(self, offset, length, df=None):
1382 """Load a segment of raw data from the revlog.
1382 """Load a segment of raw data from the revlog.
1383
1383
1384 Accepts an absolute offset, length to read, and an optional existing
1384 Accepts an absolute offset, length to read, and an optional existing
1385 file handle to read from.
1385 file handle to read from.
1386
1386
1387 If an existing file handle is passed, it will be seeked and the
1387 If an existing file handle is passed, it will be seeked and the
1388 original seek position will NOT be restored.
1388 original seek position will NOT be restored.
1389
1389
1390 Returns a str or buffer of raw byte data.
1390 Returns a str or buffer of raw byte data.
1391
1391
1392 Raises if the requested number of bytes could not be read.
1392 Raises if the requested number of bytes could not be read.
1393 """
1393 """
1394 # Cache data both forward and backward around the requested
1394 # Cache data both forward and backward around the requested
1395 # data, in a fixed size window. This helps speed up operations
1395 # data, in a fixed size window. This helps speed up operations
1396 # involving reading the revlog backwards.
1396 # involving reading the revlog backwards.
1397 cachesize = self._chunkcachesize
1397 cachesize = self._chunkcachesize
1398 realoffset = offset & ~(cachesize - 1)
1398 realoffset = offset & ~(cachesize - 1)
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 - realoffset)
1400 - realoffset)
1401 with self._datareadfp(df) as df:
1401 with self._datareadfp(df) as df:
1402 df.seek(realoffset)
1402 df.seek(realoffset)
1403 d = df.read(reallength)
1403 d = df.read(reallength)
1404
1404
1405 self._cachesegment(realoffset, d)
1405 self._cachesegment(realoffset, d)
1406 if offset != realoffset or reallength != length:
1406 if offset != realoffset or reallength != length:
1407 startoffset = offset - realoffset
1407 startoffset = offset - realoffset
1408 if len(d) - startoffset < length:
1408 if len(d) - startoffset < length:
1409 raise error.RevlogError(
1409 raise error.RevlogError(
1410 _('partial read of revlog %s; expected %d bytes from '
1410 _('partial read of revlog %s; expected %d bytes from '
1411 'offset %d, got %d') %
1411 'offset %d, got %d') %
1412 (self.indexfile if self._inline else self.datafile,
1412 (self.indexfile if self._inline else self.datafile,
1413 length, realoffset, len(d) - startoffset))
1413 length, realoffset, len(d) - startoffset))
1414
1414
1415 return util.buffer(d, startoffset, length)
1415 return util.buffer(d, startoffset, length)
1416
1416
1417 if len(d) < length:
1417 if len(d) < length:
1418 raise error.RevlogError(
1418 raise error.RevlogError(
1419 _('partial read of revlog %s; expected %d bytes from offset '
1419 _('partial read of revlog %s; expected %d bytes from offset '
1420 '%d, got %d') %
1420 '%d, got %d') %
1421 (self.indexfile if self._inline else self.datafile,
1421 (self.indexfile if self._inline else self.datafile,
1422 length, offset, len(d)))
1422 length, offset, len(d)))
1423
1423
1424 return d
1424 return d
1425
1425
1426 def _getsegment(self, offset, length, df=None):
1426 def _getsegment(self, offset, length, df=None):
1427 """Obtain a segment of raw data from the revlog.
1427 """Obtain a segment of raw data from the revlog.
1428
1428
1429 Accepts an absolute offset, length of bytes to obtain, and an
1429 Accepts an absolute offset, length of bytes to obtain, and an
1430 optional file handle to the already-opened revlog. If the file
1430 optional file handle to the already-opened revlog. If the file
1431 handle is used, it's original seek position will not be preserved.
1431 handle is used, it's original seek position will not be preserved.
1432
1432
1433 Requests for data may be returned from a cache.
1433 Requests for data may be returned from a cache.
1434
1434
1435 Returns a str or a buffer instance of raw byte data.
1435 Returns a str or a buffer instance of raw byte data.
1436 """
1436 """
1437 o, d = self._chunkcache
1437 o, d = self._chunkcache
1438 l = len(d)
1438 l = len(d)
1439
1439
1440 # is it in the cache?
1440 # is it in the cache?
1441 cachestart = offset - o
1441 cachestart = offset - o
1442 cacheend = cachestart + length
1442 cacheend = cachestart + length
1443 if cachestart >= 0 and cacheend <= l:
1443 if cachestart >= 0 and cacheend <= l:
1444 if cachestart == 0 and cacheend == l:
1444 if cachestart == 0 and cacheend == l:
1445 return d # avoid a copy
1445 return d # avoid a copy
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1447
1447
1448 return self._readsegment(offset, length, df=df)
1448 return self._readsegment(offset, length, df=df)
1449
1449
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1452
1452
1453 Accepts the start and end revisions and an optional already-open
1453 Accepts the start and end revisions and an optional already-open
1454 file handle to be used for reading. If the file handle is read, its
1454 file handle to be used for reading. If the file handle is read, its
1455 seek position will not be preserved.
1455 seek position will not be preserved.
1456
1456
1457 Requests for data may be satisfied by a cache.
1457 Requests for data may be satisfied by a cache.
1458
1458
1459 Returns a 2-tuple of (offset, data) for the requested range of
1459 Returns a 2-tuple of (offset, data) for the requested range of
1460 revisions. Offset is the integer offset from the beginning of the
1460 revisions. Offset is the integer offset from the beginning of the
1461 revlog and data is a str or buffer of the raw byte data.
1461 revlog and data is a str or buffer of the raw byte data.
1462
1462
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 to determine where each revision's data begins and ends.
1464 to determine where each revision's data begins and ends.
1465 """
1465 """
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # (functions are expensive).
1467 # (functions are expensive).
1468 index = self.index
1468 index = self.index
1469 istart = index[startrev]
1469 istart = index[startrev]
1470 start = int(istart[0] >> 16)
1470 start = int(istart[0] >> 16)
1471 if startrev == endrev:
1471 if startrev == endrev:
1472 end = start + istart[1]
1472 end = start + istart[1]
1473 else:
1473 else:
1474 iend = index[endrev]
1474 iend = index[endrev]
1475 end = int(iend[0] >> 16) + iend[1]
1475 end = int(iend[0] >> 16) + iend[1]
1476
1476
1477 if self._inline:
1477 if self._inline:
1478 start += (startrev + 1) * self._io.size
1478 start += (startrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1480 length = end - start
1480 length = end - start
1481
1481
1482 return start, self._getsegment(start, length, df=df)
1482 return start, self._getsegment(start, length, df=df)
1483
1483
1484 def _chunk(self, rev, df=None):
1484 def _chunk(self, rev, df=None):
1485 """Obtain a single decompressed chunk for a revision.
1485 """Obtain a single decompressed chunk for a revision.
1486
1486
1487 Accepts an integer revision and an optional already-open file handle
1487 Accepts an integer revision and an optional already-open file handle
1488 to be used for reading. If used, the seek position of the file will not
1488 to be used for reading. If used, the seek position of the file will not
1489 be preserved.
1489 be preserved.
1490
1490
1491 Returns a str holding uncompressed data for the requested revision.
1491 Returns a str holding uncompressed data for the requested revision.
1492 """
1492 """
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494
1494
1495 def _chunks(self, revs, df=None, targetsize=None):
1495 def _chunks(self, revs, df=None, targetsize=None):
1496 """Obtain decompressed chunks for the specified revisions.
1496 """Obtain decompressed chunks for the specified revisions.
1497
1497
1498 Accepts an iterable of numeric revisions that are assumed to be in
1498 Accepts an iterable of numeric revisions that are assumed to be in
1499 ascending order. Also accepts an optional already-open file handle
1499 ascending order. Also accepts an optional already-open file handle
1500 to be used for reading. If used, the seek position of the file will
1500 to be used for reading. If used, the seek position of the file will
1501 not be preserved.
1501 not be preserved.
1502
1502
1503 This function is similar to calling ``self._chunk()`` multiple times,
1503 This function is similar to calling ``self._chunk()`` multiple times,
1504 but is faster.
1504 but is faster.
1505
1505
1506 Returns a list with decompressed data for each requested revision.
1506 Returns a list with decompressed data for each requested revision.
1507 """
1507 """
1508 if not revs:
1508 if not revs:
1509 return []
1509 return []
1510 start = self.start
1510 start = self.start
1511 length = self.length
1511 length = self.length
1512 inline = self._inline
1512 inline = self._inline
1513 iosize = self._io.size
1513 iosize = self._io.size
1514 buffer = util.buffer
1514 buffer = util.buffer
1515
1515
1516 l = []
1516 l = []
1517 ladd = l.append
1517 ladd = l.append
1518
1518
1519 if not self._withsparseread:
1519 if not self._withsparseread:
1520 slicedchunks = (revs,)
1520 slicedchunks = (revs,)
1521 else:
1521 else:
1522 slicedchunks = deltautil.slicechunk(self, revs,
1522 slicedchunks = deltautil.slicechunk(self, revs,
1523 targetsize=targetsize)
1523 targetsize=targetsize)
1524
1524
1525 for revschunk in slicedchunks:
1525 for revschunk in slicedchunks:
1526 firstrev = revschunk[0]
1526 firstrev = revschunk[0]
1527 # Skip trailing revisions with empty diff
1527 # Skip trailing revisions with empty diff
1528 for lastrev in revschunk[::-1]:
1528 for lastrev in revschunk[::-1]:
1529 if length(lastrev) != 0:
1529 if length(lastrev) != 0:
1530 break
1530 break
1531
1531
1532 try:
1532 try:
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 except OverflowError:
1534 except OverflowError:
1535 # issue4215 - we can't cache a run of chunks greater than
1535 # issue4215 - we can't cache a run of chunks greater than
1536 # 2G on Windows
1536 # 2G on Windows
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1538
1538
1539 decomp = self.decompress
1539 decomp = self.decompress
1540 for rev in revschunk:
1540 for rev in revschunk:
1541 chunkstart = start(rev)
1541 chunkstart = start(rev)
1542 if inline:
1542 if inline:
1543 chunkstart += (rev + 1) * iosize
1543 chunkstart += (rev + 1) * iosize
1544 chunklength = length(rev)
1544 chunklength = length(rev)
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546
1546
1547 return l
1547 return l
1548
1548
1549 def _chunkclear(self):
1549 def _chunkclear(self):
1550 """Clear the raw chunk cache."""
1550 """Clear the raw chunk cache."""
1551 self._chunkcache = (0, '')
1551 self._chunkcache = (0, '')
1552
1552
1553 def deltaparent(self, rev):
1553 def deltaparent(self, rev):
1554 """return deltaparent of the given revision"""
1554 """return deltaparent of the given revision"""
1555 base = self.index[rev][3]
1555 base = self.index[rev][3]
1556 if base == rev:
1556 if base == rev:
1557 return nullrev
1557 return nullrev
1558 elif self._generaldelta:
1558 elif self._generaldelta:
1559 return base
1559 return base
1560 else:
1560 else:
1561 return rev - 1
1561 return rev - 1
1562
1562
1563 def issnapshot(self, rev):
1563 def issnapshot(self, rev):
1564 """tells whether rev is a snapshot
1564 """tells whether rev is a snapshot
1565 """
1565 """
1566 if not self._sparserevlog:
1566 if not self._sparserevlog:
1567 return self.deltaparent(rev) == nullrev
1567 return self.deltaparent(rev) == nullrev
1568 elif util.safehasattr(self.index, 'issnapshot'):
1568 elif util.safehasattr(self.index, 'issnapshot'):
1569 # directly assign the method to cache the testing and access
1569 # directly assign the method to cache the testing and access
1570 self.issnapshot = self.index.issnapshot
1570 self.issnapshot = self.index.issnapshot
1571 return self.issnapshot(rev)
1571 return self.issnapshot(rev)
1572 if rev == nullrev:
1572 if rev == nullrev:
1573 return True
1573 return True
1574 entry = self.index[rev]
1574 entry = self.index[rev]
1575 base = entry[3]
1575 base = entry[3]
1576 if base == rev:
1576 if base == rev:
1577 return True
1577 return True
1578 if base == nullrev:
1578 if base == nullrev:
1579 return True
1579 return True
1580 p1 = entry[5]
1580 p1 = entry[5]
1581 p2 = entry[6]
1581 p2 = entry[6]
1582 if base == p1 or base == p2:
1582 if base == p1 or base == p2:
1583 return False
1583 return False
1584 return self.issnapshot(base)
1584 return self.issnapshot(base)
1585
1585
1586 def snapshotdepth(self, rev):
1586 def snapshotdepth(self, rev):
1587 """number of snapshot in the chain before this one"""
1587 """number of snapshot in the chain before this one"""
1588 if not self.issnapshot(rev):
1588 if not self.issnapshot(rev):
1589 raise error.ProgrammingError('revision %d not a snapshot')
1589 raise error.ProgrammingError('revision %d not a snapshot')
1590 return len(self._deltachain(rev)[0]) - 1
1590 return len(self._deltachain(rev)[0]) - 1
1591
1591
1592 def revdiff(self, rev1, rev2):
1592 def revdiff(self, rev1, rev2):
1593 """return or calculate a delta between two revisions
1593 """return or calculate a delta between two revisions
1594
1594
1595 The delta calculated is in binary form and is intended to be written to
1595 The delta calculated is in binary form and is intended to be written to
1596 revlog data directly. So this function needs raw revision data.
1596 revlog data directly. So this function needs raw revision data.
1597 """
1597 """
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 return bytes(self._chunk(rev2))
1599 return bytes(self._chunk(rev2))
1600
1600
1601 return mdiff.textdiff(self.rawdata(rev1),
1601 return mdiff.textdiff(self.rawdata(rev1),
1602 self.rawdata(rev2))
1602 self.rawdata(rev2))
1603
1603
1604 def revision(self, nodeorrev, _df=None, raw=False):
1604 def revision(self, nodeorrev, _df=None, raw=False):
1605 """return an uncompressed revision of a given node or revision
1605 """return an uncompressed revision of a given node or revision
1606 number.
1606 number.
1607
1607
1608 _df - an existing file handle to read from. (internal-only)
1608 _df - an existing file handle to read from. (internal-only)
1609 raw - an optional argument specifying if the revision data is to be
1609 raw - an optional argument specifying if the revision data is to be
1610 treated as raw data when applying flag transforms. 'raw' should be set
1610 treated as raw data when applying flag transforms. 'raw' should be set
1611 to True when generating changegroups or in debug commands.
1611 to True when generating changegroups or in debug commands.
1612 """
1612 """
1613 if raw:
1614 msg = ('revlog.revision(..., raw=True) is deprecated, '
1615 'use revlog.rawdata(...)')
1616 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1613 return self._revisiondata(nodeorrev, _df, raw=raw)
1617 return self._revisiondata(nodeorrev, _df, raw=raw)
1614
1618
1615 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1619 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1616 # deal with <nodeorrev> argument type
1620 # deal with <nodeorrev> argument type
1617 if isinstance(nodeorrev, int):
1621 if isinstance(nodeorrev, int):
1618 rev = nodeorrev
1622 rev = nodeorrev
1619 node = self.node(rev)
1623 node = self.node(rev)
1620 else:
1624 else:
1621 node = nodeorrev
1625 node = nodeorrev
1622 rev = None
1626 rev = None
1623
1627
1624 # fast path the special `nullid` rev
1628 # fast path the special `nullid` rev
1625 if node == nullid:
1629 if node == nullid:
1626 return ""
1630 return ""
1627
1631
1628 # The text as stored inside the revlog. Might be the revision or might
1632 # The text as stored inside the revlog. Might be the revision or might
1629 # need to be processed to retrieve the revision.
1633 # need to be processed to retrieve the revision.
1630 rawtext = None
1634 rawtext = None
1631
1635
1632 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1636 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1633
1637
1634 if raw and validated:
1638 if raw and validated:
1635 # if we don't want to process the raw text and that raw
1639 # if we don't want to process the raw text and that raw
1636 # text is cached, we can exit early.
1640 # text is cached, we can exit early.
1637 return rawtext
1641 return rawtext
1638 if rev is None:
1642 if rev is None:
1639 rev = self.rev(node)
1643 rev = self.rev(node)
1640 # the revlog's flag for this revision
1644 # the revlog's flag for this revision
1641 # (usually alter its state or content)
1645 # (usually alter its state or content)
1642 flags = self.flags(rev)
1646 flags = self.flags(rev)
1643
1647
1644 if validated and flags == REVIDX_DEFAULT_FLAGS:
1648 if validated and flags == REVIDX_DEFAULT_FLAGS:
1645 # no extra flags set, no flag processor runs, text = rawtext
1649 # no extra flags set, no flag processor runs, text = rawtext
1646 return rawtext
1650 return rawtext
1647
1651
1648 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1652 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1649 if validatehash:
1653 if validatehash:
1650 self.checkhash(text, node, rev=rev)
1654 self.checkhash(text, node, rev=rev)
1651 if not validated:
1655 if not validated:
1652 self._revisioncache = (node, rev, rawtext)
1656 self._revisioncache = (node, rev, rawtext)
1653
1657
1654 return text
1658 return text
1655
1659
1656 def _rawtext(self, node, rev, _df=None):
1660 def _rawtext(self, node, rev, _df=None):
1657 """return the possibly unvalidated rawtext for a revision
1661 """return the possibly unvalidated rawtext for a revision
1658
1662
1659 returns (rev, rawtext, validated)
1663 returns (rev, rawtext, validated)
1660 """
1664 """
1661
1665
1662 # revision in the cache (could be useful to apply delta)
1666 # revision in the cache (could be useful to apply delta)
1663 cachedrev = None
1667 cachedrev = None
1664 # An intermediate text to apply deltas to
1668 # An intermediate text to apply deltas to
1665 basetext = None
1669 basetext = None
1666
1670
1667 # Check if we have the entry in cache
1671 # Check if we have the entry in cache
1668 # The cache entry looks like (node, rev, rawtext)
1672 # The cache entry looks like (node, rev, rawtext)
1669 if self._revisioncache:
1673 if self._revisioncache:
1670 if self._revisioncache[0] == node:
1674 if self._revisioncache[0] == node:
1671 return (rev, self._revisioncache[2], True)
1675 return (rev, self._revisioncache[2], True)
1672 cachedrev = self._revisioncache[1]
1676 cachedrev = self._revisioncache[1]
1673
1677
1674 if rev is None:
1678 if rev is None:
1675 rev = self.rev(node)
1679 rev = self.rev(node)
1676
1680
1677 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1681 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1678 if stopped:
1682 if stopped:
1679 basetext = self._revisioncache[2]
1683 basetext = self._revisioncache[2]
1680
1684
1681 # drop cache to save memory, the caller is expected to
1685 # drop cache to save memory, the caller is expected to
1682 # update self._revisioncache after validating the text
1686 # update self._revisioncache after validating the text
1683 self._revisioncache = None
1687 self._revisioncache = None
1684
1688
1685 targetsize = None
1689 targetsize = None
1686 rawsize = self.index[rev][2]
1690 rawsize = self.index[rev][2]
1687 if 0 <= rawsize:
1691 if 0 <= rawsize:
1688 targetsize = 4 * rawsize
1692 targetsize = 4 * rawsize
1689
1693
1690 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1694 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1691 if basetext is None:
1695 if basetext is None:
1692 basetext = bytes(bins[0])
1696 basetext = bytes(bins[0])
1693 bins = bins[1:]
1697 bins = bins[1:]
1694
1698
1695 rawtext = mdiff.patches(basetext, bins)
1699 rawtext = mdiff.patches(basetext, bins)
1696 del basetext # let us have a chance to free memory early
1700 del basetext # let us have a chance to free memory early
1697 return (rev, rawtext, False)
1701 return (rev, rawtext, False)
1698
1702
1699 def rawdata(self, nodeorrev, _df=None):
1703 def rawdata(self, nodeorrev, _df=None):
1700 """return an uncompressed raw data of a given node or revision number.
1704 """return an uncompressed raw data of a given node or revision number.
1701
1705
1702 _df - an existing file handle to read from. (internal-only)
1706 _df - an existing file handle to read from. (internal-only)
1703 """
1707 """
1704 return self._revisiondata(nodeorrev, _df, raw=True)
1708 return self._revisiondata(nodeorrev, _df, raw=True)
1705
1709
1706 def hash(self, text, p1, p2):
1710 def hash(self, text, p1, p2):
1707 """Compute a node hash.
1711 """Compute a node hash.
1708
1712
1709 Available as a function so that subclasses can replace the hash
1713 Available as a function so that subclasses can replace the hash
1710 as needed.
1714 as needed.
1711 """
1715 """
1712 return storageutil.hashrevisionsha1(text, p1, p2)
1716 return storageutil.hashrevisionsha1(text, p1, p2)
1713
1717
1714 def _processflags(self, text, flags, operation, raw=False):
1718 def _processflags(self, text, flags, operation, raw=False):
1715 """Inspect revision data flags and applies transforms defined by
1719 """Inspect revision data flags and applies transforms defined by
1716 registered flag processors.
1720 registered flag processors.
1717
1721
1718 ``text`` - the revision data to process
1722 ``text`` - the revision data to process
1719 ``flags`` - the revision flags
1723 ``flags`` - the revision flags
1720 ``operation`` - the operation being performed (read or write)
1724 ``operation`` - the operation being performed (read or write)
1721 ``raw`` - an optional argument describing if the raw transform should be
1725 ``raw`` - an optional argument describing if the raw transform should be
1722 applied.
1726 applied.
1723
1727
1724 This method processes the flags in the order (or reverse order if
1728 This method processes the flags in the order (or reverse order if
1725 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1729 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1726 flag processors registered for present flags. The order of flags defined
1730 flag processors registered for present flags. The order of flags defined
1727 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1731 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1728
1732
1729 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1733 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1730 processed text and ``validatehash`` is a bool indicating whether the
1734 processed text and ``validatehash`` is a bool indicating whether the
1731 returned text should be checked for hash integrity.
1735 returned text should be checked for hash integrity.
1732
1736
1733 Note: If the ``raw`` argument is set, it has precedence over the
1737 Note: If the ``raw`` argument is set, it has precedence over the
1734 operation and will only update the value of ``validatehash``.
1738 operation and will only update the value of ``validatehash``.
1735 """
1739 """
1736 # fast path: no flag processors will run
1740 # fast path: no flag processors will run
1737 if flags == 0:
1741 if flags == 0:
1738 return text, True
1742 return text, True
1739 if not operation in ('read', 'write'):
1743 if not operation in ('read', 'write'):
1740 raise error.ProgrammingError(_("invalid '%s' operation") %
1744 raise error.ProgrammingError(_("invalid '%s' operation") %
1741 operation)
1745 operation)
1742 # Check all flags are known.
1746 # Check all flags are known.
1743 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1747 if flags & ~flagutil.REVIDX_KNOWN_FLAGS:
1744 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1748 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1745 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1749 (flags & ~flagutil.REVIDX_KNOWN_FLAGS))
1746 validatehash = True
1750 validatehash = True
1747 # Depending on the operation (read or write), the order might be
1751 # Depending on the operation (read or write), the order might be
1748 # reversed due to non-commutative transforms.
1752 # reversed due to non-commutative transforms.
1749 orderedflags = REVIDX_FLAGS_ORDER
1753 orderedflags = REVIDX_FLAGS_ORDER
1750 if operation == 'write':
1754 if operation == 'write':
1751 orderedflags = reversed(orderedflags)
1755 orderedflags = reversed(orderedflags)
1752
1756
1753 for flag in orderedflags:
1757 for flag in orderedflags:
1754 # If a flagprocessor has been registered for a known flag, apply the
1758 # If a flagprocessor has been registered for a known flag, apply the
1755 # related operation transform and update result tuple.
1759 # related operation transform and update result tuple.
1756 if flag & flags:
1760 if flag & flags:
1757 vhash = True
1761 vhash = True
1758
1762
1759 if flag not in self._flagprocessors:
1763 if flag not in self._flagprocessors:
1760 message = _("missing processor for flag '%#x'") % (flag)
1764 message = _("missing processor for flag '%#x'") % (flag)
1761 raise error.RevlogError(message)
1765 raise error.RevlogError(message)
1762
1766
1763 processor = self._flagprocessors[flag]
1767 processor = self._flagprocessors[flag]
1764 if processor is not None:
1768 if processor is not None:
1765 readtransform, writetransform, rawtransform = processor
1769 readtransform, writetransform, rawtransform = processor
1766
1770
1767 if raw:
1771 if raw:
1768 vhash = rawtransform(self, text)
1772 vhash = rawtransform(self, text)
1769 elif operation == 'read':
1773 elif operation == 'read':
1770 text, vhash = readtransform(self, text)
1774 text, vhash = readtransform(self, text)
1771 else: # write operation
1775 else: # write operation
1772 text, vhash = writetransform(self, text)
1776 text, vhash = writetransform(self, text)
1773 validatehash = validatehash and vhash
1777 validatehash = validatehash and vhash
1774
1778
1775 return text, validatehash
1779 return text, validatehash
1776
1780
1777 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1781 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1778 """Check node hash integrity.
1782 """Check node hash integrity.
1779
1783
1780 Available as a function so that subclasses can extend hash mismatch
1784 Available as a function so that subclasses can extend hash mismatch
1781 behaviors as needed.
1785 behaviors as needed.
1782 """
1786 """
1783 try:
1787 try:
1784 if p1 is None and p2 is None:
1788 if p1 is None and p2 is None:
1785 p1, p2 = self.parents(node)
1789 p1, p2 = self.parents(node)
1786 if node != self.hash(text, p1, p2):
1790 if node != self.hash(text, p1, p2):
1787 # Clear the revision cache on hash failure. The revision cache
1791 # Clear the revision cache on hash failure. The revision cache
1788 # only stores the raw revision and clearing the cache does have
1792 # only stores the raw revision and clearing the cache does have
1789 # the side-effect that we won't have a cache hit when the raw
1793 # the side-effect that we won't have a cache hit when the raw
1790 # revision data is accessed. But this case should be rare and
1794 # revision data is accessed. But this case should be rare and
1791 # it is extra work to teach the cache about the hash
1795 # it is extra work to teach the cache about the hash
1792 # verification state.
1796 # verification state.
1793 if self._revisioncache and self._revisioncache[0] == node:
1797 if self._revisioncache and self._revisioncache[0] == node:
1794 self._revisioncache = None
1798 self._revisioncache = None
1795
1799
1796 revornode = rev
1800 revornode = rev
1797 if revornode is None:
1801 if revornode is None:
1798 revornode = templatefilters.short(hex(node))
1802 revornode = templatefilters.short(hex(node))
1799 raise error.RevlogError(_("integrity check failed on %s:%s")
1803 raise error.RevlogError(_("integrity check failed on %s:%s")
1800 % (self.indexfile, pycompat.bytestr(revornode)))
1804 % (self.indexfile, pycompat.bytestr(revornode)))
1801 except error.RevlogError:
1805 except error.RevlogError:
1802 if self._censorable and storageutil.iscensoredtext(text):
1806 if self._censorable and storageutil.iscensoredtext(text):
1803 raise error.CensoredNodeError(self.indexfile, node, text)
1807 raise error.CensoredNodeError(self.indexfile, node, text)
1804 raise
1808 raise
1805
1809
1806 def _enforceinlinesize(self, tr, fp=None):
1810 def _enforceinlinesize(self, tr, fp=None):
1807 """Check if the revlog is too big for inline and convert if so.
1811 """Check if the revlog is too big for inline and convert if so.
1808
1812
1809 This should be called after revisions are added to the revlog. If the
1813 This should be called after revisions are added to the revlog. If the
1810 revlog has grown too large to be an inline revlog, it will convert it
1814 revlog has grown too large to be an inline revlog, it will convert it
1811 to use multiple index and data files.
1815 to use multiple index and data files.
1812 """
1816 """
1813 tiprev = len(self) - 1
1817 tiprev = len(self) - 1
1814 if (not self._inline or
1818 if (not self._inline or
1815 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1819 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1816 return
1820 return
1817
1821
1818 trinfo = tr.find(self.indexfile)
1822 trinfo = tr.find(self.indexfile)
1819 if trinfo is None:
1823 if trinfo is None:
1820 raise error.RevlogError(_("%s not found in the transaction")
1824 raise error.RevlogError(_("%s not found in the transaction")
1821 % self.indexfile)
1825 % self.indexfile)
1822
1826
1823 trindex = trinfo[2]
1827 trindex = trinfo[2]
1824 if trindex is not None:
1828 if trindex is not None:
1825 dataoff = self.start(trindex)
1829 dataoff = self.start(trindex)
1826 else:
1830 else:
1827 # revlog was stripped at start of transaction, use all leftover data
1831 # revlog was stripped at start of transaction, use all leftover data
1828 trindex = len(self) - 1
1832 trindex = len(self) - 1
1829 dataoff = self.end(tiprev)
1833 dataoff = self.end(tiprev)
1830
1834
1831 tr.add(self.datafile, dataoff)
1835 tr.add(self.datafile, dataoff)
1832
1836
1833 if fp:
1837 if fp:
1834 fp.flush()
1838 fp.flush()
1835 fp.close()
1839 fp.close()
1836 # We can't use the cached file handle after close(). So prevent
1840 # We can't use the cached file handle after close(). So prevent
1837 # its usage.
1841 # its usage.
1838 self._writinghandles = None
1842 self._writinghandles = None
1839
1843
1840 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1844 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1841 for r in self:
1845 for r in self:
1842 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1846 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1843
1847
1844 with self._indexfp('w') as fp:
1848 with self._indexfp('w') as fp:
1845 self.version &= ~FLAG_INLINE_DATA
1849 self.version &= ~FLAG_INLINE_DATA
1846 self._inline = False
1850 self._inline = False
1847 io = self._io
1851 io = self._io
1848 for i in self:
1852 for i in self:
1849 e = io.packentry(self.index[i], self.node, self.version, i)
1853 e = io.packentry(self.index[i], self.node, self.version, i)
1850 fp.write(e)
1854 fp.write(e)
1851
1855
1852 # the temp file replace the real index when we exit the context
1856 # the temp file replace the real index when we exit the context
1853 # manager
1857 # manager
1854
1858
1855 tr.replace(self.indexfile, trindex * self._io.size)
1859 tr.replace(self.indexfile, trindex * self._io.size)
1856 self._chunkclear()
1860 self._chunkclear()
1857
1861
1858 def _nodeduplicatecallback(self, transaction, node):
1862 def _nodeduplicatecallback(self, transaction, node):
1859 """called when trying to add a node already stored.
1863 """called when trying to add a node already stored.
1860 """
1864 """
1861
1865
1862 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1866 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1863 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1867 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1864 """add a revision to the log
1868 """add a revision to the log
1865
1869
1866 text - the revision data to add
1870 text - the revision data to add
1867 transaction - the transaction object used for rollback
1871 transaction - the transaction object used for rollback
1868 link - the linkrev data to add
1872 link - the linkrev data to add
1869 p1, p2 - the parent nodeids of the revision
1873 p1, p2 - the parent nodeids of the revision
1870 cachedelta - an optional precomputed delta
1874 cachedelta - an optional precomputed delta
1871 node - nodeid of revision; typically node is not specified, and it is
1875 node - nodeid of revision; typically node is not specified, and it is
1872 computed by default as hash(text, p1, p2), however subclasses might
1876 computed by default as hash(text, p1, p2), however subclasses might
1873 use different hashing method (and override checkhash() in such case)
1877 use different hashing method (and override checkhash() in such case)
1874 flags - the known flags to set on the revision
1878 flags - the known flags to set on the revision
1875 deltacomputer - an optional deltacomputer instance shared between
1879 deltacomputer - an optional deltacomputer instance shared between
1876 multiple calls
1880 multiple calls
1877 """
1881 """
1878 if link == nullrev:
1882 if link == nullrev:
1879 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1883 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1880 % self.indexfile)
1884 % self.indexfile)
1881
1885
1882 if flags:
1886 if flags:
1883 node = node or self.hash(text, p1, p2)
1887 node = node or self.hash(text, p1, p2)
1884
1888
1885 rawtext, validatehash = self._processflags(text, flags, 'write')
1889 rawtext, validatehash = self._processflags(text, flags, 'write')
1886
1890
1887 # If the flag processor modifies the revision data, ignore any provided
1891 # If the flag processor modifies the revision data, ignore any provided
1888 # cachedelta.
1892 # cachedelta.
1889 if rawtext != text:
1893 if rawtext != text:
1890 cachedelta = None
1894 cachedelta = None
1891
1895
1892 if len(rawtext) > _maxentrysize:
1896 if len(rawtext) > _maxentrysize:
1893 raise error.RevlogError(
1897 raise error.RevlogError(
1894 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1898 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1895 % (self.indexfile, len(rawtext)))
1899 % (self.indexfile, len(rawtext)))
1896
1900
1897 node = node or self.hash(rawtext, p1, p2)
1901 node = node or self.hash(rawtext, p1, p2)
1898 if node in self.nodemap:
1902 if node in self.nodemap:
1899 return node
1903 return node
1900
1904
1901 if validatehash:
1905 if validatehash:
1902 self.checkhash(rawtext, node, p1=p1, p2=p2)
1906 self.checkhash(rawtext, node, p1=p1, p2=p2)
1903
1907
1904 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1908 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1905 flags, cachedelta=cachedelta,
1909 flags, cachedelta=cachedelta,
1906 deltacomputer=deltacomputer)
1910 deltacomputer=deltacomputer)
1907
1911
1908 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1912 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1909 cachedelta=None, deltacomputer=None):
1913 cachedelta=None, deltacomputer=None):
1910 """add a raw revision with known flags, node and parents
1914 """add a raw revision with known flags, node and parents
1911 useful when reusing a revision not stored in this revlog (ex: received
1915 useful when reusing a revision not stored in this revlog (ex: received
1912 over wire, or read from an external bundle).
1916 over wire, or read from an external bundle).
1913 """
1917 """
1914 dfh = None
1918 dfh = None
1915 if not self._inline:
1919 if not self._inline:
1916 dfh = self._datafp("a+")
1920 dfh = self._datafp("a+")
1917 ifh = self._indexfp("a+")
1921 ifh = self._indexfp("a+")
1918 try:
1922 try:
1919 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1923 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1920 flags, cachedelta, ifh, dfh,
1924 flags, cachedelta, ifh, dfh,
1921 deltacomputer=deltacomputer)
1925 deltacomputer=deltacomputer)
1922 finally:
1926 finally:
1923 if dfh:
1927 if dfh:
1924 dfh.close()
1928 dfh.close()
1925 ifh.close()
1929 ifh.close()
1926
1930
1927 def compress(self, data):
1931 def compress(self, data):
1928 """Generate a possibly-compressed representation of data."""
1932 """Generate a possibly-compressed representation of data."""
1929 if not data:
1933 if not data:
1930 return '', data
1934 return '', data
1931
1935
1932 compressed = self._compressor.compress(data)
1936 compressed = self._compressor.compress(data)
1933
1937
1934 if compressed:
1938 if compressed:
1935 # The revlog compressor added the header in the returned data.
1939 # The revlog compressor added the header in the returned data.
1936 return '', compressed
1940 return '', compressed
1937
1941
1938 if data[0:1] == '\0':
1942 if data[0:1] == '\0':
1939 return '', data
1943 return '', data
1940 return 'u', data
1944 return 'u', data
1941
1945
1942 def decompress(self, data):
1946 def decompress(self, data):
1943 """Decompress a revlog chunk.
1947 """Decompress a revlog chunk.
1944
1948
1945 The chunk is expected to begin with a header identifying the
1949 The chunk is expected to begin with a header identifying the
1946 format type so it can be routed to an appropriate decompressor.
1950 format type so it can be routed to an appropriate decompressor.
1947 """
1951 """
1948 if not data:
1952 if not data:
1949 return data
1953 return data
1950
1954
1951 # Revlogs are read much more frequently than they are written and many
1955 # Revlogs are read much more frequently than they are written and many
1952 # chunks only take microseconds to decompress, so performance is
1956 # chunks only take microseconds to decompress, so performance is
1953 # important here.
1957 # important here.
1954 #
1958 #
1955 # We can make a few assumptions about revlogs:
1959 # We can make a few assumptions about revlogs:
1956 #
1960 #
1957 # 1) the majority of chunks will be compressed (as opposed to inline
1961 # 1) the majority of chunks will be compressed (as opposed to inline
1958 # raw data).
1962 # raw data).
1959 # 2) decompressing *any* data will likely by at least 10x slower than
1963 # 2) decompressing *any* data will likely by at least 10x slower than
1960 # returning raw inline data.
1964 # returning raw inline data.
1961 # 3) we want to prioritize common and officially supported compression
1965 # 3) we want to prioritize common and officially supported compression
1962 # engines
1966 # engines
1963 #
1967 #
1964 # It follows that we want to optimize for "decompress compressed data
1968 # It follows that we want to optimize for "decompress compressed data
1965 # when encoded with common and officially supported compression engines"
1969 # when encoded with common and officially supported compression engines"
1966 # case over "raw data" and "data encoded by less common or non-official
1970 # case over "raw data" and "data encoded by less common or non-official
1967 # compression engines." That is why we have the inline lookup first
1971 # compression engines." That is why we have the inline lookup first
1968 # followed by the compengines lookup.
1972 # followed by the compengines lookup.
1969 #
1973 #
1970 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1974 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1971 # compressed chunks. And this matters for changelog and manifest reads.
1975 # compressed chunks. And this matters for changelog and manifest reads.
1972 t = data[0:1]
1976 t = data[0:1]
1973
1977
1974 if t == 'x':
1978 if t == 'x':
1975 try:
1979 try:
1976 return _zlibdecompress(data)
1980 return _zlibdecompress(data)
1977 except zlib.error as e:
1981 except zlib.error as e:
1978 raise error.RevlogError(_('revlog decompress error: %s') %
1982 raise error.RevlogError(_('revlog decompress error: %s') %
1979 stringutil.forcebytestr(e))
1983 stringutil.forcebytestr(e))
1980 # '\0' is more common than 'u' so it goes first.
1984 # '\0' is more common than 'u' so it goes first.
1981 elif t == '\0':
1985 elif t == '\0':
1982 return data
1986 return data
1983 elif t == 'u':
1987 elif t == 'u':
1984 return util.buffer(data, 1)
1988 return util.buffer(data, 1)
1985
1989
1986 try:
1990 try:
1987 compressor = self._decompressors[t]
1991 compressor = self._decompressors[t]
1988 except KeyError:
1992 except KeyError:
1989 try:
1993 try:
1990 engine = util.compengines.forrevlogheader(t)
1994 engine = util.compengines.forrevlogheader(t)
1991 compressor = engine.revlogcompressor(self._compengineopts)
1995 compressor = engine.revlogcompressor(self._compengineopts)
1992 self._decompressors[t] = compressor
1996 self._decompressors[t] = compressor
1993 except KeyError:
1997 except KeyError:
1994 raise error.RevlogError(_('unknown compression type %r') % t)
1998 raise error.RevlogError(_('unknown compression type %r') % t)
1995
1999
1996 return compressor.decompress(data)
2000 return compressor.decompress(data)
1997
2001
1998 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2002 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1999 cachedelta, ifh, dfh, alwayscache=False,
2003 cachedelta, ifh, dfh, alwayscache=False,
2000 deltacomputer=None):
2004 deltacomputer=None):
2001 """internal function to add revisions to the log
2005 """internal function to add revisions to the log
2002
2006
2003 see addrevision for argument descriptions.
2007 see addrevision for argument descriptions.
2004
2008
2005 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2009 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2006
2010
2007 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2011 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2008 be used.
2012 be used.
2009
2013
2010 invariants:
2014 invariants:
2011 - rawtext is optional (can be None); if not set, cachedelta must be set.
2015 - rawtext is optional (can be None); if not set, cachedelta must be set.
2012 if both are set, they must correspond to each other.
2016 if both are set, they must correspond to each other.
2013 """
2017 """
2014 if node == nullid:
2018 if node == nullid:
2015 raise error.RevlogError(_("%s: attempt to add null revision") %
2019 raise error.RevlogError(_("%s: attempt to add null revision") %
2016 self.indexfile)
2020 self.indexfile)
2017 if node == wdirid or node in wdirfilenodeids:
2021 if node == wdirid or node in wdirfilenodeids:
2018 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2022 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2019 self.indexfile)
2023 self.indexfile)
2020
2024
2021 if self._inline:
2025 if self._inline:
2022 fh = ifh
2026 fh = ifh
2023 else:
2027 else:
2024 fh = dfh
2028 fh = dfh
2025
2029
2026 btext = [rawtext]
2030 btext = [rawtext]
2027
2031
2028 curr = len(self)
2032 curr = len(self)
2029 prev = curr - 1
2033 prev = curr - 1
2030 offset = self.end(prev)
2034 offset = self.end(prev)
2031 p1r, p2r = self.rev(p1), self.rev(p2)
2035 p1r, p2r = self.rev(p1), self.rev(p2)
2032
2036
2033 # full versions are inserted when the needed deltas
2037 # full versions are inserted when the needed deltas
2034 # become comparable to the uncompressed text
2038 # become comparable to the uncompressed text
2035 if rawtext is None:
2039 if rawtext is None:
2036 # need rawtext size, before changed by flag processors, which is
2040 # need rawtext size, before changed by flag processors, which is
2037 # the non-raw size. use revlog explicitly to avoid filelog's extra
2041 # the non-raw size. use revlog explicitly to avoid filelog's extra
2038 # logic that might remove metadata size.
2042 # logic that might remove metadata size.
2039 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2043 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2040 cachedelta[1])
2044 cachedelta[1])
2041 else:
2045 else:
2042 textlen = len(rawtext)
2046 textlen = len(rawtext)
2043
2047
2044 if deltacomputer is None:
2048 if deltacomputer is None:
2045 deltacomputer = deltautil.deltacomputer(self)
2049 deltacomputer = deltautil.deltacomputer(self)
2046
2050
2047 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2051 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2048
2052
2049 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2053 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2050
2054
2051 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2055 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2052 deltainfo.base, link, p1r, p2r, node)
2056 deltainfo.base, link, p1r, p2r, node)
2053 self.index.append(e)
2057 self.index.append(e)
2054 self.nodemap[node] = curr
2058 self.nodemap[node] = curr
2055
2059
2056 # Reset the pure node cache start lookup offset to account for new
2060 # Reset the pure node cache start lookup offset to account for new
2057 # revision.
2061 # revision.
2058 if self._nodepos is not None:
2062 if self._nodepos is not None:
2059 self._nodepos = curr
2063 self._nodepos = curr
2060
2064
2061 entry = self._io.packentry(e, self.node, self.version, curr)
2065 entry = self._io.packentry(e, self.node, self.version, curr)
2062 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2066 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2063 link, offset)
2067 link, offset)
2064
2068
2065 rawtext = btext[0]
2069 rawtext = btext[0]
2066
2070
2067 if alwayscache and rawtext is None:
2071 if alwayscache and rawtext is None:
2068 rawtext = deltacomputer.buildtext(revinfo, fh)
2072 rawtext = deltacomputer.buildtext(revinfo, fh)
2069
2073
2070 if type(rawtext) == bytes: # only accept immutable objects
2074 if type(rawtext) == bytes: # only accept immutable objects
2071 self._revisioncache = (node, curr, rawtext)
2075 self._revisioncache = (node, curr, rawtext)
2072 self._chainbasecache[curr] = deltainfo.chainbase
2076 self._chainbasecache[curr] = deltainfo.chainbase
2073 return node
2077 return node
2074
2078
2075 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2079 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2076 # Files opened in a+ mode have inconsistent behavior on various
2080 # Files opened in a+ mode have inconsistent behavior on various
2077 # platforms. Windows requires that a file positioning call be made
2081 # platforms. Windows requires that a file positioning call be made
2078 # when the file handle transitions between reads and writes. See
2082 # when the file handle transitions between reads and writes. See
2079 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2083 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2080 # platforms, Python or the platform itself can be buggy. Some versions
2084 # platforms, Python or the platform itself can be buggy. Some versions
2081 # of Solaris have been observed to not append at the end of the file
2085 # of Solaris have been observed to not append at the end of the file
2082 # if the file was seeked to before the end. See issue4943 for more.
2086 # if the file was seeked to before the end. See issue4943 for more.
2083 #
2087 #
2084 # We work around this issue by inserting a seek() before writing.
2088 # We work around this issue by inserting a seek() before writing.
2085 # Note: This is likely not necessary on Python 3. However, because
2089 # Note: This is likely not necessary on Python 3. However, because
2086 # the file handle is reused for reads and may be seeked there, we need
2090 # the file handle is reused for reads and may be seeked there, we need
2087 # to be careful before changing this.
2091 # to be careful before changing this.
2088 ifh.seek(0, os.SEEK_END)
2092 ifh.seek(0, os.SEEK_END)
2089 if dfh:
2093 if dfh:
2090 dfh.seek(0, os.SEEK_END)
2094 dfh.seek(0, os.SEEK_END)
2091
2095
2092 curr = len(self) - 1
2096 curr = len(self) - 1
2093 if not self._inline:
2097 if not self._inline:
2094 transaction.add(self.datafile, offset)
2098 transaction.add(self.datafile, offset)
2095 transaction.add(self.indexfile, curr * len(entry))
2099 transaction.add(self.indexfile, curr * len(entry))
2096 if data[0]:
2100 if data[0]:
2097 dfh.write(data[0])
2101 dfh.write(data[0])
2098 dfh.write(data[1])
2102 dfh.write(data[1])
2099 ifh.write(entry)
2103 ifh.write(entry)
2100 else:
2104 else:
2101 offset += curr * self._io.size
2105 offset += curr * self._io.size
2102 transaction.add(self.indexfile, offset, curr)
2106 transaction.add(self.indexfile, offset, curr)
2103 ifh.write(entry)
2107 ifh.write(entry)
2104 ifh.write(data[0])
2108 ifh.write(data[0])
2105 ifh.write(data[1])
2109 ifh.write(data[1])
2106 self._enforceinlinesize(transaction, ifh)
2110 self._enforceinlinesize(transaction, ifh)
2107
2111
2108 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2112 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2109 """
2113 """
2110 add a delta group
2114 add a delta group
2111
2115
2112 given a set of deltas, add them to the revision log. the
2116 given a set of deltas, add them to the revision log. the
2113 first delta is against its parent, which should be in our
2117 first delta is against its parent, which should be in our
2114 log, the rest are against the previous delta.
2118 log, the rest are against the previous delta.
2115
2119
2116 If ``addrevisioncb`` is defined, it will be called with arguments of
2120 If ``addrevisioncb`` is defined, it will be called with arguments of
2117 this revlog and the node that was added.
2121 this revlog and the node that was added.
2118 """
2122 """
2119
2123
2120 if self._writinghandles:
2124 if self._writinghandles:
2121 raise error.ProgrammingError('cannot nest addgroup() calls')
2125 raise error.ProgrammingError('cannot nest addgroup() calls')
2122
2126
2123 nodes = []
2127 nodes = []
2124
2128
2125 r = len(self)
2129 r = len(self)
2126 end = 0
2130 end = 0
2127 if r:
2131 if r:
2128 end = self.end(r - 1)
2132 end = self.end(r - 1)
2129 ifh = self._indexfp("a+")
2133 ifh = self._indexfp("a+")
2130 isize = r * self._io.size
2134 isize = r * self._io.size
2131 if self._inline:
2135 if self._inline:
2132 transaction.add(self.indexfile, end + isize, r)
2136 transaction.add(self.indexfile, end + isize, r)
2133 dfh = None
2137 dfh = None
2134 else:
2138 else:
2135 transaction.add(self.indexfile, isize, r)
2139 transaction.add(self.indexfile, isize, r)
2136 transaction.add(self.datafile, end)
2140 transaction.add(self.datafile, end)
2137 dfh = self._datafp("a+")
2141 dfh = self._datafp("a+")
2138 def flush():
2142 def flush():
2139 if dfh:
2143 if dfh:
2140 dfh.flush()
2144 dfh.flush()
2141 ifh.flush()
2145 ifh.flush()
2142
2146
2143 self._writinghandles = (ifh, dfh)
2147 self._writinghandles = (ifh, dfh)
2144
2148
2145 try:
2149 try:
2146 deltacomputer = deltautil.deltacomputer(self)
2150 deltacomputer = deltautil.deltacomputer(self)
2147 # loop through our set of deltas
2151 # loop through our set of deltas
2148 for data in deltas:
2152 for data in deltas:
2149 node, p1, p2, linknode, deltabase, delta, flags = data
2153 node, p1, p2, linknode, deltabase, delta, flags = data
2150 link = linkmapper(linknode)
2154 link = linkmapper(linknode)
2151 flags = flags or REVIDX_DEFAULT_FLAGS
2155 flags = flags or REVIDX_DEFAULT_FLAGS
2152
2156
2153 nodes.append(node)
2157 nodes.append(node)
2154
2158
2155 if node in self.nodemap:
2159 if node in self.nodemap:
2156 self._nodeduplicatecallback(transaction, node)
2160 self._nodeduplicatecallback(transaction, node)
2157 # this can happen if two branches make the same change
2161 # this can happen if two branches make the same change
2158 continue
2162 continue
2159
2163
2160 for p in (p1, p2):
2164 for p in (p1, p2):
2161 if p not in self.nodemap:
2165 if p not in self.nodemap:
2162 raise error.LookupError(p, self.indexfile,
2166 raise error.LookupError(p, self.indexfile,
2163 _('unknown parent'))
2167 _('unknown parent'))
2164
2168
2165 if deltabase not in self.nodemap:
2169 if deltabase not in self.nodemap:
2166 raise error.LookupError(deltabase, self.indexfile,
2170 raise error.LookupError(deltabase, self.indexfile,
2167 _('unknown delta base'))
2171 _('unknown delta base'))
2168
2172
2169 baserev = self.rev(deltabase)
2173 baserev = self.rev(deltabase)
2170
2174
2171 if baserev != nullrev and self.iscensored(baserev):
2175 if baserev != nullrev and self.iscensored(baserev):
2172 # if base is censored, delta must be full replacement in a
2176 # if base is censored, delta must be full replacement in a
2173 # single patch operation
2177 # single patch operation
2174 hlen = struct.calcsize(">lll")
2178 hlen = struct.calcsize(">lll")
2175 oldlen = self.rawsize(baserev)
2179 oldlen = self.rawsize(baserev)
2176 newlen = len(delta) - hlen
2180 newlen = len(delta) - hlen
2177 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2181 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2178 raise error.CensoredBaseError(self.indexfile,
2182 raise error.CensoredBaseError(self.indexfile,
2179 self.node(baserev))
2183 self.node(baserev))
2180
2184
2181 if not flags and self._peek_iscensored(baserev, delta, flush):
2185 if not flags and self._peek_iscensored(baserev, delta, flush):
2182 flags |= REVIDX_ISCENSORED
2186 flags |= REVIDX_ISCENSORED
2183
2187
2184 # We assume consumers of addrevisioncb will want to retrieve
2188 # We assume consumers of addrevisioncb will want to retrieve
2185 # the added revision, which will require a call to
2189 # the added revision, which will require a call to
2186 # revision(). revision() will fast path if there is a cache
2190 # revision(). revision() will fast path if there is a cache
2187 # hit. So, we tell _addrevision() to always cache in this case.
2191 # hit. So, we tell _addrevision() to always cache in this case.
2188 # We're only using addgroup() in the context of changegroup
2192 # We're only using addgroup() in the context of changegroup
2189 # generation so the revision data can always be handled as raw
2193 # generation so the revision data can always be handled as raw
2190 # by the flagprocessor.
2194 # by the flagprocessor.
2191 self._addrevision(node, None, transaction, link,
2195 self._addrevision(node, None, transaction, link,
2192 p1, p2, flags, (baserev, delta),
2196 p1, p2, flags, (baserev, delta),
2193 ifh, dfh,
2197 ifh, dfh,
2194 alwayscache=bool(addrevisioncb),
2198 alwayscache=bool(addrevisioncb),
2195 deltacomputer=deltacomputer)
2199 deltacomputer=deltacomputer)
2196
2200
2197 if addrevisioncb:
2201 if addrevisioncb:
2198 addrevisioncb(self, node)
2202 addrevisioncb(self, node)
2199
2203
2200 if not dfh and not self._inline:
2204 if not dfh and not self._inline:
2201 # addrevision switched from inline to conventional
2205 # addrevision switched from inline to conventional
2202 # reopen the index
2206 # reopen the index
2203 ifh.close()
2207 ifh.close()
2204 dfh = self._datafp("a+")
2208 dfh = self._datafp("a+")
2205 ifh = self._indexfp("a+")
2209 ifh = self._indexfp("a+")
2206 self._writinghandles = (ifh, dfh)
2210 self._writinghandles = (ifh, dfh)
2207 finally:
2211 finally:
2208 self._writinghandles = None
2212 self._writinghandles = None
2209
2213
2210 if dfh:
2214 if dfh:
2211 dfh.close()
2215 dfh.close()
2212 ifh.close()
2216 ifh.close()
2213
2217
2214 return nodes
2218 return nodes
2215
2219
2216 def iscensored(self, rev):
2220 def iscensored(self, rev):
2217 """Check if a file revision is censored."""
2221 """Check if a file revision is censored."""
2218 if not self._censorable:
2222 if not self._censorable:
2219 return False
2223 return False
2220
2224
2221 return self.flags(rev) & REVIDX_ISCENSORED
2225 return self.flags(rev) & REVIDX_ISCENSORED
2222
2226
2223 def _peek_iscensored(self, baserev, delta, flush):
2227 def _peek_iscensored(self, baserev, delta, flush):
2224 """Quickly check if a delta produces a censored revision."""
2228 """Quickly check if a delta produces a censored revision."""
2225 if not self._censorable:
2229 if not self._censorable:
2226 return False
2230 return False
2227
2231
2228 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2232 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2229
2233
2230 def getstrippoint(self, minlink):
2234 def getstrippoint(self, minlink):
2231 """find the minimum rev that must be stripped to strip the linkrev
2235 """find the minimum rev that must be stripped to strip the linkrev
2232
2236
2233 Returns a tuple containing the minimum rev and a set of all revs that
2237 Returns a tuple containing the minimum rev and a set of all revs that
2234 have linkrevs that will be broken by this strip.
2238 have linkrevs that will be broken by this strip.
2235 """
2239 """
2236 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2240 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2237 self.headrevs(),
2241 self.headrevs(),
2238 self.linkrev, self.parentrevs)
2242 self.linkrev, self.parentrevs)
2239
2243
2240 def strip(self, minlink, transaction):
2244 def strip(self, minlink, transaction):
2241 """truncate the revlog on the first revision with a linkrev >= minlink
2245 """truncate the revlog on the first revision with a linkrev >= minlink
2242
2246
2243 This function is called when we're stripping revision minlink and
2247 This function is called when we're stripping revision minlink and
2244 its descendants from the repository.
2248 its descendants from the repository.
2245
2249
2246 We have to remove all revisions with linkrev >= minlink, because
2250 We have to remove all revisions with linkrev >= minlink, because
2247 the equivalent changelog revisions will be renumbered after the
2251 the equivalent changelog revisions will be renumbered after the
2248 strip.
2252 strip.
2249
2253
2250 So we truncate the revlog on the first of these revisions, and
2254 So we truncate the revlog on the first of these revisions, and
2251 trust that the caller has saved the revisions that shouldn't be
2255 trust that the caller has saved the revisions that shouldn't be
2252 removed and that it'll re-add them after this truncation.
2256 removed and that it'll re-add them after this truncation.
2253 """
2257 """
2254 if len(self) == 0:
2258 if len(self) == 0:
2255 return
2259 return
2256
2260
2257 rev, _ = self.getstrippoint(minlink)
2261 rev, _ = self.getstrippoint(minlink)
2258 if rev == len(self):
2262 if rev == len(self):
2259 return
2263 return
2260
2264
2261 # first truncate the files on disk
2265 # first truncate the files on disk
2262 end = self.start(rev)
2266 end = self.start(rev)
2263 if not self._inline:
2267 if not self._inline:
2264 transaction.add(self.datafile, end)
2268 transaction.add(self.datafile, end)
2265 end = rev * self._io.size
2269 end = rev * self._io.size
2266 else:
2270 else:
2267 end += rev * self._io.size
2271 end += rev * self._io.size
2268
2272
2269 transaction.add(self.indexfile, end)
2273 transaction.add(self.indexfile, end)
2270
2274
2271 # then reset internal state in memory to forget those revisions
2275 # then reset internal state in memory to forget those revisions
2272 self._revisioncache = None
2276 self._revisioncache = None
2273 self._chaininfocache = {}
2277 self._chaininfocache = {}
2274 self._chunkclear()
2278 self._chunkclear()
2275 for x in pycompat.xrange(rev, len(self)):
2279 for x in pycompat.xrange(rev, len(self)):
2276 del self.nodemap[self.node(x)]
2280 del self.nodemap[self.node(x)]
2277
2281
2278 del self.index[rev:-1]
2282 del self.index[rev:-1]
2279 self._nodepos = None
2283 self._nodepos = None
2280
2284
2281 def checksize(self):
2285 def checksize(self):
2282 """Check size of index and data files
2286 """Check size of index and data files
2283
2287
2284 return a (dd, di) tuple.
2288 return a (dd, di) tuple.
2285 - dd: extra bytes for the "data" file
2289 - dd: extra bytes for the "data" file
2286 - di: extra bytes for the "index" file
2290 - di: extra bytes for the "index" file
2287
2291
2288 A healthy revlog will return (0, 0).
2292 A healthy revlog will return (0, 0).
2289 """
2293 """
2290 expected = 0
2294 expected = 0
2291 if len(self):
2295 if len(self):
2292 expected = max(0, self.end(len(self) - 1))
2296 expected = max(0, self.end(len(self) - 1))
2293
2297
2294 try:
2298 try:
2295 with self._datafp() as f:
2299 with self._datafp() as f:
2296 f.seek(0, io.SEEK_END)
2300 f.seek(0, io.SEEK_END)
2297 actual = f.tell()
2301 actual = f.tell()
2298 dd = actual - expected
2302 dd = actual - expected
2299 except IOError as inst:
2303 except IOError as inst:
2300 if inst.errno != errno.ENOENT:
2304 if inst.errno != errno.ENOENT:
2301 raise
2305 raise
2302 dd = 0
2306 dd = 0
2303
2307
2304 try:
2308 try:
2305 f = self.opener(self.indexfile)
2309 f = self.opener(self.indexfile)
2306 f.seek(0, io.SEEK_END)
2310 f.seek(0, io.SEEK_END)
2307 actual = f.tell()
2311 actual = f.tell()
2308 f.close()
2312 f.close()
2309 s = self._io.size
2313 s = self._io.size
2310 i = max(0, actual // s)
2314 i = max(0, actual // s)
2311 di = actual - (i * s)
2315 di = actual - (i * s)
2312 if self._inline:
2316 if self._inline:
2313 databytes = 0
2317 databytes = 0
2314 for r in self:
2318 for r in self:
2315 databytes += max(0, self.length(r))
2319 databytes += max(0, self.length(r))
2316 dd = 0
2320 dd = 0
2317 di = actual - len(self) * s - databytes
2321 di = actual - len(self) * s - databytes
2318 except IOError as inst:
2322 except IOError as inst:
2319 if inst.errno != errno.ENOENT:
2323 if inst.errno != errno.ENOENT:
2320 raise
2324 raise
2321 di = 0
2325 di = 0
2322
2326
2323 return (dd, di)
2327 return (dd, di)
2324
2328
2325 def files(self):
2329 def files(self):
2326 res = [self.indexfile]
2330 res = [self.indexfile]
2327 if not self._inline:
2331 if not self._inline:
2328 res.append(self.datafile)
2332 res.append(self.datafile)
2329 return res
2333 return res
2330
2334
2331 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2335 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2332 assumehaveparentrevisions=False,
2336 assumehaveparentrevisions=False,
2333 deltamode=repository.CG_DELTAMODE_STD):
2337 deltamode=repository.CG_DELTAMODE_STD):
2334 if nodesorder not in ('nodes', 'storage', 'linear', None):
2338 if nodesorder not in ('nodes', 'storage', 'linear', None):
2335 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2339 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2336 nodesorder)
2340 nodesorder)
2337
2341
2338 if nodesorder is None and not self._generaldelta:
2342 if nodesorder is None and not self._generaldelta:
2339 nodesorder = 'storage'
2343 nodesorder = 'storage'
2340
2344
2341 if (not self._storedeltachains and
2345 if (not self._storedeltachains and
2342 deltamode != repository.CG_DELTAMODE_PREV):
2346 deltamode != repository.CG_DELTAMODE_PREV):
2343 deltamode = repository.CG_DELTAMODE_FULL
2347 deltamode = repository.CG_DELTAMODE_FULL
2344
2348
2345 return storageutil.emitrevisions(
2349 return storageutil.emitrevisions(
2346 self, nodes, nodesorder, revlogrevisiondelta,
2350 self, nodes, nodesorder, revlogrevisiondelta,
2347 deltaparentfn=self.deltaparent,
2351 deltaparentfn=self.deltaparent,
2348 candeltafn=self.candelta,
2352 candeltafn=self.candelta,
2349 rawsizefn=self.rawsize,
2353 rawsizefn=self.rawsize,
2350 revdifffn=self.revdiff,
2354 revdifffn=self.revdiff,
2351 flagsfn=self.flags,
2355 flagsfn=self.flags,
2352 deltamode=deltamode,
2356 deltamode=deltamode,
2353 revisiondata=revisiondata,
2357 revisiondata=revisiondata,
2354 assumehaveparentrevisions=assumehaveparentrevisions)
2358 assumehaveparentrevisions=assumehaveparentrevisions)
2355
2359
2356 DELTAREUSEALWAYS = 'always'
2360 DELTAREUSEALWAYS = 'always'
2357 DELTAREUSESAMEREVS = 'samerevs'
2361 DELTAREUSESAMEREVS = 'samerevs'
2358 DELTAREUSENEVER = 'never'
2362 DELTAREUSENEVER = 'never'
2359
2363
2360 DELTAREUSEFULLADD = 'fulladd'
2364 DELTAREUSEFULLADD = 'fulladd'
2361
2365
2362 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2366 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2363
2367
2364 def clone(self, tr, destrevlog, addrevisioncb=None,
2368 def clone(self, tr, destrevlog, addrevisioncb=None,
2365 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2369 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2366 """Copy this revlog to another, possibly with format changes.
2370 """Copy this revlog to another, possibly with format changes.
2367
2371
2368 The destination revlog will contain the same revisions and nodes.
2372 The destination revlog will contain the same revisions and nodes.
2369 However, it may not be bit-for-bit identical due to e.g. delta encoding
2373 However, it may not be bit-for-bit identical due to e.g. delta encoding
2370 differences.
2374 differences.
2371
2375
2372 The ``deltareuse`` argument control how deltas from the existing revlog
2376 The ``deltareuse`` argument control how deltas from the existing revlog
2373 are preserved in the destination revlog. The argument can have the
2377 are preserved in the destination revlog. The argument can have the
2374 following values:
2378 following values:
2375
2379
2376 DELTAREUSEALWAYS
2380 DELTAREUSEALWAYS
2377 Deltas will always be reused (if possible), even if the destination
2381 Deltas will always be reused (if possible), even if the destination
2378 revlog would not select the same revisions for the delta. This is the
2382 revlog would not select the same revisions for the delta. This is the
2379 fastest mode of operation.
2383 fastest mode of operation.
2380 DELTAREUSESAMEREVS
2384 DELTAREUSESAMEREVS
2381 Deltas will be reused if the destination revlog would pick the same
2385 Deltas will be reused if the destination revlog would pick the same
2382 revisions for the delta. This mode strikes a balance between speed
2386 revisions for the delta. This mode strikes a balance between speed
2383 and optimization.
2387 and optimization.
2384 DELTAREUSENEVER
2388 DELTAREUSENEVER
2385 Deltas will never be reused. This is the slowest mode of execution.
2389 Deltas will never be reused. This is the slowest mode of execution.
2386 This mode can be used to recompute deltas (e.g. if the diff/delta
2390 This mode can be used to recompute deltas (e.g. if the diff/delta
2387 algorithm changes).
2391 algorithm changes).
2388
2392
2389 Delta computation can be slow, so the choice of delta reuse policy can
2393 Delta computation can be slow, so the choice of delta reuse policy can
2390 significantly affect run time.
2394 significantly affect run time.
2391
2395
2392 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2396 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2393 two extremes. Deltas will be reused if they are appropriate. But if the
2397 two extremes. Deltas will be reused if they are appropriate. But if the
2394 delta could choose a better revision, it will do so. This means if you
2398 delta could choose a better revision, it will do so. This means if you
2395 are converting a non-generaldelta revlog to a generaldelta revlog,
2399 are converting a non-generaldelta revlog to a generaldelta revlog,
2396 deltas will be recomputed if the delta's parent isn't a parent of the
2400 deltas will be recomputed if the delta's parent isn't a parent of the
2397 revision.
2401 revision.
2398
2402
2399 In addition to the delta policy, the ``forcedeltabothparents``
2403 In addition to the delta policy, the ``forcedeltabothparents``
2400 argument controls whether to force compute deltas against both parents
2404 argument controls whether to force compute deltas against both parents
2401 for merges. By default, the current default is used.
2405 for merges. By default, the current default is used.
2402 """
2406 """
2403 if deltareuse not in self.DELTAREUSEALL:
2407 if deltareuse not in self.DELTAREUSEALL:
2404 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2408 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2405
2409
2406 if len(destrevlog):
2410 if len(destrevlog):
2407 raise ValueError(_('destination revlog is not empty'))
2411 raise ValueError(_('destination revlog is not empty'))
2408
2412
2409 if getattr(self, 'filteredrevs', None):
2413 if getattr(self, 'filteredrevs', None):
2410 raise ValueError(_('source revlog has filtered revisions'))
2414 raise ValueError(_('source revlog has filtered revisions'))
2411 if getattr(destrevlog, 'filteredrevs', None):
2415 if getattr(destrevlog, 'filteredrevs', None):
2412 raise ValueError(_('destination revlog has filtered revisions'))
2416 raise ValueError(_('destination revlog has filtered revisions'))
2413
2417
2414 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2418 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2415 # if possible.
2419 # if possible.
2416 oldlazydelta = destrevlog._lazydelta
2420 oldlazydelta = destrevlog._lazydelta
2417 oldlazydeltabase = destrevlog._lazydeltabase
2421 oldlazydeltabase = destrevlog._lazydeltabase
2418 oldamd = destrevlog._deltabothparents
2422 oldamd = destrevlog._deltabothparents
2419
2423
2420 try:
2424 try:
2421 if deltareuse == self.DELTAREUSEALWAYS:
2425 if deltareuse == self.DELTAREUSEALWAYS:
2422 destrevlog._lazydeltabase = True
2426 destrevlog._lazydeltabase = True
2423 destrevlog._lazydelta = True
2427 destrevlog._lazydelta = True
2424 elif deltareuse == self.DELTAREUSESAMEREVS:
2428 elif deltareuse == self.DELTAREUSESAMEREVS:
2425 destrevlog._lazydeltabase = False
2429 destrevlog._lazydeltabase = False
2426 destrevlog._lazydelta = True
2430 destrevlog._lazydelta = True
2427 elif deltareuse == self.DELTAREUSENEVER:
2431 elif deltareuse == self.DELTAREUSENEVER:
2428 destrevlog._lazydeltabase = False
2432 destrevlog._lazydeltabase = False
2429 destrevlog._lazydelta = False
2433 destrevlog._lazydelta = False
2430
2434
2431 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2435 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2432
2436
2433 deltacomputer = deltautil.deltacomputer(destrevlog)
2437 deltacomputer = deltautil.deltacomputer(destrevlog)
2434 index = self.index
2438 index = self.index
2435 for rev in self:
2439 for rev in self:
2436 entry = index[rev]
2440 entry = index[rev]
2437
2441
2438 # Some classes override linkrev to take filtered revs into
2442 # Some classes override linkrev to take filtered revs into
2439 # account. Use raw entry from index.
2443 # account. Use raw entry from index.
2440 flags = entry[0] & 0xffff
2444 flags = entry[0] & 0xffff
2441 linkrev = entry[4]
2445 linkrev = entry[4]
2442 p1 = index[entry[5]][7]
2446 p1 = index[entry[5]][7]
2443 p2 = index[entry[6]][7]
2447 p2 = index[entry[6]][7]
2444 node = entry[7]
2448 node = entry[7]
2445
2449
2446 # (Possibly) reuse the delta from the revlog if allowed and
2450 # (Possibly) reuse the delta from the revlog if allowed and
2447 # the revlog chunk is a delta.
2451 # the revlog chunk is a delta.
2448 cachedelta = None
2452 cachedelta = None
2449 rawtext = None
2453 rawtext = None
2450 if (deltareuse != self.DELTAREUSEFULLADD
2454 if (deltareuse != self.DELTAREUSEFULLADD
2451 and destrevlog._lazydelta):
2455 and destrevlog._lazydelta):
2452 dp = self.deltaparent(rev)
2456 dp = self.deltaparent(rev)
2453 if dp != nullrev:
2457 if dp != nullrev:
2454 cachedelta = (dp, bytes(self._chunk(rev)))
2458 cachedelta = (dp, bytes(self._chunk(rev)))
2455
2459
2456 if not cachedelta:
2460 if not cachedelta:
2457 rawtext = self.rawdata(rev)
2461 rawtext = self.rawdata(rev)
2458
2462
2459
2463
2460 if deltareuse == self.DELTAREUSEFULLADD:
2464 if deltareuse == self.DELTAREUSEFULLADD:
2461 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2465 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2462 cachedelta=cachedelta,
2466 cachedelta=cachedelta,
2463 node=node, flags=flags,
2467 node=node, flags=flags,
2464 deltacomputer=deltacomputer)
2468 deltacomputer=deltacomputer)
2465 else:
2469 else:
2466 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2470 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2467 checkambig=False)
2471 checkambig=False)
2468 dfh = None
2472 dfh = None
2469 if not destrevlog._inline:
2473 if not destrevlog._inline:
2470 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2474 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2471 try:
2475 try:
2472 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2476 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2473 p2, flags, cachedelta, ifh, dfh,
2477 p2, flags, cachedelta, ifh, dfh,
2474 deltacomputer=deltacomputer)
2478 deltacomputer=deltacomputer)
2475 finally:
2479 finally:
2476 if dfh:
2480 if dfh:
2477 dfh.close()
2481 dfh.close()
2478 ifh.close()
2482 ifh.close()
2479
2483
2480 if addrevisioncb:
2484 if addrevisioncb:
2481 addrevisioncb(self, rev, node)
2485 addrevisioncb(self, rev, node)
2482 finally:
2486 finally:
2483 destrevlog._lazydelta = oldlazydelta
2487 destrevlog._lazydelta = oldlazydelta
2484 destrevlog._lazydeltabase = oldlazydeltabase
2488 destrevlog._lazydeltabase = oldlazydeltabase
2485 destrevlog._deltabothparents = oldamd
2489 destrevlog._deltabothparents = oldamd
2486
2490
2487 def censorrevision(self, tr, censornode, tombstone=b''):
2491 def censorrevision(self, tr, censornode, tombstone=b''):
2488 if (self.version & 0xFFFF) == REVLOGV0:
2492 if (self.version & 0xFFFF) == REVLOGV0:
2489 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2493 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2490 self.version)
2494 self.version)
2491
2495
2492 censorrev = self.rev(censornode)
2496 censorrev = self.rev(censornode)
2493 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2497 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2494
2498
2495 if len(tombstone) > self.rawsize(censorrev):
2499 if len(tombstone) > self.rawsize(censorrev):
2496 raise error.Abort(_('censor tombstone must be no longer than '
2500 raise error.Abort(_('censor tombstone must be no longer than '
2497 'censored data'))
2501 'censored data'))
2498
2502
2499 # Rewriting the revlog in place is hard. Our strategy for censoring is
2503 # Rewriting the revlog in place is hard. Our strategy for censoring is
2500 # to create a new revlog, copy all revisions to it, then replace the
2504 # to create a new revlog, copy all revisions to it, then replace the
2501 # revlogs on transaction close.
2505 # revlogs on transaction close.
2502
2506
2503 newindexfile = self.indexfile + b'.tmpcensored'
2507 newindexfile = self.indexfile + b'.tmpcensored'
2504 newdatafile = self.datafile + b'.tmpcensored'
2508 newdatafile = self.datafile + b'.tmpcensored'
2505
2509
2506 # This is a bit dangerous. We could easily have a mismatch of state.
2510 # This is a bit dangerous. We could easily have a mismatch of state.
2507 newrl = revlog(self.opener, newindexfile, newdatafile,
2511 newrl = revlog(self.opener, newindexfile, newdatafile,
2508 censorable=True)
2512 censorable=True)
2509 newrl.version = self.version
2513 newrl.version = self.version
2510 newrl._generaldelta = self._generaldelta
2514 newrl._generaldelta = self._generaldelta
2511 newrl._io = self._io
2515 newrl._io = self._io
2512
2516
2513 for rev in self.revs():
2517 for rev in self.revs():
2514 node = self.node(rev)
2518 node = self.node(rev)
2515 p1, p2 = self.parents(node)
2519 p1, p2 = self.parents(node)
2516
2520
2517 if rev == censorrev:
2521 if rev == censorrev:
2518 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2522 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2519 p1, p2, censornode, REVIDX_ISCENSORED)
2523 p1, p2, censornode, REVIDX_ISCENSORED)
2520
2524
2521 if newrl.deltaparent(rev) != nullrev:
2525 if newrl.deltaparent(rev) != nullrev:
2522 raise error.Abort(_('censored revision stored as delta; '
2526 raise error.Abort(_('censored revision stored as delta; '
2523 'cannot censor'),
2527 'cannot censor'),
2524 hint=_('censoring of revlogs is not '
2528 hint=_('censoring of revlogs is not '
2525 'fully implemented; please report '
2529 'fully implemented; please report '
2526 'this bug'))
2530 'this bug'))
2527 continue
2531 continue
2528
2532
2529 if self.iscensored(rev):
2533 if self.iscensored(rev):
2530 if self.deltaparent(rev) != nullrev:
2534 if self.deltaparent(rev) != nullrev:
2531 raise error.Abort(_('cannot censor due to censored '
2535 raise error.Abort(_('cannot censor due to censored '
2532 'revision having delta stored'))
2536 'revision having delta stored'))
2533 rawtext = self._chunk(rev)
2537 rawtext = self._chunk(rev)
2534 else:
2538 else:
2535 rawtext = self.rawdata(rev)
2539 rawtext = self.rawdata(rev)
2536
2540
2537 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2541 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2538 self.flags(rev))
2542 self.flags(rev))
2539
2543
2540 tr.addbackup(self.indexfile, location='store')
2544 tr.addbackup(self.indexfile, location='store')
2541 if not self._inline:
2545 if not self._inline:
2542 tr.addbackup(self.datafile, location='store')
2546 tr.addbackup(self.datafile, location='store')
2543
2547
2544 self.opener.rename(newrl.indexfile, self.indexfile)
2548 self.opener.rename(newrl.indexfile, self.indexfile)
2545 if not self._inline:
2549 if not self._inline:
2546 self.opener.rename(newrl.datafile, self.datafile)
2550 self.opener.rename(newrl.datafile, self.datafile)
2547
2551
2548 self.clearcaches()
2552 self.clearcaches()
2549 self._loadindex()
2553 self._loadindex()
2550
2554
2551 def verifyintegrity(self, state):
2555 def verifyintegrity(self, state):
2552 """Verifies the integrity of the revlog.
2556 """Verifies the integrity of the revlog.
2553
2557
2554 Yields ``revlogproblem`` instances describing problems that are
2558 Yields ``revlogproblem`` instances describing problems that are
2555 found.
2559 found.
2556 """
2560 """
2557 dd, di = self.checksize()
2561 dd, di = self.checksize()
2558 if dd:
2562 if dd:
2559 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2563 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2560 if di:
2564 if di:
2561 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2565 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2562
2566
2563 version = self.version & 0xFFFF
2567 version = self.version & 0xFFFF
2564
2568
2565 # The verifier tells us what version revlog we should be.
2569 # The verifier tells us what version revlog we should be.
2566 if version != state['expectedversion']:
2570 if version != state['expectedversion']:
2567 yield revlogproblem(
2571 yield revlogproblem(
2568 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2572 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2569 (self.indexfile, version, state['expectedversion']))
2573 (self.indexfile, version, state['expectedversion']))
2570
2574
2571 state['skipread'] = set()
2575 state['skipread'] = set()
2572
2576
2573 for rev in self:
2577 for rev in self:
2574 node = self.node(rev)
2578 node = self.node(rev)
2575
2579
2576 # Verify contents. 4 cases to care about:
2580 # Verify contents. 4 cases to care about:
2577 #
2581 #
2578 # common: the most common case
2582 # common: the most common case
2579 # rename: with a rename
2583 # rename: with a rename
2580 # meta: file content starts with b'\1\n', the metadata
2584 # meta: file content starts with b'\1\n', the metadata
2581 # header defined in filelog.py, but without a rename
2585 # header defined in filelog.py, but without a rename
2582 # ext: content stored externally
2586 # ext: content stored externally
2583 #
2587 #
2584 # More formally, their differences are shown below:
2588 # More formally, their differences are shown below:
2585 #
2589 #
2586 # | common | rename | meta | ext
2590 # | common | rename | meta | ext
2587 # -------------------------------------------------------
2591 # -------------------------------------------------------
2588 # flags() | 0 | 0 | 0 | not 0
2592 # flags() | 0 | 0 | 0 | not 0
2589 # renamed() | False | True | False | ?
2593 # renamed() | False | True | False | ?
2590 # rawtext[0:2]=='\1\n'| False | True | True | ?
2594 # rawtext[0:2]=='\1\n'| False | True | True | ?
2591 #
2595 #
2592 # "rawtext" means the raw text stored in revlog data, which
2596 # "rawtext" means the raw text stored in revlog data, which
2593 # could be retrieved by "rawdata(rev)". "text"
2597 # could be retrieved by "rawdata(rev)". "text"
2594 # mentioned below is "revision(rev)".
2598 # mentioned below is "revision(rev)".
2595 #
2599 #
2596 # There are 3 different lengths stored physically:
2600 # There are 3 different lengths stored physically:
2597 # 1. L1: rawsize, stored in revlog index
2601 # 1. L1: rawsize, stored in revlog index
2598 # 2. L2: len(rawtext), stored in revlog data
2602 # 2. L2: len(rawtext), stored in revlog data
2599 # 3. L3: len(text), stored in revlog data if flags==0, or
2603 # 3. L3: len(text), stored in revlog data if flags==0, or
2600 # possibly somewhere else if flags!=0
2604 # possibly somewhere else if flags!=0
2601 #
2605 #
2602 # L1 should be equal to L2. L3 could be different from them.
2606 # L1 should be equal to L2. L3 could be different from them.
2603 # "text" may or may not affect commit hash depending on flag
2607 # "text" may or may not affect commit hash depending on flag
2604 # processors (see flagutil.addflagprocessor).
2608 # processors (see flagutil.addflagprocessor).
2605 #
2609 #
2606 # | common | rename | meta | ext
2610 # | common | rename | meta | ext
2607 # -------------------------------------------------
2611 # -------------------------------------------------
2608 # rawsize() | L1 | L1 | L1 | L1
2612 # rawsize() | L1 | L1 | L1 | L1
2609 # size() | L1 | L2-LM | L1(*) | L1 (?)
2613 # size() | L1 | L2-LM | L1(*) | L1 (?)
2610 # len(rawtext) | L2 | L2 | L2 | L2
2614 # len(rawtext) | L2 | L2 | L2 | L2
2611 # len(text) | L2 | L2 | L2 | L3
2615 # len(text) | L2 | L2 | L2 | L3
2612 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2616 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2613 #
2617 #
2614 # LM: length of metadata, depending on rawtext
2618 # LM: length of metadata, depending on rawtext
2615 # (*): not ideal, see comment in filelog.size
2619 # (*): not ideal, see comment in filelog.size
2616 # (?): could be "- len(meta)" if the resolved content has
2620 # (?): could be "- len(meta)" if the resolved content has
2617 # rename metadata
2621 # rename metadata
2618 #
2622 #
2619 # Checks needed to be done:
2623 # Checks needed to be done:
2620 # 1. length check: L1 == L2, in all cases.
2624 # 1. length check: L1 == L2, in all cases.
2621 # 2. hash check: depending on flag processor, we may need to
2625 # 2. hash check: depending on flag processor, we may need to
2622 # use either "text" (external), or "rawtext" (in revlog).
2626 # use either "text" (external), or "rawtext" (in revlog).
2623
2627
2624 try:
2628 try:
2625 skipflags = state.get('skipflags', 0)
2629 skipflags = state.get('skipflags', 0)
2626 if skipflags:
2630 if skipflags:
2627 skipflags &= self.flags(rev)
2631 skipflags &= self.flags(rev)
2628
2632
2629 if skipflags:
2633 if skipflags:
2630 state['skipread'].add(node)
2634 state['skipread'].add(node)
2631 else:
2635 else:
2632 # Side-effect: read content and verify hash.
2636 # Side-effect: read content and verify hash.
2633 self.revision(node)
2637 self.revision(node)
2634
2638
2635 l1 = self.rawsize(rev)
2639 l1 = self.rawsize(rev)
2636 l2 = len(self.rawdata(node))
2640 l2 = len(self.rawdata(node))
2637
2641
2638 if l1 != l2:
2642 if l1 != l2:
2639 yield revlogproblem(
2643 yield revlogproblem(
2640 error=_('unpacked size is %d, %d expected') % (l2, l1),
2644 error=_('unpacked size is %d, %d expected') % (l2, l1),
2641 node=node)
2645 node=node)
2642
2646
2643 except error.CensoredNodeError:
2647 except error.CensoredNodeError:
2644 if state['erroroncensored']:
2648 if state['erroroncensored']:
2645 yield revlogproblem(error=_('censored file data'),
2649 yield revlogproblem(error=_('censored file data'),
2646 node=node)
2650 node=node)
2647 state['skipread'].add(node)
2651 state['skipread'].add(node)
2648 except Exception as e:
2652 except Exception as e:
2649 yield revlogproblem(
2653 yield revlogproblem(
2650 error=_('unpacking %s: %s') % (short(node),
2654 error=_('unpacking %s: %s') % (short(node),
2651 stringutil.forcebytestr(e)),
2655 stringutil.forcebytestr(e)),
2652 node=node)
2656 node=node)
2653 state['skipread'].add(node)
2657 state['skipread'].add(node)
2654
2658
2655 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2659 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2656 revisionscount=False, trackedsize=False,
2660 revisionscount=False, trackedsize=False,
2657 storedsize=False):
2661 storedsize=False):
2658 d = {}
2662 d = {}
2659
2663
2660 if exclusivefiles:
2664 if exclusivefiles:
2661 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2665 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2662 if not self._inline:
2666 if not self._inline:
2663 d['exclusivefiles'].append((self.opener, self.datafile))
2667 d['exclusivefiles'].append((self.opener, self.datafile))
2664
2668
2665 if sharedfiles:
2669 if sharedfiles:
2666 d['sharedfiles'] = []
2670 d['sharedfiles'] = []
2667
2671
2668 if revisionscount:
2672 if revisionscount:
2669 d['revisionscount'] = len(self)
2673 d['revisionscount'] = len(self)
2670
2674
2671 if trackedsize:
2675 if trackedsize:
2672 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2676 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2673
2677
2674 if storedsize:
2678 if storedsize:
2675 d['storedsize'] = sum(self.opener.stat(path).st_size
2679 d['storedsize'] = sum(self.opener.stat(path).st_size
2676 for path in self.files())
2680 for path in self.files())
2677
2681
2678 return d
2682 return d
General Comments 0
You need to be logged in to leave comments. Login now