##// END OF EJS Templates
revlog: add some documentation to the `checksize` method...
marmoute -
r42038:3f3f9624 default
parent child Browse files
Show More
@@ -1,2662 +1,2670 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 elif getattr(self.opener, 'options', None) is not None:
399 elif getattr(self.opener, 'options', None) is not None:
400 # If options provided but no 'revlog*' found, the repository
400 # If options provided but no 'revlog*' found, the repository
401 # would have no 'requires' file in it, which means we have to
401 # would have no 'requires' file in it, which means we have to
402 # stick to the old format.
402 # stick to the old format.
403 newversionflags = REVLOGV0
403 newversionflags = REVLOGV0
404 else:
404 else:
405 newversionflags = REVLOG_DEFAULT_VERSION
405 newversionflags = REVLOG_DEFAULT_VERSION
406
406
407 if 'chunkcachesize' in opts:
407 if 'chunkcachesize' in opts:
408 self._chunkcachesize = opts['chunkcachesize']
408 self._chunkcachesize = opts['chunkcachesize']
409 if 'maxchainlen' in opts:
409 if 'maxchainlen' in opts:
410 self._maxchainlen = opts['maxchainlen']
410 self._maxchainlen = opts['maxchainlen']
411 if 'deltabothparents' in opts:
411 if 'deltabothparents' in opts:
412 self._deltabothparents = opts['deltabothparents']
412 self._deltabothparents = opts['deltabothparents']
413 self._lazydelta = bool(opts.get('lazydelta', True))
413 self._lazydelta = bool(opts.get('lazydelta', True))
414 self._lazydeltabase = False
414 self._lazydeltabase = False
415 if self._lazydelta:
415 if self._lazydelta:
416 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
416 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
417 if 'compengine' in opts:
417 if 'compengine' in opts:
418 self._compengine = opts['compengine']
418 self._compengine = opts['compengine']
419 if 'maxdeltachainspan' in opts:
419 if 'maxdeltachainspan' in opts:
420 self._maxdeltachainspan = opts['maxdeltachainspan']
420 self._maxdeltachainspan = opts['maxdeltachainspan']
421 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
421 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
422 mmapindexthreshold = opts['mmapindexthreshold']
422 mmapindexthreshold = opts['mmapindexthreshold']
423 self._sparserevlog = bool(opts.get('sparse-revlog', False))
423 self._sparserevlog = bool(opts.get('sparse-revlog', False))
424 withsparseread = bool(opts.get('with-sparse-read', False))
424 withsparseread = bool(opts.get('with-sparse-read', False))
425 # sparse-revlog forces sparse-read
425 # sparse-revlog forces sparse-read
426 self._withsparseread = self._sparserevlog or withsparseread
426 self._withsparseread = self._sparserevlog or withsparseread
427 if 'sparse-read-density-threshold' in opts:
427 if 'sparse-read-density-threshold' in opts:
428 self._srdensitythreshold = opts['sparse-read-density-threshold']
428 self._srdensitythreshold = opts['sparse-read-density-threshold']
429 if 'sparse-read-min-gap-size' in opts:
429 if 'sparse-read-min-gap-size' in opts:
430 self._srmingapsize = opts['sparse-read-min-gap-size']
430 self._srmingapsize = opts['sparse-read-min-gap-size']
431 if opts.get('enableellipsis'):
431 if opts.get('enableellipsis'):
432 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
432 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
433
433
434 # revlog v0 doesn't have flag processors
434 # revlog v0 doesn't have flag processors
435 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
435 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
436 _insertflagprocessor(flag, processor, self._flagprocessors)
436 _insertflagprocessor(flag, processor, self._flagprocessors)
437
437
438 if self._chunkcachesize <= 0:
438 if self._chunkcachesize <= 0:
439 raise error.RevlogError(_('revlog chunk cache size %r is not '
439 raise error.RevlogError(_('revlog chunk cache size %r is not '
440 'greater than 0') % self._chunkcachesize)
440 'greater than 0') % self._chunkcachesize)
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
441 elif self._chunkcachesize & (self._chunkcachesize - 1):
442 raise error.RevlogError(_('revlog chunk cache size %r is not a '
442 raise error.RevlogError(_('revlog chunk cache size %r is not a '
443 'power of 2') % self._chunkcachesize)
443 'power of 2') % self._chunkcachesize)
444
444
445 indexdata = ''
445 indexdata = ''
446 self._initempty = True
446 self._initempty = True
447 try:
447 try:
448 with self._indexfp() as f:
448 with self._indexfp() as f:
449 if (mmapindexthreshold is not None and
449 if (mmapindexthreshold is not None and
450 self.opener.fstat(f).st_size >= mmapindexthreshold):
450 self.opener.fstat(f).st_size >= mmapindexthreshold):
451 # TODO: should .close() to release resources without
451 # TODO: should .close() to release resources without
452 # relying on Python GC
452 # relying on Python GC
453 indexdata = util.buffer(util.mmapread(f))
453 indexdata = util.buffer(util.mmapread(f))
454 else:
454 else:
455 indexdata = f.read()
455 indexdata = f.read()
456 if len(indexdata) > 0:
456 if len(indexdata) > 0:
457 versionflags = versionformat_unpack(indexdata[:4])[0]
457 versionflags = versionformat_unpack(indexdata[:4])[0]
458 self._initempty = False
458 self._initempty = False
459 else:
459 else:
460 versionflags = newversionflags
460 versionflags = newversionflags
461 except IOError as inst:
461 except IOError as inst:
462 if inst.errno != errno.ENOENT:
462 if inst.errno != errno.ENOENT:
463 raise
463 raise
464
464
465 versionflags = newversionflags
465 versionflags = newversionflags
466
466
467 self.version = versionflags
467 self.version = versionflags
468
468
469 flags = versionflags & ~0xFFFF
469 flags = versionflags & ~0xFFFF
470 fmt = versionflags & 0xFFFF
470 fmt = versionflags & 0xFFFF
471
471
472 if fmt == REVLOGV0:
472 if fmt == REVLOGV0:
473 if flags:
473 if flags:
474 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
475 'revlog %s') %
475 'revlog %s') %
476 (flags >> 16, fmt, self.indexfile))
476 (flags >> 16, fmt, self.indexfile))
477
477
478 self._inline = False
478 self._inline = False
479 self._generaldelta = False
479 self._generaldelta = False
480
480
481 elif fmt == REVLOGV1:
481 elif fmt == REVLOGV1:
482 if flags & ~REVLOGV1_FLAGS:
482 if flags & ~REVLOGV1_FLAGS:
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
484 'revlog %s') %
484 'revlog %s') %
485 (flags >> 16, fmt, self.indexfile))
485 (flags >> 16, fmt, self.indexfile))
486
486
487 self._inline = versionflags & FLAG_INLINE_DATA
487 self._inline = versionflags & FLAG_INLINE_DATA
488 self._generaldelta = versionflags & FLAG_GENERALDELTA
488 self._generaldelta = versionflags & FLAG_GENERALDELTA
489
489
490 elif fmt == REVLOGV2:
490 elif fmt == REVLOGV2:
491 if flags & ~REVLOGV2_FLAGS:
491 if flags & ~REVLOGV2_FLAGS:
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
492 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
493 'revlog %s') %
493 'revlog %s') %
494 (flags >> 16, fmt, self.indexfile))
494 (flags >> 16, fmt, self.indexfile))
495
495
496 self._inline = versionflags & FLAG_INLINE_DATA
496 self._inline = versionflags & FLAG_INLINE_DATA
497 # generaldelta implied by version 2 revlogs.
497 # generaldelta implied by version 2 revlogs.
498 self._generaldelta = True
498 self._generaldelta = True
499
499
500 else:
500 else:
501 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
501 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
502 (fmt, self.indexfile))
502 (fmt, self.indexfile))
503 # sparse-revlog can't be on without general-delta (issue6056)
503 # sparse-revlog can't be on without general-delta (issue6056)
504 if not self._generaldelta:
504 if not self._generaldelta:
505 self._sparserevlog = False
505 self._sparserevlog = False
506
506
507 self._storedeltachains = True
507 self._storedeltachains = True
508
508
509 self._io = revlogio()
509 self._io = revlogio()
510 if self.version == REVLOGV0:
510 if self.version == REVLOGV0:
511 self._io = revlogoldio()
511 self._io = revlogoldio()
512 try:
512 try:
513 d = self._io.parseindex(indexdata, self._inline)
513 d = self._io.parseindex(indexdata, self._inline)
514 except (ValueError, IndexError):
514 except (ValueError, IndexError):
515 raise error.RevlogError(_("index %s is corrupted") %
515 raise error.RevlogError(_("index %s is corrupted") %
516 self.indexfile)
516 self.indexfile)
517 self.index, nodemap, self._chunkcache = d
517 self.index, nodemap, self._chunkcache = d
518 if nodemap is not None:
518 if nodemap is not None:
519 self.nodemap = self._nodecache = nodemap
519 self.nodemap = self._nodecache = nodemap
520 if not self._chunkcache:
520 if not self._chunkcache:
521 self._chunkclear()
521 self._chunkclear()
522 # revnum -> (chain-length, sum-delta-length)
522 # revnum -> (chain-length, sum-delta-length)
523 self._chaininfocache = {}
523 self._chaininfocache = {}
524 # revlog header -> revlog compressor
524 # revlog header -> revlog compressor
525 self._decompressors = {}
525 self._decompressors = {}
526
526
527 @util.propertycache
527 @util.propertycache
528 def _compressor(self):
528 def _compressor(self):
529 return util.compengines[self._compengine].revlogcompressor()
529 return util.compengines[self._compengine].revlogcompressor()
530
530
531 def _indexfp(self, mode='r'):
531 def _indexfp(self, mode='r'):
532 """file object for the revlog's index file"""
532 """file object for the revlog's index file"""
533 args = {r'mode': mode}
533 args = {r'mode': mode}
534 if mode != 'r':
534 if mode != 'r':
535 args[r'checkambig'] = self._checkambig
535 args[r'checkambig'] = self._checkambig
536 if mode == 'w':
536 if mode == 'w':
537 args[r'atomictemp'] = True
537 args[r'atomictemp'] = True
538 return self.opener(self.indexfile, **args)
538 return self.opener(self.indexfile, **args)
539
539
540 def _datafp(self, mode='r'):
540 def _datafp(self, mode='r'):
541 """file object for the revlog's data file"""
541 """file object for the revlog's data file"""
542 return self.opener(self.datafile, mode=mode)
542 return self.opener(self.datafile, mode=mode)
543
543
544 @contextlib.contextmanager
544 @contextlib.contextmanager
545 def _datareadfp(self, existingfp=None):
545 def _datareadfp(self, existingfp=None):
546 """file object suitable to read data"""
546 """file object suitable to read data"""
547 # Use explicit file handle, if given.
547 # Use explicit file handle, if given.
548 if existingfp is not None:
548 if existingfp is not None:
549 yield existingfp
549 yield existingfp
550
550
551 # Use a file handle being actively used for writes, if available.
551 # Use a file handle being actively used for writes, if available.
552 # There is some danger to doing this because reads will seek the
552 # There is some danger to doing this because reads will seek the
553 # file. However, _writeentry() performs a SEEK_END before all writes,
553 # file. However, _writeentry() performs a SEEK_END before all writes,
554 # so we should be safe.
554 # so we should be safe.
555 elif self._writinghandles:
555 elif self._writinghandles:
556 if self._inline:
556 if self._inline:
557 yield self._writinghandles[0]
557 yield self._writinghandles[0]
558 else:
558 else:
559 yield self._writinghandles[1]
559 yield self._writinghandles[1]
560
560
561 # Otherwise open a new file handle.
561 # Otherwise open a new file handle.
562 else:
562 else:
563 if self._inline:
563 if self._inline:
564 func = self._indexfp
564 func = self._indexfp
565 else:
565 else:
566 func = self._datafp
566 func = self._datafp
567 with func() as fp:
567 with func() as fp:
568 yield fp
568 yield fp
569
569
570 def tip(self):
570 def tip(self):
571 return self.node(len(self.index) - 1)
571 return self.node(len(self.index) - 1)
572 def __contains__(self, rev):
572 def __contains__(self, rev):
573 return 0 <= rev < len(self)
573 return 0 <= rev < len(self)
574 def __len__(self):
574 def __len__(self):
575 return len(self.index)
575 return len(self.index)
576 def __iter__(self):
576 def __iter__(self):
577 return iter(pycompat.xrange(len(self)))
577 return iter(pycompat.xrange(len(self)))
578 def revs(self, start=0, stop=None):
578 def revs(self, start=0, stop=None):
579 """iterate over all rev in this revlog (from start to stop)"""
579 """iterate over all rev in this revlog (from start to stop)"""
580 return storageutil.iterrevs(len(self), start=start, stop=stop)
580 return storageutil.iterrevs(len(self), start=start, stop=stop)
581
581
582 @util.propertycache
582 @util.propertycache
583 def nodemap(self):
583 def nodemap(self):
584 if self.index:
584 if self.index:
585 # populate mapping down to the initial node
585 # populate mapping down to the initial node
586 node0 = self.index[0][7] # get around changelog filtering
586 node0 = self.index[0][7] # get around changelog filtering
587 self.rev(node0)
587 self.rev(node0)
588 return self._nodecache
588 return self._nodecache
589
589
590 def hasnode(self, node):
590 def hasnode(self, node):
591 try:
591 try:
592 self.rev(node)
592 self.rev(node)
593 return True
593 return True
594 except KeyError:
594 except KeyError:
595 return False
595 return False
596
596
597 def candelta(self, baserev, rev):
597 def candelta(self, baserev, rev):
598 """whether two revisions (baserev, rev) can be delta-ed or not"""
598 """whether two revisions (baserev, rev) can be delta-ed or not"""
599 # Disable delta if either rev requires a content-changing flag
599 # Disable delta if either rev requires a content-changing flag
600 # processor (ex. LFS). This is because such flag processor can alter
600 # processor (ex. LFS). This is because such flag processor can alter
601 # the rawtext content that the delta will be based on, and two clients
601 # the rawtext content that the delta will be based on, and two clients
602 # could have a same revlog node with different flags (i.e. different
602 # could have a same revlog node with different flags (i.e. different
603 # rawtext contents) and the delta could be incompatible.
603 # rawtext contents) and the delta could be incompatible.
604 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
604 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
605 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
605 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
606 return False
606 return False
607 return True
607 return True
608
608
609 def clearcaches(self):
609 def clearcaches(self):
610 self._revisioncache = None
610 self._revisioncache = None
611 self._chainbasecache.clear()
611 self._chainbasecache.clear()
612 self._chunkcache = (0, '')
612 self._chunkcache = (0, '')
613 self._pcache = {}
613 self._pcache = {}
614
614
615 try:
615 try:
616 # If we are using the native C version, you are in a fun case
616 # If we are using the native C version, you are in a fun case
617 # where self.index, self.nodemap and self._nodecaches is the same
617 # where self.index, self.nodemap and self._nodecaches is the same
618 # object.
618 # object.
619 self._nodecache.clearcaches()
619 self._nodecache.clearcaches()
620 except AttributeError:
620 except AttributeError:
621 self._nodecache = {nullid: nullrev}
621 self._nodecache = {nullid: nullrev}
622 self._nodepos = None
622 self._nodepos = None
623
623
624 def rev(self, node):
624 def rev(self, node):
625 try:
625 try:
626 return self._nodecache[node]
626 return self._nodecache[node]
627 except TypeError:
627 except TypeError:
628 raise
628 raise
629 except error.RevlogError:
629 except error.RevlogError:
630 # parsers.c radix tree lookup failed
630 # parsers.c radix tree lookup failed
631 if node == wdirid or node in wdirfilenodeids:
631 if node == wdirid or node in wdirfilenodeids:
632 raise error.WdirUnsupported
632 raise error.WdirUnsupported
633 raise error.LookupError(node, self.indexfile, _('no node'))
633 raise error.LookupError(node, self.indexfile, _('no node'))
634 except KeyError:
634 except KeyError:
635 # pure python cache lookup failed
635 # pure python cache lookup failed
636 n = self._nodecache
636 n = self._nodecache
637 i = self.index
637 i = self.index
638 p = self._nodepos
638 p = self._nodepos
639 if p is None:
639 if p is None:
640 p = len(i) - 1
640 p = len(i) - 1
641 else:
641 else:
642 assert p < len(i)
642 assert p < len(i)
643 for r in pycompat.xrange(p, -1, -1):
643 for r in pycompat.xrange(p, -1, -1):
644 v = i[r][7]
644 v = i[r][7]
645 n[v] = r
645 n[v] = r
646 if v == node:
646 if v == node:
647 self._nodepos = r - 1
647 self._nodepos = r - 1
648 return r
648 return r
649 if node == wdirid or node in wdirfilenodeids:
649 if node == wdirid or node in wdirfilenodeids:
650 raise error.WdirUnsupported
650 raise error.WdirUnsupported
651 raise error.LookupError(node, self.indexfile, _('no node'))
651 raise error.LookupError(node, self.indexfile, _('no node'))
652
652
653 # Accessors for index entries.
653 # Accessors for index entries.
654
654
655 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
655 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
656 # are flags.
656 # are flags.
657 def start(self, rev):
657 def start(self, rev):
658 return int(self.index[rev][0] >> 16)
658 return int(self.index[rev][0] >> 16)
659
659
660 def flags(self, rev):
660 def flags(self, rev):
661 return self.index[rev][0] & 0xFFFF
661 return self.index[rev][0] & 0xFFFF
662
662
663 def length(self, rev):
663 def length(self, rev):
664 return self.index[rev][1]
664 return self.index[rev][1]
665
665
666 def rawsize(self, rev):
666 def rawsize(self, rev):
667 """return the length of the uncompressed text for a given revision"""
667 """return the length of the uncompressed text for a given revision"""
668 l = self.index[rev][2]
668 l = self.index[rev][2]
669 if l >= 0:
669 if l >= 0:
670 return l
670 return l
671
671
672 t = self.revision(rev, raw=True)
672 t = self.revision(rev, raw=True)
673 return len(t)
673 return len(t)
674
674
675 def size(self, rev):
675 def size(self, rev):
676 """length of non-raw text (processed by a "read" flag processor)"""
676 """length of non-raw text (processed by a "read" flag processor)"""
677 # fast path: if no "read" flag processor could change the content,
677 # fast path: if no "read" flag processor could change the content,
678 # size is rawsize. note: ELLIPSIS is known to not change the content.
678 # size is rawsize. note: ELLIPSIS is known to not change the content.
679 flags = self.flags(rev)
679 flags = self.flags(rev)
680 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
680 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
681 return self.rawsize(rev)
681 return self.rawsize(rev)
682
682
683 return len(self.revision(rev, raw=False))
683 return len(self.revision(rev, raw=False))
684
684
685 def chainbase(self, rev):
685 def chainbase(self, rev):
686 base = self._chainbasecache.get(rev)
686 base = self._chainbasecache.get(rev)
687 if base is not None:
687 if base is not None:
688 return base
688 return base
689
689
690 index = self.index
690 index = self.index
691 iterrev = rev
691 iterrev = rev
692 base = index[iterrev][3]
692 base = index[iterrev][3]
693 while base != iterrev:
693 while base != iterrev:
694 iterrev = base
694 iterrev = base
695 base = index[iterrev][3]
695 base = index[iterrev][3]
696
696
697 self._chainbasecache[rev] = base
697 self._chainbasecache[rev] = base
698 return base
698 return base
699
699
700 def linkrev(self, rev):
700 def linkrev(self, rev):
701 return self.index[rev][4]
701 return self.index[rev][4]
702
702
703 def parentrevs(self, rev):
703 def parentrevs(self, rev):
704 try:
704 try:
705 entry = self.index[rev]
705 entry = self.index[rev]
706 except IndexError:
706 except IndexError:
707 if rev == wdirrev:
707 if rev == wdirrev:
708 raise error.WdirUnsupported
708 raise error.WdirUnsupported
709 raise
709 raise
710
710
711 return entry[5], entry[6]
711 return entry[5], entry[6]
712
712
713 # fast parentrevs(rev) where rev isn't filtered
713 # fast parentrevs(rev) where rev isn't filtered
714 _uncheckedparentrevs = parentrevs
714 _uncheckedparentrevs = parentrevs
715
715
716 def node(self, rev):
716 def node(self, rev):
717 try:
717 try:
718 return self.index[rev][7]
718 return self.index[rev][7]
719 except IndexError:
719 except IndexError:
720 if rev == wdirrev:
720 if rev == wdirrev:
721 raise error.WdirUnsupported
721 raise error.WdirUnsupported
722 raise
722 raise
723
723
724 # Derived from index values.
724 # Derived from index values.
725
725
726 def end(self, rev):
726 def end(self, rev):
727 return self.start(rev) + self.length(rev)
727 return self.start(rev) + self.length(rev)
728
728
729 def parents(self, node):
729 def parents(self, node):
730 i = self.index
730 i = self.index
731 d = i[self.rev(node)]
731 d = i[self.rev(node)]
732 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
732 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
733
733
734 def chainlen(self, rev):
734 def chainlen(self, rev):
735 return self._chaininfo(rev)[0]
735 return self._chaininfo(rev)[0]
736
736
737 def _chaininfo(self, rev):
737 def _chaininfo(self, rev):
738 chaininfocache = self._chaininfocache
738 chaininfocache = self._chaininfocache
739 if rev in chaininfocache:
739 if rev in chaininfocache:
740 return chaininfocache[rev]
740 return chaininfocache[rev]
741 index = self.index
741 index = self.index
742 generaldelta = self._generaldelta
742 generaldelta = self._generaldelta
743 iterrev = rev
743 iterrev = rev
744 e = index[iterrev]
744 e = index[iterrev]
745 clen = 0
745 clen = 0
746 compresseddeltalen = 0
746 compresseddeltalen = 0
747 while iterrev != e[3]:
747 while iterrev != e[3]:
748 clen += 1
748 clen += 1
749 compresseddeltalen += e[1]
749 compresseddeltalen += e[1]
750 if generaldelta:
750 if generaldelta:
751 iterrev = e[3]
751 iterrev = e[3]
752 else:
752 else:
753 iterrev -= 1
753 iterrev -= 1
754 if iterrev in chaininfocache:
754 if iterrev in chaininfocache:
755 t = chaininfocache[iterrev]
755 t = chaininfocache[iterrev]
756 clen += t[0]
756 clen += t[0]
757 compresseddeltalen += t[1]
757 compresseddeltalen += t[1]
758 break
758 break
759 e = index[iterrev]
759 e = index[iterrev]
760 else:
760 else:
761 # Add text length of base since decompressing that also takes
761 # Add text length of base since decompressing that also takes
762 # work. For cache hits the length is already included.
762 # work. For cache hits the length is already included.
763 compresseddeltalen += e[1]
763 compresseddeltalen += e[1]
764 r = (clen, compresseddeltalen)
764 r = (clen, compresseddeltalen)
765 chaininfocache[rev] = r
765 chaininfocache[rev] = r
766 return r
766 return r
767
767
768 def _deltachain(self, rev, stoprev=None):
768 def _deltachain(self, rev, stoprev=None):
769 """Obtain the delta chain for a revision.
769 """Obtain the delta chain for a revision.
770
770
771 ``stoprev`` specifies a revision to stop at. If not specified, we
771 ``stoprev`` specifies a revision to stop at. If not specified, we
772 stop at the base of the chain.
772 stop at the base of the chain.
773
773
774 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
774 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
775 revs in ascending order and ``stopped`` is a bool indicating whether
775 revs in ascending order and ``stopped`` is a bool indicating whether
776 ``stoprev`` was hit.
776 ``stoprev`` was hit.
777 """
777 """
778 # Try C implementation.
778 # Try C implementation.
779 try:
779 try:
780 return self.index.deltachain(rev, stoprev, self._generaldelta)
780 return self.index.deltachain(rev, stoprev, self._generaldelta)
781 except AttributeError:
781 except AttributeError:
782 pass
782 pass
783
783
784 chain = []
784 chain = []
785
785
786 # Alias to prevent attribute lookup in tight loop.
786 # Alias to prevent attribute lookup in tight loop.
787 index = self.index
787 index = self.index
788 generaldelta = self._generaldelta
788 generaldelta = self._generaldelta
789
789
790 iterrev = rev
790 iterrev = rev
791 e = index[iterrev]
791 e = index[iterrev]
792 while iterrev != e[3] and iterrev != stoprev:
792 while iterrev != e[3] and iterrev != stoprev:
793 chain.append(iterrev)
793 chain.append(iterrev)
794 if generaldelta:
794 if generaldelta:
795 iterrev = e[3]
795 iterrev = e[3]
796 else:
796 else:
797 iterrev -= 1
797 iterrev -= 1
798 e = index[iterrev]
798 e = index[iterrev]
799
799
800 if iterrev == stoprev:
800 if iterrev == stoprev:
801 stopped = True
801 stopped = True
802 else:
802 else:
803 chain.append(iterrev)
803 chain.append(iterrev)
804 stopped = False
804 stopped = False
805
805
806 chain.reverse()
806 chain.reverse()
807 return chain, stopped
807 return chain, stopped
808
808
809 def ancestors(self, revs, stoprev=0, inclusive=False):
809 def ancestors(self, revs, stoprev=0, inclusive=False):
810 """Generate the ancestors of 'revs' in reverse revision order.
810 """Generate the ancestors of 'revs' in reverse revision order.
811 Does not generate revs lower than stoprev.
811 Does not generate revs lower than stoprev.
812
812
813 See the documentation for ancestor.lazyancestors for more details."""
813 See the documentation for ancestor.lazyancestors for more details."""
814
814
815 # first, make sure start revisions aren't filtered
815 # first, make sure start revisions aren't filtered
816 revs = list(revs)
816 revs = list(revs)
817 checkrev = self.node
817 checkrev = self.node
818 for r in revs:
818 for r in revs:
819 checkrev(r)
819 checkrev(r)
820 # and we're sure ancestors aren't filtered as well
820 # and we're sure ancestors aren't filtered as well
821
821
822 if rustext is not None:
822 if rustext is not None:
823 lazyancestors = rustext.ancestor.LazyAncestors
823 lazyancestors = rustext.ancestor.LazyAncestors
824 arg = self.index
824 arg = self.index
825 elif util.safehasattr(parsers, 'rustlazyancestors'):
825 elif util.safehasattr(parsers, 'rustlazyancestors'):
826 lazyancestors = ancestor.rustlazyancestors
826 lazyancestors = ancestor.rustlazyancestors
827 arg = self.index
827 arg = self.index
828 else:
828 else:
829 lazyancestors = ancestor.lazyancestors
829 lazyancestors = ancestor.lazyancestors
830 arg = self._uncheckedparentrevs
830 arg = self._uncheckedparentrevs
831 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
831 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
832
832
833 def descendants(self, revs):
833 def descendants(self, revs):
834 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
834 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
835
835
836 def findcommonmissing(self, common=None, heads=None):
836 def findcommonmissing(self, common=None, heads=None):
837 """Return a tuple of the ancestors of common and the ancestors of heads
837 """Return a tuple of the ancestors of common and the ancestors of heads
838 that are not ancestors of common. In revset terminology, we return the
838 that are not ancestors of common. In revset terminology, we return the
839 tuple:
839 tuple:
840
840
841 ::common, (::heads) - (::common)
841 ::common, (::heads) - (::common)
842
842
843 The list is sorted by revision number, meaning it is
843 The list is sorted by revision number, meaning it is
844 topologically sorted.
844 topologically sorted.
845
845
846 'heads' and 'common' are both lists of node IDs. If heads is
846 'heads' and 'common' are both lists of node IDs. If heads is
847 not supplied, uses all of the revlog's heads. If common is not
847 not supplied, uses all of the revlog's heads. If common is not
848 supplied, uses nullid."""
848 supplied, uses nullid."""
849 if common is None:
849 if common is None:
850 common = [nullid]
850 common = [nullid]
851 if heads is None:
851 if heads is None:
852 heads = self.heads()
852 heads = self.heads()
853
853
854 common = [self.rev(n) for n in common]
854 common = [self.rev(n) for n in common]
855 heads = [self.rev(n) for n in heads]
855 heads = [self.rev(n) for n in heads]
856
856
857 # we want the ancestors, but inclusive
857 # we want the ancestors, but inclusive
858 class lazyset(object):
858 class lazyset(object):
859 def __init__(self, lazyvalues):
859 def __init__(self, lazyvalues):
860 self.addedvalues = set()
860 self.addedvalues = set()
861 self.lazyvalues = lazyvalues
861 self.lazyvalues = lazyvalues
862
862
863 def __contains__(self, value):
863 def __contains__(self, value):
864 return value in self.addedvalues or value in self.lazyvalues
864 return value in self.addedvalues or value in self.lazyvalues
865
865
866 def __iter__(self):
866 def __iter__(self):
867 added = self.addedvalues
867 added = self.addedvalues
868 for r in added:
868 for r in added:
869 yield r
869 yield r
870 for r in self.lazyvalues:
870 for r in self.lazyvalues:
871 if not r in added:
871 if not r in added:
872 yield r
872 yield r
873
873
874 def add(self, value):
874 def add(self, value):
875 self.addedvalues.add(value)
875 self.addedvalues.add(value)
876
876
877 def update(self, values):
877 def update(self, values):
878 self.addedvalues.update(values)
878 self.addedvalues.update(values)
879
879
880 has = lazyset(self.ancestors(common))
880 has = lazyset(self.ancestors(common))
881 has.add(nullrev)
881 has.add(nullrev)
882 has.update(common)
882 has.update(common)
883
883
884 # take all ancestors from heads that aren't in has
884 # take all ancestors from heads that aren't in has
885 missing = set()
885 missing = set()
886 visit = collections.deque(r for r in heads if r not in has)
886 visit = collections.deque(r for r in heads if r not in has)
887 while visit:
887 while visit:
888 r = visit.popleft()
888 r = visit.popleft()
889 if r in missing:
889 if r in missing:
890 continue
890 continue
891 else:
891 else:
892 missing.add(r)
892 missing.add(r)
893 for p in self.parentrevs(r):
893 for p in self.parentrevs(r):
894 if p not in has:
894 if p not in has:
895 visit.append(p)
895 visit.append(p)
896 missing = list(missing)
896 missing = list(missing)
897 missing.sort()
897 missing.sort()
898 return has, [self.node(miss) for miss in missing]
898 return has, [self.node(miss) for miss in missing]
899
899
900 def incrementalmissingrevs(self, common=None):
900 def incrementalmissingrevs(self, common=None):
901 """Return an object that can be used to incrementally compute the
901 """Return an object that can be used to incrementally compute the
902 revision numbers of the ancestors of arbitrary sets that are not
902 revision numbers of the ancestors of arbitrary sets that are not
903 ancestors of common. This is an ancestor.incrementalmissingancestors
903 ancestors of common. This is an ancestor.incrementalmissingancestors
904 object.
904 object.
905
905
906 'common' is a list of revision numbers. If common is not supplied, uses
906 'common' is a list of revision numbers. If common is not supplied, uses
907 nullrev.
907 nullrev.
908 """
908 """
909 if common is None:
909 if common is None:
910 common = [nullrev]
910 common = [nullrev]
911
911
912 if rustext is not None:
912 if rustext is not None:
913 return rustext.ancestor.MissingAncestors(self.index, common)
913 return rustext.ancestor.MissingAncestors(self.index, common)
914 return ancestor.incrementalmissingancestors(self.parentrevs, common)
914 return ancestor.incrementalmissingancestors(self.parentrevs, common)
915
915
916 def findmissingrevs(self, common=None, heads=None):
916 def findmissingrevs(self, common=None, heads=None):
917 """Return the revision numbers of the ancestors of heads that
917 """Return the revision numbers of the ancestors of heads that
918 are not ancestors of common.
918 are not ancestors of common.
919
919
920 More specifically, return a list of revision numbers corresponding to
920 More specifically, return a list of revision numbers corresponding to
921 nodes N such that every N satisfies the following constraints:
921 nodes N such that every N satisfies the following constraints:
922
922
923 1. N is an ancestor of some node in 'heads'
923 1. N is an ancestor of some node in 'heads'
924 2. N is not an ancestor of any node in 'common'
924 2. N is not an ancestor of any node in 'common'
925
925
926 The list is sorted by revision number, meaning it is
926 The list is sorted by revision number, meaning it is
927 topologically sorted.
927 topologically sorted.
928
928
929 'heads' and 'common' are both lists of revision numbers. If heads is
929 'heads' and 'common' are both lists of revision numbers. If heads is
930 not supplied, uses all of the revlog's heads. If common is not
930 not supplied, uses all of the revlog's heads. If common is not
931 supplied, uses nullid."""
931 supplied, uses nullid."""
932 if common is None:
932 if common is None:
933 common = [nullrev]
933 common = [nullrev]
934 if heads is None:
934 if heads is None:
935 heads = self.headrevs()
935 heads = self.headrevs()
936
936
937 inc = self.incrementalmissingrevs(common=common)
937 inc = self.incrementalmissingrevs(common=common)
938 return inc.missingancestors(heads)
938 return inc.missingancestors(heads)
939
939
940 def findmissing(self, common=None, heads=None):
940 def findmissing(self, common=None, heads=None):
941 """Return the ancestors of heads that are not ancestors of common.
941 """Return the ancestors of heads that are not ancestors of common.
942
942
943 More specifically, return a list of nodes N such that every N
943 More specifically, return a list of nodes N such that every N
944 satisfies the following constraints:
944 satisfies the following constraints:
945
945
946 1. N is an ancestor of some node in 'heads'
946 1. N is an ancestor of some node in 'heads'
947 2. N is not an ancestor of any node in 'common'
947 2. N is not an ancestor of any node in 'common'
948
948
949 The list is sorted by revision number, meaning it is
949 The list is sorted by revision number, meaning it is
950 topologically sorted.
950 topologically sorted.
951
951
952 'heads' and 'common' are both lists of node IDs. If heads is
952 'heads' and 'common' are both lists of node IDs. If heads is
953 not supplied, uses all of the revlog's heads. If common is not
953 not supplied, uses all of the revlog's heads. If common is not
954 supplied, uses nullid."""
954 supplied, uses nullid."""
955 if common is None:
955 if common is None:
956 common = [nullid]
956 common = [nullid]
957 if heads is None:
957 if heads is None:
958 heads = self.heads()
958 heads = self.heads()
959
959
960 common = [self.rev(n) for n in common]
960 common = [self.rev(n) for n in common]
961 heads = [self.rev(n) for n in heads]
961 heads = [self.rev(n) for n in heads]
962
962
963 inc = self.incrementalmissingrevs(common=common)
963 inc = self.incrementalmissingrevs(common=common)
964 return [self.node(r) for r in inc.missingancestors(heads)]
964 return [self.node(r) for r in inc.missingancestors(heads)]
965
965
966 def nodesbetween(self, roots=None, heads=None):
966 def nodesbetween(self, roots=None, heads=None):
967 """Return a topological path from 'roots' to 'heads'.
967 """Return a topological path from 'roots' to 'heads'.
968
968
969 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
969 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
970 topologically sorted list of all nodes N that satisfy both of
970 topologically sorted list of all nodes N that satisfy both of
971 these constraints:
971 these constraints:
972
972
973 1. N is a descendant of some node in 'roots'
973 1. N is a descendant of some node in 'roots'
974 2. N is an ancestor of some node in 'heads'
974 2. N is an ancestor of some node in 'heads'
975
975
976 Every node is considered to be both a descendant and an ancestor
976 Every node is considered to be both a descendant and an ancestor
977 of itself, so every reachable node in 'roots' and 'heads' will be
977 of itself, so every reachable node in 'roots' and 'heads' will be
978 included in 'nodes'.
978 included in 'nodes'.
979
979
980 'outroots' is the list of reachable nodes in 'roots', i.e., the
980 'outroots' is the list of reachable nodes in 'roots', i.e., the
981 subset of 'roots' that is returned in 'nodes'. Likewise,
981 subset of 'roots' that is returned in 'nodes'. Likewise,
982 'outheads' is the subset of 'heads' that is also in 'nodes'.
982 'outheads' is the subset of 'heads' that is also in 'nodes'.
983
983
984 'roots' and 'heads' are both lists of node IDs. If 'roots' is
984 'roots' and 'heads' are both lists of node IDs. If 'roots' is
985 unspecified, uses nullid as the only root. If 'heads' is
985 unspecified, uses nullid as the only root. If 'heads' is
986 unspecified, uses list of all of the revlog's heads."""
986 unspecified, uses list of all of the revlog's heads."""
987 nonodes = ([], [], [])
987 nonodes = ([], [], [])
988 if roots is not None:
988 if roots is not None:
989 roots = list(roots)
989 roots = list(roots)
990 if not roots:
990 if not roots:
991 return nonodes
991 return nonodes
992 lowestrev = min([self.rev(n) for n in roots])
992 lowestrev = min([self.rev(n) for n in roots])
993 else:
993 else:
994 roots = [nullid] # Everybody's a descendant of nullid
994 roots = [nullid] # Everybody's a descendant of nullid
995 lowestrev = nullrev
995 lowestrev = nullrev
996 if (lowestrev == nullrev) and (heads is None):
996 if (lowestrev == nullrev) and (heads is None):
997 # We want _all_ the nodes!
997 # We want _all_ the nodes!
998 return ([self.node(r) for r in self], [nullid], list(self.heads()))
998 return ([self.node(r) for r in self], [nullid], list(self.heads()))
999 if heads is None:
999 if heads is None:
1000 # All nodes are ancestors, so the latest ancestor is the last
1000 # All nodes are ancestors, so the latest ancestor is the last
1001 # node.
1001 # node.
1002 highestrev = len(self) - 1
1002 highestrev = len(self) - 1
1003 # Set ancestors to None to signal that every node is an ancestor.
1003 # Set ancestors to None to signal that every node is an ancestor.
1004 ancestors = None
1004 ancestors = None
1005 # Set heads to an empty dictionary for later discovery of heads
1005 # Set heads to an empty dictionary for later discovery of heads
1006 heads = {}
1006 heads = {}
1007 else:
1007 else:
1008 heads = list(heads)
1008 heads = list(heads)
1009 if not heads:
1009 if not heads:
1010 return nonodes
1010 return nonodes
1011 ancestors = set()
1011 ancestors = set()
1012 # Turn heads into a dictionary so we can remove 'fake' heads.
1012 # Turn heads into a dictionary so we can remove 'fake' heads.
1013 # Also, later we will be using it to filter out the heads we can't
1013 # Also, later we will be using it to filter out the heads we can't
1014 # find from roots.
1014 # find from roots.
1015 heads = dict.fromkeys(heads, False)
1015 heads = dict.fromkeys(heads, False)
1016 # Start at the top and keep marking parents until we're done.
1016 # Start at the top and keep marking parents until we're done.
1017 nodestotag = set(heads)
1017 nodestotag = set(heads)
1018 # Remember where the top was so we can use it as a limit later.
1018 # Remember where the top was so we can use it as a limit later.
1019 highestrev = max([self.rev(n) for n in nodestotag])
1019 highestrev = max([self.rev(n) for n in nodestotag])
1020 while nodestotag:
1020 while nodestotag:
1021 # grab a node to tag
1021 # grab a node to tag
1022 n = nodestotag.pop()
1022 n = nodestotag.pop()
1023 # Never tag nullid
1023 # Never tag nullid
1024 if n == nullid:
1024 if n == nullid:
1025 continue
1025 continue
1026 # A node's revision number represents its place in a
1026 # A node's revision number represents its place in a
1027 # topologically sorted list of nodes.
1027 # topologically sorted list of nodes.
1028 r = self.rev(n)
1028 r = self.rev(n)
1029 if r >= lowestrev:
1029 if r >= lowestrev:
1030 if n not in ancestors:
1030 if n not in ancestors:
1031 # If we are possibly a descendant of one of the roots
1031 # If we are possibly a descendant of one of the roots
1032 # and we haven't already been marked as an ancestor
1032 # and we haven't already been marked as an ancestor
1033 ancestors.add(n) # Mark as ancestor
1033 ancestors.add(n) # Mark as ancestor
1034 # Add non-nullid parents to list of nodes to tag.
1034 # Add non-nullid parents to list of nodes to tag.
1035 nodestotag.update([p for p in self.parents(n) if
1035 nodestotag.update([p for p in self.parents(n) if
1036 p != nullid])
1036 p != nullid])
1037 elif n in heads: # We've seen it before, is it a fake head?
1037 elif n in heads: # We've seen it before, is it a fake head?
1038 # So it is, real heads should not be the ancestors of
1038 # So it is, real heads should not be the ancestors of
1039 # any other heads.
1039 # any other heads.
1040 heads.pop(n)
1040 heads.pop(n)
1041 if not ancestors:
1041 if not ancestors:
1042 return nonodes
1042 return nonodes
1043 # Now that we have our set of ancestors, we want to remove any
1043 # Now that we have our set of ancestors, we want to remove any
1044 # roots that are not ancestors.
1044 # roots that are not ancestors.
1045
1045
1046 # If one of the roots was nullid, everything is included anyway.
1046 # If one of the roots was nullid, everything is included anyway.
1047 if lowestrev > nullrev:
1047 if lowestrev > nullrev:
1048 # But, since we weren't, let's recompute the lowest rev to not
1048 # But, since we weren't, let's recompute the lowest rev to not
1049 # include roots that aren't ancestors.
1049 # include roots that aren't ancestors.
1050
1050
1051 # Filter out roots that aren't ancestors of heads
1051 # Filter out roots that aren't ancestors of heads
1052 roots = [root for root in roots if root in ancestors]
1052 roots = [root for root in roots if root in ancestors]
1053 # Recompute the lowest revision
1053 # Recompute the lowest revision
1054 if roots:
1054 if roots:
1055 lowestrev = min([self.rev(root) for root in roots])
1055 lowestrev = min([self.rev(root) for root in roots])
1056 else:
1056 else:
1057 # No more roots? Return empty list
1057 # No more roots? Return empty list
1058 return nonodes
1058 return nonodes
1059 else:
1059 else:
1060 # We are descending from nullid, and don't need to care about
1060 # We are descending from nullid, and don't need to care about
1061 # any other roots.
1061 # any other roots.
1062 lowestrev = nullrev
1062 lowestrev = nullrev
1063 roots = [nullid]
1063 roots = [nullid]
1064 # Transform our roots list into a set.
1064 # Transform our roots list into a set.
1065 descendants = set(roots)
1065 descendants = set(roots)
1066 # Also, keep the original roots so we can filter out roots that aren't
1066 # Also, keep the original roots so we can filter out roots that aren't
1067 # 'real' roots (i.e. are descended from other roots).
1067 # 'real' roots (i.e. are descended from other roots).
1068 roots = descendants.copy()
1068 roots = descendants.copy()
1069 # Our topologically sorted list of output nodes.
1069 # Our topologically sorted list of output nodes.
1070 orderedout = []
1070 orderedout = []
1071 # Don't start at nullid since we don't want nullid in our output list,
1071 # Don't start at nullid since we don't want nullid in our output list,
1072 # and if nullid shows up in descendants, empty parents will look like
1072 # and if nullid shows up in descendants, empty parents will look like
1073 # they're descendants.
1073 # they're descendants.
1074 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1074 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1075 n = self.node(r)
1075 n = self.node(r)
1076 isdescendant = False
1076 isdescendant = False
1077 if lowestrev == nullrev: # Everybody is a descendant of nullid
1077 if lowestrev == nullrev: # Everybody is a descendant of nullid
1078 isdescendant = True
1078 isdescendant = True
1079 elif n in descendants:
1079 elif n in descendants:
1080 # n is already a descendant
1080 # n is already a descendant
1081 isdescendant = True
1081 isdescendant = True
1082 # This check only needs to be done here because all the roots
1082 # This check only needs to be done here because all the roots
1083 # will start being marked is descendants before the loop.
1083 # will start being marked is descendants before the loop.
1084 if n in roots:
1084 if n in roots:
1085 # If n was a root, check if it's a 'real' root.
1085 # If n was a root, check if it's a 'real' root.
1086 p = tuple(self.parents(n))
1086 p = tuple(self.parents(n))
1087 # If any of its parents are descendants, it's not a root.
1087 # If any of its parents are descendants, it's not a root.
1088 if (p[0] in descendants) or (p[1] in descendants):
1088 if (p[0] in descendants) or (p[1] in descendants):
1089 roots.remove(n)
1089 roots.remove(n)
1090 else:
1090 else:
1091 p = tuple(self.parents(n))
1091 p = tuple(self.parents(n))
1092 # A node is a descendant if either of its parents are
1092 # A node is a descendant if either of its parents are
1093 # descendants. (We seeded the dependents list with the roots
1093 # descendants. (We seeded the dependents list with the roots
1094 # up there, remember?)
1094 # up there, remember?)
1095 if (p[0] in descendants) or (p[1] in descendants):
1095 if (p[0] in descendants) or (p[1] in descendants):
1096 descendants.add(n)
1096 descendants.add(n)
1097 isdescendant = True
1097 isdescendant = True
1098 if isdescendant and ((ancestors is None) or (n in ancestors)):
1098 if isdescendant and ((ancestors is None) or (n in ancestors)):
1099 # Only include nodes that are both descendants and ancestors.
1099 # Only include nodes that are both descendants and ancestors.
1100 orderedout.append(n)
1100 orderedout.append(n)
1101 if (ancestors is not None) and (n in heads):
1101 if (ancestors is not None) and (n in heads):
1102 # We're trying to figure out which heads are reachable
1102 # We're trying to figure out which heads are reachable
1103 # from roots.
1103 # from roots.
1104 # Mark this head as having been reached
1104 # Mark this head as having been reached
1105 heads[n] = True
1105 heads[n] = True
1106 elif ancestors is None:
1106 elif ancestors is None:
1107 # Otherwise, we're trying to discover the heads.
1107 # Otherwise, we're trying to discover the heads.
1108 # Assume this is a head because if it isn't, the next step
1108 # Assume this is a head because if it isn't, the next step
1109 # will eventually remove it.
1109 # will eventually remove it.
1110 heads[n] = True
1110 heads[n] = True
1111 # But, obviously its parents aren't.
1111 # But, obviously its parents aren't.
1112 for p in self.parents(n):
1112 for p in self.parents(n):
1113 heads.pop(p, None)
1113 heads.pop(p, None)
1114 heads = [head for head, flag in heads.iteritems() if flag]
1114 heads = [head for head, flag in heads.iteritems() if flag]
1115 roots = list(roots)
1115 roots = list(roots)
1116 assert orderedout
1116 assert orderedout
1117 assert roots
1117 assert roots
1118 assert heads
1118 assert heads
1119 return (orderedout, roots, heads)
1119 return (orderedout, roots, heads)
1120
1120
1121 def headrevs(self, revs=None):
1121 def headrevs(self, revs=None):
1122 if revs is None:
1122 if revs is None:
1123 try:
1123 try:
1124 return self.index.headrevs()
1124 return self.index.headrevs()
1125 except AttributeError:
1125 except AttributeError:
1126 return self._headrevs()
1126 return self._headrevs()
1127 if rustext is not None:
1127 if rustext is not None:
1128 return rustext.dagop.headrevs(self.index, revs)
1128 return rustext.dagop.headrevs(self.index, revs)
1129 return dagop.headrevs(revs, self._uncheckedparentrevs)
1129 return dagop.headrevs(revs, self._uncheckedparentrevs)
1130
1130
1131 def computephases(self, roots):
1131 def computephases(self, roots):
1132 return self.index.computephasesmapsets(roots)
1132 return self.index.computephasesmapsets(roots)
1133
1133
1134 def _headrevs(self):
1134 def _headrevs(self):
1135 count = len(self)
1135 count = len(self)
1136 if not count:
1136 if not count:
1137 return [nullrev]
1137 return [nullrev]
1138 # we won't iter over filtered rev so nobody is a head at start
1138 # we won't iter over filtered rev so nobody is a head at start
1139 ishead = [0] * (count + 1)
1139 ishead = [0] * (count + 1)
1140 index = self.index
1140 index = self.index
1141 for r in self:
1141 for r in self:
1142 ishead[r] = 1 # I may be an head
1142 ishead[r] = 1 # I may be an head
1143 e = index[r]
1143 e = index[r]
1144 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1144 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1145 return [r for r, val in enumerate(ishead) if val]
1145 return [r for r, val in enumerate(ishead) if val]
1146
1146
1147 def heads(self, start=None, stop=None):
1147 def heads(self, start=None, stop=None):
1148 """return the list of all nodes that have no children
1148 """return the list of all nodes that have no children
1149
1149
1150 if start is specified, only heads that are descendants of
1150 if start is specified, only heads that are descendants of
1151 start will be returned
1151 start will be returned
1152 if stop is specified, it will consider all the revs from stop
1152 if stop is specified, it will consider all the revs from stop
1153 as if they had no children
1153 as if they had no children
1154 """
1154 """
1155 if start is None and stop is None:
1155 if start is None and stop is None:
1156 if not len(self):
1156 if not len(self):
1157 return [nullid]
1157 return [nullid]
1158 return [self.node(r) for r in self.headrevs()]
1158 return [self.node(r) for r in self.headrevs()]
1159
1159
1160 if start is None:
1160 if start is None:
1161 start = nullrev
1161 start = nullrev
1162 else:
1162 else:
1163 start = self.rev(start)
1163 start = self.rev(start)
1164
1164
1165 stoprevs = set(self.rev(n) for n in stop or [])
1165 stoprevs = set(self.rev(n) for n in stop or [])
1166
1166
1167 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1167 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1168 stoprevs=stoprevs)
1168 stoprevs=stoprevs)
1169
1169
1170 return [self.node(rev) for rev in revs]
1170 return [self.node(rev) for rev in revs]
1171
1171
1172 def children(self, node):
1172 def children(self, node):
1173 """find the children of a given node"""
1173 """find the children of a given node"""
1174 c = []
1174 c = []
1175 p = self.rev(node)
1175 p = self.rev(node)
1176 for r in self.revs(start=p + 1):
1176 for r in self.revs(start=p + 1):
1177 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1177 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1178 if prevs:
1178 if prevs:
1179 for pr in prevs:
1179 for pr in prevs:
1180 if pr == p:
1180 if pr == p:
1181 c.append(self.node(r))
1181 c.append(self.node(r))
1182 elif p == nullrev:
1182 elif p == nullrev:
1183 c.append(self.node(r))
1183 c.append(self.node(r))
1184 return c
1184 return c
1185
1185
1186 def commonancestorsheads(self, a, b):
1186 def commonancestorsheads(self, a, b):
1187 """calculate all the heads of the common ancestors of nodes a and b"""
1187 """calculate all the heads of the common ancestors of nodes a and b"""
1188 a, b = self.rev(a), self.rev(b)
1188 a, b = self.rev(a), self.rev(b)
1189 ancs = self._commonancestorsheads(a, b)
1189 ancs = self._commonancestorsheads(a, b)
1190 return pycompat.maplist(self.node, ancs)
1190 return pycompat.maplist(self.node, ancs)
1191
1191
1192 def _commonancestorsheads(self, *revs):
1192 def _commonancestorsheads(self, *revs):
1193 """calculate all the heads of the common ancestors of revs"""
1193 """calculate all the heads of the common ancestors of revs"""
1194 try:
1194 try:
1195 ancs = self.index.commonancestorsheads(*revs)
1195 ancs = self.index.commonancestorsheads(*revs)
1196 except (AttributeError, OverflowError): # C implementation failed
1196 except (AttributeError, OverflowError): # C implementation failed
1197 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1197 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1198 return ancs
1198 return ancs
1199
1199
1200 def isancestor(self, a, b):
1200 def isancestor(self, a, b):
1201 """return True if node a is an ancestor of node b
1201 """return True if node a is an ancestor of node b
1202
1202
1203 A revision is considered an ancestor of itself."""
1203 A revision is considered an ancestor of itself."""
1204 a, b = self.rev(a), self.rev(b)
1204 a, b = self.rev(a), self.rev(b)
1205 return self.isancestorrev(a, b)
1205 return self.isancestorrev(a, b)
1206
1206
1207 def isancestorrev(self, a, b):
1207 def isancestorrev(self, a, b):
1208 """return True if revision a is an ancestor of revision b
1208 """return True if revision a is an ancestor of revision b
1209
1209
1210 A revision is considered an ancestor of itself.
1210 A revision is considered an ancestor of itself.
1211
1211
1212 The implementation of this is trivial but the use of
1212 The implementation of this is trivial but the use of
1213 commonancestorsheads is not."""
1213 commonancestorsheads is not."""
1214 if a == nullrev:
1214 if a == nullrev:
1215 return True
1215 return True
1216 elif a == b:
1216 elif a == b:
1217 return True
1217 return True
1218 elif a > b:
1218 elif a > b:
1219 return False
1219 return False
1220 return a in self._commonancestorsheads(a, b)
1220 return a in self._commonancestorsheads(a, b)
1221
1221
1222 def ancestor(self, a, b):
1222 def ancestor(self, a, b):
1223 """calculate the "best" common ancestor of nodes a and b"""
1223 """calculate the "best" common ancestor of nodes a and b"""
1224
1224
1225 a, b = self.rev(a), self.rev(b)
1225 a, b = self.rev(a), self.rev(b)
1226 try:
1226 try:
1227 ancs = self.index.ancestors(a, b)
1227 ancs = self.index.ancestors(a, b)
1228 except (AttributeError, OverflowError):
1228 except (AttributeError, OverflowError):
1229 ancs = ancestor.ancestors(self.parentrevs, a, b)
1229 ancs = ancestor.ancestors(self.parentrevs, a, b)
1230 if ancs:
1230 if ancs:
1231 # choose a consistent winner when there's a tie
1231 # choose a consistent winner when there's a tie
1232 return min(map(self.node, ancs))
1232 return min(map(self.node, ancs))
1233 return nullid
1233 return nullid
1234
1234
1235 def _match(self, id):
1235 def _match(self, id):
1236 if isinstance(id, int):
1236 if isinstance(id, int):
1237 # rev
1237 # rev
1238 return self.node(id)
1238 return self.node(id)
1239 if len(id) == 20:
1239 if len(id) == 20:
1240 # possibly a binary node
1240 # possibly a binary node
1241 # odds of a binary node being all hex in ASCII are 1 in 10**25
1241 # odds of a binary node being all hex in ASCII are 1 in 10**25
1242 try:
1242 try:
1243 node = id
1243 node = id
1244 self.rev(node) # quick search the index
1244 self.rev(node) # quick search the index
1245 return node
1245 return node
1246 except error.LookupError:
1246 except error.LookupError:
1247 pass # may be partial hex id
1247 pass # may be partial hex id
1248 try:
1248 try:
1249 # str(rev)
1249 # str(rev)
1250 rev = int(id)
1250 rev = int(id)
1251 if "%d" % rev != id:
1251 if "%d" % rev != id:
1252 raise ValueError
1252 raise ValueError
1253 if rev < 0:
1253 if rev < 0:
1254 rev = len(self) + rev
1254 rev = len(self) + rev
1255 if rev < 0 or rev >= len(self):
1255 if rev < 0 or rev >= len(self):
1256 raise ValueError
1256 raise ValueError
1257 return self.node(rev)
1257 return self.node(rev)
1258 except (ValueError, OverflowError):
1258 except (ValueError, OverflowError):
1259 pass
1259 pass
1260 if len(id) == 40:
1260 if len(id) == 40:
1261 try:
1261 try:
1262 # a full hex nodeid?
1262 # a full hex nodeid?
1263 node = bin(id)
1263 node = bin(id)
1264 self.rev(node)
1264 self.rev(node)
1265 return node
1265 return node
1266 except (TypeError, error.LookupError):
1266 except (TypeError, error.LookupError):
1267 pass
1267 pass
1268
1268
1269 def _partialmatch(self, id):
1269 def _partialmatch(self, id):
1270 # we don't care wdirfilenodeids as they should be always full hash
1270 # we don't care wdirfilenodeids as they should be always full hash
1271 maybewdir = wdirhex.startswith(id)
1271 maybewdir = wdirhex.startswith(id)
1272 try:
1272 try:
1273 partial = self.index.partialmatch(id)
1273 partial = self.index.partialmatch(id)
1274 if partial and self.hasnode(partial):
1274 if partial and self.hasnode(partial):
1275 if maybewdir:
1275 if maybewdir:
1276 # single 'ff...' match in radix tree, ambiguous with wdir
1276 # single 'ff...' match in radix tree, ambiguous with wdir
1277 raise error.RevlogError
1277 raise error.RevlogError
1278 return partial
1278 return partial
1279 if maybewdir:
1279 if maybewdir:
1280 # no 'ff...' match in radix tree, wdir identified
1280 # no 'ff...' match in radix tree, wdir identified
1281 raise error.WdirUnsupported
1281 raise error.WdirUnsupported
1282 return None
1282 return None
1283 except error.RevlogError:
1283 except error.RevlogError:
1284 # parsers.c radix tree lookup gave multiple matches
1284 # parsers.c radix tree lookup gave multiple matches
1285 # fast path: for unfiltered changelog, radix tree is accurate
1285 # fast path: for unfiltered changelog, radix tree is accurate
1286 if not getattr(self, 'filteredrevs', None):
1286 if not getattr(self, 'filteredrevs', None):
1287 raise error.AmbiguousPrefixLookupError(
1287 raise error.AmbiguousPrefixLookupError(
1288 id, self.indexfile, _('ambiguous identifier'))
1288 id, self.indexfile, _('ambiguous identifier'))
1289 # fall through to slow path that filters hidden revisions
1289 # fall through to slow path that filters hidden revisions
1290 except (AttributeError, ValueError):
1290 except (AttributeError, ValueError):
1291 # we are pure python, or key was too short to search radix tree
1291 # we are pure python, or key was too short to search radix tree
1292 pass
1292 pass
1293
1293
1294 if id in self._pcache:
1294 if id in self._pcache:
1295 return self._pcache[id]
1295 return self._pcache[id]
1296
1296
1297 if len(id) <= 40:
1297 if len(id) <= 40:
1298 try:
1298 try:
1299 # hex(node)[:...]
1299 # hex(node)[:...]
1300 l = len(id) // 2 # grab an even number of digits
1300 l = len(id) // 2 # grab an even number of digits
1301 prefix = bin(id[:l * 2])
1301 prefix = bin(id[:l * 2])
1302 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1302 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1303 nl = [n for n in nl if hex(n).startswith(id) and
1303 nl = [n for n in nl if hex(n).startswith(id) and
1304 self.hasnode(n)]
1304 self.hasnode(n)]
1305 if nullhex.startswith(id):
1305 if nullhex.startswith(id):
1306 nl.append(nullid)
1306 nl.append(nullid)
1307 if len(nl) > 0:
1307 if len(nl) > 0:
1308 if len(nl) == 1 and not maybewdir:
1308 if len(nl) == 1 and not maybewdir:
1309 self._pcache[id] = nl[0]
1309 self._pcache[id] = nl[0]
1310 return nl[0]
1310 return nl[0]
1311 raise error.AmbiguousPrefixLookupError(
1311 raise error.AmbiguousPrefixLookupError(
1312 id, self.indexfile, _('ambiguous identifier'))
1312 id, self.indexfile, _('ambiguous identifier'))
1313 if maybewdir:
1313 if maybewdir:
1314 raise error.WdirUnsupported
1314 raise error.WdirUnsupported
1315 return None
1315 return None
1316 except TypeError:
1316 except TypeError:
1317 pass
1317 pass
1318
1318
1319 def lookup(self, id):
1319 def lookup(self, id):
1320 """locate a node based on:
1320 """locate a node based on:
1321 - revision number or str(revision number)
1321 - revision number or str(revision number)
1322 - nodeid or subset of hex nodeid
1322 - nodeid or subset of hex nodeid
1323 """
1323 """
1324 n = self._match(id)
1324 n = self._match(id)
1325 if n is not None:
1325 if n is not None:
1326 return n
1326 return n
1327 n = self._partialmatch(id)
1327 n = self._partialmatch(id)
1328 if n:
1328 if n:
1329 return n
1329 return n
1330
1330
1331 raise error.LookupError(id, self.indexfile, _('no match found'))
1331 raise error.LookupError(id, self.indexfile, _('no match found'))
1332
1332
1333 def shortest(self, node, minlength=1):
1333 def shortest(self, node, minlength=1):
1334 """Find the shortest unambiguous prefix that matches node."""
1334 """Find the shortest unambiguous prefix that matches node."""
1335 def isvalid(prefix):
1335 def isvalid(prefix):
1336 try:
1336 try:
1337 node = self._partialmatch(prefix)
1337 node = self._partialmatch(prefix)
1338 except error.AmbiguousPrefixLookupError:
1338 except error.AmbiguousPrefixLookupError:
1339 return False
1339 return False
1340 except error.WdirUnsupported:
1340 except error.WdirUnsupported:
1341 # single 'ff...' match
1341 # single 'ff...' match
1342 return True
1342 return True
1343 if node is None:
1343 if node is None:
1344 raise error.LookupError(node, self.indexfile, _('no node'))
1344 raise error.LookupError(node, self.indexfile, _('no node'))
1345 return True
1345 return True
1346
1346
1347 def maybewdir(prefix):
1347 def maybewdir(prefix):
1348 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1348 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1349
1349
1350 hexnode = hex(node)
1350 hexnode = hex(node)
1351
1351
1352 def disambiguate(hexnode, minlength):
1352 def disambiguate(hexnode, minlength):
1353 """Disambiguate against wdirid."""
1353 """Disambiguate against wdirid."""
1354 for length in range(minlength, 41):
1354 for length in range(minlength, 41):
1355 prefix = hexnode[:length]
1355 prefix = hexnode[:length]
1356 if not maybewdir(prefix):
1356 if not maybewdir(prefix):
1357 return prefix
1357 return prefix
1358
1358
1359 if not getattr(self, 'filteredrevs', None):
1359 if not getattr(self, 'filteredrevs', None):
1360 try:
1360 try:
1361 length = max(self.index.shortest(node), minlength)
1361 length = max(self.index.shortest(node), minlength)
1362 return disambiguate(hexnode, length)
1362 return disambiguate(hexnode, length)
1363 except error.RevlogError:
1363 except error.RevlogError:
1364 if node != wdirid:
1364 if node != wdirid:
1365 raise error.LookupError(node, self.indexfile, _('no node'))
1365 raise error.LookupError(node, self.indexfile, _('no node'))
1366 except AttributeError:
1366 except AttributeError:
1367 # Fall through to pure code
1367 # Fall through to pure code
1368 pass
1368 pass
1369
1369
1370 if node == wdirid:
1370 if node == wdirid:
1371 for length in range(minlength, 41):
1371 for length in range(minlength, 41):
1372 prefix = hexnode[:length]
1372 prefix = hexnode[:length]
1373 if isvalid(prefix):
1373 if isvalid(prefix):
1374 return prefix
1374 return prefix
1375
1375
1376 for length in range(minlength, 41):
1376 for length in range(minlength, 41):
1377 prefix = hexnode[:length]
1377 prefix = hexnode[:length]
1378 if isvalid(prefix):
1378 if isvalid(prefix):
1379 return disambiguate(hexnode, length)
1379 return disambiguate(hexnode, length)
1380
1380
1381 def cmp(self, node, text):
1381 def cmp(self, node, text):
1382 """compare text with a given file revision
1382 """compare text with a given file revision
1383
1383
1384 returns True if text is different than what is stored.
1384 returns True if text is different than what is stored.
1385 """
1385 """
1386 p1, p2 = self.parents(node)
1386 p1, p2 = self.parents(node)
1387 return storageutil.hashrevisionsha1(text, p1, p2) != node
1387 return storageutil.hashrevisionsha1(text, p1, p2) != node
1388
1388
1389 def _cachesegment(self, offset, data):
1389 def _cachesegment(self, offset, data):
1390 """Add a segment to the revlog cache.
1390 """Add a segment to the revlog cache.
1391
1391
1392 Accepts an absolute offset and the data that is at that location.
1392 Accepts an absolute offset and the data that is at that location.
1393 """
1393 """
1394 o, d = self._chunkcache
1394 o, d = self._chunkcache
1395 # try to add to existing cache
1395 # try to add to existing cache
1396 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1396 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1397 self._chunkcache = o, d + data
1397 self._chunkcache = o, d + data
1398 else:
1398 else:
1399 self._chunkcache = offset, data
1399 self._chunkcache = offset, data
1400
1400
1401 def _readsegment(self, offset, length, df=None):
1401 def _readsegment(self, offset, length, df=None):
1402 """Load a segment of raw data from the revlog.
1402 """Load a segment of raw data from the revlog.
1403
1403
1404 Accepts an absolute offset, length to read, and an optional existing
1404 Accepts an absolute offset, length to read, and an optional existing
1405 file handle to read from.
1405 file handle to read from.
1406
1406
1407 If an existing file handle is passed, it will be seeked and the
1407 If an existing file handle is passed, it will be seeked and the
1408 original seek position will NOT be restored.
1408 original seek position will NOT be restored.
1409
1409
1410 Returns a str or buffer of raw byte data.
1410 Returns a str or buffer of raw byte data.
1411
1411
1412 Raises if the requested number of bytes could not be read.
1412 Raises if the requested number of bytes could not be read.
1413 """
1413 """
1414 # Cache data both forward and backward around the requested
1414 # Cache data both forward and backward around the requested
1415 # data, in a fixed size window. This helps speed up operations
1415 # data, in a fixed size window. This helps speed up operations
1416 # involving reading the revlog backwards.
1416 # involving reading the revlog backwards.
1417 cachesize = self._chunkcachesize
1417 cachesize = self._chunkcachesize
1418 realoffset = offset & ~(cachesize - 1)
1418 realoffset = offset & ~(cachesize - 1)
1419 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1419 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1420 - realoffset)
1420 - realoffset)
1421 with self._datareadfp(df) as df:
1421 with self._datareadfp(df) as df:
1422 df.seek(realoffset)
1422 df.seek(realoffset)
1423 d = df.read(reallength)
1423 d = df.read(reallength)
1424
1424
1425 self._cachesegment(realoffset, d)
1425 self._cachesegment(realoffset, d)
1426 if offset != realoffset or reallength != length:
1426 if offset != realoffset or reallength != length:
1427 startoffset = offset - realoffset
1427 startoffset = offset - realoffset
1428 if len(d) - startoffset < length:
1428 if len(d) - startoffset < length:
1429 raise error.RevlogError(
1429 raise error.RevlogError(
1430 _('partial read of revlog %s; expected %d bytes from '
1430 _('partial read of revlog %s; expected %d bytes from '
1431 'offset %d, got %d') %
1431 'offset %d, got %d') %
1432 (self.indexfile if self._inline else self.datafile,
1432 (self.indexfile if self._inline else self.datafile,
1433 length, realoffset, len(d) - startoffset))
1433 length, realoffset, len(d) - startoffset))
1434
1434
1435 return util.buffer(d, startoffset, length)
1435 return util.buffer(d, startoffset, length)
1436
1436
1437 if len(d) < length:
1437 if len(d) < length:
1438 raise error.RevlogError(
1438 raise error.RevlogError(
1439 _('partial read of revlog %s; expected %d bytes from offset '
1439 _('partial read of revlog %s; expected %d bytes from offset '
1440 '%d, got %d') %
1440 '%d, got %d') %
1441 (self.indexfile if self._inline else self.datafile,
1441 (self.indexfile if self._inline else self.datafile,
1442 length, offset, len(d)))
1442 length, offset, len(d)))
1443
1443
1444 return d
1444 return d
1445
1445
1446 def _getsegment(self, offset, length, df=None):
1446 def _getsegment(self, offset, length, df=None):
1447 """Obtain a segment of raw data from the revlog.
1447 """Obtain a segment of raw data from the revlog.
1448
1448
1449 Accepts an absolute offset, length of bytes to obtain, and an
1449 Accepts an absolute offset, length of bytes to obtain, and an
1450 optional file handle to the already-opened revlog. If the file
1450 optional file handle to the already-opened revlog. If the file
1451 handle is used, it's original seek position will not be preserved.
1451 handle is used, it's original seek position will not be preserved.
1452
1452
1453 Requests for data may be returned from a cache.
1453 Requests for data may be returned from a cache.
1454
1454
1455 Returns a str or a buffer instance of raw byte data.
1455 Returns a str or a buffer instance of raw byte data.
1456 """
1456 """
1457 o, d = self._chunkcache
1457 o, d = self._chunkcache
1458 l = len(d)
1458 l = len(d)
1459
1459
1460 # is it in the cache?
1460 # is it in the cache?
1461 cachestart = offset - o
1461 cachestart = offset - o
1462 cacheend = cachestart + length
1462 cacheend = cachestart + length
1463 if cachestart >= 0 and cacheend <= l:
1463 if cachestart >= 0 and cacheend <= l:
1464 if cachestart == 0 and cacheend == l:
1464 if cachestart == 0 and cacheend == l:
1465 return d # avoid a copy
1465 return d # avoid a copy
1466 return util.buffer(d, cachestart, cacheend - cachestart)
1466 return util.buffer(d, cachestart, cacheend - cachestart)
1467
1467
1468 return self._readsegment(offset, length, df=df)
1468 return self._readsegment(offset, length, df=df)
1469
1469
1470 def _getsegmentforrevs(self, startrev, endrev, df=None):
1470 def _getsegmentforrevs(self, startrev, endrev, df=None):
1471 """Obtain a segment of raw data corresponding to a range of revisions.
1471 """Obtain a segment of raw data corresponding to a range of revisions.
1472
1472
1473 Accepts the start and end revisions and an optional already-open
1473 Accepts the start and end revisions and an optional already-open
1474 file handle to be used for reading. If the file handle is read, its
1474 file handle to be used for reading. If the file handle is read, its
1475 seek position will not be preserved.
1475 seek position will not be preserved.
1476
1476
1477 Requests for data may be satisfied by a cache.
1477 Requests for data may be satisfied by a cache.
1478
1478
1479 Returns a 2-tuple of (offset, data) for the requested range of
1479 Returns a 2-tuple of (offset, data) for the requested range of
1480 revisions. Offset is the integer offset from the beginning of the
1480 revisions. Offset is the integer offset from the beginning of the
1481 revlog and data is a str or buffer of the raw byte data.
1481 revlog and data is a str or buffer of the raw byte data.
1482
1482
1483 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1483 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1484 to determine where each revision's data begins and ends.
1484 to determine where each revision's data begins and ends.
1485 """
1485 """
1486 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1486 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1487 # (functions are expensive).
1487 # (functions are expensive).
1488 index = self.index
1488 index = self.index
1489 istart = index[startrev]
1489 istart = index[startrev]
1490 start = int(istart[0] >> 16)
1490 start = int(istart[0] >> 16)
1491 if startrev == endrev:
1491 if startrev == endrev:
1492 end = start + istart[1]
1492 end = start + istart[1]
1493 else:
1493 else:
1494 iend = index[endrev]
1494 iend = index[endrev]
1495 end = int(iend[0] >> 16) + iend[1]
1495 end = int(iend[0] >> 16) + iend[1]
1496
1496
1497 if self._inline:
1497 if self._inline:
1498 start += (startrev + 1) * self._io.size
1498 start += (startrev + 1) * self._io.size
1499 end += (endrev + 1) * self._io.size
1499 end += (endrev + 1) * self._io.size
1500 length = end - start
1500 length = end - start
1501
1501
1502 return start, self._getsegment(start, length, df=df)
1502 return start, self._getsegment(start, length, df=df)
1503
1503
1504 def _chunk(self, rev, df=None):
1504 def _chunk(self, rev, df=None):
1505 """Obtain a single decompressed chunk for a revision.
1505 """Obtain a single decompressed chunk for a revision.
1506
1506
1507 Accepts an integer revision and an optional already-open file handle
1507 Accepts an integer revision and an optional already-open file handle
1508 to be used for reading. If used, the seek position of the file will not
1508 to be used for reading. If used, the seek position of the file will not
1509 be preserved.
1509 be preserved.
1510
1510
1511 Returns a str holding uncompressed data for the requested revision.
1511 Returns a str holding uncompressed data for the requested revision.
1512 """
1512 """
1513 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1513 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1514
1514
1515 def _chunks(self, revs, df=None, targetsize=None):
1515 def _chunks(self, revs, df=None, targetsize=None):
1516 """Obtain decompressed chunks for the specified revisions.
1516 """Obtain decompressed chunks for the specified revisions.
1517
1517
1518 Accepts an iterable of numeric revisions that are assumed to be in
1518 Accepts an iterable of numeric revisions that are assumed to be in
1519 ascending order. Also accepts an optional already-open file handle
1519 ascending order. Also accepts an optional already-open file handle
1520 to be used for reading. If used, the seek position of the file will
1520 to be used for reading. If used, the seek position of the file will
1521 not be preserved.
1521 not be preserved.
1522
1522
1523 This function is similar to calling ``self._chunk()`` multiple times,
1523 This function is similar to calling ``self._chunk()`` multiple times,
1524 but is faster.
1524 but is faster.
1525
1525
1526 Returns a list with decompressed data for each requested revision.
1526 Returns a list with decompressed data for each requested revision.
1527 """
1527 """
1528 if not revs:
1528 if not revs:
1529 return []
1529 return []
1530 start = self.start
1530 start = self.start
1531 length = self.length
1531 length = self.length
1532 inline = self._inline
1532 inline = self._inline
1533 iosize = self._io.size
1533 iosize = self._io.size
1534 buffer = util.buffer
1534 buffer = util.buffer
1535
1535
1536 l = []
1536 l = []
1537 ladd = l.append
1537 ladd = l.append
1538
1538
1539 if not self._withsparseread:
1539 if not self._withsparseread:
1540 slicedchunks = (revs,)
1540 slicedchunks = (revs,)
1541 else:
1541 else:
1542 slicedchunks = deltautil.slicechunk(self, revs,
1542 slicedchunks = deltautil.slicechunk(self, revs,
1543 targetsize=targetsize)
1543 targetsize=targetsize)
1544
1544
1545 for revschunk in slicedchunks:
1545 for revschunk in slicedchunks:
1546 firstrev = revschunk[0]
1546 firstrev = revschunk[0]
1547 # Skip trailing revisions with empty diff
1547 # Skip trailing revisions with empty diff
1548 for lastrev in revschunk[::-1]:
1548 for lastrev in revschunk[::-1]:
1549 if length(lastrev) != 0:
1549 if length(lastrev) != 0:
1550 break
1550 break
1551
1551
1552 try:
1552 try:
1553 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1553 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1554 except OverflowError:
1554 except OverflowError:
1555 # issue4215 - we can't cache a run of chunks greater than
1555 # issue4215 - we can't cache a run of chunks greater than
1556 # 2G on Windows
1556 # 2G on Windows
1557 return [self._chunk(rev, df=df) for rev in revschunk]
1557 return [self._chunk(rev, df=df) for rev in revschunk]
1558
1558
1559 decomp = self.decompress
1559 decomp = self.decompress
1560 for rev in revschunk:
1560 for rev in revschunk:
1561 chunkstart = start(rev)
1561 chunkstart = start(rev)
1562 if inline:
1562 if inline:
1563 chunkstart += (rev + 1) * iosize
1563 chunkstart += (rev + 1) * iosize
1564 chunklength = length(rev)
1564 chunklength = length(rev)
1565 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1565 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1566
1566
1567 return l
1567 return l
1568
1568
1569 def _chunkclear(self):
1569 def _chunkclear(self):
1570 """Clear the raw chunk cache."""
1570 """Clear the raw chunk cache."""
1571 self._chunkcache = (0, '')
1571 self._chunkcache = (0, '')
1572
1572
1573 def deltaparent(self, rev):
1573 def deltaparent(self, rev):
1574 """return deltaparent of the given revision"""
1574 """return deltaparent of the given revision"""
1575 base = self.index[rev][3]
1575 base = self.index[rev][3]
1576 if base == rev:
1576 if base == rev:
1577 return nullrev
1577 return nullrev
1578 elif self._generaldelta:
1578 elif self._generaldelta:
1579 return base
1579 return base
1580 else:
1580 else:
1581 return rev - 1
1581 return rev - 1
1582
1582
1583 def issnapshot(self, rev):
1583 def issnapshot(self, rev):
1584 """tells whether rev is a snapshot
1584 """tells whether rev is a snapshot
1585 """
1585 """
1586 if not self._sparserevlog:
1586 if not self._sparserevlog:
1587 return self.deltaparent(rev) == nullrev
1587 return self.deltaparent(rev) == nullrev
1588 elif util.safehasattr(self.index, 'issnapshot'):
1588 elif util.safehasattr(self.index, 'issnapshot'):
1589 # directly assign the method to cache the testing and access
1589 # directly assign the method to cache the testing and access
1590 self.issnapshot = self.index.issnapshot
1590 self.issnapshot = self.index.issnapshot
1591 return self.issnapshot(rev)
1591 return self.issnapshot(rev)
1592 if rev == nullrev:
1592 if rev == nullrev:
1593 return True
1593 return True
1594 entry = self.index[rev]
1594 entry = self.index[rev]
1595 base = entry[3]
1595 base = entry[3]
1596 if base == rev:
1596 if base == rev:
1597 return True
1597 return True
1598 if base == nullrev:
1598 if base == nullrev:
1599 return True
1599 return True
1600 p1 = entry[5]
1600 p1 = entry[5]
1601 p2 = entry[6]
1601 p2 = entry[6]
1602 if base == p1 or base == p2:
1602 if base == p1 or base == p2:
1603 return False
1603 return False
1604 return self.issnapshot(base)
1604 return self.issnapshot(base)
1605
1605
1606 def snapshotdepth(self, rev):
1606 def snapshotdepth(self, rev):
1607 """number of snapshot in the chain before this one"""
1607 """number of snapshot in the chain before this one"""
1608 if not self.issnapshot(rev):
1608 if not self.issnapshot(rev):
1609 raise error.ProgrammingError('revision %d not a snapshot')
1609 raise error.ProgrammingError('revision %d not a snapshot')
1610 return len(self._deltachain(rev)[0]) - 1
1610 return len(self._deltachain(rev)[0]) - 1
1611
1611
1612 def revdiff(self, rev1, rev2):
1612 def revdiff(self, rev1, rev2):
1613 """return or calculate a delta between two revisions
1613 """return or calculate a delta between two revisions
1614
1614
1615 The delta calculated is in binary form and is intended to be written to
1615 The delta calculated is in binary form and is intended to be written to
1616 revlog data directly. So this function needs raw revision data.
1616 revlog data directly. So this function needs raw revision data.
1617 """
1617 """
1618 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1618 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1619 return bytes(self._chunk(rev2))
1619 return bytes(self._chunk(rev2))
1620
1620
1621 return mdiff.textdiff(self.revision(rev1, raw=True),
1621 return mdiff.textdiff(self.revision(rev1, raw=True),
1622 self.revision(rev2, raw=True))
1622 self.revision(rev2, raw=True))
1623
1623
1624 def revision(self, nodeorrev, _df=None, raw=False):
1624 def revision(self, nodeorrev, _df=None, raw=False):
1625 """return an uncompressed revision of a given node or revision
1625 """return an uncompressed revision of a given node or revision
1626 number.
1626 number.
1627
1627
1628 _df - an existing file handle to read from. (internal-only)
1628 _df - an existing file handle to read from. (internal-only)
1629 raw - an optional argument specifying if the revision data is to be
1629 raw - an optional argument specifying if the revision data is to be
1630 treated as raw data when applying flag transforms. 'raw' should be set
1630 treated as raw data when applying flag transforms. 'raw' should be set
1631 to True when generating changegroups or in debug commands.
1631 to True when generating changegroups or in debug commands.
1632 """
1632 """
1633 if isinstance(nodeorrev, int):
1633 if isinstance(nodeorrev, int):
1634 rev = nodeorrev
1634 rev = nodeorrev
1635 node = self.node(rev)
1635 node = self.node(rev)
1636 else:
1636 else:
1637 node = nodeorrev
1637 node = nodeorrev
1638 rev = None
1638 rev = None
1639
1639
1640 cachedrev = None
1640 cachedrev = None
1641 flags = None
1641 flags = None
1642 rawtext = None
1642 rawtext = None
1643 if node == nullid:
1643 if node == nullid:
1644 return ""
1644 return ""
1645 if self._revisioncache:
1645 if self._revisioncache:
1646 if self._revisioncache[0] == node:
1646 if self._revisioncache[0] == node:
1647 # _cache only stores rawtext
1647 # _cache only stores rawtext
1648 if raw:
1648 if raw:
1649 return self._revisioncache[2]
1649 return self._revisioncache[2]
1650 # duplicated, but good for perf
1650 # duplicated, but good for perf
1651 if rev is None:
1651 if rev is None:
1652 rev = self.rev(node)
1652 rev = self.rev(node)
1653 if flags is None:
1653 if flags is None:
1654 flags = self.flags(rev)
1654 flags = self.flags(rev)
1655 # no extra flags set, no flag processor runs, text = rawtext
1655 # no extra flags set, no flag processor runs, text = rawtext
1656 if flags == REVIDX_DEFAULT_FLAGS:
1656 if flags == REVIDX_DEFAULT_FLAGS:
1657 return self._revisioncache[2]
1657 return self._revisioncache[2]
1658 # rawtext is reusable. need to run flag processor
1658 # rawtext is reusable. need to run flag processor
1659 rawtext = self._revisioncache[2]
1659 rawtext = self._revisioncache[2]
1660
1660
1661 cachedrev = self._revisioncache[1]
1661 cachedrev = self._revisioncache[1]
1662
1662
1663 # look up what we need to read
1663 # look up what we need to read
1664 if rawtext is None:
1664 if rawtext is None:
1665 if rev is None:
1665 if rev is None:
1666 rev = self.rev(node)
1666 rev = self.rev(node)
1667
1667
1668 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1668 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1669 if stopped:
1669 if stopped:
1670 rawtext = self._revisioncache[2]
1670 rawtext = self._revisioncache[2]
1671
1671
1672 # drop cache to save memory
1672 # drop cache to save memory
1673 self._revisioncache = None
1673 self._revisioncache = None
1674
1674
1675 targetsize = None
1675 targetsize = None
1676 rawsize = self.index[rev][2]
1676 rawsize = self.index[rev][2]
1677 if 0 <= rawsize:
1677 if 0 <= rawsize:
1678 targetsize = 4 * rawsize
1678 targetsize = 4 * rawsize
1679
1679
1680 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1680 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1681 if rawtext is None:
1681 if rawtext is None:
1682 rawtext = bytes(bins[0])
1682 rawtext = bytes(bins[0])
1683 bins = bins[1:]
1683 bins = bins[1:]
1684
1684
1685 rawtext = mdiff.patches(rawtext, bins)
1685 rawtext = mdiff.patches(rawtext, bins)
1686 self._revisioncache = (node, rev, rawtext)
1686 self._revisioncache = (node, rev, rawtext)
1687
1687
1688 if flags is None:
1688 if flags is None:
1689 if rev is None:
1689 if rev is None:
1690 rev = self.rev(node)
1690 rev = self.rev(node)
1691 flags = self.flags(rev)
1691 flags = self.flags(rev)
1692
1692
1693 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1693 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1694 if validatehash:
1694 if validatehash:
1695 self.checkhash(text, node, rev=rev)
1695 self.checkhash(text, node, rev=rev)
1696
1696
1697 return text
1697 return text
1698
1698
1699 def hash(self, text, p1, p2):
1699 def hash(self, text, p1, p2):
1700 """Compute a node hash.
1700 """Compute a node hash.
1701
1701
1702 Available as a function so that subclasses can replace the hash
1702 Available as a function so that subclasses can replace the hash
1703 as needed.
1703 as needed.
1704 """
1704 """
1705 return storageutil.hashrevisionsha1(text, p1, p2)
1705 return storageutil.hashrevisionsha1(text, p1, p2)
1706
1706
1707 def _processflags(self, text, flags, operation, raw=False):
1707 def _processflags(self, text, flags, operation, raw=False):
1708 """Inspect revision data flags and applies transforms defined by
1708 """Inspect revision data flags and applies transforms defined by
1709 registered flag processors.
1709 registered flag processors.
1710
1710
1711 ``text`` - the revision data to process
1711 ``text`` - the revision data to process
1712 ``flags`` - the revision flags
1712 ``flags`` - the revision flags
1713 ``operation`` - the operation being performed (read or write)
1713 ``operation`` - the operation being performed (read or write)
1714 ``raw`` - an optional argument describing if the raw transform should be
1714 ``raw`` - an optional argument describing if the raw transform should be
1715 applied.
1715 applied.
1716
1716
1717 This method processes the flags in the order (or reverse order if
1717 This method processes the flags in the order (or reverse order if
1718 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1718 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1719 flag processors registered for present flags. The order of flags defined
1719 flag processors registered for present flags. The order of flags defined
1720 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1720 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1721
1721
1722 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1722 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1723 processed text and ``validatehash`` is a bool indicating whether the
1723 processed text and ``validatehash`` is a bool indicating whether the
1724 returned text should be checked for hash integrity.
1724 returned text should be checked for hash integrity.
1725
1725
1726 Note: If the ``raw`` argument is set, it has precedence over the
1726 Note: If the ``raw`` argument is set, it has precedence over the
1727 operation and will only update the value of ``validatehash``.
1727 operation and will only update the value of ``validatehash``.
1728 """
1728 """
1729 # fast path: no flag processors will run
1729 # fast path: no flag processors will run
1730 if flags == 0:
1730 if flags == 0:
1731 return text, True
1731 return text, True
1732 if not operation in ('read', 'write'):
1732 if not operation in ('read', 'write'):
1733 raise error.ProgrammingError(_("invalid '%s' operation") %
1733 raise error.ProgrammingError(_("invalid '%s' operation") %
1734 operation)
1734 operation)
1735 # Check all flags are known.
1735 # Check all flags are known.
1736 if flags & ~REVIDX_KNOWN_FLAGS:
1736 if flags & ~REVIDX_KNOWN_FLAGS:
1737 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1737 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1738 (flags & ~REVIDX_KNOWN_FLAGS))
1738 (flags & ~REVIDX_KNOWN_FLAGS))
1739 validatehash = True
1739 validatehash = True
1740 # Depending on the operation (read or write), the order might be
1740 # Depending on the operation (read or write), the order might be
1741 # reversed due to non-commutative transforms.
1741 # reversed due to non-commutative transforms.
1742 orderedflags = REVIDX_FLAGS_ORDER
1742 orderedflags = REVIDX_FLAGS_ORDER
1743 if operation == 'write':
1743 if operation == 'write':
1744 orderedflags = reversed(orderedflags)
1744 orderedflags = reversed(orderedflags)
1745
1745
1746 for flag in orderedflags:
1746 for flag in orderedflags:
1747 # If a flagprocessor has been registered for a known flag, apply the
1747 # If a flagprocessor has been registered for a known flag, apply the
1748 # related operation transform and update result tuple.
1748 # related operation transform and update result tuple.
1749 if flag & flags:
1749 if flag & flags:
1750 vhash = True
1750 vhash = True
1751
1751
1752 if flag not in self._flagprocessors:
1752 if flag not in self._flagprocessors:
1753 message = _("missing processor for flag '%#x'") % (flag)
1753 message = _("missing processor for flag '%#x'") % (flag)
1754 raise error.RevlogError(message)
1754 raise error.RevlogError(message)
1755
1755
1756 processor = self._flagprocessors[flag]
1756 processor = self._flagprocessors[flag]
1757 if processor is not None:
1757 if processor is not None:
1758 readtransform, writetransform, rawtransform = processor
1758 readtransform, writetransform, rawtransform = processor
1759
1759
1760 if raw:
1760 if raw:
1761 vhash = rawtransform(self, text)
1761 vhash = rawtransform(self, text)
1762 elif operation == 'read':
1762 elif operation == 'read':
1763 text, vhash = readtransform(self, text)
1763 text, vhash = readtransform(self, text)
1764 else: # write operation
1764 else: # write operation
1765 text, vhash = writetransform(self, text)
1765 text, vhash = writetransform(self, text)
1766 validatehash = validatehash and vhash
1766 validatehash = validatehash and vhash
1767
1767
1768 return text, validatehash
1768 return text, validatehash
1769
1769
1770 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1770 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1771 """Check node hash integrity.
1771 """Check node hash integrity.
1772
1772
1773 Available as a function so that subclasses can extend hash mismatch
1773 Available as a function so that subclasses can extend hash mismatch
1774 behaviors as needed.
1774 behaviors as needed.
1775 """
1775 """
1776 try:
1776 try:
1777 if p1 is None and p2 is None:
1777 if p1 is None and p2 is None:
1778 p1, p2 = self.parents(node)
1778 p1, p2 = self.parents(node)
1779 if node != self.hash(text, p1, p2):
1779 if node != self.hash(text, p1, p2):
1780 # Clear the revision cache on hash failure. The revision cache
1780 # Clear the revision cache on hash failure. The revision cache
1781 # only stores the raw revision and clearing the cache does have
1781 # only stores the raw revision and clearing the cache does have
1782 # the side-effect that we won't have a cache hit when the raw
1782 # the side-effect that we won't have a cache hit when the raw
1783 # revision data is accessed. But this case should be rare and
1783 # revision data is accessed. But this case should be rare and
1784 # it is extra work to teach the cache about the hash
1784 # it is extra work to teach the cache about the hash
1785 # verification state.
1785 # verification state.
1786 if self._revisioncache and self._revisioncache[0] == node:
1786 if self._revisioncache and self._revisioncache[0] == node:
1787 self._revisioncache = None
1787 self._revisioncache = None
1788
1788
1789 revornode = rev
1789 revornode = rev
1790 if revornode is None:
1790 if revornode is None:
1791 revornode = templatefilters.short(hex(node))
1791 revornode = templatefilters.short(hex(node))
1792 raise error.RevlogError(_("integrity check failed on %s:%s")
1792 raise error.RevlogError(_("integrity check failed on %s:%s")
1793 % (self.indexfile, pycompat.bytestr(revornode)))
1793 % (self.indexfile, pycompat.bytestr(revornode)))
1794 except error.RevlogError:
1794 except error.RevlogError:
1795 if self._censorable and storageutil.iscensoredtext(text):
1795 if self._censorable and storageutil.iscensoredtext(text):
1796 raise error.CensoredNodeError(self.indexfile, node, text)
1796 raise error.CensoredNodeError(self.indexfile, node, text)
1797 raise
1797 raise
1798
1798
1799 def _enforceinlinesize(self, tr, fp=None):
1799 def _enforceinlinesize(self, tr, fp=None):
1800 """Check if the revlog is too big for inline and convert if so.
1800 """Check if the revlog is too big for inline and convert if so.
1801
1801
1802 This should be called after revisions are added to the revlog. If the
1802 This should be called after revisions are added to the revlog. If the
1803 revlog has grown too large to be an inline revlog, it will convert it
1803 revlog has grown too large to be an inline revlog, it will convert it
1804 to use multiple index and data files.
1804 to use multiple index and data files.
1805 """
1805 """
1806 tiprev = len(self) - 1
1806 tiprev = len(self) - 1
1807 if (not self._inline or
1807 if (not self._inline or
1808 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1808 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1809 return
1809 return
1810
1810
1811 trinfo = tr.find(self.indexfile)
1811 trinfo = tr.find(self.indexfile)
1812 if trinfo is None:
1812 if trinfo is None:
1813 raise error.RevlogError(_("%s not found in the transaction")
1813 raise error.RevlogError(_("%s not found in the transaction")
1814 % self.indexfile)
1814 % self.indexfile)
1815
1815
1816 trindex = trinfo[2]
1816 trindex = trinfo[2]
1817 if trindex is not None:
1817 if trindex is not None:
1818 dataoff = self.start(trindex)
1818 dataoff = self.start(trindex)
1819 else:
1819 else:
1820 # revlog was stripped at start of transaction, use all leftover data
1820 # revlog was stripped at start of transaction, use all leftover data
1821 trindex = len(self) - 1
1821 trindex = len(self) - 1
1822 dataoff = self.end(tiprev)
1822 dataoff = self.end(tiprev)
1823
1823
1824 tr.add(self.datafile, dataoff)
1824 tr.add(self.datafile, dataoff)
1825
1825
1826 if fp:
1826 if fp:
1827 fp.flush()
1827 fp.flush()
1828 fp.close()
1828 fp.close()
1829 # We can't use the cached file handle after close(). So prevent
1829 # We can't use the cached file handle after close(). So prevent
1830 # its usage.
1830 # its usage.
1831 self._writinghandles = None
1831 self._writinghandles = None
1832
1832
1833 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1833 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1834 for r in self:
1834 for r in self:
1835 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1835 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1836
1836
1837 with self._indexfp('w') as fp:
1837 with self._indexfp('w') as fp:
1838 self.version &= ~FLAG_INLINE_DATA
1838 self.version &= ~FLAG_INLINE_DATA
1839 self._inline = False
1839 self._inline = False
1840 io = self._io
1840 io = self._io
1841 for i in self:
1841 for i in self:
1842 e = io.packentry(self.index[i], self.node, self.version, i)
1842 e = io.packentry(self.index[i], self.node, self.version, i)
1843 fp.write(e)
1843 fp.write(e)
1844
1844
1845 # the temp file replace the real index when we exit the context
1845 # the temp file replace the real index when we exit the context
1846 # manager
1846 # manager
1847
1847
1848 tr.replace(self.indexfile, trindex * self._io.size)
1848 tr.replace(self.indexfile, trindex * self._io.size)
1849 self._chunkclear()
1849 self._chunkclear()
1850
1850
1851 def _nodeduplicatecallback(self, transaction, node):
1851 def _nodeduplicatecallback(self, transaction, node):
1852 """called when trying to add a node already stored.
1852 """called when trying to add a node already stored.
1853 """
1853 """
1854
1854
1855 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1855 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1856 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1856 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1857 """add a revision to the log
1857 """add a revision to the log
1858
1858
1859 text - the revision data to add
1859 text - the revision data to add
1860 transaction - the transaction object used for rollback
1860 transaction - the transaction object used for rollback
1861 link - the linkrev data to add
1861 link - the linkrev data to add
1862 p1, p2 - the parent nodeids of the revision
1862 p1, p2 - the parent nodeids of the revision
1863 cachedelta - an optional precomputed delta
1863 cachedelta - an optional precomputed delta
1864 node - nodeid of revision; typically node is not specified, and it is
1864 node - nodeid of revision; typically node is not specified, and it is
1865 computed by default as hash(text, p1, p2), however subclasses might
1865 computed by default as hash(text, p1, p2), however subclasses might
1866 use different hashing method (and override checkhash() in such case)
1866 use different hashing method (and override checkhash() in such case)
1867 flags - the known flags to set on the revision
1867 flags - the known flags to set on the revision
1868 deltacomputer - an optional deltacomputer instance shared between
1868 deltacomputer - an optional deltacomputer instance shared between
1869 multiple calls
1869 multiple calls
1870 """
1870 """
1871 if link == nullrev:
1871 if link == nullrev:
1872 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1872 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1873 % self.indexfile)
1873 % self.indexfile)
1874
1874
1875 if flags:
1875 if flags:
1876 node = node or self.hash(text, p1, p2)
1876 node = node or self.hash(text, p1, p2)
1877
1877
1878 rawtext, validatehash = self._processflags(text, flags, 'write')
1878 rawtext, validatehash = self._processflags(text, flags, 'write')
1879
1879
1880 # If the flag processor modifies the revision data, ignore any provided
1880 # If the flag processor modifies the revision data, ignore any provided
1881 # cachedelta.
1881 # cachedelta.
1882 if rawtext != text:
1882 if rawtext != text:
1883 cachedelta = None
1883 cachedelta = None
1884
1884
1885 if len(rawtext) > _maxentrysize:
1885 if len(rawtext) > _maxentrysize:
1886 raise error.RevlogError(
1886 raise error.RevlogError(
1887 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1887 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1888 % (self.indexfile, len(rawtext)))
1888 % (self.indexfile, len(rawtext)))
1889
1889
1890 node = node or self.hash(rawtext, p1, p2)
1890 node = node or self.hash(rawtext, p1, p2)
1891 if node in self.nodemap:
1891 if node in self.nodemap:
1892 return node
1892 return node
1893
1893
1894 if validatehash:
1894 if validatehash:
1895 self.checkhash(rawtext, node, p1=p1, p2=p2)
1895 self.checkhash(rawtext, node, p1=p1, p2=p2)
1896
1896
1897 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1897 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1898 flags, cachedelta=cachedelta,
1898 flags, cachedelta=cachedelta,
1899 deltacomputer=deltacomputer)
1899 deltacomputer=deltacomputer)
1900
1900
1901 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1901 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1902 cachedelta=None, deltacomputer=None):
1902 cachedelta=None, deltacomputer=None):
1903 """add a raw revision with known flags, node and parents
1903 """add a raw revision with known flags, node and parents
1904 useful when reusing a revision not stored in this revlog (ex: received
1904 useful when reusing a revision not stored in this revlog (ex: received
1905 over wire, or read from an external bundle).
1905 over wire, or read from an external bundle).
1906 """
1906 """
1907 dfh = None
1907 dfh = None
1908 if not self._inline:
1908 if not self._inline:
1909 dfh = self._datafp("a+")
1909 dfh = self._datafp("a+")
1910 ifh = self._indexfp("a+")
1910 ifh = self._indexfp("a+")
1911 try:
1911 try:
1912 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1912 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1913 flags, cachedelta, ifh, dfh,
1913 flags, cachedelta, ifh, dfh,
1914 deltacomputer=deltacomputer)
1914 deltacomputer=deltacomputer)
1915 finally:
1915 finally:
1916 if dfh:
1916 if dfh:
1917 dfh.close()
1917 dfh.close()
1918 ifh.close()
1918 ifh.close()
1919
1919
1920 def compress(self, data):
1920 def compress(self, data):
1921 """Generate a possibly-compressed representation of data."""
1921 """Generate a possibly-compressed representation of data."""
1922 if not data:
1922 if not data:
1923 return '', data
1923 return '', data
1924
1924
1925 compressed = self._compressor.compress(data)
1925 compressed = self._compressor.compress(data)
1926
1926
1927 if compressed:
1927 if compressed:
1928 # The revlog compressor added the header in the returned data.
1928 # The revlog compressor added the header in the returned data.
1929 return '', compressed
1929 return '', compressed
1930
1930
1931 if data[0:1] == '\0':
1931 if data[0:1] == '\0':
1932 return '', data
1932 return '', data
1933 return 'u', data
1933 return 'u', data
1934
1934
1935 def decompress(self, data):
1935 def decompress(self, data):
1936 """Decompress a revlog chunk.
1936 """Decompress a revlog chunk.
1937
1937
1938 The chunk is expected to begin with a header identifying the
1938 The chunk is expected to begin with a header identifying the
1939 format type so it can be routed to an appropriate decompressor.
1939 format type so it can be routed to an appropriate decompressor.
1940 """
1940 """
1941 if not data:
1941 if not data:
1942 return data
1942 return data
1943
1943
1944 # Revlogs are read much more frequently than they are written and many
1944 # Revlogs are read much more frequently than they are written and many
1945 # chunks only take microseconds to decompress, so performance is
1945 # chunks only take microseconds to decompress, so performance is
1946 # important here.
1946 # important here.
1947 #
1947 #
1948 # We can make a few assumptions about revlogs:
1948 # We can make a few assumptions about revlogs:
1949 #
1949 #
1950 # 1) the majority of chunks will be compressed (as opposed to inline
1950 # 1) the majority of chunks will be compressed (as opposed to inline
1951 # raw data).
1951 # raw data).
1952 # 2) decompressing *any* data will likely by at least 10x slower than
1952 # 2) decompressing *any* data will likely by at least 10x slower than
1953 # returning raw inline data.
1953 # returning raw inline data.
1954 # 3) we want to prioritize common and officially supported compression
1954 # 3) we want to prioritize common and officially supported compression
1955 # engines
1955 # engines
1956 #
1956 #
1957 # It follows that we want to optimize for "decompress compressed data
1957 # It follows that we want to optimize for "decompress compressed data
1958 # when encoded with common and officially supported compression engines"
1958 # when encoded with common and officially supported compression engines"
1959 # case over "raw data" and "data encoded by less common or non-official
1959 # case over "raw data" and "data encoded by less common or non-official
1960 # compression engines." That is why we have the inline lookup first
1960 # compression engines." That is why we have the inline lookup first
1961 # followed by the compengines lookup.
1961 # followed by the compengines lookup.
1962 #
1962 #
1963 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1963 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1964 # compressed chunks. And this matters for changelog and manifest reads.
1964 # compressed chunks. And this matters for changelog and manifest reads.
1965 t = data[0:1]
1965 t = data[0:1]
1966
1966
1967 if t == 'x':
1967 if t == 'x':
1968 try:
1968 try:
1969 return _zlibdecompress(data)
1969 return _zlibdecompress(data)
1970 except zlib.error as e:
1970 except zlib.error as e:
1971 raise error.RevlogError(_('revlog decompress error: %s') %
1971 raise error.RevlogError(_('revlog decompress error: %s') %
1972 stringutil.forcebytestr(e))
1972 stringutil.forcebytestr(e))
1973 # '\0' is more common than 'u' so it goes first.
1973 # '\0' is more common than 'u' so it goes first.
1974 elif t == '\0':
1974 elif t == '\0':
1975 return data
1975 return data
1976 elif t == 'u':
1976 elif t == 'u':
1977 return util.buffer(data, 1)
1977 return util.buffer(data, 1)
1978
1978
1979 try:
1979 try:
1980 compressor = self._decompressors[t]
1980 compressor = self._decompressors[t]
1981 except KeyError:
1981 except KeyError:
1982 try:
1982 try:
1983 engine = util.compengines.forrevlogheader(t)
1983 engine = util.compengines.forrevlogheader(t)
1984 compressor = engine.revlogcompressor()
1984 compressor = engine.revlogcompressor()
1985 self._decompressors[t] = compressor
1985 self._decompressors[t] = compressor
1986 except KeyError:
1986 except KeyError:
1987 raise error.RevlogError(_('unknown compression type %r') % t)
1987 raise error.RevlogError(_('unknown compression type %r') % t)
1988
1988
1989 return compressor.decompress(data)
1989 return compressor.decompress(data)
1990
1990
1991 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1991 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1992 cachedelta, ifh, dfh, alwayscache=False,
1992 cachedelta, ifh, dfh, alwayscache=False,
1993 deltacomputer=None):
1993 deltacomputer=None):
1994 """internal function to add revisions to the log
1994 """internal function to add revisions to the log
1995
1995
1996 see addrevision for argument descriptions.
1996 see addrevision for argument descriptions.
1997
1997
1998 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1998 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1999
1999
2000 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2000 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2001 be used.
2001 be used.
2002
2002
2003 invariants:
2003 invariants:
2004 - rawtext is optional (can be None); if not set, cachedelta must be set.
2004 - rawtext is optional (can be None); if not set, cachedelta must be set.
2005 if both are set, they must correspond to each other.
2005 if both are set, they must correspond to each other.
2006 """
2006 """
2007 if node == nullid:
2007 if node == nullid:
2008 raise error.RevlogError(_("%s: attempt to add null revision") %
2008 raise error.RevlogError(_("%s: attempt to add null revision") %
2009 self.indexfile)
2009 self.indexfile)
2010 if node == wdirid or node in wdirfilenodeids:
2010 if node == wdirid or node in wdirfilenodeids:
2011 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2011 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2012 self.indexfile)
2012 self.indexfile)
2013
2013
2014 if self._inline:
2014 if self._inline:
2015 fh = ifh
2015 fh = ifh
2016 else:
2016 else:
2017 fh = dfh
2017 fh = dfh
2018
2018
2019 btext = [rawtext]
2019 btext = [rawtext]
2020
2020
2021 curr = len(self)
2021 curr = len(self)
2022 prev = curr - 1
2022 prev = curr - 1
2023 offset = self.end(prev)
2023 offset = self.end(prev)
2024 p1r, p2r = self.rev(p1), self.rev(p2)
2024 p1r, p2r = self.rev(p1), self.rev(p2)
2025
2025
2026 # full versions are inserted when the needed deltas
2026 # full versions are inserted when the needed deltas
2027 # become comparable to the uncompressed text
2027 # become comparable to the uncompressed text
2028 if rawtext is None:
2028 if rawtext is None:
2029 # need rawtext size, before changed by flag processors, which is
2029 # need rawtext size, before changed by flag processors, which is
2030 # the non-raw size. use revlog explicitly to avoid filelog's extra
2030 # the non-raw size. use revlog explicitly to avoid filelog's extra
2031 # logic that might remove metadata size.
2031 # logic that might remove metadata size.
2032 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2032 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2033 cachedelta[1])
2033 cachedelta[1])
2034 else:
2034 else:
2035 textlen = len(rawtext)
2035 textlen = len(rawtext)
2036
2036
2037 if deltacomputer is None:
2037 if deltacomputer is None:
2038 deltacomputer = deltautil.deltacomputer(self)
2038 deltacomputer = deltautil.deltacomputer(self)
2039
2039
2040 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2040 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2041
2041
2042 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2042 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2043
2043
2044 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2044 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2045 deltainfo.base, link, p1r, p2r, node)
2045 deltainfo.base, link, p1r, p2r, node)
2046 self.index.append(e)
2046 self.index.append(e)
2047 self.nodemap[node] = curr
2047 self.nodemap[node] = curr
2048
2048
2049 # Reset the pure node cache start lookup offset to account for new
2049 # Reset the pure node cache start lookup offset to account for new
2050 # revision.
2050 # revision.
2051 if self._nodepos is not None:
2051 if self._nodepos is not None:
2052 self._nodepos = curr
2052 self._nodepos = curr
2053
2053
2054 entry = self._io.packentry(e, self.node, self.version, curr)
2054 entry = self._io.packentry(e, self.node, self.version, curr)
2055 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2055 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2056 link, offset)
2056 link, offset)
2057
2057
2058 rawtext = btext[0]
2058 rawtext = btext[0]
2059
2059
2060 if alwayscache and rawtext is None:
2060 if alwayscache and rawtext is None:
2061 rawtext = deltacomputer.buildtext(revinfo, fh)
2061 rawtext = deltacomputer.buildtext(revinfo, fh)
2062
2062
2063 if type(rawtext) == bytes: # only accept immutable objects
2063 if type(rawtext) == bytes: # only accept immutable objects
2064 self._revisioncache = (node, curr, rawtext)
2064 self._revisioncache = (node, curr, rawtext)
2065 self._chainbasecache[curr] = deltainfo.chainbase
2065 self._chainbasecache[curr] = deltainfo.chainbase
2066 return node
2066 return node
2067
2067
2068 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2068 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2069 # Files opened in a+ mode have inconsistent behavior on various
2069 # Files opened in a+ mode have inconsistent behavior on various
2070 # platforms. Windows requires that a file positioning call be made
2070 # platforms. Windows requires that a file positioning call be made
2071 # when the file handle transitions between reads and writes. See
2071 # when the file handle transitions between reads and writes. See
2072 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2072 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2073 # platforms, Python or the platform itself can be buggy. Some versions
2073 # platforms, Python or the platform itself can be buggy. Some versions
2074 # of Solaris have been observed to not append at the end of the file
2074 # of Solaris have been observed to not append at the end of the file
2075 # if the file was seeked to before the end. See issue4943 for more.
2075 # if the file was seeked to before the end. See issue4943 for more.
2076 #
2076 #
2077 # We work around this issue by inserting a seek() before writing.
2077 # We work around this issue by inserting a seek() before writing.
2078 # Note: This is likely not necessary on Python 3. However, because
2078 # Note: This is likely not necessary on Python 3. However, because
2079 # the file handle is reused for reads and may be seeked there, we need
2079 # the file handle is reused for reads and may be seeked there, we need
2080 # to be careful before changing this.
2080 # to be careful before changing this.
2081 ifh.seek(0, os.SEEK_END)
2081 ifh.seek(0, os.SEEK_END)
2082 if dfh:
2082 if dfh:
2083 dfh.seek(0, os.SEEK_END)
2083 dfh.seek(0, os.SEEK_END)
2084
2084
2085 curr = len(self) - 1
2085 curr = len(self) - 1
2086 if not self._inline:
2086 if not self._inline:
2087 transaction.add(self.datafile, offset)
2087 transaction.add(self.datafile, offset)
2088 transaction.add(self.indexfile, curr * len(entry))
2088 transaction.add(self.indexfile, curr * len(entry))
2089 if data[0]:
2089 if data[0]:
2090 dfh.write(data[0])
2090 dfh.write(data[0])
2091 dfh.write(data[1])
2091 dfh.write(data[1])
2092 ifh.write(entry)
2092 ifh.write(entry)
2093 else:
2093 else:
2094 offset += curr * self._io.size
2094 offset += curr * self._io.size
2095 transaction.add(self.indexfile, offset, curr)
2095 transaction.add(self.indexfile, offset, curr)
2096 ifh.write(entry)
2096 ifh.write(entry)
2097 ifh.write(data[0])
2097 ifh.write(data[0])
2098 ifh.write(data[1])
2098 ifh.write(data[1])
2099 self._enforceinlinesize(transaction, ifh)
2099 self._enforceinlinesize(transaction, ifh)
2100
2100
2101 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2101 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2102 """
2102 """
2103 add a delta group
2103 add a delta group
2104
2104
2105 given a set of deltas, add them to the revision log. the
2105 given a set of deltas, add them to the revision log. the
2106 first delta is against its parent, which should be in our
2106 first delta is against its parent, which should be in our
2107 log, the rest are against the previous delta.
2107 log, the rest are against the previous delta.
2108
2108
2109 If ``addrevisioncb`` is defined, it will be called with arguments of
2109 If ``addrevisioncb`` is defined, it will be called with arguments of
2110 this revlog and the node that was added.
2110 this revlog and the node that was added.
2111 """
2111 """
2112
2112
2113 if self._writinghandles:
2113 if self._writinghandles:
2114 raise error.ProgrammingError('cannot nest addgroup() calls')
2114 raise error.ProgrammingError('cannot nest addgroup() calls')
2115
2115
2116 nodes = []
2116 nodes = []
2117
2117
2118 r = len(self)
2118 r = len(self)
2119 end = 0
2119 end = 0
2120 if r:
2120 if r:
2121 end = self.end(r - 1)
2121 end = self.end(r - 1)
2122 ifh = self._indexfp("a+")
2122 ifh = self._indexfp("a+")
2123 isize = r * self._io.size
2123 isize = r * self._io.size
2124 if self._inline:
2124 if self._inline:
2125 transaction.add(self.indexfile, end + isize, r)
2125 transaction.add(self.indexfile, end + isize, r)
2126 dfh = None
2126 dfh = None
2127 else:
2127 else:
2128 transaction.add(self.indexfile, isize, r)
2128 transaction.add(self.indexfile, isize, r)
2129 transaction.add(self.datafile, end)
2129 transaction.add(self.datafile, end)
2130 dfh = self._datafp("a+")
2130 dfh = self._datafp("a+")
2131 def flush():
2131 def flush():
2132 if dfh:
2132 if dfh:
2133 dfh.flush()
2133 dfh.flush()
2134 ifh.flush()
2134 ifh.flush()
2135
2135
2136 self._writinghandles = (ifh, dfh)
2136 self._writinghandles = (ifh, dfh)
2137
2137
2138 try:
2138 try:
2139 deltacomputer = deltautil.deltacomputer(self)
2139 deltacomputer = deltautil.deltacomputer(self)
2140 # loop through our set of deltas
2140 # loop through our set of deltas
2141 for data in deltas:
2141 for data in deltas:
2142 node, p1, p2, linknode, deltabase, delta, flags = data
2142 node, p1, p2, linknode, deltabase, delta, flags = data
2143 link = linkmapper(linknode)
2143 link = linkmapper(linknode)
2144 flags = flags or REVIDX_DEFAULT_FLAGS
2144 flags = flags or REVIDX_DEFAULT_FLAGS
2145
2145
2146 nodes.append(node)
2146 nodes.append(node)
2147
2147
2148 if node in self.nodemap:
2148 if node in self.nodemap:
2149 self._nodeduplicatecallback(transaction, node)
2149 self._nodeduplicatecallback(transaction, node)
2150 # this can happen if two branches make the same change
2150 # this can happen if two branches make the same change
2151 continue
2151 continue
2152
2152
2153 for p in (p1, p2):
2153 for p in (p1, p2):
2154 if p not in self.nodemap:
2154 if p not in self.nodemap:
2155 raise error.LookupError(p, self.indexfile,
2155 raise error.LookupError(p, self.indexfile,
2156 _('unknown parent'))
2156 _('unknown parent'))
2157
2157
2158 if deltabase not in self.nodemap:
2158 if deltabase not in self.nodemap:
2159 raise error.LookupError(deltabase, self.indexfile,
2159 raise error.LookupError(deltabase, self.indexfile,
2160 _('unknown delta base'))
2160 _('unknown delta base'))
2161
2161
2162 baserev = self.rev(deltabase)
2162 baserev = self.rev(deltabase)
2163
2163
2164 if baserev != nullrev and self.iscensored(baserev):
2164 if baserev != nullrev and self.iscensored(baserev):
2165 # if base is censored, delta must be full replacement in a
2165 # if base is censored, delta must be full replacement in a
2166 # single patch operation
2166 # single patch operation
2167 hlen = struct.calcsize(">lll")
2167 hlen = struct.calcsize(">lll")
2168 oldlen = self.rawsize(baserev)
2168 oldlen = self.rawsize(baserev)
2169 newlen = len(delta) - hlen
2169 newlen = len(delta) - hlen
2170 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2170 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2171 raise error.CensoredBaseError(self.indexfile,
2171 raise error.CensoredBaseError(self.indexfile,
2172 self.node(baserev))
2172 self.node(baserev))
2173
2173
2174 if not flags and self._peek_iscensored(baserev, delta, flush):
2174 if not flags and self._peek_iscensored(baserev, delta, flush):
2175 flags |= REVIDX_ISCENSORED
2175 flags |= REVIDX_ISCENSORED
2176
2176
2177 # We assume consumers of addrevisioncb will want to retrieve
2177 # We assume consumers of addrevisioncb will want to retrieve
2178 # the added revision, which will require a call to
2178 # the added revision, which will require a call to
2179 # revision(). revision() will fast path if there is a cache
2179 # revision(). revision() will fast path if there is a cache
2180 # hit. So, we tell _addrevision() to always cache in this case.
2180 # hit. So, we tell _addrevision() to always cache in this case.
2181 # We're only using addgroup() in the context of changegroup
2181 # We're only using addgroup() in the context of changegroup
2182 # generation so the revision data can always be handled as raw
2182 # generation so the revision data can always be handled as raw
2183 # by the flagprocessor.
2183 # by the flagprocessor.
2184 self._addrevision(node, None, transaction, link,
2184 self._addrevision(node, None, transaction, link,
2185 p1, p2, flags, (baserev, delta),
2185 p1, p2, flags, (baserev, delta),
2186 ifh, dfh,
2186 ifh, dfh,
2187 alwayscache=bool(addrevisioncb),
2187 alwayscache=bool(addrevisioncb),
2188 deltacomputer=deltacomputer)
2188 deltacomputer=deltacomputer)
2189
2189
2190 if addrevisioncb:
2190 if addrevisioncb:
2191 addrevisioncb(self, node)
2191 addrevisioncb(self, node)
2192
2192
2193 if not dfh and not self._inline:
2193 if not dfh and not self._inline:
2194 # addrevision switched from inline to conventional
2194 # addrevision switched from inline to conventional
2195 # reopen the index
2195 # reopen the index
2196 ifh.close()
2196 ifh.close()
2197 dfh = self._datafp("a+")
2197 dfh = self._datafp("a+")
2198 ifh = self._indexfp("a+")
2198 ifh = self._indexfp("a+")
2199 self._writinghandles = (ifh, dfh)
2199 self._writinghandles = (ifh, dfh)
2200 finally:
2200 finally:
2201 self._writinghandles = None
2201 self._writinghandles = None
2202
2202
2203 if dfh:
2203 if dfh:
2204 dfh.close()
2204 dfh.close()
2205 ifh.close()
2205 ifh.close()
2206
2206
2207 return nodes
2207 return nodes
2208
2208
2209 def iscensored(self, rev):
2209 def iscensored(self, rev):
2210 """Check if a file revision is censored."""
2210 """Check if a file revision is censored."""
2211 if not self._censorable:
2211 if not self._censorable:
2212 return False
2212 return False
2213
2213
2214 return self.flags(rev) & REVIDX_ISCENSORED
2214 return self.flags(rev) & REVIDX_ISCENSORED
2215
2215
2216 def _peek_iscensored(self, baserev, delta, flush):
2216 def _peek_iscensored(self, baserev, delta, flush):
2217 """Quickly check if a delta produces a censored revision."""
2217 """Quickly check if a delta produces a censored revision."""
2218 if not self._censorable:
2218 if not self._censorable:
2219 return False
2219 return False
2220
2220
2221 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2221 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2222
2222
2223 def getstrippoint(self, minlink):
2223 def getstrippoint(self, minlink):
2224 """find the minimum rev that must be stripped to strip the linkrev
2224 """find the minimum rev that must be stripped to strip the linkrev
2225
2225
2226 Returns a tuple containing the minimum rev and a set of all revs that
2226 Returns a tuple containing the minimum rev and a set of all revs that
2227 have linkrevs that will be broken by this strip.
2227 have linkrevs that will be broken by this strip.
2228 """
2228 """
2229 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2229 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2230 self.headrevs(),
2230 self.headrevs(),
2231 self.linkrev, self.parentrevs)
2231 self.linkrev, self.parentrevs)
2232
2232
2233 def strip(self, minlink, transaction):
2233 def strip(self, minlink, transaction):
2234 """truncate the revlog on the first revision with a linkrev >= minlink
2234 """truncate the revlog on the first revision with a linkrev >= minlink
2235
2235
2236 This function is called when we're stripping revision minlink and
2236 This function is called when we're stripping revision minlink and
2237 its descendants from the repository.
2237 its descendants from the repository.
2238
2238
2239 We have to remove all revisions with linkrev >= minlink, because
2239 We have to remove all revisions with linkrev >= minlink, because
2240 the equivalent changelog revisions will be renumbered after the
2240 the equivalent changelog revisions will be renumbered after the
2241 strip.
2241 strip.
2242
2242
2243 So we truncate the revlog on the first of these revisions, and
2243 So we truncate the revlog on the first of these revisions, and
2244 trust that the caller has saved the revisions that shouldn't be
2244 trust that the caller has saved the revisions that shouldn't be
2245 removed and that it'll re-add them after this truncation.
2245 removed and that it'll re-add them after this truncation.
2246 """
2246 """
2247 if len(self) == 0:
2247 if len(self) == 0:
2248 return
2248 return
2249
2249
2250 rev, _ = self.getstrippoint(minlink)
2250 rev, _ = self.getstrippoint(minlink)
2251 if rev == len(self):
2251 if rev == len(self):
2252 return
2252 return
2253
2253
2254 # first truncate the files on disk
2254 # first truncate the files on disk
2255 end = self.start(rev)
2255 end = self.start(rev)
2256 if not self._inline:
2256 if not self._inline:
2257 transaction.add(self.datafile, end)
2257 transaction.add(self.datafile, end)
2258 end = rev * self._io.size
2258 end = rev * self._io.size
2259 else:
2259 else:
2260 end += rev * self._io.size
2260 end += rev * self._io.size
2261
2261
2262 transaction.add(self.indexfile, end)
2262 transaction.add(self.indexfile, end)
2263
2263
2264 # then reset internal state in memory to forget those revisions
2264 # then reset internal state in memory to forget those revisions
2265 self._revisioncache = None
2265 self._revisioncache = None
2266 self._chaininfocache = {}
2266 self._chaininfocache = {}
2267 self._chunkclear()
2267 self._chunkclear()
2268 for x in pycompat.xrange(rev, len(self)):
2268 for x in pycompat.xrange(rev, len(self)):
2269 del self.nodemap[self.node(x)]
2269 del self.nodemap[self.node(x)]
2270
2270
2271 del self.index[rev:-1]
2271 del self.index[rev:-1]
2272 self._nodepos = None
2272 self._nodepos = None
2273
2273
2274 def checksize(self):
2274 def checksize(self):
2275 """Check size of index and data files
2276
2277 return a (dd, di) tuple.
2278 - dd: extra bytes for the "data" file
2279 - di: extra bytes for the "index" file
2280
2281 A healthy revlog will return (0, 0).
2282 """
2275 expected = 0
2283 expected = 0
2276 if len(self):
2284 if len(self):
2277 expected = max(0, self.end(len(self) - 1))
2285 expected = max(0, self.end(len(self) - 1))
2278
2286
2279 try:
2287 try:
2280 with self._datafp() as f:
2288 with self._datafp() as f:
2281 f.seek(0, 2)
2289 f.seek(0, 2)
2282 actual = f.tell()
2290 actual = f.tell()
2283 dd = actual - expected
2291 dd = actual - expected
2284 except IOError as inst:
2292 except IOError as inst:
2285 if inst.errno != errno.ENOENT:
2293 if inst.errno != errno.ENOENT:
2286 raise
2294 raise
2287 dd = 0
2295 dd = 0
2288
2296
2289 try:
2297 try:
2290 f = self.opener(self.indexfile)
2298 f = self.opener(self.indexfile)
2291 f.seek(0, 2)
2299 f.seek(0, 2)
2292 actual = f.tell()
2300 actual = f.tell()
2293 f.close()
2301 f.close()
2294 s = self._io.size
2302 s = self._io.size
2295 i = max(0, actual // s)
2303 i = max(0, actual // s)
2296 di = actual - (i * s)
2304 di = actual - (i * s)
2297 if self._inline:
2305 if self._inline:
2298 databytes = 0
2306 databytes = 0
2299 for r in self:
2307 for r in self:
2300 databytes += max(0, self.length(r))
2308 databytes += max(0, self.length(r))
2301 dd = 0
2309 dd = 0
2302 di = actual - len(self) * s - databytes
2310 di = actual - len(self) * s - databytes
2303 except IOError as inst:
2311 except IOError as inst:
2304 if inst.errno != errno.ENOENT:
2312 if inst.errno != errno.ENOENT:
2305 raise
2313 raise
2306 di = 0
2314 di = 0
2307
2315
2308 return (dd, di)
2316 return (dd, di)
2309
2317
2310 def files(self):
2318 def files(self):
2311 res = [self.indexfile]
2319 res = [self.indexfile]
2312 if not self._inline:
2320 if not self._inline:
2313 res.append(self.datafile)
2321 res.append(self.datafile)
2314 return res
2322 return res
2315
2323
2316 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2324 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2317 assumehaveparentrevisions=False,
2325 assumehaveparentrevisions=False,
2318 deltamode=repository.CG_DELTAMODE_STD):
2326 deltamode=repository.CG_DELTAMODE_STD):
2319 if nodesorder not in ('nodes', 'storage', 'linear', None):
2327 if nodesorder not in ('nodes', 'storage', 'linear', None):
2320 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2328 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2321 nodesorder)
2329 nodesorder)
2322
2330
2323 if nodesorder is None and not self._generaldelta:
2331 if nodesorder is None and not self._generaldelta:
2324 nodesorder = 'storage'
2332 nodesorder = 'storage'
2325
2333
2326 if (not self._storedeltachains and
2334 if (not self._storedeltachains and
2327 deltamode != repository.CG_DELTAMODE_PREV):
2335 deltamode != repository.CG_DELTAMODE_PREV):
2328 deltamode = repository.CG_DELTAMODE_FULL
2336 deltamode = repository.CG_DELTAMODE_FULL
2329
2337
2330 return storageutil.emitrevisions(
2338 return storageutil.emitrevisions(
2331 self, nodes, nodesorder, revlogrevisiondelta,
2339 self, nodes, nodesorder, revlogrevisiondelta,
2332 deltaparentfn=self.deltaparent,
2340 deltaparentfn=self.deltaparent,
2333 candeltafn=self.candelta,
2341 candeltafn=self.candelta,
2334 rawsizefn=self.rawsize,
2342 rawsizefn=self.rawsize,
2335 revdifffn=self.revdiff,
2343 revdifffn=self.revdiff,
2336 flagsfn=self.flags,
2344 flagsfn=self.flags,
2337 deltamode=deltamode,
2345 deltamode=deltamode,
2338 revisiondata=revisiondata,
2346 revisiondata=revisiondata,
2339 assumehaveparentrevisions=assumehaveparentrevisions)
2347 assumehaveparentrevisions=assumehaveparentrevisions)
2340
2348
2341 DELTAREUSEALWAYS = 'always'
2349 DELTAREUSEALWAYS = 'always'
2342 DELTAREUSESAMEREVS = 'samerevs'
2350 DELTAREUSESAMEREVS = 'samerevs'
2343 DELTAREUSENEVER = 'never'
2351 DELTAREUSENEVER = 'never'
2344
2352
2345 DELTAREUSEFULLADD = 'fulladd'
2353 DELTAREUSEFULLADD = 'fulladd'
2346
2354
2347 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2355 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2348
2356
2349 def clone(self, tr, destrevlog, addrevisioncb=None,
2357 def clone(self, tr, destrevlog, addrevisioncb=None,
2350 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2358 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2351 """Copy this revlog to another, possibly with format changes.
2359 """Copy this revlog to another, possibly with format changes.
2352
2360
2353 The destination revlog will contain the same revisions and nodes.
2361 The destination revlog will contain the same revisions and nodes.
2354 However, it may not be bit-for-bit identical due to e.g. delta encoding
2362 However, it may not be bit-for-bit identical due to e.g. delta encoding
2355 differences.
2363 differences.
2356
2364
2357 The ``deltareuse`` argument control how deltas from the existing revlog
2365 The ``deltareuse`` argument control how deltas from the existing revlog
2358 are preserved in the destination revlog. The argument can have the
2366 are preserved in the destination revlog. The argument can have the
2359 following values:
2367 following values:
2360
2368
2361 DELTAREUSEALWAYS
2369 DELTAREUSEALWAYS
2362 Deltas will always be reused (if possible), even if the destination
2370 Deltas will always be reused (if possible), even if the destination
2363 revlog would not select the same revisions for the delta. This is the
2371 revlog would not select the same revisions for the delta. This is the
2364 fastest mode of operation.
2372 fastest mode of operation.
2365 DELTAREUSESAMEREVS
2373 DELTAREUSESAMEREVS
2366 Deltas will be reused if the destination revlog would pick the same
2374 Deltas will be reused if the destination revlog would pick the same
2367 revisions for the delta. This mode strikes a balance between speed
2375 revisions for the delta. This mode strikes a balance between speed
2368 and optimization.
2376 and optimization.
2369 DELTAREUSENEVER
2377 DELTAREUSENEVER
2370 Deltas will never be reused. This is the slowest mode of execution.
2378 Deltas will never be reused. This is the slowest mode of execution.
2371 This mode can be used to recompute deltas (e.g. if the diff/delta
2379 This mode can be used to recompute deltas (e.g. if the diff/delta
2372 algorithm changes).
2380 algorithm changes).
2373
2381
2374 Delta computation can be slow, so the choice of delta reuse policy can
2382 Delta computation can be slow, so the choice of delta reuse policy can
2375 significantly affect run time.
2383 significantly affect run time.
2376
2384
2377 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2385 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2378 two extremes. Deltas will be reused if they are appropriate. But if the
2386 two extremes. Deltas will be reused if they are appropriate. But if the
2379 delta could choose a better revision, it will do so. This means if you
2387 delta could choose a better revision, it will do so. This means if you
2380 are converting a non-generaldelta revlog to a generaldelta revlog,
2388 are converting a non-generaldelta revlog to a generaldelta revlog,
2381 deltas will be recomputed if the delta's parent isn't a parent of the
2389 deltas will be recomputed if the delta's parent isn't a parent of the
2382 revision.
2390 revision.
2383
2391
2384 In addition to the delta policy, the ``forcedeltabothparents``
2392 In addition to the delta policy, the ``forcedeltabothparents``
2385 argument controls whether to force compute deltas against both parents
2393 argument controls whether to force compute deltas against both parents
2386 for merges. By default, the current default is used.
2394 for merges. By default, the current default is used.
2387 """
2395 """
2388 if deltareuse not in self.DELTAREUSEALL:
2396 if deltareuse not in self.DELTAREUSEALL:
2389 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2397 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2390
2398
2391 if len(destrevlog):
2399 if len(destrevlog):
2392 raise ValueError(_('destination revlog is not empty'))
2400 raise ValueError(_('destination revlog is not empty'))
2393
2401
2394 if getattr(self, 'filteredrevs', None):
2402 if getattr(self, 'filteredrevs', None):
2395 raise ValueError(_('source revlog has filtered revisions'))
2403 raise ValueError(_('source revlog has filtered revisions'))
2396 if getattr(destrevlog, 'filteredrevs', None):
2404 if getattr(destrevlog, 'filteredrevs', None):
2397 raise ValueError(_('destination revlog has filtered revisions'))
2405 raise ValueError(_('destination revlog has filtered revisions'))
2398
2406
2399 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2407 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2400 # if possible.
2408 # if possible.
2401 oldlazydelta = destrevlog._lazydelta
2409 oldlazydelta = destrevlog._lazydelta
2402 oldlazydeltabase = destrevlog._lazydeltabase
2410 oldlazydeltabase = destrevlog._lazydeltabase
2403 oldamd = destrevlog._deltabothparents
2411 oldamd = destrevlog._deltabothparents
2404
2412
2405 try:
2413 try:
2406 if deltareuse == self.DELTAREUSEALWAYS:
2414 if deltareuse == self.DELTAREUSEALWAYS:
2407 destrevlog._lazydeltabase = True
2415 destrevlog._lazydeltabase = True
2408 destrevlog._lazydelta = True
2416 destrevlog._lazydelta = True
2409 elif deltareuse == self.DELTAREUSESAMEREVS:
2417 elif deltareuse == self.DELTAREUSESAMEREVS:
2410 destrevlog._lazydeltabase = False
2418 destrevlog._lazydeltabase = False
2411 destrevlog._lazydelta = True
2419 destrevlog._lazydelta = True
2412 elif deltareuse == self.DELTAREUSENEVER:
2420 elif deltareuse == self.DELTAREUSENEVER:
2413 destrevlog._lazydeltabase = False
2421 destrevlog._lazydeltabase = False
2414 destrevlog._lazydelta = False
2422 destrevlog._lazydelta = False
2415
2423
2416 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2424 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2417
2425
2418 deltacomputer = deltautil.deltacomputer(destrevlog)
2426 deltacomputer = deltautil.deltacomputer(destrevlog)
2419 index = self.index
2427 index = self.index
2420 for rev in self:
2428 for rev in self:
2421 entry = index[rev]
2429 entry = index[rev]
2422
2430
2423 # Some classes override linkrev to take filtered revs into
2431 # Some classes override linkrev to take filtered revs into
2424 # account. Use raw entry from index.
2432 # account. Use raw entry from index.
2425 flags = entry[0] & 0xffff
2433 flags = entry[0] & 0xffff
2426 linkrev = entry[4]
2434 linkrev = entry[4]
2427 p1 = index[entry[5]][7]
2435 p1 = index[entry[5]][7]
2428 p2 = index[entry[6]][7]
2436 p2 = index[entry[6]][7]
2429 node = entry[7]
2437 node = entry[7]
2430
2438
2431 # (Possibly) reuse the delta from the revlog if allowed and
2439 # (Possibly) reuse the delta from the revlog if allowed and
2432 # the revlog chunk is a delta.
2440 # the revlog chunk is a delta.
2433 cachedelta = None
2441 cachedelta = None
2434 rawtext = None
2442 rawtext = None
2435 if destrevlog._lazydelta:
2443 if destrevlog._lazydelta:
2436 dp = self.deltaparent(rev)
2444 dp = self.deltaparent(rev)
2437 if dp != nullrev:
2445 if dp != nullrev:
2438 cachedelta = (dp, bytes(self._chunk(rev)))
2446 cachedelta = (dp, bytes(self._chunk(rev)))
2439
2447
2440 if not cachedelta:
2448 if not cachedelta:
2441 rawtext = self.revision(rev, raw=True)
2449 rawtext = self.revision(rev, raw=True)
2442
2450
2443
2451
2444 if deltareuse == self.DELTAREUSEFULLADD:
2452 if deltareuse == self.DELTAREUSEFULLADD:
2445 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2453 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2446 cachedelta=cachedelta,
2454 cachedelta=cachedelta,
2447 node=node, flags=flags,
2455 node=node, flags=flags,
2448 deltacomputer=deltacomputer)
2456 deltacomputer=deltacomputer)
2449 else:
2457 else:
2450 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2458 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2451 checkambig=False)
2459 checkambig=False)
2452 dfh = None
2460 dfh = None
2453 if not destrevlog._inline:
2461 if not destrevlog._inline:
2454 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2462 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2455 try:
2463 try:
2456 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2464 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2457 p2, flags, cachedelta, ifh, dfh,
2465 p2, flags, cachedelta, ifh, dfh,
2458 deltacomputer=deltacomputer)
2466 deltacomputer=deltacomputer)
2459 finally:
2467 finally:
2460 if dfh:
2468 if dfh:
2461 dfh.close()
2469 dfh.close()
2462 ifh.close()
2470 ifh.close()
2463
2471
2464 if addrevisioncb:
2472 if addrevisioncb:
2465 addrevisioncb(self, rev, node)
2473 addrevisioncb(self, rev, node)
2466 finally:
2474 finally:
2467 destrevlog._lazydelta = oldlazydelta
2475 destrevlog._lazydelta = oldlazydelta
2468 destrevlog._lazydeltabase = oldlazydeltabase
2476 destrevlog._lazydeltabase = oldlazydeltabase
2469 destrevlog._deltabothparents = oldamd
2477 destrevlog._deltabothparents = oldamd
2470
2478
2471 def censorrevision(self, tr, censornode, tombstone=b''):
2479 def censorrevision(self, tr, censornode, tombstone=b''):
2472 if (self.version & 0xFFFF) == REVLOGV0:
2480 if (self.version & 0xFFFF) == REVLOGV0:
2473 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2481 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2474 self.version)
2482 self.version)
2475
2483
2476 censorrev = self.rev(censornode)
2484 censorrev = self.rev(censornode)
2477 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2485 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2478
2486
2479 if len(tombstone) > self.rawsize(censorrev):
2487 if len(tombstone) > self.rawsize(censorrev):
2480 raise error.Abort(_('censor tombstone must be no longer than '
2488 raise error.Abort(_('censor tombstone must be no longer than '
2481 'censored data'))
2489 'censored data'))
2482
2490
2483 # Rewriting the revlog in place is hard. Our strategy for censoring is
2491 # Rewriting the revlog in place is hard. Our strategy for censoring is
2484 # to create a new revlog, copy all revisions to it, then replace the
2492 # to create a new revlog, copy all revisions to it, then replace the
2485 # revlogs on transaction close.
2493 # revlogs on transaction close.
2486
2494
2487 newindexfile = self.indexfile + b'.tmpcensored'
2495 newindexfile = self.indexfile + b'.tmpcensored'
2488 newdatafile = self.datafile + b'.tmpcensored'
2496 newdatafile = self.datafile + b'.tmpcensored'
2489
2497
2490 # This is a bit dangerous. We could easily have a mismatch of state.
2498 # This is a bit dangerous. We could easily have a mismatch of state.
2491 newrl = revlog(self.opener, newindexfile, newdatafile,
2499 newrl = revlog(self.opener, newindexfile, newdatafile,
2492 censorable=True)
2500 censorable=True)
2493 newrl.version = self.version
2501 newrl.version = self.version
2494 newrl._generaldelta = self._generaldelta
2502 newrl._generaldelta = self._generaldelta
2495 newrl._io = self._io
2503 newrl._io = self._io
2496
2504
2497 for rev in self.revs():
2505 for rev in self.revs():
2498 node = self.node(rev)
2506 node = self.node(rev)
2499 p1, p2 = self.parents(node)
2507 p1, p2 = self.parents(node)
2500
2508
2501 if rev == censorrev:
2509 if rev == censorrev:
2502 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2510 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2503 p1, p2, censornode, REVIDX_ISCENSORED)
2511 p1, p2, censornode, REVIDX_ISCENSORED)
2504
2512
2505 if newrl.deltaparent(rev) != nullrev:
2513 if newrl.deltaparent(rev) != nullrev:
2506 raise error.Abort(_('censored revision stored as delta; '
2514 raise error.Abort(_('censored revision stored as delta; '
2507 'cannot censor'),
2515 'cannot censor'),
2508 hint=_('censoring of revlogs is not '
2516 hint=_('censoring of revlogs is not '
2509 'fully implemented; please report '
2517 'fully implemented; please report '
2510 'this bug'))
2518 'this bug'))
2511 continue
2519 continue
2512
2520
2513 if self.iscensored(rev):
2521 if self.iscensored(rev):
2514 if self.deltaparent(rev) != nullrev:
2522 if self.deltaparent(rev) != nullrev:
2515 raise error.Abort(_('cannot censor due to censored '
2523 raise error.Abort(_('cannot censor due to censored '
2516 'revision having delta stored'))
2524 'revision having delta stored'))
2517 rawtext = self._chunk(rev)
2525 rawtext = self._chunk(rev)
2518 else:
2526 else:
2519 rawtext = self.revision(rev, raw=True)
2527 rawtext = self.revision(rev, raw=True)
2520
2528
2521 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2529 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2522 self.flags(rev))
2530 self.flags(rev))
2523
2531
2524 tr.addbackup(self.indexfile, location='store')
2532 tr.addbackup(self.indexfile, location='store')
2525 if not self._inline:
2533 if not self._inline:
2526 tr.addbackup(self.datafile, location='store')
2534 tr.addbackup(self.datafile, location='store')
2527
2535
2528 self.opener.rename(newrl.indexfile, self.indexfile)
2536 self.opener.rename(newrl.indexfile, self.indexfile)
2529 if not self._inline:
2537 if not self._inline:
2530 self.opener.rename(newrl.datafile, self.datafile)
2538 self.opener.rename(newrl.datafile, self.datafile)
2531
2539
2532 self.clearcaches()
2540 self.clearcaches()
2533 self._loadindex()
2541 self._loadindex()
2534
2542
2535 def verifyintegrity(self, state):
2543 def verifyintegrity(self, state):
2536 """Verifies the integrity of the revlog.
2544 """Verifies the integrity of the revlog.
2537
2545
2538 Yields ``revlogproblem`` instances describing problems that are
2546 Yields ``revlogproblem`` instances describing problems that are
2539 found.
2547 found.
2540 """
2548 """
2541 dd, di = self.checksize()
2549 dd, di = self.checksize()
2542 if dd:
2550 if dd:
2543 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2551 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2544 if di:
2552 if di:
2545 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2553 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2546
2554
2547 version = self.version & 0xFFFF
2555 version = self.version & 0xFFFF
2548
2556
2549 # The verifier tells us what version revlog we should be.
2557 # The verifier tells us what version revlog we should be.
2550 if version != state['expectedversion']:
2558 if version != state['expectedversion']:
2551 yield revlogproblem(
2559 yield revlogproblem(
2552 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2560 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2553 (self.indexfile, version, state['expectedversion']))
2561 (self.indexfile, version, state['expectedversion']))
2554
2562
2555 state['skipread'] = set()
2563 state['skipread'] = set()
2556
2564
2557 for rev in self:
2565 for rev in self:
2558 node = self.node(rev)
2566 node = self.node(rev)
2559
2567
2560 # Verify contents. 4 cases to care about:
2568 # Verify contents. 4 cases to care about:
2561 #
2569 #
2562 # common: the most common case
2570 # common: the most common case
2563 # rename: with a rename
2571 # rename: with a rename
2564 # meta: file content starts with b'\1\n', the metadata
2572 # meta: file content starts with b'\1\n', the metadata
2565 # header defined in filelog.py, but without a rename
2573 # header defined in filelog.py, but without a rename
2566 # ext: content stored externally
2574 # ext: content stored externally
2567 #
2575 #
2568 # More formally, their differences are shown below:
2576 # More formally, their differences are shown below:
2569 #
2577 #
2570 # | common | rename | meta | ext
2578 # | common | rename | meta | ext
2571 # -------------------------------------------------------
2579 # -------------------------------------------------------
2572 # flags() | 0 | 0 | 0 | not 0
2580 # flags() | 0 | 0 | 0 | not 0
2573 # renamed() | False | True | False | ?
2581 # renamed() | False | True | False | ?
2574 # rawtext[0:2]=='\1\n'| False | True | True | ?
2582 # rawtext[0:2]=='\1\n'| False | True | True | ?
2575 #
2583 #
2576 # "rawtext" means the raw text stored in revlog data, which
2584 # "rawtext" means the raw text stored in revlog data, which
2577 # could be retrieved by "revision(rev, raw=True)". "text"
2585 # could be retrieved by "revision(rev, raw=True)". "text"
2578 # mentioned below is "revision(rev, raw=False)".
2586 # mentioned below is "revision(rev, raw=False)".
2579 #
2587 #
2580 # There are 3 different lengths stored physically:
2588 # There are 3 different lengths stored physically:
2581 # 1. L1: rawsize, stored in revlog index
2589 # 1. L1: rawsize, stored in revlog index
2582 # 2. L2: len(rawtext), stored in revlog data
2590 # 2. L2: len(rawtext), stored in revlog data
2583 # 3. L3: len(text), stored in revlog data if flags==0, or
2591 # 3. L3: len(text), stored in revlog data if flags==0, or
2584 # possibly somewhere else if flags!=0
2592 # possibly somewhere else if flags!=0
2585 #
2593 #
2586 # L1 should be equal to L2. L3 could be different from them.
2594 # L1 should be equal to L2. L3 could be different from them.
2587 # "text" may or may not affect commit hash depending on flag
2595 # "text" may or may not affect commit hash depending on flag
2588 # processors (see revlog.addflagprocessor).
2596 # processors (see revlog.addflagprocessor).
2589 #
2597 #
2590 # | common | rename | meta | ext
2598 # | common | rename | meta | ext
2591 # -------------------------------------------------
2599 # -------------------------------------------------
2592 # rawsize() | L1 | L1 | L1 | L1
2600 # rawsize() | L1 | L1 | L1 | L1
2593 # size() | L1 | L2-LM | L1(*) | L1 (?)
2601 # size() | L1 | L2-LM | L1(*) | L1 (?)
2594 # len(rawtext) | L2 | L2 | L2 | L2
2602 # len(rawtext) | L2 | L2 | L2 | L2
2595 # len(text) | L2 | L2 | L2 | L3
2603 # len(text) | L2 | L2 | L2 | L3
2596 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2604 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2597 #
2605 #
2598 # LM: length of metadata, depending on rawtext
2606 # LM: length of metadata, depending on rawtext
2599 # (*): not ideal, see comment in filelog.size
2607 # (*): not ideal, see comment in filelog.size
2600 # (?): could be "- len(meta)" if the resolved content has
2608 # (?): could be "- len(meta)" if the resolved content has
2601 # rename metadata
2609 # rename metadata
2602 #
2610 #
2603 # Checks needed to be done:
2611 # Checks needed to be done:
2604 # 1. length check: L1 == L2, in all cases.
2612 # 1. length check: L1 == L2, in all cases.
2605 # 2. hash check: depending on flag processor, we may need to
2613 # 2. hash check: depending on flag processor, we may need to
2606 # use either "text" (external), or "rawtext" (in revlog).
2614 # use either "text" (external), or "rawtext" (in revlog).
2607
2615
2608 try:
2616 try:
2609 skipflags = state.get('skipflags', 0)
2617 skipflags = state.get('skipflags', 0)
2610 if skipflags:
2618 if skipflags:
2611 skipflags &= self.flags(rev)
2619 skipflags &= self.flags(rev)
2612
2620
2613 if skipflags:
2621 if skipflags:
2614 state['skipread'].add(node)
2622 state['skipread'].add(node)
2615 else:
2623 else:
2616 # Side-effect: read content and verify hash.
2624 # Side-effect: read content and verify hash.
2617 self.revision(node)
2625 self.revision(node)
2618
2626
2619 l1 = self.rawsize(rev)
2627 l1 = self.rawsize(rev)
2620 l2 = len(self.revision(node, raw=True))
2628 l2 = len(self.revision(node, raw=True))
2621
2629
2622 if l1 != l2:
2630 if l1 != l2:
2623 yield revlogproblem(
2631 yield revlogproblem(
2624 error=_('unpacked size is %d, %d expected') % (l2, l1),
2632 error=_('unpacked size is %d, %d expected') % (l2, l1),
2625 node=node)
2633 node=node)
2626
2634
2627 except error.CensoredNodeError:
2635 except error.CensoredNodeError:
2628 if state['erroroncensored']:
2636 if state['erroroncensored']:
2629 yield revlogproblem(error=_('censored file data'),
2637 yield revlogproblem(error=_('censored file data'),
2630 node=node)
2638 node=node)
2631 state['skipread'].add(node)
2639 state['skipread'].add(node)
2632 except Exception as e:
2640 except Exception as e:
2633 yield revlogproblem(
2641 yield revlogproblem(
2634 error=_('unpacking %s: %s') % (short(node),
2642 error=_('unpacking %s: %s') % (short(node),
2635 stringutil.forcebytestr(e)),
2643 stringutil.forcebytestr(e)),
2636 node=node)
2644 node=node)
2637 state['skipread'].add(node)
2645 state['skipread'].add(node)
2638
2646
2639 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2647 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2640 revisionscount=False, trackedsize=False,
2648 revisionscount=False, trackedsize=False,
2641 storedsize=False):
2649 storedsize=False):
2642 d = {}
2650 d = {}
2643
2651
2644 if exclusivefiles:
2652 if exclusivefiles:
2645 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2653 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2646 if not self._inline:
2654 if not self._inline:
2647 d['exclusivefiles'].append((self.opener, self.datafile))
2655 d['exclusivefiles'].append((self.opener, self.datafile))
2648
2656
2649 if sharedfiles:
2657 if sharedfiles:
2650 d['sharedfiles'] = []
2658 d['sharedfiles'] = []
2651
2659
2652 if revisionscount:
2660 if revisionscount:
2653 d['revisionscount'] = len(self)
2661 d['revisionscount'] = len(self)
2654
2662
2655 if trackedsize:
2663 if trackedsize:
2656 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2664 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2657
2665
2658 if storedsize:
2666 if storedsize:
2659 d['storedsize'] = sum(self.opener.stat(path).st_size
2667 d['storedsize'] = sum(self.opener.stat(path).st_size
2660 for path in self.files())
2668 for path in self.files())
2661
2669
2662 return d
2670 return d
General Comments 0
You need to be logged in to leave comments. Login now