##// END OF EJS Templates
sidedata: add a function to read sidedata from revlog raw text...
marmoute -
r43302:294afb98 default
parent child Browse files
Show More
@@ -1,336 +1,343
1 # error.py - Mercurial exceptions
1 # error.py - Mercurial exceptions
2 #
2 #
3 # Copyright 2005-2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Mercurial exceptions.
8 """Mercurial exceptions.
9
9
10 This allows us to catch exceptions at higher levels without forcing
10 This allows us to catch exceptions at higher levels without forcing
11 imports.
11 imports.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 # Do not import anything but pycompat here, please
16 # Do not import anything but pycompat here, please
17 from . import pycompat
17 from . import pycompat
18
18
19 def _tobytes(exc):
19 def _tobytes(exc):
20 """Byte-stringify exception in the same way as BaseException_str()"""
20 """Byte-stringify exception in the same way as BaseException_str()"""
21 if not exc.args:
21 if not exc.args:
22 return b''
22 return b''
23 if len(exc.args) == 1:
23 if len(exc.args) == 1:
24 return pycompat.bytestr(exc.args[0])
24 return pycompat.bytestr(exc.args[0])
25 return b'(%s)' % b', '.join(b"'%s'" % pycompat.bytestr(a) for a in exc.args)
25 return b'(%s)' % b', '.join(b"'%s'" % pycompat.bytestr(a) for a in exc.args)
26
26
27 class Hint(object):
27 class Hint(object):
28 """Mix-in to provide a hint of an error
28 """Mix-in to provide a hint of an error
29
29
30 This should come first in the inheritance list to consume a hint and
30 This should come first in the inheritance list to consume a hint and
31 pass remaining arguments to the exception class.
31 pass remaining arguments to the exception class.
32 """
32 """
33 def __init__(self, *args, **kw):
33 def __init__(self, *args, **kw):
34 self.hint = kw.pop(r'hint', None)
34 self.hint = kw.pop(r'hint', None)
35 super(Hint, self).__init__(*args, **kw)
35 super(Hint, self).__init__(*args, **kw)
36
36
37 class StorageError(Hint, Exception):
37 class StorageError(Hint, Exception):
38 """Raised when an error occurs in a storage layer.
38 """Raised when an error occurs in a storage layer.
39
39
40 Usually subclassed by a storage-specific exception.
40 Usually subclassed by a storage-specific exception.
41 """
41 """
42 __bytes__ = _tobytes
42 __bytes__ = _tobytes
43
43
44 class RevlogError(StorageError):
44 class RevlogError(StorageError):
45 __bytes__ = _tobytes
45 __bytes__ = _tobytes
46
46
47 class SidedataHashError(RevlogError):
48
49 def __init__(self, key, expected, got):
50 self.sidedatakey = key
51 self.expecteddigest = expected
52 self.actualdigest = got
53
47 class FilteredIndexError(IndexError):
54 class FilteredIndexError(IndexError):
48 __bytes__ = _tobytes
55 __bytes__ = _tobytes
49
56
50 class LookupError(RevlogError, KeyError):
57 class LookupError(RevlogError, KeyError):
51 def __init__(self, name, index, message):
58 def __init__(self, name, index, message):
52 self.name = name
59 self.name = name
53 self.index = index
60 self.index = index
54 # this can't be called 'message' because at least some installs of
61 # this can't be called 'message' because at least some installs of
55 # Python 2.6+ complain about the 'message' property being deprecated
62 # Python 2.6+ complain about the 'message' property being deprecated
56 self.lookupmessage = message
63 self.lookupmessage = message
57 if isinstance(name, bytes) and len(name) == 20:
64 if isinstance(name, bytes) and len(name) == 20:
58 from .node import short
65 from .node import short
59 name = short(name)
66 name = short(name)
60 RevlogError.__init__(self, '%s@%s: %s' % (index, name, message))
67 RevlogError.__init__(self, '%s@%s: %s' % (index, name, message))
61
68
62 def __bytes__(self):
69 def __bytes__(self):
63 return RevlogError.__bytes__(self)
70 return RevlogError.__bytes__(self)
64
71
65 def __str__(self):
72 def __str__(self):
66 return RevlogError.__str__(self)
73 return RevlogError.__str__(self)
67
74
68 class AmbiguousPrefixLookupError(LookupError):
75 class AmbiguousPrefixLookupError(LookupError):
69 pass
76 pass
70
77
71 class FilteredLookupError(LookupError):
78 class FilteredLookupError(LookupError):
72 pass
79 pass
73
80
74 class ManifestLookupError(LookupError):
81 class ManifestLookupError(LookupError):
75 pass
82 pass
76
83
77 class CommandError(Exception):
84 class CommandError(Exception):
78 """Exception raised on errors in parsing the command line."""
85 """Exception raised on errors in parsing the command line."""
79 __bytes__ = _tobytes
86 __bytes__ = _tobytes
80
87
81 class InterventionRequired(Hint, Exception):
88 class InterventionRequired(Hint, Exception):
82 """Exception raised when a command requires human intervention."""
89 """Exception raised when a command requires human intervention."""
83 __bytes__ = _tobytes
90 __bytes__ = _tobytes
84
91
85 class Abort(Hint, Exception):
92 class Abort(Hint, Exception):
86 """Raised if a command needs to print an error and exit."""
93 """Raised if a command needs to print an error and exit."""
87 __bytes__ = _tobytes
94 __bytes__ = _tobytes
88
95
89 class HookLoadError(Abort):
96 class HookLoadError(Abort):
90 """raised when loading a hook fails, aborting an operation
97 """raised when loading a hook fails, aborting an operation
91
98
92 Exists to allow more specialized catching."""
99 Exists to allow more specialized catching."""
93
100
94 class HookAbort(Abort):
101 class HookAbort(Abort):
95 """raised when a validation hook fails, aborting an operation
102 """raised when a validation hook fails, aborting an operation
96
103
97 Exists to allow more specialized catching."""
104 Exists to allow more specialized catching."""
98
105
99 class ConfigError(Abort):
106 class ConfigError(Abort):
100 """Exception raised when parsing config files"""
107 """Exception raised when parsing config files"""
101
108
102 class UpdateAbort(Abort):
109 class UpdateAbort(Abort):
103 """Raised when an update is aborted for destination issue"""
110 """Raised when an update is aborted for destination issue"""
104
111
105 class MergeDestAbort(Abort):
112 class MergeDestAbort(Abort):
106 """Raised when an update is aborted for destination issues"""
113 """Raised when an update is aborted for destination issues"""
107
114
108 class NoMergeDestAbort(MergeDestAbort):
115 class NoMergeDestAbort(MergeDestAbort):
109 """Raised when an update is aborted because there is nothing to merge"""
116 """Raised when an update is aborted because there is nothing to merge"""
110
117
111 class ManyMergeDestAbort(MergeDestAbort):
118 class ManyMergeDestAbort(MergeDestAbort):
112 """Raised when an update is aborted because destination is ambiguous"""
119 """Raised when an update is aborted because destination is ambiguous"""
113
120
114 class ResponseExpected(Abort):
121 class ResponseExpected(Abort):
115 """Raised when an EOF is received for a prompt"""
122 """Raised when an EOF is received for a prompt"""
116 def __init__(self):
123 def __init__(self):
117 from .i18n import _
124 from .i18n import _
118 Abort.__init__(self, _('response expected'))
125 Abort.__init__(self, _('response expected'))
119
126
120 class OutOfBandError(Hint, Exception):
127 class OutOfBandError(Hint, Exception):
121 """Exception raised when a remote repo reports failure"""
128 """Exception raised when a remote repo reports failure"""
122 __bytes__ = _tobytes
129 __bytes__ = _tobytes
123
130
124 class ParseError(Hint, Exception):
131 class ParseError(Hint, Exception):
125 """Raised when parsing config files and {rev,file}sets (msg[, pos])"""
132 """Raised when parsing config files and {rev,file}sets (msg[, pos])"""
126 __bytes__ = _tobytes
133 __bytes__ = _tobytes
127
134
128 class PatchError(Exception):
135 class PatchError(Exception):
129 __bytes__ = _tobytes
136 __bytes__ = _tobytes
130
137
131 class UnknownIdentifier(ParseError):
138 class UnknownIdentifier(ParseError):
132 """Exception raised when a {rev,file}set references an unknown identifier"""
139 """Exception raised when a {rev,file}set references an unknown identifier"""
133
140
134 def __init__(self, function, symbols):
141 def __init__(self, function, symbols):
135 from .i18n import _
142 from .i18n import _
136 ParseError.__init__(self, _("unknown identifier: %s") % function)
143 ParseError.__init__(self, _("unknown identifier: %s") % function)
137 self.function = function
144 self.function = function
138 self.symbols = symbols
145 self.symbols = symbols
139
146
140 class RepoError(Hint, Exception):
147 class RepoError(Hint, Exception):
141 __bytes__ = _tobytes
148 __bytes__ = _tobytes
142
149
143 class RepoLookupError(RepoError):
150 class RepoLookupError(RepoError):
144 pass
151 pass
145
152
146 class FilteredRepoLookupError(RepoLookupError):
153 class FilteredRepoLookupError(RepoLookupError):
147 pass
154 pass
148
155
149 class CapabilityError(RepoError):
156 class CapabilityError(RepoError):
150 pass
157 pass
151
158
152 class RequirementError(RepoError):
159 class RequirementError(RepoError):
153 """Exception raised if .hg/requires has an unknown entry."""
160 """Exception raised if .hg/requires has an unknown entry."""
154
161
155 class StdioError(IOError):
162 class StdioError(IOError):
156 """Raised if I/O to stdout or stderr fails"""
163 """Raised if I/O to stdout or stderr fails"""
157
164
158 def __init__(self, err):
165 def __init__(self, err):
159 IOError.__init__(self, err.errno, err.strerror)
166 IOError.__init__(self, err.errno, err.strerror)
160
167
161 # no __bytes__() because error message is derived from the standard IOError
168 # no __bytes__() because error message is derived from the standard IOError
162
169
163 class UnsupportedMergeRecords(Abort):
170 class UnsupportedMergeRecords(Abort):
164 def __init__(self, recordtypes):
171 def __init__(self, recordtypes):
165 from .i18n import _
172 from .i18n import _
166 self.recordtypes = sorted(recordtypes)
173 self.recordtypes = sorted(recordtypes)
167 s = ' '.join(self.recordtypes)
174 s = ' '.join(self.recordtypes)
168 Abort.__init__(
175 Abort.__init__(
169 self, _('unsupported merge state records: %s') % s,
176 self, _('unsupported merge state records: %s') % s,
170 hint=_('see https://mercurial-scm.org/wiki/MergeStateRecords for '
177 hint=_('see https://mercurial-scm.org/wiki/MergeStateRecords for '
171 'more information'))
178 'more information'))
172
179
173 class UnknownVersion(Abort):
180 class UnknownVersion(Abort):
174 """generic exception for aborting from an encounter with an unknown version
181 """generic exception for aborting from an encounter with an unknown version
175 """
182 """
176
183
177 def __init__(self, msg, hint=None, version=None):
184 def __init__(self, msg, hint=None, version=None):
178 self.version = version
185 self.version = version
179 super(UnknownVersion, self).__init__(msg, hint=hint)
186 super(UnknownVersion, self).__init__(msg, hint=hint)
180
187
181 class LockError(IOError):
188 class LockError(IOError):
182 def __init__(self, errno, strerror, filename, desc):
189 def __init__(self, errno, strerror, filename, desc):
183 IOError.__init__(self, errno, strerror, filename)
190 IOError.__init__(self, errno, strerror, filename)
184 self.desc = desc
191 self.desc = desc
185
192
186 # no __bytes__() because error message is derived from the standard IOError
193 # no __bytes__() because error message is derived from the standard IOError
187
194
188 class LockHeld(LockError):
195 class LockHeld(LockError):
189 def __init__(self, errno, filename, desc, locker):
196 def __init__(self, errno, filename, desc, locker):
190 LockError.__init__(self, errno, 'Lock held', filename, desc)
197 LockError.__init__(self, errno, 'Lock held', filename, desc)
191 self.locker = locker
198 self.locker = locker
192
199
193 class LockUnavailable(LockError):
200 class LockUnavailable(LockError):
194 pass
201 pass
195
202
196 # LockError is for errors while acquiring the lock -- this is unrelated
203 # LockError is for errors while acquiring the lock -- this is unrelated
197 class LockInheritanceContractViolation(RuntimeError):
204 class LockInheritanceContractViolation(RuntimeError):
198 __bytes__ = _tobytes
205 __bytes__ = _tobytes
199
206
200 class ResponseError(Exception):
207 class ResponseError(Exception):
201 """Raised to print an error with part of output and exit."""
208 """Raised to print an error with part of output and exit."""
202 __bytes__ = _tobytes
209 __bytes__ = _tobytes
203
210
204 class UnknownCommand(Exception):
211 class UnknownCommand(Exception):
205 """Exception raised if command is not in the command table."""
212 """Exception raised if command is not in the command table."""
206 __bytes__ = _tobytes
213 __bytes__ = _tobytes
207
214
208 class AmbiguousCommand(Exception):
215 class AmbiguousCommand(Exception):
209 """Exception raised if command shortcut matches more than one command."""
216 """Exception raised if command shortcut matches more than one command."""
210 __bytes__ = _tobytes
217 __bytes__ = _tobytes
211
218
212 # derived from KeyboardInterrupt to simplify some breakout code
219 # derived from KeyboardInterrupt to simplify some breakout code
213 class SignalInterrupt(KeyboardInterrupt):
220 class SignalInterrupt(KeyboardInterrupt):
214 """Exception raised on SIGTERM and SIGHUP."""
221 """Exception raised on SIGTERM and SIGHUP."""
215
222
216 class SignatureError(Exception):
223 class SignatureError(Exception):
217 __bytes__ = _tobytes
224 __bytes__ = _tobytes
218
225
219 class PushRaced(RuntimeError):
226 class PushRaced(RuntimeError):
220 """An exception raised during unbundling that indicate a push race"""
227 """An exception raised during unbundling that indicate a push race"""
221 __bytes__ = _tobytes
228 __bytes__ = _tobytes
222
229
223 class ProgrammingError(Hint, RuntimeError):
230 class ProgrammingError(Hint, RuntimeError):
224 """Raised if a mercurial (core or extension) developer made a mistake"""
231 """Raised if a mercurial (core or extension) developer made a mistake"""
225
232
226 def __init__(self, msg, *args, **kwargs):
233 def __init__(self, msg, *args, **kwargs):
227 # On Python 3, turn the message back into a string since this is
234 # On Python 3, turn the message back into a string since this is
228 # an internal-only error that won't be printed except in a
235 # an internal-only error that won't be printed except in a
229 # stack traces.
236 # stack traces.
230 msg = pycompat.sysstr(msg)
237 msg = pycompat.sysstr(msg)
231 super(ProgrammingError, self).__init__(msg, *args, **kwargs)
238 super(ProgrammingError, self).__init__(msg, *args, **kwargs)
232
239
233 __bytes__ = _tobytes
240 __bytes__ = _tobytes
234
241
235 class WdirUnsupported(Exception):
242 class WdirUnsupported(Exception):
236 """An exception which is raised when 'wdir()' is not supported"""
243 """An exception which is raised when 'wdir()' is not supported"""
237 __bytes__ = _tobytes
244 __bytes__ = _tobytes
238
245
239 # bundle2 related errors
246 # bundle2 related errors
240 class BundleValueError(ValueError):
247 class BundleValueError(ValueError):
241 """error raised when bundle2 cannot be processed"""
248 """error raised when bundle2 cannot be processed"""
242 __bytes__ = _tobytes
249 __bytes__ = _tobytes
243
250
244 class BundleUnknownFeatureError(BundleValueError):
251 class BundleUnknownFeatureError(BundleValueError):
245 def __init__(self, parttype=None, params=(), values=()):
252 def __init__(self, parttype=None, params=(), values=()):
246 self.parttype = parttype
253 self.parttype = parttype
247 self.params = params
254 self.params = params
248 self.values = values
255 self.values = values
249 if self.parttype is None:
256 if self.parttype is None:
250 msg = 'Stream Parameter'
257 msg = 'Stream Parameter'
251 else:
258 else:
252 msg = parttype
259 msg = parttype
253 entries = self.params
260 entries = self.params
254 if self.params and self.values:
261 if self.params and self.values:
255 assert len(self.params) == len(self.values)
262 assert len(self.params) == len(self.values)
256 entries = []
263 entries = []
257 for idx, par in enumerate(self.params):
264 for idx, par in enumerate(self.params):
258 val = self.values[idx]
265 val = self.values[idx]
259 if val is None:
266 if val is None:
260 entries.append(val)
267 entries.append(val)
261 else:
268 else:
262 entries.append("%s=%r" % (par, pycompat.maybebytestr(val)))
269 entries.append("%s=%r" % (par, pycompat.maybebytestr(val)))
263 if entries:
270 if entries:
264 msg = '%s - %s' % (msg, ', '.join(entries))
271 msg = '%s - %s' % (msg, ', '.join(entries))
265 ValueError.__init__(self, msg)
272 ValueError.__init__(self, msg)
266
273
267 class ReadOnlyPartError(RuntimeError):
274 class ReadOnlyPartError(RuntimeError):
268 """error raised when code tries to alter a part being generated"""
275 """error raised when code tries to alter a part being generated"""
269 __bytes__ = _tobytes
276 __bytes__ = _tobytes
270
277
271 class PushkeyFailed(Abort):
278 class PushkeyFailed(Abort):
272 """error raised when a pushkey part failed to update a value"""
279 """error raised when a pushkey part failed to update a value"""
273
280
274 def __init__(self, partid, namespace=None, key=None, new=None, old=None,
281 def __init__(self, partid, namespace=None, key=None, new=None, old=None,
275 ret=None):
282 ret=None):
276 self.partid = partid
283 self.partid = partid
277 self.namespace = namespace
284 self.namespace = namespace
278 self.key = key
285 self.key = key
279 self.new = new
286 self.new = new
280 self.old = old
287 self.old = old
281 self.ret = ret
288 self.ret = ret
282 # no i18n expected to be processed into a better message
289 # no i18n expected to be processed into a better message
283 Abort.__init__(self, 'failed to update value for "%s/%s"'
290 Abort.__init__(self, 'failed to update value for "%s/%s"'
284 % (namespace, key))
291 % (namespace, key))
285
292
286 class CensoredNodeError(StorageError):
293 class CensoredNodeError(StorageError):
287 """error raised when content verification fails on a censored node
294 """error raised when content verification fails on a censored node
288
295
289 Also contains the tombstone data substituted for the uncensored data.
296 Also contains the tombstone data substituted for the uncensored data.
290 """
297 """
291
298
292 def __init__(self, filename, node, tombstone):
299 def __init__(self, filename, node, tombstone):
293 from .node import short
300 from .node import short
294 StorageError.__init__(self, '%s:%s' % (filename, short(node)))
301 StorageError.__init__(self, '%s:%s' % (filename, short(node)))
295 self.tombstone = tombstone
302 self.tombstone = tombstone
296
303
297 class CensoredBaseError(StorageError):
304 class CensoredBaseError(StorageError):
298 """error raised when a delta is rejected because its base is censored
305 """error raised when a delta is rejected because its base is censored
299
306
300 A delta based on a censored revision must be formed as single patch
307 A delta based on a censored revision must be formed as single patch
301 operation which replaces the entire base with new content. This ensures
308 operation which replaces the entire base with new content. This ensures
302 the delta may be applied by clones which have not censored the base.
309 the delta may be applied by clones which have not censored the base.
303 """
310 """
304
311
305 class InvalidBundleSpecification(Exception):
312 class InvalidBundleSpecification(Exception):
306 """error raised when a bundle specification is invalid.
313 """error raised when a bundle specification is invalid.
307
314
308 This is used for syntax errors as opposed to support errors.
315 This is used for syntax errors as opposed to support errors.
309 """
316 """
310 __bytes__ = _tobytes
317 __bytes__ = _tobytes
311
318
312 class UnsupportedBundleSpecification(Exception):
319 class UnsupportedBundleSpecification(Exception):
313 """error raised when a bundle specification is not supported."""
320 """error raised when a bundle specification is not supported."""
314 __bytes__ = _tobytes
321 __bytes__ = _tobytes
315
322
316 class CorruptedState(Exception):
323 class CorruptedState(Exception):
317 """error raised when a command is not able to read its state from file"""
324 """error raised when a command is not able to read its state from file"""
318 __bytes__ = _tobytes
325 __bytes__ = _tobytes
319
326
320 class PeerTransportError(Abort):
327 class PeerTransportError(Abort):
321 """Transport-level I/O error when communicating with a peer repo."""
328 """Transport-level I/O error when communicating with a peer repo."""
322
329
323 class InMemoryMergeConflictsError(Exception):
330 class InMemoryMergeConflictsError(Exception):
324 """Exception raised when merge conflicts arose during an in-memory merge."""
331 """Exception raised when merge conflicts arose during an in-memory merge."""
325 __bytes__ = _tobytes
332 __bytes__ = _tobytes
326
333
327 class WireprotoCommandError(Exception):
334 class WireprotoCommandError(Exception):
328 """Represents an error during execution of a wire protocol command.
335 """Represents an error during execution of a wire protocol command.
329
336
330 Should only be thrown by wire protocol version 2 commands.
337 Should only be thrown by wire protocol version 2 commands.
331
338
332 The error is a formatter string and an optional iterable of arguments.
339 The error is a formatter string and an optional iterable of arguments.
333 """
340 """
334 def __init__(self, message, args=None):
341 def __init__(self, message, args=None):
335 self.message = message
342 self.message = message
336 self.messageargs = args
343 self.messageargs = args
@@ -1,2665 +1,2671
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .revlogutils.constants import (
38 from .revlogutils.constants import (
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_ISCENSORED,
55 REVIDX_ISCENSORED,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 )
57 )
58 from .thirdparty import (
58 from .thirdparty import (
59 attr,
59 attr,
60 )
60 )
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 )
78 )
79 from .utils import (
79 from .utils import (
80 storageutil,
80 storageutil,
81 stringutil,
81 stringutil,
82 )
82 )
83
83
84 # blanked usage of all the name to prevent pyflakes constraints
84 # blanked usage of all the name to prevent pyflakes constraints
85 # We need these name available in the module for extensions.
85 # We need these name available in the module for extensions.
86 REVLOGV0
86 REVLOGV0
87 REVLOGV1
87 REVLOGV1
88 REVLOGV2
88 REVLOGV2
89 FLAG_INLINE_DATA
89 FLAG_INLINE_DATA
90 FLAG_GENERALDELTA
90 FLAG_GENERALDELTA
91 REVLOG_DEFAULT_FLAGS
91 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FORMAT
92 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_VERSION
93 REVLOG_DEFAULT_VERSION
94 REVLOGV1_FLAGS
94 REVLOGV1_FLAGS
95 REVLOGV2_FLAGS
95 REVLOGV2_FLAGS
96 REVIDX_ISCENSORED
96 REVIDX_ISCENSORED
97 REVIDX_ELLIPSIS
97 REVIDX_ELLIPSIS
98 REVIDX_EXTSTORED
98 REVIDX_EXTSTORED
99 REVIDX_DEFAULT_FLAGS
99 REVIDX_DEFAULT_FLAGS
100 REVIDX_FLAGS_ORDER
100 REVIDX_FLAGS_ORDER
101 REVIDX_RAWTEXT_CHANGING_FLAGS
101 REVIDX_RAWTEXT_CHANGING_FLAGS
102
102
103 parsers = policy.importmod(r'parsers')
103 parsers = policy.importmod(r'parsers')
104 rustancestor = policy.importrust(r'ancestor')
104 rustancestor = policy.importrust(r'ancestor')
105 rustdagop = policy.importrust(r'dagop')
105 rustdagop = policy.importrust(r'dagop')
106
106
107 # Aliased for performance.
107 # Aliased for performance.
108 _zlibdecompress = zlib.decompress
108 _zlibdecompress = zlib.decompress
109
109
110 # max size of revlog with inline data
110 # max size of revlog with inline data
111 _maxinline = 131072
111 _maxinline = 131072
112 _chunksize = 1048576
112 _chunksize = 1048576
113
113
114 # Flag processors for REVIDX_ELLIPSIS.
114 # Flag processors for REVIDX_ELLIPSIS.
115 def ellipsisreadprocessor(rl, text):
115 def ellipsisreadprocessor(rl, text):
116 return text, False, {}
116 return text, False, {}
117
117
118 def ellipsiswriteprocessor(rl, text, sidedata):
118 def ellipsiswriteprocessor(rl, text, sidedata):
119 return text, False
119 return text, False
120
120
121 def ellipsisrawprocessor(rl, text):
121 def ellipsisrawprocessor(rl, text):
122 return False
122 return False
123
123
124 ellipsisprocessor = (
124 ellipsisprocessor = (
125 ellipsisreadprocessor,
125 ellipsisreadprocessor,
126 ellipsiswriteprocessor,
126 ellipsiswriteprocessor,
127 ellipsisrawprocessor,
127 ellipsisrawprocessor,
128 )
128 )
129
129
130 def getoffset(q):
130 def getoffset(q):
131 return int(q >> 16)
131 return int(q >> 16)
132
132
133 def gettype(q):
133 def gettype(q):
134 return int(q & 0xFFFF)
134 return int(q & 0xFFFF)
135
135
136 def offset_type(offset, type):
136 def offset_type(offset, type):
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 raise ValueError('unknown revlog index flags')
138 raise ValueError('unknown revlog index flags')
139 return int(int(offset) << 16 | type)
139 return int(int(offset) << 16 | type)
140
140
141 @attr.s(slots=True, frozen=True)
141 @attr.s(slots=True, frozen=True)
142 class _revisioninfo(object):
142 class _revisioninfo(object):
143 """Information about a revision that allows building its fulltext
143 """Information about a revision that allows building its fulltext
144 node: expected hash of the revision
144 node: expected hash of the revision
145 p1, p2: parent revs of the revision
145 p1, p2: parent revs of the revision
146 btext: built text cache consisting of a one-element list
146 btext: built text cache consisting of a one-element list
147 cachedelta: (baserev, uncompressed_delta) or None
147 cachedelta: (baserev, uncompressed_delta) or None
148 flags: flags associated to the revision storage
148 flags: flags associated to the revision storage
149
149
150 One of btext[0] or cachedelta must be set.
150 One of btext[0] or cachedelta must be set.
151 """
151 """
152 node = attr.ib()
152 node = attr.ib()
153 p1 = attr.ib()
153 p1 = attr.ib()
154 p2 = attr.ib()
154 p2 = attr.ib()
155 btext = attr.ib()
155 btext = attr.ib()
156 textlen = attr.ib()
156 textlen = attr.ib()
157 cachedelta = attr.ib()
157 cachedelta = attr.ib()
158 flags = attr.ib()
158 flags = attr.ib()
159
159
160 @interfaceutil.implementer(repository.irevisiondelta)
160 @interfaceutil.implementer(repository.irevisiondelta)
161 @attr.s(slots=True)
161 @attr.s(slots=True)
162 class revlogrevisiondelta(object):
162 class revlogrevisiondelta(object):
163 node = attr.ib()
163 node = attr.ib()
164 p1node = attr.ib()
164 p1node = attr.ib()
165 p2node = attr.ib()
165 p2node = attr.ib()
166 basenode = attr.ib()
166 basenode = attr.ib()
167 flags = attr.ib()
167 flags = attr.ib()
168 baserevisionsize = attr.ib()
168 baserevisionsize = attr.ib()
169 revision = attr.ib()
169 revision = attr.ib()
170 delta = attr.ib()
170 delta = attr.ib()
171 linknode = attr.ib(default=None)
171 linknode = attr.ib(default=None)
172
172
173 @interfaceutil.implementer(repository.iverifyproblem)
173 @interfaceutil.implementer(repository.iverifyproblem)
174 @attr.s(frozen=True)
174 @attr.s(frozen=True)
175 class revlogproblem(object):
175 class revlogproblem(object):
176 warning = attr.ib(default=None)
176 warning = attr.ib(default=None)
177 error = attr.ib(default=None)
177 error = attr.ib(default=None)
178 node = attr.ib(default=None)
178 node = attr.ib(default=None)
179
179
180 # index v0:
180 # index v0:
181 # 4 bytes: offset
181 # 4 bytes: offset
182 # 4 bytes: compressed length
182 # 4 bytes: compressed length
183 # 4 bytes: base rev
183 # 4 bytes: base rev
184 # 4 bytes: link rev
184 # 4 bytes: link rev
185 # 20 bytes: parent 1 nodeid
185 # 20 bytes: parent 1 nodeid
186 # 20 bytes: parent 2 nodeid
186 # 20 bytes: parent 2 nodeid
187 # 20 bytes: nodeid
187 # 20 bytes: nodeid
188 indexformatv0 = struct.Struct(">4l20s20s20s")
188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 indexformatv0_pack = indexformatv0.pack
189 indexformatv0_pack = indexformatv0.pack
190 indexformatv0_unpack = indexformatv0.unpack
190 indexformatv0_unpack = indexformatv0.unpack
191
191
192 class revlogoldindex(list):
192 class revlogoldindex(list):
193 def __getitem__(self, i):
193 def __getitem__(self, i):
194 if i == -1:
194 if i == -1:
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 return list.__getitem__(self, i)
196 return list.__getitem__(self, i)
197
197
198 class revlogoldio(object):
198 class revlogoldio(object):
199 def __init__(self):
199 def __init__(self):
200 self.size = indexformatv0.size
200 self.size = indexformatv0.size
201
201
202 def parseindex(self, data, inline):
202 def parseindex(self, data, inline):
203 s = self.size
203 s = self.size
204 index = []
204 index = []
205 nodemap = {nullid: nullrev}
205 nodemap = {nullid: nullrev}
206 n = off = 0
206 n = off = 0
207 l = len(data)
207 l = len(data)
208 while off + s <= l:
208 while off + s <= l:
209 cur = data[off:off + s]
209 cur = data[off:off + s]
210 off += s
210 off += s
211 e = indexformatv0_unpack(cur)
211 e = indexformatv0_unpack(cur)
212 # transform to revlogv1 format
212 # transform to revlogv1 format
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 index.append(e2)
215 index.append(e2)
216 nodemap[e[6]] = n
216 nodemap[e[6]] = n
217 n += 1
217 n += 1
218
218
219 return revlogoldindex(index), nodemap, None
219 return revlogoldindex(index), nodemap, None
220
220
221 def packentry(self, entry, node, version, rev):
221 def packentry(self, entry, node, version, rev):
222 if gettype(entry[0]):
222 if gettype(entry[0]):
223 raise error.RevlogError(_('index entry flags need revlog '
223 raise error.RevlogError(_('index entry flags need revlog '
224 'version 1'))
224 'version 1'))
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 node(entry[5]), node(entry[6]), entry[7])
226 node(entry[5]), node(entry[6]), entry[7])
227 return indexformatv0_pack(*e2)
227 return indexformatv0_pack(*e2)
228
228
229 # index ng:
229 # index ng:
230 # 6 bytes: offset
230 # 6 bytes: offset
231 # 2 bytes: flags
231 # 2 bytes: flags
232 # 4 bytes: compressed length
232 # 4 bytes: compressed length
233 # 4 bytes: uncompressed length
233 # 4 bytes: uncompressed length
234 # 4 bytes: base rev
234 # 4 bytes: base rev
235 # 4 bytes: link rev
235 # 4 bytes: link rev
236 # 4 bytes: parent 1 rev
236 # 4 bytes: parent 1 rev
237 # 4 bytes: parent 2 rev
237 # 4 bytes: parent 2 rev
238 # 32 bytes: nodeid
238 # 32 bytes: nodeid
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 indexformatng_pack = indexformatng.pack
240 indexformatng_pack = indexformatng.pack
241 versionformat = struct.Struct(">I")
241 versionformat = struct.Struct(">I")
242 versionformat_pack = versionformat.pack
242 versionformat_pack = versionformat.pack
243 versionformat_unpack = versionformat.unpack
243 versionformat_unpack = versionformat.unpack
244
244
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 # signed integer)
246 # signed integer)
247 _maxentrysize = 0x7fffffff
247 _maxentrysize = 0x7fffffff
248
248
249 class revlogio(object):
249 class revlogio(object):
250 def __init__(self):
250 def __init__(self):
251 self.size = indexformatng.size
251 self.size = indexformatng.size
252
252
253 def parseindex(self, data, inline):
253 def parseindex(self, data, inline):
254 # call the C implementation to parse the index data
254 # call the C implementation to parse the index data
255 index, cache = parsers.parse_index2(data, inline)
255 index, cache = parsers.parse_index2(data, inline)
256 return index, getattr(index, 'nodemap', None), cache
256 return index, getattr(index, 'nodemap', None), cache
257
257
258 def packentry(self, entry, node, version, rev):
258 def packentry(self, entry, node, version, rev):
259 p = indexformatng_pack(*entry)
259 p = indexformatng_pack(*entry)
260 if rev == 0:
260 if rev == 0:
261 p = versionformat_pack(version) + p[4:]
261 p = versionformat_pack(version) + p[4:]
262 return p
262 return p
263
263
264 class revlog(object):
264 class revlog(object):
265 """
265 """
266 the underlying revision storage object
266 the underlying revision storage object
267
267
268 A revlog consists of two parts, an index and the revision data.
268 A revlog consists of two parts, an index and the revision data.
269
269
270 The index is a file with a fixed record size containing
270 The index is a file with a fixed record size containing
271 information on each revision, including its nodeid (hash), the
271 information on each revision, including its nodeid (hash), the
272 nodeids of its parents, the position and offset of its data within
272 nodeids of its parents, the position and offset of its data within
273 the data file, and the revision it's based on. Finally, each entry
273 the data file, and the revision it's based on. Finally, each entry
274 contains a linkrev entry that can serve as a pointer to external
274 contains a linkrev entry that can serve as a pointer to external
275 data.
275 data.
276
276
277 The revision data itself is a linear collection of data chunks.
277 The revision data itself is a linear collection of data chunks.
278 Each chunk represents a revision and is usually represented as a
278 Each chunk represents a revision and is usually represented as a
279 delta against the previous chunk. To bound lookup time, runs of
279 delta against the previous chunk. To bound lookup time, runs of
280 deltas are limited to about 2 times the length of the original
280 deltas are limited to about 2 times the length of the original
281 version data. This makes retrieval of a version proportional to
281 version data. This makes retrieval of a version proportional to
282 its size, or O(1) relative to the number of revisions.
282 its size, or O(1) relative to the number of revisions.
283
283
284 Both pieces of the revlog are written to in an append-only
284 Both pieces of the revlog are written to in an append-only
285 fashion, which means we never need to rewrite a file to insert or
285 fashion, which means we never need to rewrite a file to insert or
286 remove data, and can use some simple techniques to avoid the need
286 remove data, and can use some simple techniques to avoid the need
287 for locking while reading.
287 for locking while reading.
288
288
289 If checkambig, indexfile is opened with checkambig=True at
289 If checkambig, indexfile is opened with checkambig=True at
290 writing, to avoid file stat ambiguity.
290 writing, to avoid file stat ambiguity.
291
291
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 index will be mmapped rather than read if it is larger than the
293 index will be mmapped rather than read if it is larger than the
294 configured threshold.
294 configured threshold.
295
295
296 If censorable is True, the revlog can have censored revisions.
296 If censorable is True, the revlog can have censored revisions.
297
297
298 If `upperboundcomp` is not None, this is the expected maximal gain from
298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 compression for the data content.
299 compression for the data content.
300 """
300 """
301
301
302 _flagserrorclass = error.RevlogError
302 _flagserrorclass = error.RevlogError
303
303
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 mmaplargeindex=False, censorable=False,
305 mmaplargeindex=False, censorable=False,
306 upperboundcomp=None):
306 upperboundcomp=None):
307 """
307 """
308 create a revlog object
308 create a revlog object
309
309
310 opener is a function that abstracts the file opening operation
310 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
311 and can be used to implement COW semantics or the like.
312
312
313 """
313 """
314 self.upperboundcomp = upperboundcomp
314 self.upperboundcomp = upperboundcomp
315 self.indexfile = indexfile
315 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + ".d")
316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 self.opener = opener
317 self.opener = opener
318 # When True, indexfile is opened with checkambig=True at writing, to
318 # When True, indexfile is opened with checkambig=True at writing, to
319 # avoid file stat ambiguity.
319 # avoid file stat ambiguity.
320 self._checkambig = checkambig
320 self._checkambig = checkambig
321 self._mmaplargeindex = mmaplargeindex
321 self._mmaplargeindex = mmaplargeindex
322 self._censorable = censorable
322 self._censorable = censorable
323 # 3-tuple of (node, rev, text) for a raw revision.
323 # 3-tuple of (node, rev, text) for a raw revision.
324 self._revisioncache = None
324 self._revisioncache = None
325 # Maps rev to chain base rev.
325 # Maps rev to chain base rev.
326 self._chainbasecache = util.lrucachedict(100)
326 self._chainbasecache = util.lrucachedict(100)
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 self._chunkcache = (0, '')
328 self._chunkcache = (0, '')
329 # How much data to read and cache into the raw revlog data cache.
329 # How much data to read and cache into the raw revlog data cache.
330 self._chunkcachesize = 65536
330 self._chunkcachesize = 65536
331 self._maxchainlen = None
331 self._maxchainlen = None
332 self._deltabothparents = True
332 self._deltabothparents = True
333 self.index = []
333 self.index = []
334 # Mapping of partial identifiers to full nodes.
334 # Mapping of partial identifiers to full nodes.
335 self._pcache = {}
335 self._pcache = {}
336 # Mapping of revision integer to full node.
336 # Mapping of revision integer to full node.
337 self._nodecache = {nullid: nullrev}
337 self._nodecache = {nullid: nullrev}
338 self._nodepos = None
338 self._nodepos = None
339 self._compengine = 'zlib'
339 self._compengine = 'zlib'
340 self._compengineopts = {}
340 self._compengineopts = {}
341 self._maxdeltachainspan = -1
341 self._maxdeltachainspan = -1
342 self._withsparseread = False
342 self._withsparseread = False
343 self._sparserevlog = False
343 self._sparserevlog = False
344 self._srdensitythreshold = 0.50
344 self._srdensitythreshold = 0.50
345 self._srmingapsize = 262144
345 self._srmingapsize = 262144
346
346
347 # Make copy of flag processors so each revlog instance can support
347 # Make copy of flag processors so each revlog instance can support
348 # custom flags.
348 # custom flags.
349 self._flagprocessors = dict(flagutil.flagprocessors)
349 self._flagprocessors = dict(flagutil.flagprocessors)
350
350
351 # 2-tuple of file handles being used for active writing.
351 # 2-tuple of file handles being used for active writing.
352 self._writinghandles = None
352 self._writinghandles = None
353
353
354 self._loadindex()
354 self._loadindex()
355
355
356 def _loadindex(self):
356 def _loadindex(self):
357 mmapindexthreshold = None
357 mmapindexthreshold = None
358 opts = self.opener.options
358 opts = self.opener.options
359
359
360 if 'revlogv2' in opts:
360 if 'revlogv2' in opts:
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 elif 'revlogv1' in opts:
362 elif 'revlogv1' in opts:
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 if 'generaldelta' in opts:
364 if 'generaldelta' in opts:
365 newversionflags |= FLAG_GENERALDELTA
365 newversionflags |= FLAG_GENERALDELTA
366 elif 'revlogv0' in self.opener.options:
366 elif 'revlogv0' in self.opener.options:
367 newversionflags = REVLOGV0
367 newversionflags = REVLOGV0
368 else:
368 else:
369 newversionflags = REVLOG_DEFAULT_VERSION
369 newversionflags = REVLOG_DEFAULT_VERSION
370
370
371 if 'chunkcachesize' in opts:
371 if 'chunkcachesize' in opts:
372 self._chunkcachesize = opts['chunkcachesize']
372 self._chunkcachesize = opts['chunkcachesize']
373 if 'maxchainlen' in opts:
373 if 'maxchainlen' in opts:
374 self._maxchainlen = opts['maxchainlen']
374 self._maxchainlen = opts['maxchainlen']
375 if 'deltabothparents' in opts:
375 if 'deltabothparents' in opts:
376 self._deltabothparents = opts['deltabothparents']
376 self._deltabothparents = opts['deltabothparents']
377 self._lazydelta = bool(opts.get('lazydelta', True))
377 self._lazydelta = bool(opts.get('lazydelta', True))
378 self._lazydeltabase = False
378 self._lazydeltabase = False
379 if self._lazydelta:
379 if self._lazydelta:
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 if 'compengine' in opts:
381 if 'compengine' in opts:
382 self._compengine = opts['compengine']
382 self._compengine = opts['compengine']
383 if 'zlib.level' in opts:
383 if 'zlib.level' in opts:
384 self._compengineopts['zlib.level'] = opts['zlib.level']
384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 if 'zstd.level' in opts:
385 if 'zstd.level' in opts:
386 self._compengineopts['zstd.level'] = opts['zstd.level']
386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 if 'maxdeltachainspan' in opts:
387 if 'maxdeltachainspan' in opts:
388 self._maxdeltachainspan = opts['maxdeltachainspan']
388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 mmapindexthreshold = opts['mmapindexthreshold']
390 mmapindexthreshold = opts['mmapindexthreshold']
391 self.hassidedata = bool(opts.get('side-data', False))
391 self.hassidedata = bool(opts.get('side-data', False))
392 self._sparserevlog = bool(opts.get('sparse-revlog', False))
392 self._sparserevlog = bool(opts.get('sparse-revlog', False))
393 withsparseread = bool(opts.get('with-sparse-read', False))
393 withsparseread = bool(opts.get('with-sparse-read', False))
394 # sparse-revlog forces sparse-read
394 # sparse-revlog forces sparse-read
395 self._withsparseread = self._sparserevlog or withsparseread
395 self._withsparseread = self._sparserevlog or withsparseread
396 if 'sparse-read-density-threshold' in opts:
396 if 'sparse-read-density-threshold' in opts:
397 self._srdensitythreshold = opts['sparse-read-density-threshold']
397 self._srdensitythreshold = opts['sparse-read-density-threshold']
398 if 'sparse-read-min-gap-size' in opts:
398 if 'sparse-read-min-gap-size' in opts:
399 self._srmingapsize = opts['sparse-read-min-gap-size']
399 self._srmingapsize = opts['sparse-read-min-gap-size']
400 if opts.get('enableellipsis'):
400 if opts.get('enableellipsis'):
401 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
401 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
402
402
403 # revlog v0 doesn't have flag processors
403 # revlog v0 doesn't have flag processors
404 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
404 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
405 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
405 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
406
406
407 if self._chunkcachesize <= 0:
407 if self._chunkcachesize <= 0:
408 raise error.RevlogError(_('revlog chunk cache size %r is not '
408 raise error.RevlogError(_('revlog chunk cache size %r is not '
409 'greater than 0') % self._chunkcachesize)
409 'greater than 0') % self._chunkcachesize)
410 elif self._chunkcachesize & (self._chunkcachesize - 1):
410 elif self._chunkcachesize & (self._chunkcachesize - 1):
411 raise error.RevlogError(_('revlog chunk cache size %r is not a '
411 raise error.RevlogError(_('revlog chunk cache size %r is not a '
412 'power of 2') % self._chunkcachesize)
412 'power of 2') % self._chunkcachesize)
413
413
414 indexdata = ''
414 indexdata = ''
415 self._initempty = True
415 self._initempty = True
416 try:
416 try:
417 with self._indexfp() as f:
417 with self._indexfp() as f:
418 if (mmapindexthreshold is not None and
418 if (mmapindexthreshold is not None and
419 self.opener.fstat(f).st_size >= mmapindexthreshold):
419 self.opener.fstat(f).st_size >= mmapindexthreshold):
420 # TODO: should .close() to release resources without
420 # TODO: should .close() to release resources without
421 # relying on Python GC
421 # relying on Python GC
422 indexdata = util.buffer(util.mmapread(f))
422 indexdata = util.buffer(util.mmapread(f))
423 else:
423 else:
424 indexdata = f.read()
424 indexdata = f.read()
425 if len(indexdata) > 0:
425 if len(indexdata) > 0:
426 versionflags = versionformat_unpack(indexdata[:4])[0]
426 versionflags = versionformat_unpack(indexdata[:4])[0]
427 self._initempty = False
427 self._initempty = False
428 else:
428 else:
429 versionflags = newversionflags
429 versionflags = newversionflags
430 except IOError as inst:
430 except IOError as inst:
431 if inst.errno != errno.ENOENT:
431 if inst.errno != errno.ENOENT:
432 raise
432 raise
433
433
434 versionflags = newversionflags
434 versionflags = newversionflags
435
435
436 self.version = versionflags
436 self.version = versionflags
437
437
438 flags = versionflags & ~0xFFFF
438 flags = versionflags & ~0xFFFF
439 fmt = versionflags & 0xFFFF
439 fmt = versionflags & 0xFFFF
440
440
441 if fmt == REVLOGV0:
441 if fmt == REVLOGV0:
442 if flags:
442 if flags:
443 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
443 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 'revlog %s') %
444 'revlog %s') %
445 (flags >> 16, fmt, self.indexfile))
445 (flags >> 16, fmt, self.indexfile))
446
446
447 self._inline = False
447 self._inline = False
448 self._generaldelta = False
448 self._generaldelta = False
449
449
450 elif fmt == REVLOGV1:
450 elif fmt == REVLOGV1:
451 if flags & ~REVLOGV1_FLAGS:
451 if flags & ~REVLOGV1_FLAGS:
452 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
453 'revlog %s') %
453 'revlog %s') %
454 (flags >> 16, fmt, self.indexfile))
454 (flags >> 16, fmt, self.indexfile))
455
455
456 self._inline = versionflags & FLAG_INLINE_DATA
456 self._inline = versionflags & FLAG_INLINE_DATA
457 self._generaldelta = versionflags & FLAG_GENERALDELTA
457 self._generaldelta = versionflags & FLAG_GENERALDELTA
458
458
459 elif fmt == REVLOGV2:
459 elif fmt == REVLOGV2:
460 if flags & ~REVLOGV2_FLAGS:
460 if flags & ~REVLOGV2_FLAGS:
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 'revlog %s') %
462 'revlog %s') %
463 (flags >> 16, fmt, self.indexfile))
463 (flags >> 16, fmt, self.indexfile))
464
464
465 self._inline = versionflags & FLAG_INLINE_DATA
465 self._inline = versionflags & FLAG_INLINE_DATA
466 # generaldelta implied by version 2 revlogs.
466 # generaldelta implied by version 2 revlogs.
467 self._generaldelta = True
467 self._generaldelta = True
468
468
469 else:
469 else:
470 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
470 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
471 (fmt, self.indexfile))
471 (fmt, self.indexfile))
472 # sparse-revlog can't be on without general-delta (issue6056)
472 # sparse-revlog can't be on without general-delta (issue6056)
473 if not self._generaldelta:
473 if not self._generaldelta:
474 self._sparserevlog = False
474 self._sparserevlog = False
475
475
476 self._storedeltachains = True
476 self._storedeltachains = True
477
477
478 self._io = revlogio()
478 self._io = revlogio()
479 if self.version == REVLOGV0:
479 if self.version == REVLOGV0:
480 self._io = revlogoldio()
480 self._io = revlogoldio()
481 try:
481 try:
482 d = self._io.parseindex(indexdata, self._inline)
482 d = self._io.parseindex(indexdata, self._inline)
483 except (ValueError, IndexError):
483 except (ValueError, IndexError):
484 raise error.RevlogError(_("index %s is corrupted") %
484 raise error.RevlogError(_("index %s is corrupted") %
485 self.indexfile)
485 self.indexfile)
486 self.index, nodemap, self._chunkcache = d
486 self.index, nodemap, self._chunkcache = d
487 if nodemap is not None:
487 if nodemap is not None:
488 self.nodemap = self._nodecache = nodemap
488 self.nodemap = self._nodecache = nodemap
489 if not self._chunkcache:
489 if not self._chunkcache:
490 self._chunkclear()
490 self._chunkclear()
491 # revnum -> (chain-length, sum-delta-length)
491 # revnum -> (chain-length, sum-delta-length)
492 self._chaininfocache = {}
492 self._chaininfocache = {}
493 # revlog header -> revlog compressor
493 # revlog header -> revlog compressor
494 self._decompressors = {}
494 self._decompressors = {}
495
495
496 @util.propertycache
496 @util.propertycache
497 def _compressor(self):
497 def _compressor(self):
498 engine = util.compengines[self._compengine]
498 engine = util.compengines[self._compengine]
499 return engine.revlogcompressor(self._compengineopts)
499 return engine.revlogcompressor(self._compengineopts)
500
500
501 def _indexfp(self, mode='r'):
501 def _indexfp(self, mode='r'):
502 """file object for the revlog's index file"""
502 """file object for the revlog's index file"""
503 args = {r'mode': mode}
503 args = {r'mode': mode}
504 if mode != 'r':
504 if mode != 'r':
505 args[r'checkambig'] = self._checkambig
505 args[r'checkambig'] = self._checkambig
506 if mode == 'w':
506 if mode == 'w':
507 args[r'atomictemp'] = True
507 args[r'atomictemp'] = True
508 return self.opener(self.indexfile, **args)
508 return self.opener(self.indexfile, **args)
509
509
510 def _datafp(self, mode='r'):
510 def _datafp(self, mode='r'):
511 """file object for the revlog's data file"""
511 """file object for the revlog's data file"""
512 return self.opener(self.datafile, mode=mode)
512 return self.opener(self.datafile, mode=mode)
513
513
514 @contextlib.contextmanager
514 @contextlib.contextmanager
515 def _datareadfp(self, existingfp=None):
515 def _datareadfp(self, existingfp=None):
516 """file object suitable to read data"""
516 """file object suitable to read data"""
517 # Use explicit file handle, if given.
517 # Use explicit file handle, if given.
518 if existingfp is not None:
518 if existingfp is not None:
519 yield existingfp
519 yield existingfp
520
520
521 # Use a file handle being actively used for writes, if available.
521 # Use a file handle being actively used for writes, if available.
522 # There is some danger to doing this because reads will seek the
522 # There is some danger to doing this because reads will seek the
523 # file. However, _writeentry() performs a SEEK_END before all writes,
523 # file. However, _writeentry() performs a SEEK_END before all writes,
524 # so we should be safe.
524 # so we should be safe.
525 elif self._writinghandles:
525 elif self._writinghandles:
526 if self._inline:
526 if self._inline:
527 yield self._writinghandles[0]
527 yield self._writinghandles[0]
528 else:
528 else:
529 yield self._writinghandles[1]
529 yield self._writinghandles[1]
530
530
531 # Otherwise open a new file handle.
531 # Otherwise open a new file handle.
532 else:
532 else:
533 if self._inline:
533 if self._inline:
534 func = self._indexfp
534 func = self._indexfp
535 else:
535 else:
536 func = self._datafp
536 func = self._datafp
537 with func() as fp:
537 with func() as fp:
538 yield fp
538 yield fp
539
539
540 def tip(self):
540 def tip(self):
541 return self.node(len(self.index) - 1)
541 return self.node(len(self.index) - 1)
542 def __contains__(self, rev):
542 def __contains__(self, rev):
543 return 0 <= rev < len(self)
543 return 0 <= rev < len(self)
544 def __len__(self):
544 def __len__(self):
545 return len(self.index)
545 return len(self.index)
546 def __iter__(self):
546 def __iter__(self):
547 return iter(pycompat.xrange(len(self)))
547 return iter(pycompat.xrange(len(self)))
548 def revs(self, start=0, stop=None):
548 def revs(self, start=0, stop=None):
549 """iterate over all rev in this revlog (from start to stop)"""
549 """iterate over all rev in this revlog (from start to stop)"""
550 return storageutil.iterrevs(len(self), start=start, stop=stop)
550 return storageutil.iterrevs(len(self), start=start, stop=stop)
551
551
552 @util.propertycache
552 @util.propertycache
553 def nodemap(self):
553 def nodemap(self):
554 if self.index:
554 if self.index:
555 # populate mapping down to the initial node
555 # populate mapping down to the initial node
556 node0 = self.index[0][7] # get around changelog filtering
556 node0 = self.index[0][7] # get around changelog filtering
557 self.rev(node0)
557 self.rev(node0)
558 return self._nodecache
558 return self._nodecache
559
559
560 def hasnode(self, node):
560 def hasnode(self, node):
561 try:
561 try:
562 self.rev(node)
562 self.rev(node)
563 return True
563 return True
564 except KeyError:
564 except KeyError:
565 return False
565 return False
566
566
567 def candelta(self, baserev, rev):
567 def candelta(self, baserev, rev):
568 """whether two revisions (baserev, rev) can be delta-ed or not"""
568 """whether two revisions (baserev, rev) can be delta-ed or not"""
569 # Disable delta if either rev requires a content-changing flag
569 # Disable delta if either rev requires a content-changing flag
570 # processor (ex. LFS). This is because such flag processor can alter
570 # processor (ex. LFS). This is because such flag processor can alter
571 # the rawtext content that the delta will be based on, and two clients
571 # the rawtext content that the delta will be based on, and two clients
572 # could have a same revlog node with different flags (i.e. different
572 # could have a same revlog node with different flags (i.e. different
573 # rawtext contents) and the delta could be incompatible.
573 # rawtext contents) and the delta could be incompatible.
574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
576 return False
576 return False
577 return True
577 return True
578
578
579 def clearcaches(self):
579 def clearcaches(self):
580 self._revisioncache = None
580 self._revisioncache = None
581 self._chainbasecache.clear()
581 self._chainbasecache.clear()
582 self._chunkcache = (0, '')
582 self._chunkcache = (0, '')
583 self._pcache = {}
583 self._pcache = {}
584
584
585 try:
585 try:
586 # If we are using the native C version, you are in a fun case
586 # If we are using the native C version, you are in a fun case
587 # where self.index, self.nodemap and self._nodecaches is the same
587 # where self.index, self.nodemap and self._nodecaches is the same
588 # object.
588 # object.
589 self._nodecache.clearcaches()
589 self._nodecache.clearcaches()
590 except AttributeError:
590 except AttributeError:
591 self._nodecache = {nullid: nullrev}
591 self._nodecache = {nullid: nullrev}
592 self._nodepos = None
592 self._nodepos = None
593
593
594 def rev(self, node):
594 def rev(self, node):
595 try:
595 try:
596 return self._nodecache[node]
596 return self._nodecache[node]
597 except TypeError:
597 except TypeError:
598 raise
598 raise
599 except error.RevlogError:
599 except error.RevlogError:
600 # parsers.c radix tree lookup failed
600 # parsers.c radix tree lookup failed
601 if node == wdirid or node in wdirfilenodeids:
601 if node == wdirid or node in wdirfilenodeids:
602 raise error.WdirUnsupported
602 raise error.WdirUnsupported
603 raise error.LookupError(node, self.indexfile, _('no node'))
603 raise error.LookupError(node, self.indexfile, _('no node'))
604 except KeyError:
604 except KeyError:
605 # pure python cache lookup failed
605 # pure python cache lookup failed
606 n = self._nodecache
606 n = self._nodecache
607 i = self.index
607 i = self.index
608 p = self._nodepos
608 p = self._nodepos
609 if p is None:
609 if p is None:
610 p = len(i) - 1
610 p = len(i) - 1
611 else:
611 else:
612 assert p < len(i)
612 assert p < len(i)
613 for r in pycompat.xrange(p, -1, -1):
613 for r in pycompat.xrange(p, -1, -1):
614 v = i[r][7]
614 v = i[r][7]
615 n[v] = r
615 n[v] = r
616 if v == node:
616 if v == node:
617 self._nodepos = r - 1
617 self._nodepos = r - 1
618 return r
618 return r
619 if node == wdirid or node in wdirfilenodeids:
619 if node == wdirid or node in wdirfilenodeids:
620 raise error.WdirUnsupported
620 raise error.WdirUnsupported
621 raise error.LookupError(node, self.indexfile, _('no node'))
621 raise error.LookupError(node, self.indexfile, _('no node'))
622
622
623 # Accessors for index entries.
623 # Accessors for index entries.
624
624
625 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
625 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
626 # are flags.
626 # are flags.
627 def start(self, rev):
627 def start(self, rev):
628 return int(self.index[rev][0] >> 16)
628 return int(self.index[rev][0] >> 16)
629
629
630 def flags(self, rev):
630 def flags(self, rev):
631 return self.index[rev][0] & 0xFFFF
631 return self.index[rev][0] & 0xFFFF
632
632
633 def length(self, rev):
633 def length(self, rev):
634 return self.index[rev][1]
634 return self.index[rev][1]
635
635
636 def rawsize(self, rev):
636 def rawsize(self, rev):
637 """return the length of the uncompressed text for a given revision"""
637 """return the length of the uncompressed text for a given revision"""
638 l = self.index[rev][2]
638 l = self.index[rev][2]
639 if l >= 0:
639 if l >= 0:
640 return l
640 return l
641
641
642 t = self.rawdata(rev)
642 t = self.rawdata(rev)
643 return len(t)
643 return len(t)
644
644
645 def size(self, rev):
645 def size(self, rev):
646 """length of non-raw text (processed by a "read" flag processor)"""
646 """length of non-raw text (processed by a "read" flag processor)"""
647 # fast path: if no "read" flag processor could change the content,
647 # fast path: if no "read" flag processor could change the content,
648 # size is rawsize. note: ELLIPSIS is known to not change the content.
648 # size is rawsize. note: ELLIPSIS is known to not change the content.
649 flags = self.flags(rev)
649 flags = self.flags(rev)
650 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
650 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
651 return self.rawsize(rev)
651 return self.rawsize(rev)
652
652
653 return len(self.revision(rev, raw=False))
653 return len(self.revision(rev, raw=False))
654
654
655 def chainbase(self, rev):
655 def chainbase(self, rev):
656 base = self._chainbasecache.get(rev)
656 base = self._chainbasecache.get(rev)
657 if base is not None:
657 if base is not None:
658 return base
658 return base
659
659
660 index = self.index
660 index = self.index
661 iterrev = rev
661 iterrev = rev
662 base = index[iterrev][3]
662 base = index[iterrev][3]
663 while base != iterrev:
663 while base != iterrev:
664 iterrev = base
664 iterrev = base
665 base = index[iterrev][3]
665 base = index[iterrev][3]
666
666
667 self._chainbasecache[rev] = base
667 self._chainbasecache[rev] = base
668 return base
668 return base
669
669
670 def linkrev(self, rev):
670 def linkrev(self, rev):
671 return self.index[rev][4]
671 return self.index[rev][4]
672
672
673 def parentrevs(self, rev):
673 def parentrevs(self, rev):
674 try:
674 try:
675 entry = self.index[rev]
675 entry = self.index[rev]
676 except IndexError:
676 except IndexError:
677 if rev == wdirrev:
677 if rev == wdirrev:
678 raise error.WdirUnsupported
678 raise error.WdirUnsupported
679 raise
679 raise
680
680
681 return entry[5], entry[6]
681 return entry[5], entry[6]
682
682
683 # fast parentrevs(rev) where rev isn't filtered
683 # fast parentrevs(rev) where rev isn't filtered
684 _uncheckedparentrevs = parentrevs
684 _uncheckedparentrevs = parentrevs
685
685
686 def node(self, rev):
686 def node(self, rev):
687 try:
687 try:
688 return self.index[rev][7]
688 return self.index[rev][7]
689 except IndexError:
689 except IndexError:
690 if rev == wdirrev:
690 if rev == wdirrev:
691 raise error.WdirUnsupported
691 raise error.WdirUnsupported
692 raise
692 raise
693
693
694 # Derived from index values.
694 # Derived from index values.
695
695
696 def end(self, rev):
696 def end(self, rev):
697 return self.start(rev) + self.length(rev)
697 return self.start(rev) + self.length(rev)
698
698
699 def parents(self, node):
699 def parents(self, node):
700 i = self.index
700 i = self.index
701 d = i[self.rev(node)]
701 d = i[self.rev(node)]
702 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
702 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
703
703
704 def chainlen(self, rev):
704 def chainlen(self, rev):
705 return self._chaininfo(rev)[0]
705 return self._chaininfo(rev)[0]
706
706
707 def _chaininfo(self, rev):
707 def _chaininfo(self, rev):
708 chaininfocache = self._chaininfocache
708 chaininfocache = self._chaininfocache
709 if rev in chaininfocache:
709 if rev in chaininfocache:
710 return chaininfocache[rev]
710 return chaininfocache[rev]
711 index = self.index
711 index = self.index
712 generaldelta = self._generaldelta
712 generaldelta = self._generaldelta
713 iterrev = rev
713 iterrev = rev
714 e = index[iterrev]
714 e = index[iterrev]
715 clen = 0
715 clen = 0
716 compresseddeltalen = 0
716 compresseddeltalen = 0
717 while iterrev != e[3]:
717 while iterrev != e[3]:
718 clen += 1
718 clen += 1
719 compresseddeltalen += e[1]
719 compresseddeltalen += e[1]
720 if generaldelta:
720 if generaldelta:
721 iterrev = e[3]
721 iterrev = e[3]
722 else:
722 else:
723 iterrev -= 1
723 iterrev -= 1
724 if iterrev in chaininfocache:
724 if iterrev in chaininfocache:
725 t = chaininfocache[iterrev]
725 t = chaininfocache[iterrev]
726 clen += t[0]
726 clen += t[0]
727 compresseddeltalen += t[1]
727 compresseddeltalen += t[1]
728 break
728 break
729 e = index[iterrev]
729 e = index[iterrev]
730 else:
730 else:
731 # Add text length of base since decompressing that also takes
731 # Add text length of base since decompressing that also takes
732 # work. For cache hits the length is already included.
732 # work. For cache hits the length is already included.
733 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
734 r = (clen, compresseddeltalen)
734 r = (clen, compresseddeltalen)
735 chaininfocache[rev] = r
735 chaininfocache[rev] = r
736 return r
736 return r
737
737
738 def _deltachain(self, rev, stoprev=None):
738 def _deltachain(self, rev, stoprev=None):
739 """Obtain the delta chain for a revision.
739 """Obtain the delta chain for a revision.
740
740
741 ``stoprev`` specifies a revision to stop at. If not specified, we
741 ``stoprev`` specifies a revision to stop at. If not specified, we
742 stop at the base of the chain.
742 stop at the base of the chain.
743
743
744 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
744 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
745 revs in ascending order and ``stopped`` is a bool indicating whether
745 revs in ascending order and ``stopped`` is a bool indicating whether
746 ``stoprev`` was hit.
746 ``stoprev`` was hit.
747 """
747 """
748 # Try C implementation.
748 # Try C implementation.
749 try:
749 try:
750 return self.index.deltachain(rev, stoprev, self._generaldelta)
750 return self.index.deltachain(rev, stoprev, self._generaldelta)
751 except AttributeError:
751 except AttributeError:
752 pass
752 pass
753
753
754 chain = []
754 chain = []
755
755
756 # Alias to prevent attribute lookup in tight loop.
756 # Alias to prevent attribute lookup in tight loop.
757 index = self.index
757 index = self.index
758 generaldelta = self._generaldelta
758 generaldelta = self._generaldelta
759
759
760 iterrev = rev
760 iterrev = rev
761 e = index[iterrev]
761 e = index[iterrev]
762 while iterrev != e[3] and iterrev != stoprev:
762 while iterrev != e[3] and iterrev != stoprev:
763 chain.append(iterrev)
763 chain.append(iterrev)
764 if generaldelta:
764 if generaldelta:
765 iterrev = e[3]
765 iterrev = e[3]
766 else:
766 else:
767 iterrev -= 1
767 iterrev -= 1
768 e = index[iterrev]
768 e = index[iterrev]
769
769
770 if iterrev == stoprev:
770 if iterrev == stoprev:
771 stopped = True
771 stopped = True
772 else:
772 else:
773 chain.append(iterrev)
773 chain.append(iterrev)
774 stopped = False
774 stopped = False
775
775
776 chain.reverse()
776 chain.reverse()
777 return chain, stopped
777 return chain, stopped
778
778
779 def ancestors(self, revs, stoprev=0, inclusive=False):
779 def ancestors(self, revs, stoprev=0, inclusive=False):
780 """Generate the ancestors of 'revs' in reverse revision order.
780 """Generate the ancestors of 'revs' in reverse revision order.
781 Does not generate revs lower than stoprev.
781 Does not generate revs lower than stoprev.
782
782
783 See the documentation for ancestor.lazyancestors for more details."""
783 See the documentation for ancestor.lazyancestors for more details."""
784
784
785 # first, make sure start revisions aren't filtered
785 # first, make sure start revisions aren't filtered
786 revs = list(revs)
786 revs = list(revs)
787 checkrev = self.node
787 checkrev = self.node
788 for r in revs:
788 for r in revs:
789 checkrev(r)
789 checkrev(r)
790 # and we're sure ancestors aren't filtered as well
790 # and we're sure ancestors aren't filtered as well
791
791
792 if rustancestor is not None:
792 if rustancestor is not None:
793 lazyancestors = rustancestor.LazyAncestors
793 lazyancestors = rustancestor.LazyAncestors
794 arg = self.index
794 arg = self.index
795 elif util.safehasattr(parsers, 'rustlazyancestors'):
795 elif util.safehasattr(parsers, 'rustlazyancestors'):
796 lazyancestors = ancestor.rustlazyancestors
796 lazyancestors = ancestor.rustlazyancestors
797 arg = self.index
797 arg = self.index
798 else:
798 else:
799 lazyancestors = ancestor.lazyancestors
799 lazyancestors = ancestor.lazyancestors
800 arg = self._uncheckedparentrevs
800 arg = self._uncheckedparentrevs
801 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
801 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
802
802
803 def descendants(self, revs):
803 def descendants(self, revs):
804 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
804 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
805
805
806 def findcommonmissing(self, common=None, heads=None):
806 def findcommonmissing(self, common=None, heads=None):
807 """Return a tuple of the ancestors of common and the ancestors of heads
807 """Return a tuple of the ancestors of common and the ancestors of heads
808 that are not ancestors of common. In revset terminology, we return the
808 that are not ancestors of common. In revset terminology, we return the
809 tuple:
809 tuple:
810
810
811 ::common, (::heads) - (::common)
811 ::common, (::heads) - (::common)
812
812
813 The list is sorted by revision number, meaning it is
813 The list is sorted by revision number, meaning it is
814 topologically sorted.
814 topologically sorted.
815
815
816 'heads' and 'common' are both lists of node IDs. If heads is
816 'heads' and 'common' are both lists of node IDs. If heads is
817 not supplied, uses all of the revlog's heads. If common is not
817 not supplied, uses all of the revlog's heads. If common is not
818 supplied, uses nullid."""
818 supplied, uses nullid."""
819 if common is None:
819 if common is None:
820 common = [nullid]
820 common = [nullid]
821 if heads is None:
821 if heads is None:
822 heads = self.heads()
822 heads = self.heads()
823
823
824 common = [self.rev(n) for n in common]
824 common = [self.rev(n) for n in common]
825 heads = [self.rev(n) for n in heads]
825 heads = [self.rev(n) for n in heads]
826
826
827 # we want the ancestors, but inclusive
827 # we want the ancestors, but inclusive
828 class lazyset(object):
828 class lazyset(object):
829 def __init__(self, lazyvalues):
829 def __init__(self, lazyvalues):
830 self.addedvalues = set()
830 self.addedvalues = set()
831 self.lazyvalues = lazyvalues
831 self.lazyvalues = lazyvalues
832
832
833 def __contains__(self, value):
833 def __contains__(self, value):
834 return value in self.addedvalues or value in self.lazyvalues
834 return value in self.addedvalues or value in self.lazyvalues
835
835
836 def __iter__(self):
836 def __iter__(self):
837 added = self.addedvalues
837 added = self.addedvalues
838 for r in added:
838 for r in added:
839 yield r
839 yield r
840 for r in self.lazyvalues:
840 for r in self.lazyvalues:
841 if not r in added:
841 if not r in added:
842 yield r
842 yield r
843
843
844 def add(self, value):
844 def add(self, value):
845 self.addedvalues.add(value)
845 self.addedvalues.add(value)
846
846
847 def update(self, values):
847 def update(self, values):
848 self.addedvalues.update(values)
848 self.addedvalues.update(values)
849
849
850 has = lazyset(self.ancestors(common))
850 has = lazyset(self.ancestors(common))
851 has.add(nullrev)
851 has.add(nullrev)
852 has.update(common)
852 has.update(common)
853
853
854 # take all ancestors from heads that aren't in has
854 # take all ancestors from heads that aren't in has
855 missing = set()
855 missing = set()
856 visit = collections.deque(r for r in heads if r not in has)
856 visit = collections.deque(r for r in heads if r not in has)
857 while visit:
857 while visit:
858 r = visit.popleft()
858 r = visit.popleft()
859 if r in missing:
859 if r in missing:
860 continue
860 continue
861 else:
861 else:
862 missing.add(r)
862 missing.add(r)
863 for p in self.parentrevs(r):
863 for p in self.parentrevs(r):
864 if p not in has:
864 if p not in has:
865 visit.append(p)
865 visit.append(p)
866 missing = list(missing)
866 missing = list(missing)
867 missing.sort()
867 missing.sort()
868 return has, [self.node(miss) for miss in missing]
868 return has, [self.node(miss) for miss in missing]
869
869
870 def incrementalmissingrevs(self, common=None):
870 def incrementalmissingrevs(self, common=None):
871 """Return an object that can be used to incrementally compute the
871 """Return an object that can be used to incrementally compute the
872 revision numbers of the ancestors of arbitrary sets that are not
872 revision numbers of the ancestors of arbitrary sets that are not
873 ancestors of common. This is an ancestor.incrementalmissingancestors
873 ancestors of common. This is an ancestor.incrementalmissingancestors
874 object.
874 object.
875
875
876 'common' is a list of revision numbers. If common is not supplied, uses
876 'common' is a list of revision numbers. If common is not supplied, uses
877 nullrev.
877 nullrev.
878 """
878 """
879 if common is None:
879 if common is None:
880 common = [nullrev]
880 common = [nullrev]
881
881
882 if rustancestor is not None:
882 if rustancestor is not None:
883 return rustancestor.MissingAncestors(self.index, common)
883 return rustancestor.MissingAncestors(self.index, common)
884 return ancestor.incrementalmissingancestors(self.parentrevs, common)
884 return ancestor.incrementalmissingancestors(self.parentrevs, common)
885
885
886 def findmissingrevs(self, common=None, heads=None):
886 def findmissingrevs(self, common=None, heads=None):
887 """Return the revision numbers of the ancestors of heads that
887 """Return the revision numbers of the ancestors of heads that
888 are not ancestors of common.
888 are not ancestors of common.
889
889
890 More specifically, return a list of revision numbers corresponding to
890 More specifically, return a list of revision numbers corresponding to
891 nodes N such that every N satisfies the following constraints:
891 nodes N such that every N satisfies the following constraints:
892
892
893 1. N is an ancestor of some node in 'heads'
893 1. N is an ancestor of some node in 'heads'
894 2. N is not an ancestor of any node in 'common'
894 2. N is not an ancestor of any node in 'common'
895
895
896 The list is sorted by revision number, meaning it is
896 The list is sorted by revision number, meaning it is
897 topologically sorted.
897 topologically sorted.
898
898
899 'heads' and 'common' are both lists of revision numbers. If heads is
899 'heads' and 'common' are both lists of revision numbers. If heads is
900 not supplied, uses all of the revlog's heads. If common is not
900 not supplied, uses all of the revlog's heads. If common is not
901 supplied, uses nullid."""
901 supplied, uses nullid."""
902 if common is None:
902 if common is None:
903 common = [nullrev]
903 common = [nullrev]
904 if heads is None:
904 if heads is None:
905 heads = self.headrevs()
905 heads = self.headrevs()
906
906
907 inc = self.incrementalmissingrevs(common=common)
907 inc = self.incrementalmissingrevs(common=common)
908 return inc.missingancestors(heads)
908 return inc.missingancestors(heads)
909
909
910 def findmissing(self, common=None, heads=None):
910 def findmissing(self, common=None, heads=None):
911 """Return the ancestors of heads that are not ancestors of common.
911 """Return the ancestors of heads that are not ancestors of common.
912
912
913 More specifically, return a list of nodes N such that every N
913 More specifically, return a list of nodes N such that every N
914 satisfies the following constraints:
914 satisfies the following constraints:
915
915
916 1. N is an ancestor of some node in 'heads'
916 1. N is an ancestor of some node in 'heads'
917 2. N is not an ancestor of any node in 'common'
917 2. N is not an ancestor of any node in 'common'
918
918
919 The list is sorted by revision number, meaning it is
919 The list is sorted by revision number, meaning it is
920 topologically sorted.
920 topologically sorted.
921
921
922 'heads' and 'common' are both lists of node IDs. If heads is
922 'heads' and 'common' are both lists of node IDs. If heads is
923 not supplied, uses all of the revlog's heads. If common is not
923 not supplied, uses all of the revlog's heads. If common is not
924 supplied, uses nullid."""
924 supplied, uses nullid."""
925 if common is None:
925 if common is None:
926 common = [nullid]
926 common = [nullid]
927 if heads is None:
927 if heads is None:
928 heads = self.heads()
928 heads = self.heads()
929
929
930 common = [self.rev(n) for n in common]
930 common = [self.rev(n) for n in common]
931 heads = [self.rev(n) for n in heads]
931 heads = [self.rev(n) for n in heads]
932
932
933 inc = self.incrementalmissingrevs(common=common)
933 inc = self.incrementalmissingrevs(common=common)
934 return [self.node(r) for r in inc.missingancestors(heads)]
934 return [self.node(r) for r in inc.missingancestors(heads)]
935
935
936 def nodesbetween(self, roots=None, heads=None):
936 def nodesbetween(self, roots=None, heads=None):
937 """Return a topological path from 'roots' to 'heads'.
937 """Return a topological path from 'roots' to 'heads'.
938
938
939 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
939 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
940 topologically sorted list of all nodes N that satisfy both of
940 topologically sorted list of all nodes N that satisfy both of
941 these constraints:
941 these constraints:
942
942
943 1. N is a descendant of some node in 'roots'
943 1. N is a descendant of some node in 'roots'
944 2. N is an ancestor of some node in 'heads'
944 2. N is an ancestor of some node in 'heads'
945
945
946 Every node is considered to be both a descendant and an ancestor
946 Every node is considered to be both a descendant and an ancestor
947 of itself, so every reachable node in 'roots' and 'heads' will be
947 of itself, so every reachable node in 'roots' and 'heads' will be
948 included in 'nodes'.
948 included in 'nodes'.
949
949
950 'outroots' is the list of reachable nodes in 'roots', i.e., the
950 'outroots' is the list of reachable nodes in 'roots', i.e., the
951 subset of 'roots' that is returned in 'nodes'. Likewise,
951 subset of 'roots' that is returned in 'nodes'. Likewise,
952 'outheads' is the subset of 'heads' that is also in 'nodes'.
952 'outheads' is the subset of 'heads' that is also in 'nodes'.
953
953
954 'roots' and 'heads' are both lists of node IDs. If 'roots' is
954 'roots' and 'heads' are both lists of node IDs. If 'roots' is
955 unspecified, uses nullid as the only root. If 'heads' is
955 unspecified, uses nullid as the only root. If 'heads' is
956 unspecified, uses list of all of the revlog's heads."""
956 unspecified, uses list of all of the revlog's heads."""
957 nonodes = ([], [], [])
957 nonodes = ([], [], [])
958 if roots is not None:
958 if roots is not None:
959 roots = list(roots)
959 roots = list(roots)
960 if not roots:
960 if not roots:
961 return nonodes
961 return nonodes
962 lowestrev = min([self.rev(n) for n in roots])
962 lowestrev = min([self.rev(n) for n in roots])
963 else:
963 else:
964 roots = [nullid] # Everybody's a descendant of nullid
964 roots = [nullid] # Everybody's a descendant of nullid
965 lowestrev = nullrev
965 lowestrev = nullrev
966 if (lowestrev == nullrev) and (heads is None):
966 if (lowestrev == nullrev) and (heads is None):
967 # We want _all_ the nodes!
967 # We want _all_ the nodes!
968 return ([self.node(r) for r in self], [nullid], list(self.heads()))
968 return ([self.node(r) for r in self], [nullid], list(self.heads()))
969 if heads is None:
969 if heads is None:
970 # All nodes are ancestors, so the latest ancestor is the last
970 # All nodes are ancestors, so the latest ancestor is the last
971 # node.
971 # node.
972 highestrev = len(self) - 1
972 highestrev = len(self) - 1
973 # Set ancestors to None to signal that every node is an ancestor.
973 # Set ancestors to None to signal that every node is an ancestor.
974 ancestors = None
974 ancestors = None
975 # Set heads to an empty dictionary for later discovery of heads
975 # Set heads to an empty dictionary for later discovery of heads
976 heads = {}
976 heads = {}
977 else:
977 else:
978 heads = list(heads)
978 heads = list(heads)
979 if not heads:
979 if not heads:
980 return nonodes
980 return nonodes
981 ancestors = set()
981 ancestors = set()
982 # Turn heads into a dictionary so we can remove 'fake' heads.
982 # Turn heads into a dictionary so we can remove 'fake' heads.
983 # Also, later we will be using it to filter out the heads we can't
983 # Also, later we will be using it to filter out the heads we can't
984 # find from roots.
984 # find from roots.
985 heads = dict.fromkeys(heads, False)
985 heads = dict.fromkeys(heads, False)
986 # Start at the top and keep marking parents until we're done.
986 # Start at the top and keep marking parents until we're done.
987 nodestotag = set(heads)
987 nodestotag = set(heads)
988 # Remember where the top was so we can use it as a limit later.
988 # Remember where the top was so we can use it as a limit later.
989 highestrev = max([self.rev(n) for n in nodestotag])
989 highestrev = max([self.rev(n) for n in nodestotag])
990 while nodestotag:
990 while nodestotag:
991 # grab a node to tag
991 # grab a node to tag
992 n = nodestotag.pop()
992 n = nodestotag.pop()
993 # Never tag nullid
993 # Never tag nullid
994 if n == nullid:
994 if n == nullid:
995 continue
995 continue
996 # A node's revision number represents its place in a
996 # A node's revision number represents its place in a
997 # topologically sorted list of nodes.
997 # topologically sorted list of nodes.
998 r = self.rev(n)
998 r = self.rev(n)
999 if r >= lowestrev:
999 if r >= lowestrev:
1000 if n not in ancestors:
1000 if n not in ancestors:
1001 # If we are possibly a descendant of one of the roots
1001 # If we are possibly a descendant of one of the roots
1002 # and we haven't already been marked as an ancestor
1002 # and we haven't already been marked as an ancestor
1003 ancestors.add(n) # Mark as ancestor
1003 ancestors.add(n) # Mark as ancestor
1004 # Add non-nullid parents to list of nodes to tag.
1004 # Add non-nullid parents to list of nodes to tag.
1005 nodestotag.update([p for p in self.parents(n) if
1005 nodestotag.update([p for p in self.parents(n) if
1006 p != nullid])
1006 p != nullid])
1007 elif n in heads: # We've seen it before, is it a fake head?
1007 elif n in heads: # We've seen it before, is it a fake head?
1008 # So it is, real heads should not be the ancestors of
1008 # So it is, real heads should not be the ancestors of
1009 # any other heads.
1009 # any other heads.
1010 heads.pop(n)
1010 heads.pop(n)
1011 if not ancestors:
1011 if not ancestors:
1012 return nonodes
1012 return nonodes
1013 # Now that we have our set of ancestors, we want to remove any
1013 # Now that we have our set of ancestors, we want to remove any
1014 # roots that are not ancestors.
1014 # roots that are not ancestors.
1015
1015
1016 # If one of the roots was nullid, everything is included anyway.
1016 # If one of the roots was nullid, everything is included anyway.
1017 if lowestrev > nullrev:
1017 if lowestrev > nullrev:
1018 # But, since we weren't, let's recompute the lowest rev to not
1018 # But, since we weren't, let's recompute the lowest rev to not
1019 # include roots that aren't ancestors.
1019 # include roots that aren't ancestors.
1020
1020
1021 # Filter out roots that aren't ancestors of heads
1021 # Filter out roots that aren't ancestors of heads
1022 roots = [root for root in roots if root in ancestors]
1022 roots = [root for root in roots if root in ancestors]
1023 # Recompute the lowest revision
1023 # Recompute the lowest revision
1024 if roots:
1024 if roots:
1025 lowestrev = min([self.rev(root) for root in roots])
1025 lowestrev = min([self.rev(root) for root in roots])
1026 else:
1026 else:
1027 # No more roots? Return empty list
1027 # No more roots? Return empty list
1028 return nonodes
1028 return nonodes
1029 else:
1029 else:
1030 # We are descending from nullid, and don't need to care about
1030 # We are descending from nullid, and don't need to care about
1031 # any other roots.
1031 # any other roots.
1032 lowestrev = nullrev
1032 lowestrev = nullrev
1033 roots = [nullid]
1033 roots = [nullid]
1034 # Transform our roots list into a set.
1034 # Transform our roots list into a set.
1035 descendants = set(roots)
1035 descendants = set(roots)
1036 # Also, keep the original roots so we can filter out roots that aren't
1036 # Also, keep the original roots so we can filter out roots that aren't
1037 # 'real' roots (i.e. are descended from other roots).
1037 # 'real' roots (i.e. are descended from other roots).
1038 roots = descendants.copy()
1038 roots = descendants.copy()
1039 # Our topologically sorted list of output nodes.
1039 # Our topologically sorted list of output nodes.
1040 orderedout = []
1040 orderedout = []
1041 # Don't start at nullid since we don't want nullid in our output list,
1041 # Don't start at nullid since we don't want nullid in our output list,
1042 # and if nullid shows up in descendants, empty parents will look like
1042 # and if nullid shows up in descendants, empty parents will look like
1043 # they're descendants.
1043 # they're descendants.
1044 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1044 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1045 n = self.node(r)
1045 n = self.node(r)
1046 isdescendant = False
1046 isdescendant = False
1047 if lowestrev == nullrev: # Everybody is a descendant of nullid
1047 if lowestrev == nullrev: # Everybody is a descendant of nullid
1048 isdescendant = True
1048 isdescendant = True
1049 elif n in descendants:
1049 elif n in descendants:
1050 # n is already a descendant
1050 # n is already a descendant
1051 isdescendant = True
1051 isdescendant = True
1052 # This check only needs to be done here because all the roots
1052 # This check only needs to be done here because all the roots
1053 # will start being marked is descendants before the loop.
1053 # will start being marked is descendants before the loop.
1054 if n in roots:
1054 if n in roots:
1055 # If n was a root, check if it's a 'real' root.
1055 # If n was a root, check if it's a 'real' root.
1056 p = tuple(self.parents(n))
1056 p = tuple(self.parents(n))
1057 # If any of its parents are descendants, it's not a root.
1057 # If any of its parents are descendants, it's not a root.
1058 if (p[0] in descendants) or (p[1] in descendants):
1058 if (p[0] in descendants) or (p[1] in descendants):
1059 roots.remove(n)
1059 roots.remove(n)
1060 else:
1060 else:
1061 p = tuple(self.parents(n))
1061 p = tuple(self.parents(n))
1062 # A node is a descendant if either of its parents are
1062 # A node is a descendant if either of its parents are
1063 # descendants. (We seeded the dependents list with the roots
1063 # descendants. (We seeded the dependents list with the roots
1064 # up there, remember?)
1064 # up there, remember?)
1065 if (p[0] in descendants) or (p[1] in descendants):
1065 if (p[0] in descendants) or (p[1] in descendants):
1066 descendants.add(n)
1066 descendants.add(n)
1067 isdescendant = True
1067 isdescendant = True
1068 if isdescendant and ((ancestors is None) or (n in ancestors)):
1068 if isdescendant and ((ancestors is None) or (n in ancestors)):
1069 # Only include nodes that are both descendants and ancestors.
1069 # Only include nodes that are both descendants and ancestors.
1070 orderedout.append(n)
1070 orderedout.append(n)
1071 if (ancestors is not None) and (n in heads):
1071 if (ancestors is not None) and (n in heads):
1072 # We're trying to figure out which heads are reachable
1072 # We're trying to figure out which heads are reachable
1073 # from roots.
1073 # from roots.
1074 # Mark this head as having been reached
1074 # Mark this head as having been reached
1075 heads[n] = True
1075 heads[n] = True
1076 elif ancestors is None:
1076 elif ancestors is None:
1077 # Otherwise, we're trying to discover the heads.
1077 # Otherwise, we're trying to discover the heads.
1078 # Assume this is a head because if it isn't, the next step
1078 # Assume this is a head because if it isn't, the next step
1079 # will eventually remove it.
1079 # will eventually remove it.
1080 heads[n] = True
1080 heads[n] = True
1081 # But, obviously its parents aren't.
1081 # But, obviously its parents aren't.
1082 for p in self.parents(n):
1082 for p in self.parents(n):
1083 heads.pop(p, None)
1083 heads.pop(p, None)
1084 heads = [head for head, flag in heads.iteritems() if flag]
1084 heads = [head for head, flag in heads.iteritems() if flag]
1085 roots = list(roots)
1085 roots = list(roots)
1086 assert orderedout
1086 assert orderedout
1087 assert roots
1087 assert roots
1088 assert heads
1088 assert heads
1089 return (orderedout, roots, heads)
1089 return (orderedout, roots, heads)
1090
1090
1091 def headrevs(self, revs=None):
1091 def headrevs(self, revs=None):
1092 if revs is None:
1092 if revs is None:
1093 try:
1093 try:
1094 return self.index.headrevs()
1094 return self.index.headrevs()
1095 except AttributeError:
1095 except AttributeError:
1096 return self._headrevs()
1096 return self._headrevs()
1097 if rustdagop is not None:
1097 if rustdagop is not None:
1098 return rustdagop.headrevs(self.index, revs)
1098 return rustdagop.headrevs(self.index, revs)
1099 return dagop.headrevs(revs, self._uncheckedparentrevs)
1099 return dagop.headrevs(revs, self._uncheckedparentrevs)
1100
1100
1101 def computephases(self, roots):
1101 def computephases(self, roots):
1102 return self.index.computephasesmapsets(roots)
1102 return self.index.computephasesmapsets(roots)
1103
1103
1104 def _headrevs(self):
1104 def _headrevs(self):
1105 count = len(self)
1105 count = len(self)
1106 if not count:
1106 if not count:
1107 return [nullrev]
1107 return [nullrev]
1108 # we won't iter over filtered rev so nobody is a head at start
1108 # we won't iter over filtered rev so nobody is a head at start
1109 ishead = [0] * (count + 1)
1109 ishead = [0] * (count + 1)
1110 index = self.index
1110 index = self.index
1111 for r in self:
1111 for r in self:
1112 ishead[r] = 1 # I may be an head
1112 ishead[r] = 1 # I may be an head
1113 e = index[r]
1113 e = index[r]
1114 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1114 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1115 return [r for r, val in enumerate(ishead) if val]
1115 return [r for r, val in enumerate(ishead) if val]
1116
1116
1117 def heads(self, start=None, stop=None):
1117 def heads(self, start=None, stop=None):
1118 """return the list of all nodes that have no children
1118 """return the list of all nodes that have no children
1119
1119
1120 if start is specified, only heads that are descendants of
1120 if start is specified, only heads that are descendants of
1121 start will be returned
1121 start will be returned
1122 if stop is specified, it will consider all the revs from stop
1122 if stop is specified, it will consider all the revs from stop
1123 as if they had no children
1123 as if they had no children
1124 """
1124 """
1125 if start is None and stop is None:
1125 if start is None and stop is None:
1126 if not len(self):
1126 if not len(self):
1127 return [nullid]
1127 return [nullid]
1128 return [self.node(r) for r in self.headrevs()]
1128 return [self.node(r) for r in self.headrevs()]
1129
1129
1130 if start is None:
1130 if start is None:
1131 start = nullrev
1131 start = nullrev
1132 else:
1132 else:
1133 start = self.rev(start)
1133 start = self.rev(start)
1134
1134
1135 stoprevs = set(self.rev(n) for n in stop or [])
1135 stoprevs = set(self.rev(n) for n in stop or [])
1136
1136
1137 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1137 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1138 stoprevs=stoprevs)
1138 stoprevs=stoprevs)
1139
1139
1140 return [self.node(rev) for rev in revs]
1140 return [self.node(rev) for rev in revs]
1141
1141
1142 def children(self, node):
1142 def children(self, node):
1143 """find the children of a given node"""
1143 """find the children of a given node"""
1144 c = []
1144 c = []
1145 p = self.rev(node)
1145 p = self.rev(node)
1146 for r in self.revs(start=p + 1):
1146 for r in self.revs(start=p + 1):
1147 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1147 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1148 if prevs:
1148 if prevs:
1149 for pr in prevs:
1149 for pr in prevs:
1150 if pr == p:
1150 if pr == p:
1151 c.append(self.node(r))
1151 c.append(self.node(r))
1152 elif p == nullrev:
1152 elif p == nullrev:
1153 c.append(self.node(r))
1153 c.append(self.node(r))
1154 return c
1154 return c
1155
1155
1156 def commonancestorsheads(self, a, b):
1156 def commonancestorsheads(self, a, b):
1157 """calculate all the heads of the common ancestors of nodes a and b"""
1157 """calculate all the heads of the common ancestors of nodes a and b"""
1158 a, b = self.rev(a), self.rev(b)
1158 a, b = self.rev(a), self.rev(b)
1159 ancs = self._commonancestorsheads(a, b)
1159 ancs = self._commonancestorsheads(a, b)
1160 return pycompat.maplist(self.node, ancs)
1160 return pycompat.maplist(self.node, ancs)
1161
1161
1162 def _commonancestorsheads(self, *revs):
1162 def _commonancestorsheads(self, *revs):
1163 """calculate all the heads of the common ancestors of revs"""
1163 """calculate all the heads of the common ancestors of revs"""
1164 try:
1164 try:
1165 ancs = self.index.commonancestorsheads(*revs)
1165 ancs = self.index.commonancestorsheads(*revs)
1166 except (AttributeError, OverflowError): # C implementation failed
1166 except (AttributeError, OverflowError): # C implementation failed
1167 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1167 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1168 return ancs
1168 return ancs
1169
1169
1170 def isancestor(self, a, b):
1170 def isancestor(self, a, b):
1171 """return True if node a is an ancestor of node b
1171 """return True if node a is an ancestor of node b
1172
1172
1173 A revision is considered an ancestor of itself."""
1173 A revision is considered an ancestor of itself."""
1174 a, b = self.rev(a), self.rev(b)
1174 a, b = self.rev(a), self.rev(b)
1175 return self.isancestorrev(a, b)
1175 return self.isancestorrev(a, b)
1176
1176
1177 def isancestorrev(self, a, b):
1177 def isancestorrev(self, a, b):
1178 """return True if revision a is an ancestor of revision b
1178 """return True if revision a is an ancestor of revision b
1179
1179
1180 A revision is considered an ancestor of itself.
1180 A revision is considered an ancestor of itself.
1181
1181
1182 The implementation of this is trivial but the use of
1182 The implementation of this is trivial but the use of
1183 reachableroots is not."""
1183 reachableroots is not."""
1184 if a == nullrev:
1184 if a == nullrev:
1185 return True
1185 return True
1186 elif a == b:
1186 elif a == b:
1187 return True
1187 return True
1188 elif a > b:
1188 elif a > b:
1189 return False
1189 return False
1190 return bool(self.reachableroots(a, [b], [a], includepath=False))
1190 return bool(self.reachableroots(a, [b], [a], includepath=False))
1191
1191
1192 def reachableroots(self, minroot, heads, roots, includepath=False):
1192 def reachableroots(self, minroot, heads, roots, includepath=False):
1193 """return (heads(::<roots> and <roots>::<heads>))
1193 """return (heads(::<roots> and <roots>::<heads>))
1194
1194
1195 If includepath is True, return (<roots>::<heads>)."""
1195 If includepath is True, return (<roots>::<heads>)."""
1196 try:
1196 try:
1197 return self.index.reachableroots2(minroot, heads, roots,
1197 return self.index.reachableroots2(minroot, heads, roots,
1198 includepath)
1198 includepath)
1199 except AttributeError:
1199 except AttributeError:
1200 return dagop._reachablerootspure(self.parentrevs,
1200 return dagop._reachablerootspure(self.parentrevs,
1201 minroot, roots, heads, includepath)
1201 minroot, roots, heads, includepath)
1202
1202
1203 def ancestor(self, a, b):
1203 def ancestor(self, a, b):
1204 """calculate the "best" common ancestor of nodes a and b"""
1204 """calculate the "best" common ancestor of nodes a and b"""
1205
1205
1206 a, b = self.rev(a), self.rev(b)
1206 a, b = self.rev(a), self.rev(b)
1207 try:
1207 try:
1208 ancs = self.index.ancestors(a, b)
1208 ancs = self.index.ancestors(a, b)
1209 except (AttributeError, OverflowError):
1209 except (AttributeError, OverflowError):
1210 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 ancs = ancestor.ancestors(self.parentrevs, a, b)
1211 if ancs:
1211 if ancs:
1212 # choose a consistent winner when there's a tie
1212 # choose a consistent winner when there's a tie
1213 return min(map(self.node, ancs))
1213 return min(map(self.node, ancs))
1214 return nullid
1214 return nullid
1215
1215
1216 def _match(self, id):
1216 def _match(self, id):
1217 if isinstance(id, int):
1217 if isinstance(id, int):
1218 # rev
1218 # rev
1219 return self.node(id)
1219 return self.node(id)
1220 if len(id) == 20:
1220 if len(id) == 20:
1221 # possibly a binary node
1221 # possibly a binary node
1222 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 # odds of a binary node being all hex in ASCII are 1 in 10**25
1223 try:
1223 try:
1224 node = id
1224 node = id
1225 self.rev(node) # quick search the index
1225 self.rev(node) # quick search the index
1226 return node
1226 return node
1227 except error.LookupError:
1227 except error.LookupError:
1228 pass # may be partial hex id
1228 pass # may be partial hex id
1229 try:
1229 try:
1230 # str(rev)
1230 # str(rev)
1231 rev = int(id)
1231 rev = int(id)
1232 if "%d" % rev != id:
1232 if "%d" % rev != id:
1233 raise ValueError
1233 raise ValueError
1234 if rev < 0:
1234 if rev < 0:
1235 rev = len(self) + rev
1235 rev = len(self) + rev
1236 if rev < 0 or rev >= len(self):
1236 if rev < 0 or rev >= len(self):
1237 raise ValueError
1237 raise ValueError
1238 return self.node(rev)
1238 return self.node(rev)
1239 except (ValueError, OverflowError):
1239 except (ValueError, OverflowError):
1240 pass
1240 pass
1241 if len(id) == 40:
1241 if len(id) == 40:
1242 try:
1242 try:
1243 # a full hex nodeid?
1243 # a full hex nodeid?
1244 node = bin(id)
1244 node = bin(id)
1245 self.rev(node)
1245 self.rev(node)
1246 return node
1246 return node
1247 except (TypeError, error.LookupError):
1247 except (TypeError, error.LookupError):
1248 pass
1248 pass
1249
1249
1250 def _partialmatch(self, id):
1250 def _partialmatch(self, id):
1251 # we don't care wdirfilenodeids as they should be always full hash
1251 # we don't care wdirfilenodeids as they should be always full hash
1252 maybewdir = wdirhex.startswith(id)
1252 maybewdir = wdirhex.startswith(id)
1253 try:
1253 try:
1254 partial = self.index.partialmatch(id)
1254 partial = self.index.partialmatch(id)
1255 if partial and self.hasnode(partial):
1255 if partial and self.hasnode(partial):
1256 if maybewdir:
1256 if maybewdir:
1257 # single 'ff...' match in radix tree, ambiguous with wdir
1257 # single 'ff...' match in radix tree, ambiguous with wdir
1258 raise error.RevlogError
1258 raise error.RevlogError
1259 return partial
1259 return partial
1260 if maybewdir:
1260 if maybewdir:
1261 # no 'ff...' match in radix tree, wdir identified
1261 # no 'ff...' match in radix tree, wdir identified
1262 raise error.WdirUnsupported
1262 raise error.WdirUnsupported
1263 return None
1263 return None
1264 except error.RevlogError:
1264 except error.RevlogError:
1265 # parsers.c radix tree lookup gave multiple matches
1265 # parsers.c radix tree lookup gave multiple matches
1266 # fast path: for unfiltered changelog, radix tree is accurate
1266 # fast path: for unfiltered changelog, radix tree is accurate
1267 if not getattr(self, 'filteredrevs', None):
1267 if not getattr(self, 'filteredrevs', None):
1268 raise error.AmbiguousPrefixLookupError(
1268 raise error.AmbiguousPrefixLookupError(
1269 id, self.indexfile, _('ambiguous identifier'))
1269 id, self.indexfile, _('ambiguous identifier'))
1270 # fall through to slow path that filters hidden revisions
1270 # fall through to slow path that filters hidden revisions
1271 except (AttributeError, ValueError):
1271 except (AttributeError, ValueError):
1272 # we are pure python, or key was too short to search radix tree
1272 # we are pure python, or key was too short to search radix tree
1273 pass
1273 pass
1274
1274
1275 if id in self._pcache:
1275 if id in self._pcache:
1276 return self._pcache[id]
1276 return self._pcache[id]
1277
1277
1278 if len(id) <= 40:
1278 if len(id) <= 40:
1279 try:
1279 try:
1280 # hex(node)[:...]
1280 # hex(node)[:...]
1281 l = len(id) // 2 # grab an even number of digits
1281 l = len(id) // 2 # grab an even number of digits
1282 prefix = bin(id[:l * 2])
1282 prefix = bin(id[:l * 2])
1283 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1284 nl = [n for n in nl if hex(n).startswith(id) and
1284 nl = [n for n in nl if hex(n).startswith(id) and
1285 self.hasnode(n)]
1285 self.hasnode(n)]
1286 if nullhex.startswith(id):
1286 if nullhex.startswith(id):
1287 nl.append(nullid)
1287 nl.append(nullid)
1288 if len(nl) > 0:
1288 if len(nl) > 0:
1289 if len(nl) == 1 and not maybewdir:
1289 if len(nl) == 1 and not maybewdir:
1290 self._pcache[id] = nl[0]
1290 self._pcache[id] = nl[0]
1291 return nl[0]
1291 return nl[0]
1292 raise error.AmbiguousPrefixLookupError(
1292 raise error.AmbiguousPrefixLookupError(
1293 id, self.indexfile, _('ambiguous identifier'))
1293 id, self.indexfile, _('ambiguous identifier'))
1294 if maybewdir:
1294 if maybewdir:
1295 raise error.WdirUnsupported
1295 raise error.WdirUnsupported
1296 return None
1296 return None
1297 except TypeError:
1297 except TypeError:
1298 pass
1298 pass
1299
1299
1300 def lookup(self, id):
1300 def lookup(self, id):
1301 """locate a node based on:
1301 """locate a node based on:
1302 - revision number or str(revision number)
1302 - revision number or str(revision number)
1303 - nodeid or subset of hex nodeid
1303 - nodeid or subset of hex nodeid
1304 """
1304 """
1305 n = self._match(id)
1305 n = self._match(id)
1306 if n is not None:
1306 if n is not None:
1307 return n
1307 return n
1308 n = self._partialmatch(id)
1308 n = self._partialmatch(id)
1309 if n:
1309 if n:
1310 return n
1310 return n
1311
1311
1312 raise error.LookupError(id, self.indexfile, _('no match found'))
1312 raise error.LookupError(id, self.indexfile, _('no match found'))
1313
1313
1314 def shortest(self, node, minlength=1):
1314 def shortest(self, node, minlength=1):
1315 """Find the shortest unambiguous prefix that matches node."""
1315 """Find the shortest unambiguous prefix that matches node."""
1316 def isvalid(prefix):
1316 def isvalid(prefix):
1317 try:
1317 try:
1318 matchednode = self._partialmatch(prefix)
1318 matchednode = self._partialmatch(prefix)
1319 except error.AmbiguousPrefixLookupError:
1319 except error.AmbiguousPrefixLookupError:
1320 return False
1320 return False
1321 except error.WdirUnsupported:
1321 except error.WdirUnsupported:
1322 # single 'ff...' match
1322 # single 'ff...' match
1323 return True
1323 return True
1324 if matchednode is None:
1324 if matchednode is None:
1325 raise error.LookupError(node, self.indexfile, _('no node'))
1325 raise error.LookupError(node, self.indexfile, _('no node'))
1326 return True
1326 return True
1327
1327
1328 def maybewdir(prefix):
1328 def maybewdir(prefix):
1329 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1329 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1330
1330
1331 hexnode = hex(node)
1331 hexnode = hex(node)
1332
1332
1333 def disambiguate(hexnode, minlength):
1333 def disambiguate(hexnode, minlength):
1334 """Disambiguate against wdirid."""
1334 """Disambiguate against wdirid."""
1335 for length in range(minlength, 41):
1335 for length in range(minlength, 41):
1336 prefix = hexnode[:length]
1336 prefix = hexnode[:length]
1337 if not maybewdir(prefix):
1337 if not maybewdir(prefix):
1338 return prefix
1338 return prefix
1339
1339
1340 if not getattr(self, 'filteredrevs', None):
1340 if not getattr(self, 'filteredrevs', None):
1341 try:
1341 try:
1342 length = max(self.index.shortest(node), minlength)
1342 length = max(self.index.shortest(node), minlength)
1343 return disambiguate(hexnode, length)
1343 return disambiguate(hexnode, length)
1344 except error.RevlogError:
1344 except error.RevlogError:
1345 if node != wdirid:
1345 if node != wdirid:
1346 raise error.LookupError(node, self.indexfile, _('no node'))
1346 raise error.LookupError(node, self.indexfile, _('no node'))
1347 except AttributeError:
1347 except AttributeError:
1348 # Fall through to pure code
1348 # Fall through to pure code
1349 pass
1349 pass
1350
1350
1351 if node == wdirid:
1351 if node == wdirid:
1352 for length in range(minlength, 41):
1352 for length in range(minlength, 41):
1353 prefix = hexnode[:length]
1353 prefix = hexnode[:length]
1354 if isvalid(prefix):
1354 if isvalid(prefix):
1355 return prefix
1355 return prefix
1356
1356
1357 for length in range(minlength, 41):
1357 for length in range(minlength, 41):
1358 prefix = hexnode[:length]
1358 prefix = hexnode[:length]
1359 if isvalid(prefix):
1359 if isvalid(prefix):
1360 return disambiguate(hexnode, length)
1360 return disambiguate(hexnode, length)
1361
1361
1362 def cmp(self, node, text):
1362 def cmp(self, node, text):
1363 """compare text with a given file revision
1363 """compare text with a given file revision
1364
1364
1365 returns True if text is different than what is stored.
1365 returns True if text is different than what is stored.
1366 """
1366 """
1367 p1, p2 = self.parents(node)
1367 p1, p2 = self.parents(node)
1368 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368 return storageutil.hashrevisionsha1(text, p1, p2) != node
1369
1369
1370 def _cachesegment(self, offset, data):
1370 def _cachesegment(self, offset, data):
1371 """Add a segment to the revlog cache.
1371 """Add a segment to the revlog cache.
1372
1372
1373 Accepts an absolute offset and the data that is at that location.
1373 Accepts an absolute offset and the data that is at that location.
1374 """
1374 """
1375 o, d = self._chunkcache
1375 o, d = self._chunkcache
1376 # try to add to existing cache
1376 # try to add to existing cache
1377 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1378 self._chunkcache = o, d + data
1378 self._chunkcache = o, d + data
1379 else:
1379 else:
1380 self._chunkcache = offset, data
1380 self._chunkcache = offset, data
1381
1381
1382 def _readsegment(self, offset, length, df=None):
1382 def _readsegment(self, offset, length, df=None):
1383 """Load a segment of raw data from the revlog.
1383 """Load a segment of raw data from the revlog.
1384
1384
1385 Accepts an absolute offset, length to read, and an optional existing
1385 Accepts an absolute offset, length to read, and an optional existing
1386 file handle to read from.
1386 file handle to read from.
1387
1387
1388 If an existing file handle is passed, it will be seeked and the
1388 If an existing file handle is passed, it will be seeked and the
1389 original seek position will NOT be restored.
1389 original seek position will NOT be restored.
1390
1390
1391 Returns a str or buffer of raw byte data.
1391 Returns a str or buffer of raw byte data.
1392
1392
1393 Raises if the requested number of bytes could not be read.
1393 Raises if the requested number of bytes could not be read.
1394 """
1394 """
1395 # Cache data both forward and backward around the requested
1395 # Cache data both forward and backward around the requested
1396 # data, in a fixed size window. This helps speed up operations
1396 # data, in a fixed size window. This helps speed up operations
1397 # involving reading the revlog backwards.
1397 # involving reading the revlog backwards.
1398 cachesize = self._chunkcachesize
1398 cachesize = self._chunkcachesize
1399 realoffset = offset & ~(cachesize - 1)
1399 realoffset = offset & ~(cachesize - 1)
1400 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1401 - realoffset)
1401 - realoffset)
1402 with self._datareadfp(df) as df:
1402 with self._datareadfp(df) as df:
1403 df.seek(realoffset)
1403 df.seek(realoffset)
1404 d = df.read(reallength)
1404 d = df.read(reallength)
1405
1405
1406 self._cachesegment(realoffset, d)
1406 self._cachesegment(realoffset, d)
1407 if offset != realoffset or reallength != length:
1407 if offset != realoffset or reallength != length:
1408 startoffset = offset - realoffset
1408 startoffset = offset - realoffset
1409 if len(d) - startoffset < length:
1409 if len(d) - startoffset < length:
1410 raise error.RevlogError(
1410 raise error.RevlogError(
1411 _('partial read of revlog %s; expected %d bytes from '
1411 _('partial read of revlog %s; expected %d bytes from '
1412 'offset %d, got %d') %
1412 'offset %d, got %d') %
1413 (self.indexfile if self._inline else self.datafile,
1413 (self.indexfile if self._inline else self.datafile,
1414 length, realoffset, len(d) - startoffset))
1414 length, realoffset, len(d) - startoffset))
1415
1415
1416 return util.buffer(d, startoffset, length)
1416 return util.buffer(d, startoffset, length)
1417
1417
1418 if len(d) < length:
1418 if len(d) < length:
1419 raise error.RevlogError(
1419 raise error.RevlogError(
1420 _('partial read of revlog %s; expected %d bytes from offset '
1420 _('partial read of revlog %s; expected %d bytes from offset '
1421 '%d, got %d') %
1421 '%d, got %d') %
1422 (self.indexfile if self._inline else self.datafile,
1422 (self.indexfile if self._inline else self.datafile,
1423 length, offset, len(d)))
1423 length, offset, len(d)))
1424
1424
1425 return d
1425 return d
1426
1426
1427 def _getsegment(self, offset, length, df=None):
1427 def _getsegment(self, offset, length, df=None):
1428 """Obtain a segment of raw data from the revlog.
1428 """Obtain a segment of raw data from the revlog.
1429
1429
1430 Accepts an absolute offset, length of bytes to obtain, and an
1430 Accepts an absolute offset, length of bytes to obtain, and an
1431 optional file handle to the already-opened revlog. If the file
1431 optional file handle to the already-opened revlog. If the file
1432 handle is used, it's original seek position will not be preserved.
1432 handle is used, it's original seek position will not be preserved.
1433
1433
1434 Requests for data may be returned from a cache.
1434 Requests for data may be returned from a cache.
1435
1435
1436 Returns a str or a buffer instance of raw byte data.
1436 Returns a str or a buffer instance of raw byte data.
1437 """
1437 """
1438 o, d = self._chunkcache
1438 o, d = self._chunkcache
1439 l = len(d)
1439 l = len(d)
1440
1440
1441 # is it in the cache?
1441 # is it in the cache?
1442 cachestart = offset - o
1442 cachestart = offset - o
1443 cacheend = cachestart + length
1443 cacheend = cachestart + length
1444 if cachestart >= 0 and cacheend <= l:
1444 if cachestart >= 0 and cacheend <= l:
1445 if cachestart == 0 and cacheend == l:
1445 if cachestart == 0 and cacheend == l:
1446 return d # avoid a copy
1446 return d # avoid a copy
1447 return util.buffer(d, cachestart, cacheend - cachestart)
1447 return util.buffer(d, cachestart, cacheend - cachestart)
1448
1448
1449 return self._readsegment(offset, length, df=df)
1449 return self._readsegment(offset, length, df=df)
1450
1450
1451 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 def _getsegmentforrevs(self, startrev, endrev, df=None):
1452 """Obtain a segment of raw data corresponding to a range of revisions.
1452 """Obtain a segment of raw data corresponding to a range of revisions.
1453
1453
1454 Accepts the start and end revisions and an optional already-open
1454 Accepts the start and end revisions and an optional already-open
1455 file handle to be used for reading. If the file handle is read, its
1455 file handle to be used for reading. If the file handle is read, its
1456 seek position will not be preserved.
1456 seek position will not be preserved.
1457
1457
1458 Requests for data may be satisfied by a cache.
1458 Requests for data may be satisfied by a cache.
1459
1459
1460 Returns a 2-tuple of (offset, data) for the requested range of
1460 Returns a 2-tuple of (offset, data) for the requested range of
1461 revisions. Offset is the integer offset from the beginning of the
1461 revisions. Offset is the integer offset from the beginning of the
1462 revlog and data is a str or buffer of the raw byte data.
1462 revlog and data is a str or buffer of the raw byte data.
1463
1463
1464 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1465 to determine where each revision's data begins and ends.
1465 to determine where each revision's data begins and ends.
1466 """
1466 """
1467 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1468 # (functions are expensive).
1468 # (functions are expensive).
1469 index = self.index
1469 index = self.index
1470 istart = index[startrev]
1470 istart = index[startrev]
1471 start = int(istart[0] >> 16)
1471 start = int(istart[0] >> 16)
1472 if startrev == endrev:
1472 if startrev == endrev:
1473 end = start + istart[1]
1473 end = start + istart[1]
1474 else:
1474 else:
1475 iend = index[endrev]
1475 iend = index[endrev]
1476 end = int(iend[0] >> 16) + iend[1]
1476 end = int(iend[0] >> 16) + iend[1]
1477
1477
1478 if self._inline:
1478 if self._inline:
1479 start += (startrev + 1) * self._io.size
1479 start += (startrev + 1) * self._io.size
1480 end += (endrev + 1) * self._io.size
1480 end += (endrev + 1) * self._io.size
1481 length = end - start
1481 length = end - start
1482
1482
1483 return start, self._getsegment(start, length, df=df)
1483 return start, self._getsegment(start, length, df=df)
1484
1484
1485 def _chunk(self, rev, df=None):
1485 def _chunk(self, rev, df=None):
1486 """Obtain a single decompressed chunk for a revision.
1486 """Obtain a single decompressed chunk for a revision.
1487
1487
1488 Accepts an integer revision and an optional already-open file handle
1488 Accepts an integer revision and an optional already-open file handle
1489 to be used for reading. If used, the seek position of the file will not
1489 to be used for reading. If used, the seek position of the file will not
1490 be preserved.
1490 be preserved.
1491
1491
1492 Returns a str holding uncompressed data for the requested revision.
1492 Returns a str holding uncompressed data for the requested revision.
1493 """
1493 """
1494 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1495
1495
1496 def _chunks(self, revs, df=None, targetsize=None):
1496 def _chunks(self, revs, df=None, targetsize=None):
1497 """Obtain decompressed chunks for the specified revisions.
1497 """Obtain decompressed chunks for the specified revisions.
1498
1498
1499 Accepts an iterable of numeric revisions that are assumed to be in
1499 Accepts an iterable of numeric revisions that are assumed to be in
1500 ascending order. Also accepts an optional already-open file handle
1500 ascending order. Also accepts an optional already-open file handle
1501 to be used for reading. If used, the seek position of the file will
1501 to be used for reading. If used, the seek position of the file will
1502 not be preserved.
1502 not be preserved.
1503
1503
1504 This function is similar to calling ``self._chunk()`` multiple times,
1504 This function is similar to calling ``self._chunk()`` multiple times,
1505 but is faster.
1505 but is faster.
1506
1506
1507 Returns a list with decompressed data for each requested revision.
1507 Returns a list with decompressed data for each requested revision.
1508 """
1508 """
1509 if not revs:
1509 if not revs:
1510 return []
1510 return []
1511 start = self.start
1511 start = self.start
1512 length = self.length
1512 length = self.length
1513 inline = self._inline
1513 inline = self._inline
1514 iosize = self._io.size
1514 iosize = self._io.size
1515 buffer = util.buffer
1515 buffer = util.buffer
1516
1516
1517 l = []
1517 l = []
1518 ladd = l.append
1518 ladd = l.append
1519
1519
1520 if not self._withsparseread:
1520 if not self._withsparseread:
1521 slicedchunks = (revs,)
1521 slicedchunks = (revs,)
1522 else:
1522 else:
1523 slicedchunks = deltautil.slicechunk(self, revs,
1523 slicedchunks = deltautil.slicechunk(self, revs,
1524 targetsize=targetsize)
1524 targetsize=targetsize)
1525
1525
1526 for revschunk in slicedchunks:
1526 for revschunk in slicedchunks:
1527 firstrev = revschunk[0]
1527 firstrev = revschunk[0]
1528 # Skip trailing revisions with empty diff
1528 # Skip trailing revisions with empty diff
1529 for lastrev in revschunk[::-1]:
1529 for lastrev in revschunk[::-1]:
1530 if length(lastrev) != 0:
1530 if length(lastrev) != 0:
1531 break
1531 break
1532
1532
1533 try:
1533 try:
1534 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1535 except OverflowError:
1535 except OverflowError:
1536 # issue4215 - we can't cache a run of chunks greater than
1536 # issue4215 - we can't cache a run of chunks greater than
1537 # 2G on Windows
1537 # 2G on Windows
1538 return [self._chunk(rev, df=df) for rev in revschunk]
1538 return [self._chunk(rev, df=df) for rev in revschunk]
1539
1539
1540 decomp = self.decompress
1540 decomp = self.decompress
1541 for rev in revschunk:
1541 for rev in revschunk:
1542 chunkstart = start(rev)
1542 chunkstart = start(rev)
1543 if inline:
1543 if inline:
1544 chunkstart += (rev + 1) * iosize
1544 chunkstart += (rev + 1) * iosize
1545 chunklength = length(rev)
1545 chunklength = length(rev)
1546 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1547
1547
1548 return l
1548 return l
1549
1549
1550 def _chunkclear(self):
1550 def _chunkclear(self):
1551 """Clear the raw chunk cache."""
1551 """Clear the raw chunk cache."""
1552 self._chunkcache = (0, '')
1552 self._chunkcache = (0, '')
1553
1553
1554 def deltaparent(self, rev):
1554 def deltaparent(self, rev):
1555 """return deltaparent of the given revision"""
1555 """return deltaparent of the given revision"""
1556 base = self.index[rev][3]
1556 base = self.index[rev][3]
1557 if base == rev:
1557 if base == rev:
1558 return nullrev
1558 return nullrev
1559 elif self._generaldelta:
1559 elif self._generaldelta:
1560 return base
1560 return base
1561 else:
1561 else:
1562 return rev - 1
1562 return rev - 1
1563
1563
1564 def issnapshot(self, rev):
1564 def issnapshot(self, rev):
1565 """tells whether rev is a snapshot
1565 """tells whether rev is a snapshot
1566 """
1566 """
1567 if not self._sparserevlog:
1567 if not self._sparserevlog:
1568 return self.deltaparent(rev) == nullrev
1568 return self.deltaparent(rev) == nullrev
1569 elif util.safehasattr(self.index, 'issnapshot'):
1569 elif util.safehasattr(self.index, 'issnapshot'):
1570 # directly assign the method to cache the testing and access
1570 # directly assign the method to cache the testing and access
1571 self.issnapshot = self.index.issnapshot
1571 self.issnapshot = self.index.issnapshot
1572 return self.issnapshot(rev)
1572 return self.issnapshot(rev)
1573 if rev == nullrev:
1573 if rev == nullrev:
1574 return True
1574 return True
1575 entry = self.index[rev]
1575 entry = self.index[rev]
1576 base = entry[3]
1576 base = entry[3]
1577 if base == rev:
1577 if base == rev:
1578 return True
1578 return True
1579 if base == nullrev:
1579 if base == nullrev:
1580 return True
1580 return True
1581 p1 = entry[5]
1581 p1 = entry[5]
1582 p2 = entry[6]
1582 p2 = entry[6]
1583 if base == p1 or base == p2:
1583 if base == p1 or base == p2:
1584 return False
1584 return False
1585 return self.issnapshot(base)
1585 return self.issnapshot(base)
1586
1586
1587 def snapshotdepth(self, rev):
1587 def snapshotdepth(self, rev):
1588 """number of snapshot in the chain before this one"""
1588 """number of snapshot in the chain before this one"""
1589 if not self.issnapshot(rev):
1589 if not self.issnapshot(rev):
1590 raise error.ProgrammingError('revision %d not a snapshot')
1590 raise error.ProgrammingError('revision %d not a snapshot')
1591 return len(self._deltachain(rev)[0]) - 1
1591 return len(self._deltachain(rev)[0]) - 1
1592
1592
1593 def revdiff(self, rev1, rev2):
1593 def revdiff(self, rev1, rev2):
1594 """return or calculate a delta between two revisions
1594 """return or calculate a delta between two revisions
1595
1595
1596 The delta calculated is in binary form and is intended to be written to
1596 The delta calculated is in binary form and is intended to be written to
1597 revlog data directly. So this function needs raw revision data.
1597 revlog data directly. So this function needs raw revision data.
1598 """
1598 """
1599 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1600 return bytes(self._chunk(rev2))
1600 return bytes(self._chunk(rev2))
1601
1601
1602 return mdiff.textdiff(self.rawdata(rev1),
1602 return mdiff.textdiff(self.rawdata(rev1),
1603 self.rawdata(rev2))
1603 self.rawdata(rev2))
1604
1604
1605 def _processflags(self, text, flags, operation, raw=False):
1605 def _processflags(self, text, flags, operation, raw=False):
1606 """deprecated entry point to access flag processors"""
1606 """deprecated entry point to access flag processors"""
1607 msg = ('_processflag(...) use the specialized variant')
1607 msg = ('_processflag(...) use the specialized variant')
1608 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1608 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1609 if raw:
1609 if raw:
1610 return text, flagutil.processflagsraw(self, text, flags)
1610 return text, flagutil.processflagsraw(self, text, flags)
1611 elif operation == 'read':
1611 elif operation == 'read':
1612 return flagutil.processflagsread(self, text, flags)
1612 return flagutil.processflagsread(self, text, flags)
1613 else: # write operation
1613 else: # write operation
1614 return flagutil.processflagswrite(self, text, flags)
1614 return flagutil.processflagswrite(self, text, flags)
1615
1615
1616 def revision(self, nodeorrev, _df=None, raw=False):
1616 def revision(self, nodeorrev, _df=None, raw=False):
1617 """return an uncompressed revision of a given node or revision
1617 """return an uncompressed revision of a given node or revision
1618 number.
1618 number.
1619
1619
1620 _df - an existing file handle to read from. (internal-only)
1620 _df - an existing file handle to read from. (internal-only)
1621 raw - an optional argument specifying if the revision data is to be
1621 raw - an optional argument specifying if the revision data is to be
1622 treated as raw data when applying flag transforms. 'raw' should be set
1622 treated as raw data when applying flag transforms. 'raw' should be set
1623 to True when generating changegroups or in debug commands.
1623 to True when generating changegroups or in debug commands.
1624 """
1624 """
1625 if raw:
1625 if raw:
1626 msg = ('revlog.revision(..., raw=True) is deprecated, '
1626 msg = ('revlog.revision(..., raw=True) is deprecated, '
1627 'use revlog.rawdata(...)')
1627 'use revlog.rawdata(...)')
1628 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1628 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1629 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1629 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1630
1630
1631 def sidedata(self, nodeorrev, _df=None):
1631 def sidedata(self, nodeorrev, _df=None):
1632 """a map of extra data related to the changeset but not part of the hash
1632 """a map of extra data related to the changeset but not part of the hash
1633
1633
1634 This function currently return a dictionary. However, more advanced
1634 This function currently return a dictionary. However, more advanced
1635 mapping object will likely be used in the future for a more
1635 mapping object will likely be used in the future for a more
1636 efficient/lazy code.
1636 efficient/lazy code.
1637 """
1637 """
1638 return self._revisiondata(nodeorrev, _df)[1]
1638 return self._revisiondata(nodeorrev, _df)[1]
1639
1639
1640 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1640 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1641 # deal with <nodeorrev> argument type
1641 # deal with <nodeorrev> argument type
1642 if isinstance(nodeorrev, int):
1642 if isinstance(nodeorrev, int):
1643 rev = nodeorrev
1643 rev = nodeorrev
1644 node = self.node(rev)
1644 node = self.node(rev)
1645 else:
1645 else:
1646 node = nodeorrev
1646 node = nodeorrev
1647 rev = None
1647 rev = None
1648
1648
1649 # fast path the special `nullid` rev
1649 # fast path the special `nullid` rev
1650 if node == nullid:
1650 if node == nullid:
1651 return "", {}
1651 return "", {}
1652
1652
1653 # The text as stored inside the revlog. Might be the revision or might
1653 # The text as stored inside the revlog. Might be the revision or might
1654 # need to be processed to retrieve the revision.
1654 # need to be processed to retrieve the revision.
1655 rawtext = None
1655 rawtext = None
1656
1656
1657 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1657 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1658
1658
1659 if raw and validated:
1659 if raw and validated:
1660 # if we don't want to process the raw text and that raw
1660 # if we don't want to process the raw text and that raw
1661 # text is cached, we can exit early.
1661 # text is cached, we can exit early.
1662 return rawtext, {}
1662 return rawtext, {}
1663 if rev is None:
1663 if rev is None:
1664 rev = self.rev(node)
1664 rev = self.rev(node)
1665 # the revlog's flag for this revision
1665 # the revlog's flag for this revision
1666 # (usually alter its state or content)
1666 # (usually alter its state or content)
1667 flags = self.flags(rev)
1667 flags = self.flags(rev)
1668
1668
1669 if validated and flags == REVIDX_DEFAULT_FLAGS:
1669 if validated and flags == REVIDX_DEFAULT_FLAGS:
1670 # no extra flags set, no flag processor runs, text = rawtext
1670 # no extra flags set, no flag processor runs, text = rawtext
1671 return rawtext, {}
1671 return rawtext, {}
1672
1672
1673 sidedata = {}
1673 sidedata = {}
1674 if raw:
1674 if raw:
1675 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1675 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1676 text = rawtext
1676 text = rawtext
1677 else:
1677 else:
1678 try:
1678 r = flagutil.processflagsread(self, rawtext, flags)
1679 r = flagutil.processflagsread(self, rawtext, flags)
1680 except error.SidedataHashError as exc:
1681 msg = _("integrity check failed on %s:%s sidedata key %d")
1682 msg %= (self.indexfile, pycompat.bytestr(rev),
1683 exc.sidedatakey)
1684 raise error.RevlogError(msg)
1679 text, validatehash, sidedata = r
1685 text, validatehash, sidedata = r
1680 if validatehash:
1686 if validatehash:
1681 self.checkhash(text, node, rev=rev)
1687 self.checkhash(text, node, rev=rev)
1682 if not validated:
1688 if not validated:
1683 self._revisioncache = (node, rev, rawtext)
1689 self._revisioncache = (node, rev, rawtext)
1684
1690
1685 return text, sidedata
1691 return text, sidedata
1686
1692
1687 def _rawtext(self, node, rev, _df=None):
1693 def _rawtext(self, node, rev, _df=None):
1688 """return the possibly unvalidated rawtext for a revision
1694 """return the possibly unvalidated rawtext for a revision
1689
1695
1690 returns (rev, rawtext, validated)
1696 returns (rev, rawtext, validated)
1691 """
1697 """
1692
1698
1693 # revision in the cache (could be useful to apply delta)
1699 # revision in the cache (could be useful to apply delta)
1694 cachedrev = None
1700 cachedrev = None
1695 # An intermediate text to apply deltas to
1701 # An intermediate text to apply deltas to
1696 basetext = None
1702 basetext = None
1697
1703
1698 # Check if we have the entry in cache
1704 # Check if we have the entry in cache
1699 # The cache entry looks like (node, rev, rawtext)
1705 # The cache entry looks like (node, rev, rawtext)
1700 if self._revisioncache:
1706 if self._revisioncache:
1701 if self._revisioncache[0] == node:
1707 if self._revisioncache[0] == node:
1702 return (rev, self._revisioncache[2], True)
1708 return (rev, self._revisioncache[2], True)
1703 cachedrev = self._revisioncache[1]
1709 cachedrev = self._revisioncache[1]
1704
1710
1705 if rev is None:
1711 if rev is None:
1706 rev = self.rev(node)
1712 rev = self.rev(node)
1707
1713
1708 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1714 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1709 if stopped:
1715 if stopped:
1710 basetext = self._revisioncache[2]
1716 basetext = self._revisioncache[2]
1711
1717
1712 # drop cache to save memory, the caller is expected to
1718 # drop cache to save memory, the caller is expected to
1713 # update self._revisioncache after validating the text
1719 # update self._revisioncache after validating the text
1714 self._revisioncache = None
1720 self._revisioncache = None
1715
1721
1716 targetsize = None
1722 targetsize = None
1717 rawsize = self.index[rev][2]
1723 rawsize = self.index[rev][2]
1718 if 0 <= rawsize:
1724 if 0 <= rawsize:
1719 targetsize = 4 * rawsize
1725 targetsize = 4 * rawsize
1720
1726
1721 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1727 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1722 if basetext is None:
1728 if basetext is None:
1723 basetext = bytes(bins[0])
1729 basetext = bytes(bins[0])
1724 bins = bins[1:]
1730 bins = bins[1:]
1725
1731
1726 rawtext = mdiff.patches(basetext, bins)
1732 rawtext = mdiff.patches(basetext, bins)
1727 del basetext # let us have a chance to free memory early
1733 del basetext # let us have a chance to free memory early
1728 return (rev, rawtext, False)
1734 return (rev, rawtext, False)
1729
1735
1730 def rawdata(self, nodeorrev, _df=None):
1736 def rawdata(self, nodeorrev, _df=None):
1731 """return an uncompressed raw data of a given node or revision number.
1737 """return an uncompressed raw data of a given node or revision number.
1732
1738
1733 _df - an existing file handle to read from. (internal-only)
1739 _df - an existing file handle to read from. (internal-only)
1734 """
1740 """
1735 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1741 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1736
1742
1737 def hash(self, text, p1, p2):
1743 def hash(self, text, p1, p2):
1738 """Compute a node hash.
1744 """Compute a node hash.
1739
1745
1740 Available as a function so that subclasses can replace the hash
1746 Available as a function so that subclasses can replace the hash
1741 as needed.
1747 as needed.
1742 """
1748 """
1743 return storageutil.hashrevisionsha1(text, p1, p2)
1749 return storageutil.hashrevisionsha1(text, p1, p2)
1744
1750
1745 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1751 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1746 """Check node hash integrity.
1752 """Check node hash integrity.
1747
1753
1748 Available as a function so that subclasses can extend hash mismatch
1754 Available as a function so that subclasses can extend hash mismatch
1749 behaviors as needed.
1755 behaviors as needed.
1750 """
1756 """
1751 try:
1757 try:
1752 if p1 is None and p2 is None:
1758 if p1 is None and p2 is None:
1753 p1, p2 = self.parents(node)
1759 p1, p2 = self.parents(node)
1754 if node != self.hash(text, p1, p2):
1760 if node != self.hash(text, p1, p2):
1755 # Clear the revision cache on hash failure. The revision cache
1761 # Clear the revision cache on hash failure. The revision cache
1756 # only stores the raw revision and clearing the cache does have
1762 # only stores the raw revision and clearing the cache does have
1757 # the side-effect that we won't have a cache hit when the raw
1763 # the side-effect that we won't have a cache hit when the raw
1758 # revision data is accessed. But this case should be rare and
1764 # revision data is accessed. But this case should be rare and
1759 # it is extra work to teach the cache about the hash
1765 # it is extra work to teach the cache about the hash
1760 # verification state.
1766 # verification state.
1761 if self._revisioncache and self._revisioncache[0] == node:
1767 if self._revisioncache and self._revisioncache[0] == node:
1762 self._revisioncache = None
1768 self._revisioncache = None
1763
1769
1764 revornode = rev
1770 revornode = rev
1765 if revornode is None:
1771 if revornode is None:
1766 revornode = templatefilters.short(hex(node))
1772 revornode = templatefilters.short(hex(node))
1767 raise error.RevlogError(_("integrity check failed on %s:%s")
1773 raise error.RevlogError(_("integrity check failed on %s:%s")
1768 % (self.indexfile, pycompat.bytestr(revornode)))
1774 % (self.indexfile, pycompat.bytestr(revornode)))
1769 except error.RevlogError:
1775 except error.RevlogError:
1770 if self._censorable and storageutil.iscensoredtext(text):
1776 if self._censorable and storageutil.iscensoredtext(text):
1771 raise error.CensoredNodeError(self.indexfile, node, text)
1777 raise error.CensoredNodeError(self.indexfile, node, text)
1772 raise
1778 raise
1773
1779
1774 def _enforceinlinesize(self, tr, fp=None):
1780 def _enforceinlinesize(self, tr, fp=None):
1775 """Check if the revlog is too big for inline and convert if so.
1781 """Check if the revlog is too big for inline and convert if so.
1776
1782
1777 This should be called after revisions are added to the revlog. If the
1783 This should be called after revisions are added to the revlog. If the
1778 revlog has grown too large to be an inline revlog, it will convert it
1784 revlog has grown too large to be an inline revlog, it will convert it
1779 to use multiple index and data files.
1785 to use multiple index and data files.
1780 """
1786 """
1781 tiprev = len(self) - 1
1787 tiprev = len(self) - 1
1782 if (not self._inline or
1788 if (not self._inline or
1783 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1789 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1784 return
1790 return
1785
1791
1786 trinfo = tr.find(self.indexfile)
1792 trinfo = tr.find(self.indexfile)
1787 if trinfo is None:
1793 if trinfo is None:
1788 raise error.RevlogError(_("%s not found in the transaction")
1794 raise error.RevlogError(_("%s not found in the transaction")
1789 % self.indexfile)
1795 % self.indexfile)
1790
1796
1791 trindex = trinfo[2]
1797 trindex = trinfo[2]
1792 if trindex is not None:
1798 if trindex is not None:
1793 dataoff = self.start(trindex)
1799 dataoff = self.start(trindex)
1794 else:
1800 else:
1795 # revlog was stripped at start of transaction, use all leftover data
1801 # revlog was stripped at start of transaction, use all leftover data
1796 trindex = len(self) - 1
1802 trindex = len(self) - 1
1797 dataoff = self.end(tiprev)
1803 dataoff = self.end(tiprev)
1798
1804
1799 tr.add(self.datafile, dataoff)
1805 tr.add(self.datafile, dataoff)
1800
1806
1801 if fp:
1807 if fp:
1802 fp.flush()
1808 fp.flush()
1803 fp.close()
1809 fp.close()
1804 # We can't use the cached file handle after close(). So prevent
1810 # We can't use the cached file handle after close(). So prevent
1805 # its usage.
1811 # its usage.
1806 self._writinghandles = None
1812 self._writinghandles = None
1807
1813
1808 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1814 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1809 for r in self:
1815 for r in self:
1810 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1816 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1811
1817
1812 with self._indexfp('w') as fp:
1818 with self._indexfp('w') as fp:
1813 self.version &= ~FLAG_INLINE_DATA
1819 self.version &= ~FLAG_INLINE_DATA
1814 self._inline = False
1820 self._inline = False
1815 io = self._io
1821 io = self._io
1816 for i in self:
1822 for i in self:
1817 e = io.packentry(self.index[i], self.node, self.version, i)
1823 e = io.packentry(self.index[i], self.node, self.version, i)
1818 fp.write(e)
1824 fp.write(e)
1819
1825
1820 # the temp file replace the real index when we exit the context
1826 # the temp file replace the real index when we exit the context
1821 # manager
1827 # manager
1822
1828
1823 tr.replace(self.indexfile, trindex * self._io.size)
1829 tr.replace(self.indexfile, trindex * self._io.size)
1824 self._chunkclear()
1830 self._chunkclear()
1825
1831
1826 def _nodeduplicatecallback(self, transaction, node):
1832 def _nodeduplicatecallback(self, transaction, node):
1827 """called when trying to add a node already stored.
1833 """called when trying to add a node already stored.
1828 """
1834 """
1829
1835
1830 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1836 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1831 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1837 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1832 sidedata=None):
1838 sidedata=None):
1833 """add a revision to the log
1839 """add a revision to the log
1834
1840
1835 text - the revision data to add
1841 text - the revision data to add
1836 transaction - the transaction object used for rollback
1842 transaction - the transaction object used for rollback
1837 link - the linkrev data to add
1843 link - the linkrev data to add
1838 p1, p2 - the parent nodeids of the revision
1844 p1, p2 - the parent nodeids of the revision
1839 cachedelta - an optional precomputed delta
1845 cachedelta - an optional precomputed delta
1840 node - nodeid of revision; typically node is not specified, and it is
1846 node - nodeid of revision; typically node is not specified, and it is
1841 computed by default as hash(text, p1, p2), however subclasses might
1847 computed by default as hash(text, p1, p2), however subclasses might
1842 use different hashing method (and override checkhash() in such case)
1848 use different hashing method (and override checkhash() in such case)
1843 flags - the known flags to set on the revision
1849 flags - the known flags to set on the revision
1844 deltacomputer - an optional deltacomputer instance shared between
1850 deltacomputer - an optional deltacomputer instance shared between
1845 multiple calls
1851 multiple calls
1846 """
1852 """
1847 if link == nullrev:
1853 if link == nullrev:
1848 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1849 % self.indexfile)
1855 % self.indexfile)
1850
1856
1851 if sidedata is None:
1857 if sidedata is None:
1852 sidedata = {}
1858 sidedata = {}
1853 elif not self.hassidedata:
1859 elif not self.hassidedata:
1854 raise error.ProgrammingError(
1860 raise error.ProgrammingError(
1855 _("trying to add sidedata to a revlog who don't support them")
1861 _("trying to add sidedata to a revlog who don't support them")
1856 )
1862 )
1857
1863
1858 if flags:
1864 if flags:
1859 node = node or self.hash(text, p1, p2)
1865 node = node or self.hash(text, p1, p2)
1860
1866
1861 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1867 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1862 sidedata=sidedata)
1868 sidedata=sidedata)
1863
1869
1864 # If the flag processor modifies the revision data, ignore any provided
1870 # If the flag processor modifies the revision data, ignore any provided
1865 # cachedelta.
1871 # cachedelta.
1866 if rawtext != text:
1872 if rawtext != text:
1867 cachedelta = None
1873 cachedelta = None
1868
1874
1869 if len(rawtext) > _maxentrysize:
1875 if len(rawtext) > _maxentrysize:
1870 raise error.RevlogError(
1876 raise error.RevlogError(
1871 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1877 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1872 % (self.indexfile, len(rawtext)))
1878 % (self.indexfile, len(rawtext)))
1873
1879
1874 node = node or self.hash(rawtext, p1, p2)
1880 node = node or self.hash(rawtext, p1, p2)
1875 if node in self.nodemap:
1881 if node in self.nodemap:
1876 return node
1882 return node
1877
1883
1878 if validatehash:
1884 if validatehash:
1879 self.checkhash(rawtext, node, p1=p1, p2=p2)
1885 self.checkhash(rawtext, node, p1=p1, p2=p2)
1880
1886
1881 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1887 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1882 flags, cachedelta=cachedelta,
1888 flags, cachedelta=cachedelta,
1883 deltacomputer=deltacomputer)
1889 deltacomputer=deltacomputer)
1884
1890
1885 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1891 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1886 cachedelta=None, deltacomputer=None):
1892 cachedelta=None, deltacomputer=None):
1887 """add a raw revision with known flags, node and parents
1893 """add a raw revision with known flags, node and parents
1888 useful when reusing a revision not stored in this revlog (ex: received
1894 useful when reusing a revision not stored in this revlog (ex: received
1889 over wire, or read from an external bundle).
1895 over wire, or read from an external bundle).
1890 """
1896 """
1891 dfh = None
1897 dfh = None
1892 if not self._inline:
1898 if not self._inline:
1893 dfh = self._datafp("a+")
1899 dfh = self._datafp("a+")
1894 ifh = self._indexfp("a+")
1900 ifh = self._indexfp("a+")
1895 try:
1901 try:
1896 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1902 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1897 flags, cachedelta, ifh, dfh,
1903 flags, cachedelta, ifh, dfh,
1898 deltacomputer=deltacomputer)
1904 deltacomputer=deltacomputer)
1899 finally:
1905 finally:
1900 if dfh:
1906 if dfh:
1901 dfh.close()
1907 dfh.close()
1902 ifh.close()
1908 ifh.close()
1903
1909
1904 def compress(self, data):
1910 def compress(self, data):
1905 """Generate a possibly-compressed representation of data."""
1911 """Generate a possibly-compressed representation of data."""
1906 if not data:
1912 if not data:
1907 return '', data
1913 return '', data
1908
1914
1909 compressed = self._compressor.compress(data)
1915 compressed = self._compressor.compress(data)
1910
1916
1911 if compressed:
1917 if compressed:
1912 # The revlog compressor added the header in the returned data.
1918 # The revlog compressor added the header in the returned data.
1913 return '', compressed
1919 return '', compressed
1914
1920
1915 if data[0:1] == '\0':
1921 if data[0:1] == '\0':
1916 return '', data
1922 return '', data
1917 return 'u', data
1923 return 'u', data
1918
1924
1919 def decompress(self, data):
1925 def decompress(self, data):
1920 """Decompress a revlog chunk.
1926 """Decompress a revlog chunk.
1921
1927
1922 The chunk is expected to begin with a header identifying the
1928 The chunk is expected to begin with a header identifying the
1923 format type so it can be routed to an appropriate decompressor.
1929 format type so it can be routed to an appropriate decompressor.
1924 """
1930 """
1925 if not data:
1931 if not data:
1926 return data
1932 return data
1927
1933
1928 # Revlogs are read much more frequently than they are written and many
1934 # Revlogs are read much more frequently than they are written and many
1929 # chunks only take microseconds to decompress, so performance is
1935 # chunks only take microseconds to decompress, so performance is
1930 # important here.
1936 # important here.
1931 #
1937 #
1932 # We can make a few assumptions about revlogs:
1938 # We can make a few assumptions about revlogs:
1933 #
1939 #
1934 # 1) the majority of chunks will be compressed (as opposed to inline
1940 # 1) the majority of chunks will be compressed (as opposed to inline
1935 # raw data).
1941 # raw data).
1936 # 2) decompressing *any* data will likely by at least 10x slower than
1942 # 2) decompressing *any* data will likely by at least 10x slower than
1937 # returning raw inline data.
1943 # returning raw inline data.
1938 # 3) we want to prioritize common and officially supported compression
1944 # 3) we want to prioritize common and officially supported compression
1939 # engines
1945 # engines
1940 #
1946 #
1941 # It follows that we want to optimize for "decompress compressed data
1947 # It follows that we want to optimize for "decompress compressed data
1942 # when encoded with common and officially supported compression engines"
1948 # when encoded with common and officially supported compression engines"
1943 # case over "raw data" and "data encoded by less common or non-official
1949 # case over "raw data" and "data encoded by less common or non-official
1944 # compression engines." That is why we have the inline lookup first
1950 # compression engines." That is why we have the inline lookup first
1945 # followed by the compengines lookup.
1951 # followed by the compengines lookup.
1946 #
1952 #
1947 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1953 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1948 # compressed chunks. And this matters for changelog and manifest reads.
1954 # compressed chunks. And this matters for changelog and manifest reads.
1949 t = data[0:1]
1955 t = data[0:1]
1950
1956
1951 if t == 'x':
1957 if t == 'x':
1952 try:
1958 try:
1953 return _zlibdecompress(data)
1959 return _zlibdecompress(data)
1954 except zlib.error as e:
1960 except zlib.error as e:
1955 raise error.RevlogError(_('revlog decompress error: %s') %
1961 raise error.RevlogError(_('revlog decompress error: %s') %
1956 stringutil.forcebytestr(e))
1962 stringutil.forcebytestr(e))
1957 # '\0' is more common than 'u' so it goes first.
1963 # '\0' is more common than 'u' so it goes first.
1958 elif t == '\0':
1964 elif t == '\0':
1959 return data
1965 return data
1960 elif t == 'u':
1966 elif t == 'u':
1961 return util.buffer(data, 1)
1967 return util.buffer(data, 1)
1962
1968
1963 try:
1969 try:
1964 compressor = self._decompressors[t]
1970 compressor = self._decompressors[t]
1965 except KeyError:
1971 except KeyError:
1966 try:
1972 try:
1967 engine = util.compengines.forrevlogheader(t)
1973 engine = util.compengines.forrevlogheader(t)
1968 compressor = engine.revlogcompressor(self._compengineopts)
1974 compressor = engine.revlogcompressor(self._compengineopts)
1969 self._decompressors[t] = compressor
1975 self._decompressors[t] = compressor
1970 except KeyError:
1976 except KeyError:
1971 raise error.RevlogError(_('unknown compression type %r') % t)
1977 raise error.RevlogError(_('unknown compression type %r') % t)
1972
1978
1973 return compressor.decompress(data)
1979 return compressor.decompress(data)
1974
1980
1975 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1981 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1976 cachedelta, ifh, dfh, alwayscache=False,
1982 cachedelta, ifh, dfh, alwayscache=False,
1977 deltacomputer=None):
1983 deltacomputer=None):
1978 """internal function to add revisions to the log
1984 """internal function to add revisions to the log
1979
1985
1980 see addrevision for argument descriptions.
1986 see addrevision for argument descriptions.
1981
1987
1982 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1988 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1983
1989
1984 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1990 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1985 be used.
1991 be used.
1986
1992
1987 invariants:
1993 invariants:
1988 - rawtext is optional (can be None); if not set, cachedelta must be set.
1994 - rawtext is optional (can be None); if not set, cachedelta must be set.
1989 if both are set, they must correspond to each other.
1995 if both are set, they must correspond to each other.
1990 """
1996 """
1991 if node == nullid:
1997 if node == nullid:
1992 raise error.RevlogError(_("%s: attempt to add null revision") %
1998 raise error.RevlogError(_("%s: attempt to add null revision") %
1993 self.indexfile)
1999 self.indexfile)
1994 if node == wdirid or node in wdirfilenodeids:
2000 if node == wdirid or node in wdirfilenodeids:
1995 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2001 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1996 self.indexfile)
2002 self.indexfile)
1997
2003
1998 if self._inline:
2004 if self._inline:
1999 fh = ifh
2005 fh = ifh
2000 else:
2006 else:
2001 fh = dfh
2007 fh = dfh
2002
2008
2003 btext = [rawtext]
2009 btext = [rawtext]
2004
2010
2005 curr = len(self)
2011 curr = len(self)
2006 prev = curr - 1
2012 prev = curr - 1
2007 offset = self.end(prev)
2013 offset = self.end(prev)
2008 p1r, p2r = self.rev(p1), self.rev(p2)
2014 p1r, p2r = self.rev(p1), self.rev(p2)
2009
2015
2010 # full versions are inserted when the needed deltas
2016 # full versions are inserted when the needed deltas
2011 # become comparable to the uncompressed text
2017 # become comparable to the uncompressed text
2012 if rawtext is None:
2018 if rawtext is None:
2013 # need rawtext size, before changed by flag processors, which is
2019 # need rawtext size, before changed by flag processors, which is
2014 # the non-raw size. use revlog explicitly to avoid filelog's extra
2020 # the non-raw size. use revlog explicitly to avoid filelog's extra
2015 # logic that might remove metadata size.
2021 # logic that might remove metadata size.
2016 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2022 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2017 cachedelta[1])
2023 cachedelta[1])
2018 else:
2024 else:
2019 textlen = len(rawtext)
2025 textlen = len(rawtext)
2020
2026
2021 if deltacomputer is None:
2027 if deltacomputer is None:
2022 deltacomputer = deltautil.deltacomputer(self)
2028 deltacomputer = deltautil.deltacomputer(self)
2023
2029
2024 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2030 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2025
2031
2026 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2032 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2027
2033
2028 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2034 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2029 deltainfo.base, link, p1r, p2r, node)
2035 deltainfo.base, link, p1r, p2r, node)
2030 self.index.append(e)
2036 self.index.append(e)
2031 self.nodemap[node] = curr
2037 self.nodemap[node] = curr
2032
2038
2033 # Reset the pure node cache start lookup offset to account for new
2039 # Reset the pure node cache start lookup offset to account for new
2034 # revision.
2040 # revision.
2035 if self._nodepos is not None:
2041 if self._nodepos is not None:
2036 self._nodepos = curr
2042 self._nodepos = curr
2037
2043
2038 entry = self._io.packentry(e, self.node, self.version, curr)
2044 entry = self._io.packentry(e, self.node, self.version, curr)
2039 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2045 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2040 link, offset)
2046 link, offset)
2041
2047
2042 rawtext = btext[0]
2048 rawtext = btext[0]
2043
2049
2044 if alwayscache and rawtext is None:
2050 if alwayscache and rawtext is None:
2045 rawtext = deltacomputer.buildtext(revinfo, fh)
2051 rawtext = deltacomputer.buildtext(revinfo, fh)
2046
2052
2047 if type(rawtext) == bytes: # only accept immutable objects
2053 if type(rawtext) == bytes: # only accept immutable objects
2048 self._revisioncache = (node, curr, rawtext)
2054 self._revisioncache = (node, curr, rawtext)
2049 self._chainbasecache[curr] = deltainfo.chainbase
2055 self._chainbasecache[curr] = deltainfo.chainbase
2050 return node
2056 return node
2051
2057
2052 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2058 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2053 # Files opened in a+ mode have inconsistent behavior on various
2059 # Files opened in a+ mode have inconsistent behavior on various
2054 # platforms. Windows requires that a file positioning call be made
2060 # platforms. Windows requires that a file positioning call be made
2055 # when the file handle transitions between reads and writes. See
2061 # when the file handle transitions between reads and writes. See
2056 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2062 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2057 # platforms, Python or the platform itself can be buggy. Some versions
2063 # platforms, Python or the platform itself can be buggy. Some versions
2058 # of Solaris have been observed to not append at the end of the file
2064 # of Solaris have been observed to not append at the end of the file
2059 # if the file was seeked to before the end. See issue4943 for more.
2065 # if the file was seeked to before the end. See issue4943 for more.
2060 #
2066 #
2061 # We work around this issue by inserting a seek() before writing.
2067 # We work around this issue by inserting a seek() before writing.
2062 # Note: This is likely not necessary on Python 3. However, because
2068 # Note: This is likely not necessary on Python 3. However, because
2063 # the file handle is reused for reads and may be seeked there, we need
2069 # the file handle is reused for reads and may be seeked there, we need
2064 # to be careful before changing this.
2070 # to be careful before changing this.
2065 ifh.seek(0, os.SEEK_END)
2071 ifh.seek(0, os.SEEK_END)
2066 if dfh:
2072 if dfh:
2067 dfh.seek(0, os.SEEK_END)
2073 dfh.seek(0, os.SEEK_END)
2068
2074
2069 curr = len(self) - 1
2075 curr = len(self) - 1
2070 if not self._inline:
2076 if not self._inline:
2071 transaction.add(self.datafile, offset)
2077 transaction.add(self.datafile, offset)
2072 transaction.add(self.indexfile, curr * len(entry))
2078 transaction.add(self.indexfile, curr * len(entry))
2073 if data[0]:
2079 if data[0]:
2074 dfh.write(data[0])
2080 dfh.write(data[0])
2075 dfh.write(data[1])
2081 dfh.write(data[1])
2076 ifh.write(entry)
2082 ifh.write(entry)
2077 else:
2083 else:
2078 offset += curr * self._io.size
2084 offset += curr * self._io.size
2079 transaction.add(self.indexfile, offset, curr)
2085 transaction.add(self.indexfile, offset, curr)
2080 ifh.write(entry)
2086 ifh.write(entry)
2081 ifh.write(data[0])
2087 ifh.write(data[0])
2082 ifh.write(data[1])
2088 ifh.write(data[1])
2083 self._enforceinlinesize(transaction, ifh)
2089 self._enforceinlinesize(transaction, ifh)
2084
2090
2085 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2091 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2086 """
2092 """
2087 add a delta group
2093 add a delta group
2088
2094
2089 given a set of deltas, add them to the revision log. the
2095 given a set of deltas, add them to the revision log. the
2090 first delta is against its parent, which should be in our
2096 first delta is against its parent, which should be in our
2091 log, the rest are against the previous delta.
2097 log, the rest are against the previous delta.
2092
2098
2093 If ``addrevisioncb`` is defined, it will be called with arguments of
2099 If ``addrevisioncb`` is defined, it will be called with arguments of
2094 this revlog and the node that was added.
2100 this revlog and the node that was added.
2095 """
2101 """
2096
2102
2097 if self._writinghandles:
2103 if self._writinghandles:
2098 raise error.ProgrammingError('cannot nest addgroup() calls')
2104 raise error.ProgrammingError('cannot nest addgroup() calls')
2099
2105
2100 nodes = []
2106 nodes = []
2101
2107
2102 r = len(self)
2108 r = len(self)
2103 end = 0
2109 end = 0
2104 if r:
2110 if r:
2105 end = self.end(r - 1)
2111 end = self.end(r - 1)
2106 ifh = self._indexfp("a+")
2112 ifh = self._indexfp("a+")
2107 isize = r * self._io.size
2113 isize = r * self._io.size
2108 if self._inline:
2114 if self._inline:
2109 transaction.add(self.indexfile, end + isize, r)
2115 transaction.add(self.indexfile, end + isize, r)
2110 dfh = None
2116 dfh = None
2111 else:
2117 else:
2112 transaction.add(self.indexfile, isize, r)
2118 transaction.add(self.indexfile, isize, r)
2113 transaction.add(self.datafile, end)
2119 transaction.add(self.datafile, end)
2114 dfh = self._datafp("a+")
2120 dfh = self._datafp("a+")
2115 def flush():
2121 def flush():
2116 if dfh:
2122 if dfh:
2117 dfh.flush()
2123 dfh.flush()
2118 ifh.flush()
2124 ifh.flush()
2119
2125
2120 self._writinghandles = (ifh, dfh)
2126 self._writinghandles = (ifh, dfh)
2121
2127
2122 try:
2128 try:
2123 deltacomputer = deltautil.deltacomputer(self)
2129 deltacomputer = deltautil.deltacomputer(self)
2124 # loop through our set of deltas
2130 # loop through our set of deltas
2125 for data in deltas:
2131 for data in deltas:
2126 node, p1, p2, linknode, deltabase, delta, flags = data
2132 node, p1, p2, linknode, deltabase, delta, flags = data
2127 link = linkmapper(linknode)
2133 link = linkmapper(linknode)
2128 flags = flags or REVIDX_DEFAULT_FLAGS
2134 flags = flags or REVIDX_DEFAULT_FLAGS
2129
2135
2130 nodes.append(node)
2136 nodes.append(node)
2131
2137
2132 if node in self.nodemap:
2138 if node in self.nodemap:
2133 self._nodeduplicatecallback(transaction, node)
2139 self._nodeduplicatecallback(transaction, node)
2134 # this can happen if two branches make the same change
2140 # this can happen if two branches make the same change
2135 continue
2141 continue
2136
2142
2137 for p in (p1, p2):
2143 for p in (p1, p2):
2138 if p not in self.nodemap:
2144 if p not in self.nodemap:
2139 raise error.LookupError(p, self.indexfile,
2145 raise error.LookupError(p, self.indexfile,
2140 _('unknown parent'))
2146 _('unknown parent'))
2141
2147
2142 if deltabase not in self.nodemap:
2148 if deltabase not in self.nodemap:
2143 raise error.LookupError(deltabase, self.indexfile,
2149 raise error.LookupError(deltabase, self.indexfile,
2144 _('unknown delta base'))
2150 _('unknown delta base'))
2145
2151
2146 baserev = self.rev(deltabase)
2152 baserev = self.rev(deltabase)
2147
2153
2148 if baserev != nullrev and self.iscensored(baserev):
2154 if baserev != nullrev and self.iscensored(baserev):
2149 # if base is censored, delta must be full replacement in a
2155 # if base is censored, delta must be full replacement in a
2150 # single patch operation
2156 # single patch operation
2151 hlen = struct.calcsize(">lll")
2157 hlen = struct.calcsize(">lll")
2152 oldlen = self.rawsize(baserev)
2158 oldlen = self.rawsize(baserev)
2153 newlen = len(delta) - hlen
2159 newlen = len(delta) - hlen
2154 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2155 raise error.CensoredBaseError(self.indexfile,
2161 raise error.CensoredBaseError(self.indexfile,
2156 self.node(baserev))
2162 self.node(baserev))
2157
2163
2158 if not flags and self._peek_iscensored(baserev, delta, flush):
2164 if not flags and self._peek_iscensored(baserev, delta, flush):
2159 flags |= REVIDX_ISCENSORED
2165 flags |= REVIDX_ISCENSORED
2160
2166
2161 # We assume consumers of addrevisioncb will want to retrieve
2167 # We assume consumers of addrevisioncb will want to retrieve
2162 # the added revision, which will require a call to
2168 # the added revision, which will require a call to
2163 # revision(). revision() will fast path if there is a cache
2169 # revision(). revision() will fast path if there is a cache
2164 # hit. So, we tell _addrevision() to always cache in this case.
2170 # hit. So, we tell _addrevision() to always cache in this case.
2165 # We're only using addgroup() in the context of changegroup
2171 # We're only using addgroup() in the context of changegroup
2166 # generation so the revision data can always be handled as raw
2172 # generation so the revision data can always be handled as raw
2167 # by the flagprocessor.
2173 # by the flagprocessor.
2168 self._addrevision(node, None, transaction, link,
2174 self._addrevision(node, None, transaction, link,
2169 p1, p2, flags, (baserev, delta),
2175 p1, p2, flags, (baserev, delta),
2170 ifh, dfh,
2176 ifh, dfh,
2171 alwayscache=bool(addrevisioncb),
2177 alwayscache=bool(addrevisioncb),
2172 deltacomputer=deltacomputer)
2178 deltacomputer=deltacomputer)
2173
2179
2174 if addrevisioncb:
2180 if addrevisioncb:
2175 addrevisioncb(self, node)
2181 addrevisioncb(self, node)
2176
2182
2177 if not dfh and not self._inline:
2183 if not dfh and not self._inline:
2178 # addrevision switched from inline to conventional
2184 # addrevision switched from inline to conventional
2179 # reopen the index
2185 # reopen the index
2180 ifh.close()
2186 ifh.close()
2181 dfh = self._datafp("a+")
2187 dfh = self._datafp("a+")
2182 ifh = self._indexfp("a+")
2188 ifh = self._indexfp("a+")
2183 self._writinghandles = (ifh, dfh)
2189 self._writinghandles = (ifh, dfh)
2184 finally:
2190 finally:
2185 self._writinghandles = None
2191 self._writinghandles = None
2186
2192
2187 if dfh:
2193 if dfh:
2188 dfh.close()
2194 dfh.close()
2189 ifh.close()
2195 ifh.close()
2190
2196
2191 return nodes
2197 return nodes
2192
2198
2193 def iscensored(self, rev):
2199 def iscensored(self, rev):
2194 """Check if a file revision is censored."""
2200 """Check if a file revision is censored."""
2195 if not self._censorable:
2201 if not self._censorable:
2196 return False
2202 return False
2197
2203
2198 return self.flags(rev) & REVIDX_ISCENSORED
2204 return self.flags(rev) & REVIDX_ISCENSORED
2199
2205
2200 def _peek_iscensored(self, baserev, delta, flush):
2206 def _peek_iscensored(self, baserev, delta, flush):
2201 """Quickly check if a delta produces a censored revision."""
2207 """Quickly check if a delta produces a censored revision."""
2202 if not self._censorable:
2208 if not self._censorable:
2203 return False
2209 return False
2204
2210
2205 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2211 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2206
2212
2207 def getstrippoint(self, minlink):
2213 def getstrippoint(self, minlink):
2208 """find the minimum rev that must be stripped to strip the linkrev
2214 """find the minimum rev that must be stripped to strip the linkrev
2209
2215
2210 Returns a tuple containing the minimum rev and a set of all revs that
2216 Returns a tuple containing the minimum rev and a set of all revs that
2211 have linkrevs that will be broken by this strip.
2217 have linkrevs that will be broken by this strip.
2212 """
2218 """
2213 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2219 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2214 self.headrevs(),
2220 self.headrevs(),
2215 self.linkrev, self.parentrevs)
2221 self.linkrev, self.parentrevs)
2216
2222
2217 def strip(self, minlink, transaction):
2223 def strip(self, minlink, transaction):
2218 """truncate the revlog on the first revision with a linkrev >= minlink
2224 """truncate the revlog on the first revision with a linkrev >= minlink
2219
2225
2220 This function is called when we're stripping revision minlink and
2226 This function is called when we're stripping revision minlink and
2221 its descendants from the repository.
2227 its descendants from the repository.
2222
2228
2223 We have to remove all revisions with linkrev >= minlink, because
2229 We have to remove all revisions with linkrev >= minlink, because
2224 the equivalent changelog revisions will be renumbered after the
2230 the equivalent changelog revisions will be renumbered after the
2225 strip.
2231 strip.
2226
2232
2227 So we truncate the revlog on the first of these revisions, and
2233 So we truncate the revlog on the first of these revisions, and
2228 trust that the caller has saved the revisions that shouldn't be
2234 trust that the caller has saved the revisions that shouldn't be
2229 removed and that it'll re-add them after this truncation.
2235 removed and that it'll re-add them after this truncation.
2230 """
2236 """
2231 if len(self) == 0:
2237 if len(self) == 0:
2232 return
2238 return
2233
2239
2234 rev, _ = self.getstrippoint(minlink)
2240 rev, _ = self.getstrippoint(minlink)
2235 if rev == len(self):
2241 if rev == len(self):
2236 return
2242 return
2237
2243
2238 # first truncate the files on disk
2244 # first truncate the files on disk
2239 end = self.start(rev)
2245 end = self.start(rev)
2240 if not self._inline:
2246 if not self._inline:
2241 transaction.add(self.datafile, end)
2247 transaction.add(self.datafile, end)
2242 end = rev * self._io.size
2248 end = rev * self._io.size
2243 else:
2249 else:
2244 end += rev * self._io.size
2250 end += rev * self._io.size
2245
2251
2246 transaction.add(self.indexfile, end)
2252 transaction.add(self.indexfile, end)
2247
2253
2248 # then reset internal state in memory to forget those revisions
2254 # then reset internal state in memory to forget those revisions
2249 self._revisioncache = None
2255 self._revisioncache = None
2250 self._chaininfocache = {}
2256 self._chaininfocache = {}
2251 self._chunkclear()
2257 self._chunkclear()
2252 for x in pycompat.xrange(rev, len(self)):
2258 for x in pycompat.xrange(rev, len(self)):
2253 del self.nodemap[self.node(x)]
2259 del self.nodemap[self.node(x)]
2254
2260
2255 del self.index[rev:-1]
2261 del self.index[rev:-1]
2256 self._nodepos = None
2262 self._nodepos = None
2257
2263
2258 def checksize(self):
2264 def checksize(self):
2259 """Check size of index and data files
2265 """Check size of index and data files
2260
2266
2261 return a (dd, di) tuple.
2267 return a (dd, di) tuple.
2262 - dd: extra bytes for the "data" file
2268 - dd: extra bytes for the "data" file
2263 - di: extra bytes for the "index" file
2269 - di: extra bytes for the "index" file
2264
2270
2265 A healthy revlog will return (0, 0).
2271 A healthy revlog will return (0, 0).
2266 """
2272 """
2267 expected = 0
2273 expected = 0
2268 if len(self):
2274 if len(self):
2269 expected = max(0, self.end(len(self) - 1))
2275 expected = max(0, self.end(len(self) - 1))
2270
2276
2271 try:
2277 try:
2272 with self._datafp() as f:
2278 with self._datafp() as f:
2273 f.seek(0, io.SEEK_END)
2279 f.seek(0, io.SEEK_END)
2274 actual = f.tell()
2280 actual = f.tell()
2275 dd = actual - expected
2281 dd = actual - expected
2276 except IOError as inst:
2282 except IOError as inst:
2277 if inst.errno != errno.ENOENT:
2283 if inst.errno != errno.ENOENT:
2278 raise
2284 raise
2279 dd = 0
2285 dd = 0
2280
2286
2281 try:
2287 try:
2282 f = self.opener(self.indexfile)
2288 f = self.opener(self.indexfile)
2283 f.seek(0, io.SEEK_END)
2289 f.seek(0, io.SEEK_END)
2284 actual = f.tell()
2290 actual = f.tell()
2285 f.close()
2291 f.close()
2286 s = self._io.size
2292 s = self._io.size
2287 i = max(0, actual // s)
2293 i = max(0, actual // s)
2288 di = actual - (i * s)
2294 di = actual - (i * s)
2289 if self._inline:
2295 if self._inline:
2290 databytes = 0
2296 databytes = 0
2291 for r in self:
2297 for r in self:
2292 databytes += max(0, self.length(r))
2298 databytes += max(0, self.length(r))
2293 dd = 0
2299 dd = 0
2294 di = actual - len(self) * s - databytes
2300 di = actual - len(self) * s - databytes
2295 except IOError as inst:
2301 except IOError as inst:
2296 if inst.errno != errno.ENOENT:
2302 if inst.errno != errno.ENOENT:
2297 raise
2303 raise
2298 di = 0
2304 di = 0
2299
2305
2300 return (dd, di)
2306 return (dd, di)
2301
2307
2302 def files(self):
2308 def files(self):
2303 res = [self.indexfile]
2309 res = [self.indexfile]
2304 if not self._inline:
2310 if not self._inline:
2305 res.append(self.datafile)
2311 res.append(self.datafile)
2306 return res
2312 return res
2307
2313
2308 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2314 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2309 assumehaveparentrevisions=False,
2315 assumehaveparentrevisions=False,
2310 deltamode=repository.CG_DELTAMODE_STD):
2316 deltamode=repository.CG_DELTAMODE_STD):
2311 if nodesorder not in ('nodes', 'storage', 'linear', None):
2317 if nodesorder not in ('nodes', 'storage', 'linear', None):
2312 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2318 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2313 nodesorder)
2319 nodesorder)
2314
2320
2315 if nodesorder is None and not self._generaldelta:
2321 if nodesorder is None and not self._generaldelta:
2316 nodesorder = 'storage'
2322 nodesorder = 'storage'
2317
2323
2318 if (not self._storedeltachains and
2324 if (not self._storedeltachains and
2319 deltamode != repository.CG_DELTAMODE_PREV):
2325 deltamode != repository.CG_DELTAMODE_PREV):
2320 deltamode = repository.CG_DELTAMODE_FULL
2326 deltamode = repository.CG_DELTAMODE_FULL
2321
2327
2322 return storageutil.emitrevisions(
2328 return storageutil.emitrevisions(
2323 self, nodes, nodesorder, revlogrevisiondelta,
2329 self, nodes, nodesorder, revlogrevisiondelta,
2324 deltaparentfn=self.deltaparent,
2330 deltaparentfn=self.deltaparent,
2325 candeltafn=self.candelta,
2331 candeltafn=self.candelta,
2326 rawsizefn=self.rawsize,
2332 rawsizefn=self.rawsize,
2327 revdifffn=self.revdiff,
2333 revdifffn=self.revdiff,
2328 flagsfn=self.flags,
2334 flagsfn=self.flags,
2329 deltamode=deltamode,
2335 deltamode=deltamode,
2330 revisiondata=revisiondata,
2336 revisiondata=revisiondata,
2331 assumehaveparentrevisions=assumehaveparentrevisions)
2337 assumehaveparentrevisions=assumehaveparentrevisions)
2332
2338
2333 DELTAREUSEALWAYS = 'always'
2339 DELTAREUSEALWAYS = 'always'
2334 DELTAREUSESAMEREVS = 'samerevs'
2340 DELTAREUSESAMEREVS = 'samerevs'
2335 DELTAREUSENEVER = 'never'
2341 DELTAREUSENEVER = 'never'
2336
2342
2337 DELTAREUSEFULLADD = 'fulladd'
2343 DELTAREUSEFULLADD = 'fulladd'
2338
2344
2339 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2345 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2340
2346
2341 def clone(self, tr, destrevlog, addrevisioncb=None,
2347 def clone(self, tr, destrevlog, addrevisioncb=None,
2342 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2348 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2343 """Copy this revlog to another, possibly with format changes.
2349 """Copy this revlog to another, possibly with format changes.
2344
2350
2345 The destination revlog will contain the same revisions and nodes.
2351 The destination revlog will contain the same revisions and nodes.
2346 However, it may not be bit-for-bit identical due to e.g. delta encoding
2352 However, it may not be bit-for-bit identical due to e.g. delta encoding
2347 differences.
2353 differences.
2348
2354
2349 The ``deltareuse`` argument control how deltas from the existing revlog
2355 The ``deltareuse`` argument control how deltas from the existing revlog
2350 are preserved in the destination revlog. The argument can have the
2356 are preserved in the destination revlog. The argument can have the
2351 following values:
2357 following values:
2352
2358
2353 DELTAREUSEALWAYS
2359 DELTAREUSEALWAYS
2354 Deltas will always be reused (if possible), even if the destination
2360 Deltas will always be reused (if possible), even if the destination
2355 revlog would not select the same revisions for the delta. This is the
2361 revlog would not select the same revisions for the delta. This is the
2356 fastest mode of operation.
2362 fastest mode of operation.
2357 DELTAREUSESAMEREVS
2363 DELTAREUSESAMEREVS
2358 Deltas will be reused if the destination revlog would pick the same
2364 Deltas will be reused if the destination revlog would pick the same
2359 revisions for the delta. This mode strikes a balance between speed
2365 revisions for the delta. This mode strikes a balance between speed
2360 and optimization.
2366 and optimization.
2361 DELTAREUSENEVER
2367 DELTAREUSENEVER
2362 Deltas will never be reused. This is the slowest mode of execution.
2368 Deltas will never be reused. This is the slowest mode of execution.
2363 This mode can be used to recompute deltas (e.g. if the diff/delta
2369 This mode can be used to recompute deltas (e.g. if the diff/delta
2364 algorithm changes).
2370 algorithm changes).
2365 DELTAREUSEFULLADD
2371 DELTAREUSEFULLADD
2366 Revision will be re-added as if their were new content. This is
2372 Revision will be re-added as if their were new content. This is
2367 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2373 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2368 eg: large file detection and handling.
2374 eg: large file detection and handling.
2369
2375
2370 Delta computation can be slow, so the choice of delta reuse policy can
2376 Delta computation can be slow, so the choice of delta reuse policy can
2371 significantly affect run time.
2377 significantly affect run time.
2372
2378
2373 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2379 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2374 two extremes. Deltas will be reused if they are appropriate. But if the
2380 two extremes. Deltas will be reused if they are appropriate. But if the
2375 delta could choose a better revision, it will do so. This means if you
2381 delta could choose a better revision, it will do so. This means if you
2376 are converting a non-generaldelta revlog to a generaldelta revlog,
2382 are converting a non-generaldelta revlog to a generaldelta revlog,
2377 deltas will be recomputed if the delta's parent isn't a parent of the
2383 deltas will be recomputed if the delta's parent isn't a parent of the
2378 revision.
2384 revision.
2379
2385
2380 In addition to the delta policy, the ``forcedeltabothparents``
2386 In addition to the delta policy, the ``forcedeltabothparents``
2381 argument controls whether to force compute deltas against both parents
2387 argument controls whether to force compute deltas against both parents
2382 for merges. By default, the current default is used.
2388 for merges. By default, the current default is used.
2383 """
2389 """
2384 if deltareuse not in self.DELTAREUSEALL:
2390 if deltareuse not in self.DELTAREUSEALL:
2385 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2391 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2386
2392
2387 if len(destrevlog):
2393 if len(destrevlog):
2388 raise ValueError(_('destination revlog is not empty'))
2394 raise ValueError(_('destination revlog is not empty'))
2389
2395
2390 if getattr(self, 'filteredrevs', None):
2396 if getattr(self, 'filteredrevs', None):
2391 raise ValueError(_('source revlog has filtered revisions'))
2397 raise ValueError(_('source revlog has filtered revisions'))
2392 if getattr(destrevlog, 'filteredrevs', None):
2398 if getattr(destrevlog, 'filteredrevs', None):
2393 raise ValueError(_('destination revlog has filtered revisions'))
2399 raise ValueError(_('destination revlog has filtered revisions'))
2394
2400
2395 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2401 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2396 # if possible.
2402 # if possible.
2397 oldlazydelta = destrevlog._lazydelta
2403 oldlazydelta = destrevlog._lazydelta
2398 oldlazydeltabase = destrevlog._lazydeltabase
2404 oldlazydeltabase = destrevlog._lazydeltabase
2399 oldamd = destrevlog._deltabothparents
2405 oldamd = destrevlog._deltabothparents
2400
2406
2401 try:
2407 try:
2402 if deltareuse == self.DELTAREUSEALWAYS:
2408 if deltareuse == self.DELTAREUSEALWAYS:
2403 destrevlog._lazydeltabase = True
2409 destrevlog._lazydeltabase = True
2404 destrevlog._lazydelta = True
2410 destrevlog._lazydelta = True
2405 elif deltareuse == self.DELTAREUSESAMEREVS:
2411 elif deltareuse == self.DELTAREUSESAMEREVS:
2406 destrevlog._lazydeltabase = False
2412 destrevlog._lazydeltabase = False
2407 destrevlog._lazydelta = True
2413 destrevlog._lazydelta = True
2408 elif deltareuse == self.DELTAREUSENEVER:
2414 elif deltareuse == self.DELTAREUSENEVER:
2409 destrevlog._lazydeltabase = False
2415 destrevlog._lazydeltabase = False
2410 destrevlog._lazydelta = False
2416 destrevlog._lazydelta = False
2411
2417
2412 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2418 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2413
2419
2414 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2420 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2415 forcedeltabothparents)
2421 forcedeltabothparents)
2416
2422
2417 finally:
2423 finally:
2418 destrevlog._lazydelta = oldlazydelta
2424 destrevlog._lazydelta = oldlazydelta
2419 destrevlog._lazydeltabase = oldlazydeltabase
2425 destrevlog._lazydeltabase = oldlazydeltabase
2420 destrevlog._deltabothparents = oldamd
2426 destrevlog._deltabothparents = oldamd
2421
2427
2422 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2428 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2423 forcedeltabothparents):
2429 forcedeltabothparents):
2424 """perform the core duty of `revlog.clone` after parameter processing"""
2430 """perform the core duty of `revlog.clone` after parameter processing"""
2425 deltacomputer = deltautil.deltacomputer(destrevlog)
2431 deltacomputer = deltautil.deltacomputer(destrevlog)
2426 index = self.index
2432 index = self.index
2427 for rev in self:
2433 for rev in self:
2428 entry = index[rev]
2434 entry = index[rev]
2429
2435
2430 # Some classes override linkrev to take filtered revs into
2436 # Some classes override linkrev to take filtered revs into
2431 # account. Use raw entry from index.
2437 # account. Use raw entry from index.
2432 flags = entry[0] & 0xffff
2438 flags = entry[0] & 0xffff
2433 linkrev = entry[4]
2439 linkrev = entry[4]
2434 p1 = index[entry[5]][7]
2440 p1 = index[entry[5]][7]
2435 p2 = index[entry[6]][7]
2441 p2 = index[entry[6]][7]
2436 node = entry[7]
2442 node = entry[7]
2437
2443
2438 # (Possibly) reuse the delta from the revlog if allowed and
2444 # (Possibly) reuse the delta from the revlog if allowed and
2439 # the revlog chunk is a delta.
2445 # the revlog chunk is a delta.
2440 cachedelta = None
2446 cachedelta = None
2441 rawtext = None
2447 rawtext = None
2442 if deltareuse == self.DELTAREUSEFULLADD:
2448 if deltareuse == self.DELTAREUSEFULLADD:
2443 text = self.revision(rev)
2449 text = self.revision(rev)
2444 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2450 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2445 cachedelta=cachedelta,
2451 cachedelta=cachedelta,
2446 node=node, flags=flags,
2452 node=node, flags=flags,
2447 deltacomputer=deltacomputer)
2453 deltacomputer=deltacomputer)
2448 else:
2454 else:
2449 if destrevlog._lazydelta:
2455 if destrevlog._lazydelta:
2450 dp = self.deltaparent(rev)
2456 dp = self.deltaparent(rev)
2451 if dp != nullrev:
2457 if dp != nullrev:
2452 cachedelta = (dp, bytes(self._chunk(rev)))
2458 cachedelta = (dp, bytes(self._chunk(rev)))
2453
2459
2454 if not cachedelta:
2460 if not cachedelta:
2455 rawtext = self.rawdata(rev)
2461 rawtext = self.rawdata(rev)
2456
2462
2457 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2463 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2458 checkambig=False)
2464 checkambig=False)
2459 dfh = None
2465 dfh = None
2460 if not destrevlog._inline:
2466 if not destrevlog._inline:
2461 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2467 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2462 try:
2468 try:
2463 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2469 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2464 p2, flags, cachedelta, ifh, dfh,
2470 p2, flags, cachedelta, ifh, dfh,
2465 deltacomputer=deltacomputer)
2471 deltacomputer=deltacomputer)
2466 finally:
2472 finally:
2467 if dfh:
2473 if dfh:
2468 dfh.close()
2474 dfh.close()
2469 ifh.close()
2475 ifh.close()
2470
2476
2471 if addrevisioncb:
2477 if addrevisioncb:
2472 addrevisioncb(self, rev, node)
2478 addrevisioncb(self, rev, node)
2473
2479
2474 def censorrevision(self, tr, censornode, tombstone=b''):
2480 def censorrevision(self, tr, censornode, tombstone=b''):
2475 if (self.version & 0xFFFF) == REVLOGV0:
2481 if (self.version & 0xFFFF) == REVLOGV0:
2476 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2482 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2477 self.version)
2483 self.version)
2478
2484
2479 censorrev = self.rev(censornode)
2485 censorrev = self.rev(censornode)
2480 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2486 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2481
2487
2482 if len(tombstone) > self.rawsize(censorrev):
2488 if len(tombstone) > self.rawsize(censorrev):
2483 raise error.Abort(_('censor tombstone must be no longer than '
2489 raise error.Abort(_('censor tombstone must be no longer than '
2484 'censored data'))
2490 'censored data'))
2485
2491
2486 # Rewriting the revlog in place is hard. Our strategy for censoring is
2492 # Rewriting the revlog in place is hard. Our strategy for censoring is
2487 # to create a new revlog, copy all revisions to it, then replace the
2493 # to create a new revlog, copy all revisions to it, then replace the
2488 # revlogs on transaction close.
2494 # revlogs on transaction close.
2489
2495
2490 newindexfile = self.indexfile + b'.tmpcensored'
2496 newindexfile = self.indexfile + b'.tmpcensored'
2491 newdatafile = self.datafile + b'.tmpcensored'
2497 newdatafile = self.datafile + b'.tmpcensored'
2492
2498
2493 # This is a bit dangerous. We could easily have a mismatch of state.
2499 # This is a bit dangerous. We could easily have a mismatch of state.
2494 newrl = revlog(self.opener, newindexfile, newdatafile,
2500 newrl = revlog(self.opener, newindexfile, newdatafile,
2495 censorable=True)
2501 censorable=True)
2496 newrl.version = self.version
2502 newrl.version = self.version
2497 newrl._generaldelta = self._generaldelta
2503 newrl._generaldelta = self._generaldelta
2498 newrl._io = self._io
2504 newrl._io = self._io
2499
2505
2500 for rev in self.revs():
2506 for rev in self.revs():
2501 node = self.node(rev)
2507 node = self.node(rev)
2502 p1, p2 = self.parents(node)
2508 p1, p2 = self.parents(node)
2503
2509
2504 if rev == censorrev:
2510 if rev == censorrev:
2505 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2511 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2506 p1, p2, censornode, REVIDX_ISCENSORED)
2512 p1, p2, censornode, REVIDX_ISCENSORED)
2507
2513
2508 if newrl.deltaparent(rev) != nullrev:
2514 if newrl.deltaparent(rev) != nullrev:
2509 raise error.Abort(_('censored revision stored as delta; '
2515 raise error.Abort(_('censored revision stored as delta; '
2510 'cannot censor'),
2516 'cannot censor'),
2511 hint=_('censoring of revlogs is not '
2517 hint=_('censoring of revlogs is not '
2512 'fully implemented; please report '
2518 'fully implemented; please report '
2513 'this bug'))
2519 'this bug'))
2514 continue
2520 continue
2515
2521
2516 if self.iscensored(rev):
2522 if self.iscensored(rev):
2517 if self.deltaparent(rev) != nullrev:
2523 if self.deltaparent(rev) != nullrev:
2518 raise error.Abort(_('cannot censor due to censored '
2524 raise error.Abort(_('cannot censor due to censored '
2519 'revision having delta stored'))
2525 'revision having delta stored'))
2520 rawtext = self._chunk(rev)
2526 rawtext = self._chunk(rev)
2521 else:
2527 else:
2522 rawtext = self.rawdata(rev)
2528 rawtext = self.rawdata(rev)
2523
2529
2524 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2530 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2525 self.flags(rev))
2531 self.flags(rev))
2526
2532
2527 tr.addbackup(self.indexfile, location='store')
2533 tr.addbackup(self.indexfile, location='store')
2528 if not self._inline:
2534 if not self._inline:
2529 tr.addbackup(self.datafile, location='store')
2535 tr.addbackup(self.datafile, location='store')
2530
2536
2531 self.opener.rename(newrl.indexfile, self.indexfile)
2537 self.opener.rename(newrl.indexfile, self.indexfile)
2532 if not self._inline:
2538 if not self._inline:
2533 self.opener.rename(newrl.datafile, self.datafile)
2539 self.opener.rename(newrl.datafile, self.datafile)
2534
2540
2535 self.clearcaches()
2541 self.clearcaches()
2536 self._loadindex()
2542 self._loadindex()
2537
2543
2538 def verifyintegrity(self, state):
2544 def verifyintegrity(self, state):
2539 """Verifies the integrity of the revlog.
2545 """Verifies the integrity of the revlog.
2540
2546
2541 Yields ``revlogproblem`` instances describing problems that are
2547 Yields ``revlogproblem`` instances describing problems that are
2542 found.
2548 found.
2543 """
2549 """
2544 dd, di = self.checksize()
2550 dd, di = self.checksize()
2545 if dd:
2551 if dd:
2546 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2552 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2547 if di:
2553 if di:
2548 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2554 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2549
2555
2550 version = self.version & 0xFFFF
2556 version = self.version & 0xFFFF
2551
2557
2552 # The verifier tells us what version revlog we should be.
2558 # The verifier tells us what version revlog we should be.
2553 if version != state['expectedversion']:
2559 if version != state['expectedversion']:
2554 yield revlogproblem(
2560 yield revlogproblem(
2555 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2561 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2556 (self.indexfile, version, state['expectedversion']))
2562 (self.indexfile, version, state['expectedversion']))
2557
2563
2558 state['skipread'] = set()
2564 state['skipread'] = set()
2559
2565
2560 for rev in self:
2566 for rev in self:
2561 node = self.node(rev)
2567 node = self.node(rev)
2562
2568
2563 # Verify contents. 4 cases to care about:
2569 # Verify contents. 4 cases to care about:
2564 #
2570 #
2565 # common: the most common case
2571 # common: the most common case
2566 # rename: with a rename
2572 # rename: with a rename
2567 # meta: file content starts with b'\1\n', the metadata
2573 # meta: file content starts with b'\1\n', the metadata
2568 # header defined in filelog.py, but without a rename
2574 # header defined in filelog.py, but without a rename
2569 # ext: content stored externally
2575 # ext: content stored externally
2570 #
2576 #
2571 # More formally, their differences are shown below:
2577 # More formally, their differences are shown below:
2572 #
2578 #
2573 # | common | rename | meta | ext
2579 # | common | rename | meta | ext
2574 # -------------------------------------------------------
2580 # -------------------------------------------------------
2575 # flags() | 0 | 0 | 0 | not 0
2581 # flags() | 0 | 0 | 0 | not 0
2576 # renamed() | False | True | False | ?
2582 # renamed() | False | True | False | ?
2577 # rawtext[0:2]=='\1\n'| False | True | True | ?
2583 # rawtext[0:2]=='\1\n'| False | True | True | ?
2578 #
2584 #
2579 # "rawtext" means the raw text stored in revlog data, which
2585 # "rawtext" means the raw text stored in revlog data, which
2580 # could be retrieved by "rawdata(rev)". "text"
2586 # could be retrieved by "rawdata(rev)". "text"
2581 # mentioned below is "revision(rev)".
2587 # mentioned below is "revision(rev)".
2582 #
2588 #
2583 # There are 3 different lengths stored physically:
2589 # There are 3 different lengths stored physically:
2584 # 1. L1: rawsize, stored in revlog index
2590 # 1. L1: rawsize, stored in revlog index
2585 # 2. L2: len(rawtext), stored in revlog data
2591 # 2. L2: len(rawtext), stored in revlog data
2586 # 3. L3: len(text), stored in revlog data if flags==0, or
2592 # 3. L3: len(text), stored in revlog data if flags==0, or
2587 # possibly somewhere else if flags!=0
2593 # possibly somewhere else if flags!=0
2588 #
2594 #
2589 # L1 should be equal to L2. L3 could be different from them.
2595 # L1 should be equal to L2. L3 could be different from them.
2590 # "text" may or may not affect commit hash depending on flag
2596 # "text" may or may not affect commit hash depending on flag
2591 # processors (see flagutil.addflagprocessor).
2597 # processors (see flagutil.addflagprocessor).
2592 #
2598 #
2593 # | common | rename | meta | ext
2599 # | common | rename | meta | ext
2594 # -------------------------------------------------
2600 # -------------------------------------------------
2595 # rawsize() | L1 | L1 | L1 | L1
2601 # rawsize() | L1 | L1 | L1 | L1
2596 # size() | L1 | L2-LM | L1(*) | L1 (?)
2602 # size() | L1 | L2-LM | L1(*) | L1 (?)
2597 # len(rawtext) | L2 | L2 | L2 | L2
2603 # len(rawtext) | L2 | L2 | L2 | L2
2598 # len(text) | L2 | L2 | L2 | L3
2604 # len(text) | L2 | L2 | L2 | L3
2599 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2605 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2600 #
2606 #
2601 # LM: length of metadata, depending on rawtext
2607 # LM: length of metadata, depending on rawtext
2602 # (*): not ideal, see comment in filelog.size
2608 # (*): not ideal, see comment in filelog.size
2603 # (?): could be "- len(meta)" if the resolved content has
2609 # (?): could be "- len(meta)" if the resolved content has
2604 # rename metadata
2610 # rename metadata
2605 #
2611 #
2606 # Checks needed to be done:
2612 # Checks needed to be done:
2607 # 1. length check: L1 == L2, in all cases.
2613 # 1. length check: L1 == L2, in all cases.
2608 # 2. hash check: depending on flag processor, we may need to
2614 # 2. hash check: depending on flag processor, we may need to
2609 # use either "text" (external), or "rawtext" (in revlog).
2615 # use either "text" (external), or "rawtext" (in revlog).
2610
2616
2611 try:
2617 try:
2612 skipflags = state.get('skipflags', 0)
2618 skipflags = state.get('skipflags', 0)
2613 if skipflags:
2619 if skipflags:
2614 skipflags &= self.flags(rev)
2620 skipflags &= self.flags(rev)
2615
2621
2616 if skipflags:
2622 if skipflags:
2617 state['skipread'].add(node)
2623 state['skipread'].add(node)
2618 else:
2624 else:
2619 # Side-effect: read content and verify hash.
2625 # Side-effect: read content and verify hash.
2620 self.revision(node)
2626 self.revision(node)
2621
2627
2622 l1 = self.rawsize(rev)
2628 l1 = self.rawsize(rev)
2623 l2 = len(self.rawdata(node))
2629 l2 = len(self.rawdata(node))
2624
2630
2625 if l1 != l2:
2631 if l1 != l2:
2626 yield revlogproblem(
2632 yield revlogproblem(
2627 error=_('unpacked size is %d, %d expected') % (l2, l1),
2633 error=_('unpacked size is %d, %d expected') % (l2, l1),
2628 node=node)
2634 node=node)
2629
2635
2630 except error.CensoredNodeError:
2636 except error.CensoredNodeError:
2631 if state['erroroncensored']:
2637 if state['erroroncensored']:
2632 yield revlogproblem(error=_('censored file data'),
2638 yield revlogproblem(error=_('censored file data'),
2633 node=node)
2639 node=node)
2634 state['skipread'].add(node)
2640 state['skipread'].add(node)
2635 except Exception as e:
2641 except Exception as e:
2636 yield revlogproblem(
2642 yield revlogproblem(
2637 error=_('unpacking %s: %s') % (short(node),
2643 error=_('unpacking %s: %s') % (short(node),
2638 stringutil.forcebytestr(e)),
2644 stringutil.forcebytestr(e)),
2639 node=node)
2645 node=node)
2640 state['skipread'].add(node)
2646 state['skipread'].add(node)
2641
2647
2642 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2648 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2643 revisionscount=False, trackedsize=False,
2649 revisionscount=False, trackedsize=False,
2644 storedsize=False):
2650 storedsize=False):
2645 d = {}
2651 d = {}
2646
2652
2647 if exclusivefiles:
2653 if exclusivefiles:
2648 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2654 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2649 if not self._inline:
2655 if not self._inline:
2650 d['exclusivefiles'].append((self.opener, self.datafile))
2656 d['exclusivefiles'].append((self.opener, self.datafile))
2651
2657
2652 if sharedfiles:
2658 if sharedfiles:
2653 d['sharedfiles'] = []
2659 d['sharedfiles'] = []
2654
2660
2655 if revisionscount:
2661 if revisionscount:
2656 d['revisionscount'] = len(self)
2662 d['revisionscount'] = len(self)
2657
2663
2658 if trackedsize:
2664 if trackedsize:
2659 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2665 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2660
2666
2661 if storedsize:
2667 if storedsize:
2662 d['storedsize'] = sum(self.opener.stat(path).st_size
2668 d['storedsize'] = sum(self.opener.stat(path).st_size
2663 for path in self.files())
2669 for path in self.files())
2664
2670
2665 return d
2671 return d
@@ -1,34 +1,63
1 # sidedata.py - Logic around store extra data alongside revlog revisions
1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 """core code for "sidedata" support
7 """core code for "sidedata" support
8
8
9 The "sidedata" are stored alongside the revision without actually being part of
9 The "sidedata" are stored alongside the revision without actually being part of
10 its content and not affecting its hash. It's main use cases is to cache
10 its content and not affecting its hash. It's main use cases is to cache
11 important information related to a changesets.
11 important information related to a changesets.
12
12
13 The current implementation is experimental and subject to changes. Do not rely
13 The current implementation is experimental and subject to changes. Do not rely
14 on it in production.
14 on it in production.
15
15
16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
17 are inserted, removed from it using the flagprocessors mechanism. The following
17 are inserted, removed from it using the flagprocessors mechanism. The following
18 format is currently used::
18 format is currently used::
19
19
20 initial header:
20 initial header:
21 <number of sidedata; 2 bytes>
21 <number of sidedata; 2 bytes>
22 sidedata (repeated N times):
22 sidedata (repeated N times):
23 <sidedata-key; 2 bytes>
23 <sidedata-key; 2 bytes>
24 <sidedata-entry-length: 4 bytes>
24 <sidedata-entry-length: 4 bytes>
25 <sidedata-content-sha1-digest: 20 bytes>
25 <sidedata-content-sha1-digest: 20 bytes>
26 <sidedata-content; X bytes>
26 <sidedata-content; X bytes>
27 normal raw text:
27 normal raw text:
28 <all bytes remaining in the rawtext>
28 <all bytes remaining in the rawtext>
29
29
30 This is a simple and effective format. It should be enought to experiment with
30 This is a simple and effective format. It should be enought to experiment with
31 the concept.
31 the concept.
32 """
32 """
33
33
34 from __future__ import absolute_import
34 from __future__ import absolute_import
35
36 import hashlib
37 import struct
38
39 from .. import error
40
41 SIDEDATA_HEADER = struct.Struct('>H')
42 SIDEDATA_ENTRY = struct.Struct('>HL20s')
43
44 def sidedatareadprocessor(rl, text):
45 sidedata = {}
46 offset = 0
47 nbentry, = SIDEDATA_HEADER.unpack(text[:SIDEDATA_HEADER.size])
48 offset += SIDEDATA_HEADER.size
49 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
50 for i in range(nbentry):
51 nextoffset = offset + SIDEDATA_ENTRY.size
52 key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
53 offset = nextoffset
54 # read the data associated with that entry
55 nextdataoffset = dataoffset + size
56 entrytext = text[dataoffset:nextdataoffset]
57 readdigest = hashlib.sha1(entrytext).digest()
58 if storeddigest != readdigest:
59 raise error.SidedataHashError(key, storeddigest, readdigest)
60 sidedata[key] = entrytext
61 dataoffset = nextdataoffset
62 text = text[dataoffset:]
63 return text, True, sidedata
General Comments 0
You need to be logged in to leave comments. Login now