##// END OF EJS Templates
sidedata: add a function to read sidedata from revlog raw text...
marmoute -
r43302:294afb98 default
parent child Browse files
Show More
@@ -1,336 +1,343
1 1 # error.py - Mercurial exceptions
2 2 #
3 3 # Copyright 2005-2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Mercurial exceptions.
9 9
10 10 This allows us to catch exceptions at higher levels without forcing
11 11 imports.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 # Do not import anything but pycompat here, please
17 17 from . import pycompat
18 18
19 19 def _tobytes(exc):
20 20 """Byte-stringify exception in the same way as BaseException_str()"""
21 21 if not exc.args:
22 22 return b''
23 23 if len(exc.args) == 1:
24 24 return pycompat.bytestr(exc.args[0])
25 25 return b'(%s)' % b', '.join(b"'%s'" % pycompat.bytestr(a) for a in exc.args)
26 26
27 27 class Hint(object):
28 28 """Mix-in to provide a hint of an error
29 29
30 30 This should come first in the inheritance list to consume a hint and
31 31 pass remaining arguments to the exception class.
32 32 """
33 33 def __init__(self, *args, **kw):
34 34 self.hint = kw.pop(r'hint', None)
35 35 super(Hint, self).__init__(*args, **kw)
36 36
37 37 class StorageError(Hint, Exception):
38 38 """Raised when an error occurs in a storage layer.
39 39
40 40 Usually subclassed by a storage-specific exception.
41 41 """
42 42 __bytes__ = _tobytes
43 43
44 44 class RevlogError(StorageError):
45 45 __bytes__ = _tobytes
46 46
47 class SidedataHashError(RevlogError):
48
49 def __init__(self, key, expected, got):
50 self.sidedatakey = key
51 self.expecteddigest = expected
52 self.actualdigest = got
53
47 54 class FilteredIndexError(IndexError):
48 55 __bytes__ = _tobytes
49 56
50 57 class LookupError(RevlogError, KeyError):
51 58 def __init__(self, name, index, message):
52 59 self.name = name
53 60 self.index = index
54 61 # this can't be called 'message' because at least some installs of
55 62 # Python 2.6+ complain about the 'message' property being deprecated
56 63 self.lookupmessage = message
57 64 if isinstance(name, bytes) and len(name) == 20:
58 65 from .node import short
59 66 name = short(name)
60 67 RevlogError.__init__(self, '%s@%s: %s' % (index, name, message))
61 68
62 69 def __bytes__(self):
63 70 return RevlogError.__bytes__(self)
64 71
65 72 def __str__(self):
66 73 return RevlogError.__str__(self)
67 74
68 75 class AmbiguousPrefixLookupError(LookupError):
69 76 pass
70 77
71 78 class FilteredLookupError(LookupError):
72 79 pass
73 80
74 81 class ManifestLookupError(LookupError):
75 82 pass
76 83
77 84 class CommandError(Exception):
78 85 """Exception raised on errors in parsing the command line."""
79 86 __bytes__ = _tobytes
80 87
81 88 class InterventionRequired(Hint, Exception):
82 89 """Exception raised when a command requires human intervention."""
83 90 __bytes__ = _tobytes
84 91
85 92 class Abort(Hint, Exception):
86 93 """Raised if a command needs to print an error and exit."""
87 94 __bytes__ = _tobytes
88 95
89 96 class HookLoadError(Abort):
90 97 """raised when loading a hook fails, aborting an operation
91 98
92 99 Exists to allow more specialized catching."""
93 100
94 101 class HookAbort(Abort):
95 102 """raised when a validation hook fails, aborting an operation
96 103
97 104 Exists to allow more specialized catching."""
98 105
99 106 class ConfigError(Abort):
100 107 """Exception raised when parsing config files"""
101 108
102 109 class UpdateAbort(Abort):
103 110 """Raised when an update is aborted for destination issue"""
104 111
105 112 class MergeDestAbort(Abort):
106 113 """Raised when an update is aborted for destination issues"""
107 114
108 115 class NoMergeDestAbort(MergeDestAbort):
109 116 """Raised when an update is aborted because there is nothing to merge"""
110 117
111 118 class ManyMergeDestAbort(MergeDestAbort):
112 119 """Raised when an update is aborted because destination is ambiguous"""
113 120
114 121 class ResponseExpected(Abort):
115 122 """Raised when an EOF is received for a prompt"""
116 123 def __init__(self):
117 124 from .i18n import _
118 125 Abort.__init__(self, _('response expected'))
119 126
120 127 class OutOfBandError(Hint, Exception):
121 128 """Exception raised when a remote repo reports failure"""
122 129 __bytes__ = _tobytes
123 130
124 131 class ParseError(Hint, Exception):
125 132 """Raised when parsing config files and {rev,file}sets (msg[, pos])"""
126 133 __bytes__ = _tobytes
127 134
128 135 class PatchError(Exception):
129 136 __bytes__ = _tobytes
130 137
131 138 class UnknownIdentifier(ParseError):
132 139 """Exception raised when a {rev,file}set references an unknown identifier"""
133 140
134 141 def __init__(self, function, symbols):
135 142 from .i18n import _
136 143 ParseError.__init__(self, _("unknown identifier: %s") % function)
137 144 self.function = function
138 145 self.symbols = symbols
139 146
140 147 class RepoError(Hint, Exception):
141 148 __bytes__ = _tobytes
142 149
143 150 class RepoLookupError(RepoError):
144 151 pass
145 152
146 153 class FilteredRepoLookupError(RepoLookupError):
147 154 pass
148 155
149 156 class CapabilityError(RepoError):
150 157 pass
151 158
152 159 class RequirementError(RepoError):
153 160 """Exception raised if .hg/requires has an unknown entry."""
154 161
155 162 class StdioError(IOError):
156 163 """Raised if I/O to stdout or stderr fails"""
157 164
158 165 def __init__(self, err):
159 166 IOError.__init__(self, err.errno, err.strerror)
160 167
161 168 # no __bytes__() because error message is derived from the standard IOError
162 169
163 170 class UnsupportedMergeRecords(Abort):
164 171 def __init__(self, recordtypes):
165 172 from .i18n import _
166 173 self.recordtypes = sorted(recordtypes)
167 174 s = ' '.join(self.recordtypes)
168 175 Abort.__init__(
169 176 self, _('unsupported merge state records: %s') % s,
170 177 hint=_('see https://mercurial-scm.org/wiki/MergeStateRecords for '
171 178 'more information'))
172 179
173 180 class UnknownVersion(Abort):
174 181 """generic exception for aborting from an encounter with an unknown version
175 182 """
176 183
177 184 def __init__(self, msg, hint=None, version=None):
178 185 self.version = version
179 186 super(UnknownVersion, self).__init__(msg, hint=hint)
180 187
181 188 class LockError(IOError):
182 189 def __init__(self, errno, strerror, filename, desc):
183 190 IOError.__init__(self, errno, strerror, filename)
184 191 self.desc = desc
185 192
186 193 # no __bytes__() because error message is derived from the standard IOError
187 194
188 195 class LockHeld(LockError):
189 196 def __init__(self, errno, filename, desc, locker):
190 197 LockError.__init__(self, errno, 'Lock held', filename, desc)
191 198 self.locker = locker
192 199
193 200 class LockUnavailable(LockError):
194 201 pass
195 202
196 203 # LockError is for errors while acquiring the lock -- this is unrelated
197 204 class LockInheritanceContractViolation(RuntimeError):
198 205 __bytes__ = _tobytes
199 206
200 207 class ResponseError(Exception):
201 208 """Raised to print an error with part of output and exit."""
202 209 __bytes__ = _tobytes
203 210
204 211 class UnknownCommand(Exception):
205 212 """Exception raised if command is not in the command table."""
206 213 __bytes__ = _tobytes
207 214
208 215 class AmbiguousCommand(Exception):
209 216 """Exception raised if command shortcut matches more than one command."""
210 217 __bytes__ = _tobytes
211 218
212 219 # derived from KeyboardInterrupt to simplify some breakout code
213 220 class SignalInterrupt(KeyboardInterrupt):
214 221 """Exception raised on SIGTERM and SIGHUP."""
215 222
216 223 class SignatureError(Exception):
217 224 __bytes__ = _tobytes
218 225
219 226 class PushRaced(RuntimeError):
220 227 """An exception raised during unbundling that indicate a push race"""
221 228 __bytes__ = _tobytes
222 229
223 230 class ProgrammingError(Hint, RuntimeError):
224 231 """Raised if a mercurial (core or extension) developer made a mistake"""
225 232
226 233 def __init__(self, msg, *args, **kwargs):
227 234 # On Python 3, turn the message back into a string since this is
228 235 # an internal-only error that won't be printed except in a
229 236 # stack traces.
230 237 msg = pycompat.sysstr(msg)
231 238 super(ProgrammingError, self).__init__(msg, *args, **kwargs)
232 239
233 240 __bytes__ = _tobytes
234 241
235 242 class WdirUnsupported(Exception):
236 243 """An exception which is raised when 'wdir()' is not supported"""
237 244 __bytes__ = _tobytes
238 245
239 246 # bundle2 related errors
240 247 class BundleValueError(ValueError):
241 248 """error raised when bundle2 cannot be processed"""
242 249 __bytes__ = _tobytes
243 250
244 251 class BundleUnknownFeatureError(BundleValueError):
245 252 def __init__(self, parttype=None, params=(), values=()):
246 253 self.parttype = parttype
247 254 self.params = params
248 255 self.values = values
249 256 if self.parttype is None:
250 257 msg = 'Stream Parameter'
251 258 else:
252 259 msg = parttype
253 260 entries = self.params
254 261 if self.params and self.values:
255 262 assert len(self.params) == len(self.values)
256 263 entries = []
257 264 for idx, par in enumerate(self.params):
258 265 val = self.values[idx]
259 266 if val is None:
260 267 entries.append(val)
261 268 else:
262 269 entries.append("%s=%r" % (par, pycompat.maybebytestr(val)))
263 270 if entries:
264 271 msg = '%s - %s' % (msg, ', '.join(entries))
265 272 ValueError.__init__(self, msg)
266 273
267 274 class ReadOnlyPartError(RuntimeError):
268 275 """error raised when code tries to alter a part being generated"""
269 276 __bytes__ = _tobytes
270 277
271 278 class PushkeyFailed(Abort):
272 279 """error raised when a pushkey part failed to update a value"""
273 280
274 281 def __init__(self, partid, namespace=None, key=None, new=None, old=None,
275 282 ret=None):
276 283 self.partid = partid
277 284 self.namespace = namespace
278 285 self.key = key
279 286 self.new = new
280 287 self.old = old
281 288 self.ret = ret
282 289 # no i18n expected to be processed into a better message
283 290 Abort.__init__(self, 'failed to update value for "%s/%s"'
284 291 % (namespace, key))
285 292
286 293 class CensoredNodeError(StorageError):
287 294 """error raised when content verification fails on a censored node
288 295
289 296 Also contains the tombstone data substituted for the uncensored data.
290 297 """
291 298
292 299 def __init__(self, filename, node, tombstone):
293 300 from .node import short
294 301 StorageError.__init__(self, '%s:%s' % (filename, short(node)))
295 302 self.tombstone = tombstone
296 303
297 304 class CensoredBaseError(StorageError):
298 305 """error raised when a delta is rejected because its base is censored
299 306
300 307 A delta based on a censored revision must be formed as single patch
301 308 operation which replaces the entire base with new content. This ensures
302 309 the delta may be applied by clones which have not censored the base.
303 310 """
304 311
305 312 class InvalidBundleSpecification(Exception):
306 313 """error raised when a bundle specification is invalid.
307 314
308 315 This is used for syntax errors as opposed to support errors.
309 316 """
310 317 __bytes__ = _tobytes
311 318
312 319 class UnsupportedBundleSpecification(Exception):
313 320 """error raised when a bundle specification is not supported."""
314 321 __bytes__ = _tobytes
315 322
316 323 class CorruptedState(Exception):
317 324 """error raised when a command is not able to read its state from file"""
318 325 __bytes__ = _tobytes
319 326
320 327 class PeerTransportError(Abort):
321 328 """Transport-level I/O error when communicating with a peer repo."""
322 329
323 330 class InMemoryMergeConflictsError(Exception):
324 331 """Exception raised when merge conflicts arose during an in-memory merge."""
325 332 __bytes__ = _tobytes
326 333
327 334 class WireprotoCommandError(Exception):
328 335 """Represents an error during execution of a wire protocol command.
329 336
330 337 Should only be thrown by wire protocol version 2 commands.
331 338
332 339 The error is a formatter string and an optional iterable of arguments.
333 340 """
334 341 def __init__(self, message, args=None):
335 342 self.message = message
336 343 self.messageargs = args
@@ -1,2665 +1,2671
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .revlogutils.constants import (
39 39 FLAG_GENERALDELTA,
40 40 FLAG_INLINE_DATA,
41 41 REVLOGV0,
42 42 REVLOGV1,
43 43 REVLOGV1_FLAGS,
44 44 REVLOGV2,
45 45 REVLOGV2_FLAGS,
46 46 REVLOG_DEFAULT_FLAGS,
47 47 REVLOG_DEFAULT_FORMAT,
48 48 REVLOG_DEFAULT_VERSION,
49 49 )
50 50 from .revlogutils.flagutil import (
51 51 REVIDX_DEFAULT_FLAGS,
52 52 REVIDX_ELLIPSIS,
53 53 REVIDX_EXTSTORED,
54 54 REVIDX_FLAGS_ORDER,
55 55 REVIDX_ISCENSORED,
56 56 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 57 )
58 58 from .thirdparty import (
59 59 attr,
60 60 )
61 61 from . import (
62 62 ancestor,
63 63 dagop,
64 64 error,
65 65 mdiff,
66 66 policy,
67 67 pycompat,
68 68 templatefilters,
69 69 util,
70 70 )
71 71 from .interfaces import (
72 72 repository,
73 73 util as interfaceutil,
74 74 )
75 75 from .revlogutils import (
76 76 deltas as deltautil,
77 77 flagutil,
78 78 )
79 79 from .utils import (
80 80 storageutil,
81 81 stringutil,
82 82 )
83 83
84 84 # blanked usage of all the name to prevent pyflakes constraints
85 85 # We need these name available in the module for extensions.
86 86 REVLOGV0
87 87 REVLOGV1
88 88 REVLOGV2
89 89 FLAG_INLINE_DATA
90 90 FLAG_GENERALDELTA
91 91 REVLOG_DEFAULT_FLAGS
92 92 REVLOG_DEFAULT_FORMAT
93 93 REVLOG_DEFAULT_VERSION
94 94 REVLOGV1_FLAGS
95 95 REVLOGV2_FLAGS
96 96 REVIDX_ISCENSORED
97 97 REVIDX_ELLIPSIS
98 98 REVIDX_EXTSTORED
99 99 REVIDX_DEFAULT_FLAGS
100 100 REVIDX_FLAGS_ORDER
101 101 REVIDX_RAWTEXT_CHANGING_FLAGS
102 102
103 103 parsers = policy.importmod(r'parsers')
104 104 rustancestor = policy.importrust(r'ancestor')
105 105 rustdagop = policy.importrust(r'dagop')
106 106
107 107 # Aliased for performance.
108 108 _zlibdecompress = zlib.decompress
109 109
110 110 # max size of revlog with inline data
111 111 _maxinline = 131072
112 112 _chunksize = 1048576
113 113
114 114 # Flag processors for REVIDX_ELLIPSIS.
115 115 def ellipsisreadprocessor(rl, text):
116 116 return text, False, {}
117 117
118 118 def ellipsiswriteprocessor(rl, text, sidedata):
119 119 return text, False
120 120
121 121 def ellipsisrawprocessor(rl, text):
122 122 return False
123 123
124 124 ellipsisprocessor = (
125 125 ellipsisreadprocessor,
126 126 ellipsiswriteprocessor,
127 127 ellipsisrawprocessor,
128 128 )
129 129
130 130 def getoffset(q):
131 131 return int(q >> 16)
132 132
133 133 def gettype(q):
134 134 return int(q & 0xFFFF)
135 135
136 136 def offset_type(offset, type):
137 137 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
138 138 raise ValueError('unknown revlog index flags')
139 139 return int(int(offset) << 16 | type)
140 140
141 141 @attr.s(slots=True, frozen=True)
142 142 class _revisioninfo(object):
143 143 """Information about a revision that allows building its fulltext
144 144 node: expected hash of the revision
145 145 p1, p2: parent revs of the revision
146 146 btext: built text cache consisting of a one-element list
147 147 cachedelta: (baserev, uncompressed_delta) or None
148 148 flags: flags associated to the revision storage
149 149
150 150 One of btext[0] or cachedelta must be set.
151 151 """
152 152 node = attr.ib()
153 153 p1 = attr.ib()
154 154 p2 = attr.ib()
155 155 btext = attr.ib()
156 156 textlen = attr.ib()
157 157 cachedelta = attr.ib()
158 158 flags = attr.ib()
159 159
160 160 @interfaceutil.implementer(repository.irevisiondelta)
161 161 @attr.s(slots=True)
162 162 class revlogrevisiondelta(object):
163 163 node = attr.ib()
164 164 p1node = attr.ib()
165 165 p2node = attr.ib()
166 166 basenode = attr.ib()
167 167 flags = attr.ib()
168 168 baserevisionsize = attr.ib()
169 169 revision = attr.ib()
170 170 delta = attr.ib()
171 171 linknode = attr.ib(default=None)
172 172
173 173 @interfaceutil.implementer(repository.iverifyproblem)
174 174 @attr.s(frozen=True)
175 175 class revlogproblem(object):
176 176 warning = attr.ib(default=None)
177 177 error = attr.ib(default=None)
178 178 node = attr.ib(default=None)
179 179
180 180 # index v0:
181 181 # 4 bytes: offset
182 182 # 4 bytes: compressed length
183 183 # 4 bytes: base rev
184 184 # 4 bytes: link rev
185 185 # 20 bytes: parent 1 nodeid
186 186 # 20 bytes: parent 2 nodeid
187 187 # 20 bytes: nodeid
188 188 indexformatv0 = struct.Struct(">4l20s20s20s")
189 189 indexformatv0_pack = indexformatv0.pack
190 190 indexformatv0_unpack = indexformatv0.unpack
191 191
192 192 class revlogoldindex(list):
193 193 def __getitem__(self, i):
194 194 if i == -1:
195 195 return (0, 0, 0, -1, -1, -1, -1, nullid)
196 196 return list.__getitem__(self, i)
197 197
198 198 class revlogoldio(object):
199 199 def __init__(self):
200 200 self.size = indexformatv0.size
201 201
202 202 def parseindex(self, data, inline):
203 203 s = self.size
204 204 index = []
205 205 nodemap = {nullid: nullrev}
206 206 n = off = 0
207 207 l = len(data)
208 208 while off + s <= l:
209 209 cur = data[off:off + s]
210 210 off += s
211 211 e = indexformatv0_unpack(cur)
212 212 # transform to revlogv1 format
213 213 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
214 214 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
215 215 index.append(e2)
216 216 nodemap[e[6]] = n
217 217 n += 1
218 218
219 219 return revlogoldindex(index), nodemap, None
220 220
221 221 def packentry(self, entry, node, version, rev):
222 222 if gettype(entry[0]):
223 223 raise error.RevlogError(_('index entry flags need revlog '
224 224 'version 1'))
225 225 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
226 226 node(entry[5]), node(entry[6]), entry[7])
227 227 return indexformatv0_pack(*e2)
228 228
229 229 # index ng:
230 230 # 6 bytes: offset
231 231 # 2 bytes: flags
232 232 # 4 bytes: compressed length
233 233 # 4 bytes: uncompressed length
234 234 # 4 bytes: base rev
235 235 # 4 bytes: link rev
236 236 # 4 bytes: parent 1 rev
237 237 # 4 bytes: parent 2 rev
238 238 # 32 bytes: nodeid
239 239 indexformatng = struct.Struct(">Qiiiiii20s12x")
240 240 indexformatng_pack = indexformatng.pack
241 241 versionformat = struct.Struct(">I")
242 242 versionformat_pack = versionformat.pack
243 243 versionformat_unpack = versionformat.unpack
244 244
245 245 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
246 246 # signed integer)
247 247 _maxentrysize = 0x7fffffff
248 248
249 249 class revlogio(object):
250 250 def __init__(self):
251 251 self.size = indexformatng.size
252 252
253 253 def parseindex(self, data, inline):
254 254 # call the C implementation to parse the index data
255 255 index, cache = parsers.parse_index2(data, inline)
256 256 return index, getattr(index, 'nodemap', None), cache
257 257
258 258 def packentry(self, entry, node, version, rev):
259 259 p = indexformatng_pack(*entry)
260 260 if rev == 0:
261 261 p = versionformat_pack(version) + p[4:]
262 262 return p
263 263
264 264 class revlog(object):
265 265 """
266 266 the underlying revision storage object
267 267
268 268 A revlog consists of two parts, an index and the revision data.
269 269
270 270 The index is a file with a fixed record size containing
271 271 information on each revision, including its nodeid (hash), the
272 272 nodeids of its parents, the position and offset of its data within
273 273 the data file, and the revision it's based on. Finally, each entry
274 274 contains a linkrev entry that can serve as a pointer to external
275 275 data.
276 276
277 277 The revision data itself is a linear collection of data chunks.
278 278 Each chunk represents a revision and is usually represented as a
279 279 delta against the previous chunk. To bound lookup time, runs of
280 280 deltas are limited to about 2 times the length of the original
281 281 version data. This makes retrieval of a version proportional to
282 282 its size, or O(1) relative to the number of revisions.
283 283
284 284 Both pieces of the revlog are written to in an append-only
285 285 fashion, which means we never need to rewrite a file to insert or
286 286 remove data, and can use some simple techniques to avoid the need
287 287 for locking while reading.
288 288
289 289 If checkambig, indexfile is opened with checkambig=True at
290 290 writing, to avoid file stat ambiguity.
291 291
292 292 If mmaplargeindex is True, and an mmapindexthreshold is set, the
293 293 index will be mmapped rather than read if it is larger than the
294 294 configured threshold.
295 295
296 296 If censorable is True, the revlog can have censored revisions.
297 297
298 298 If `upperboundcomp` is not None, this is the expected maximal gain from
299 299 compression for the data content.
300 300 """
301 301
302 302 _flagserrorclass = error.RevlogError
303 303
304 304 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
305 305 mmaplargeindex=False, censorable=False,
306 306 upperboundcomp=None):
307 307 """
308 308 create a revlog object
309 309
310 310 opener is a function that abstracts the file opening operation
311 311 and can be used to implement COW semantics or the like.
312 312
313 313 """
314 314 self.upperboundcomp = upperboundcomp
315 315 self.indexfile = indexfile
316 316 self.datafile = datafile or (indexfile[:-2] + ".d")
317 317 self.opener = opener
318 318 # When True, indexfile is opened with checkambig=True at writing, to
319 319 # avoid file stat ambiguity.
320 320 self._checkambig = checkambig
321 321 self._mmaplargeindex = mmaplargeindex
322 322 self._censorable = censorable
323 323 # 3-tuple of (node, rev, text) for a raw revision.
324 324 self._revisioncache = None
325 325 # Maps rev to chain base rev.
326 326 self._chainbasecache = util.lrucachedict(100)
327 327 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
328 328 self._chunkcache = (0, '')
329 329 # How much data to read and cache into the raw revlog data cache.
330 330 self._chunkcachesize = 65536
331 331 self._maxchainlen = None
332 332 self._deltabothparents = True
333 333 self.index = []
334 334 # Mapping of partial identifiers to full nodes.
335 335 self._pcache = {}
336 336 # Mapping of revision integer to full node.
337 337 self._nodecache = {nullid: nullrev}
338 338 self._nodepos = None
339 339 self._compengine = 'zlib'
340 340 self._compengineopts = {}
341 341 self._maxdeltachainspan = -1
342 342 self._withsparseread = False
343 343 self._sparserevlog = False
344 344 self._srdensitythreshold = 0.50
345 345 self._srmingapsize = 262144
346 346
347 347 # Make copy of flag processors so each revlog instance can support
348 348 # custom flags.
349 349 self._flagprocessors = dict(flagutil.flagprocessors)
350 350
351 351 # 2-tuple of file handles being used for active writing.
352 352 self._writinghandles = None
353 353
354 354 self._loadindex()
355 355
356 356 def _loadindex(self):
357 357 mmapindexthreshold = None
358 358 opts = self.opener.options
359 359
360 360 if 'revlogv2' in opts:
361 361 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
362 362 elif 'revlogv1' in opts:
363 363 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
364 364 if 'generaldelta' in opts:
365 365 newversionflags |= FLAG_GENERALDELTA
366 366 elif 'revlogv0' in self.opener.options:
367 367 newversionflags = REVLOGV0
368 368 else:
369 369 newversionflags = REVLOG_DEFAULT_VERSION
370 370
371 371 if 'chunkcachesize' in opts:
372 372 self._chunkcachesize = opts['chunkcachesize']
373 373 if 'maxchainlen' in opts:
374 374 self._maxchainlen = opts['maxchainlen']
375 375 if 'deltabothparents' in opts:
376 376 self._deltabothparents = opts['deltabothparents']
377 377 self._lazydelta = bool(opts.get('lazydelta', True))
378 378 self._lazydeltabase = False
379 379 if self._lazydelta:
380 380 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
381 381 if 'compengine' in opts:
382 382 self._compengine = opts['compengine']
383 383 if 'zlib.level' in opts:
384 384 self._compengineopts['zlib.level'] = opts['zlib.level']
385 385 if 'zstd.level' in opts:
386 386 self._compengineopts['zstd.level'] = opts['zstd.level']
387 387 if 'maxdeltachainspan' in opts:
388 388 self._maxdeltachainspan = opts['maxdeltachainspan']
389 389 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
390 390 mmapindexthreshold = opts['mmapindexthreshold']
391 391 self.hassidedata = bool(opts.get('side-data', False))
392 392 self._sparserevlog = bool(opts.get('sparse-revlog', False))
393 393 withsparseread = bool(opts.get('with-sparse-read', False))
394 394 # sparse-revlog forces sparse-read
395 395 self._withsparseread = self._sparserevlog or withsparseread
396 396 if 'sparse-read-density-threshold' in opts:
397 397 self._srdensitythreshold = opts['sparse-read-density-threshold']
398 398 if 'sparse-read-min-gap-size' in opts:
399 399 self._srmingapsize = opts['sparse-read-min-gap-size']
400 400 if opts.get('enableellipsis'):
401 401 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
402 402
403 403 # revlog v0 doesn't have flag processors
404 404 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
405 405 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
406 406
407 407 if self._chunkcachesize <= 0:
408 408 raise error.RevlogError(_('revlog chunk cache size %r is not '
409 409 'greater than 0') % self._chunkcachesize)
410 410 elif self._chunkcachesize & (self._chunkcachesize - 1):
411 411 raise error.RevlogError(_('revlog chunk cache size %r is not a '
412 412 'power of 2') % self._chunkcachesize)
413 413
414 414 indexdata = ''
415 415 self._initempty = True
416 416 try:
417 417 with self._indexfp() as f:
418 418 if (mmapindexthreshold is not None and
419 419 self.opener.fstat(f).st_size >= mmapindexthreshold):
420 420 # TODO: should .close() to release resources without
421 421 # relying on Python GC
422 422 indexdata = util.buffer(util.mmapread(f))
423 423 else:
424 424 indexdata = f.read()
425 425 if len(indexdata) > 0:
426 426 versionflags = versionformat_unpack(indexdata[:4])[0]
427 427 self._initempty = False
428 428 else:
429 429 versionflags = newversionflags
430 430 except IOError as inst:
431 431 if inst.errno != errno.ENOENT:
432 432 raise
433 433
434 434 versionflags = newversionflags
435 435
436 436 self.version = versionflags
437 437
438 438 flags = versionflags & ~0xFFFF
439 439 fmt = versionflags & 0xFFFF
440 440
441 441 if fmt == REVLOGV0:
442 442 if flags:
443 443 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
444 444 'revlog %s') %
445 445 (flags >> 16, fmt, self.indexfile))
446 446
447 447 self._inline = False
448 448 self._generaldelta = False
449 449
450 450 elif fmt == REVLOGV1:
451 451 if flags & ~REVLOGV1_FLAGS:
452 452 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
453 453 'revlog %s') %
454 454 (flags >> 16, fmt, self.indexfile))
455 455
456 456 self._inline = versionflags & FLAG_INLINE_DATA
457 457 self._generaldelta = versionflags & FLAG_GENERALDELTA
458 458
459 459 elif fmt == REVLOGV2:
460 460 if flags & ~REVLOGV2_FLAGS:
461 461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 462 'revlog %s') %
463 463 (flags >> 16, fmt, self.indexfile))
464 464
465 465 self._inline = versionflags & FLAG_INLINE_DATA
466 466 # generaldelta implied by version 2 revlogs.
467 467 self._generaldelta = True
468 468
469 469 else:
470 470 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
471 471 (fmt, self.indexfile))
472 472 # sparse-revlog can't be on without general-delta (issue6056)
473 473 if not self._generaldelta:
474 474 self._sparserevlog = False
475 475
476 476 self._storedeltachains = True
477 477
478 478 self._io = revlogio()
479 479 if self.version == REVLOGV0:
480 480 self._io = revlogoldio()
481 481 try:
482 482 d = self._io.parseindex(indexdata, self._inline)
483 483 except (ValueError, IndexError):
484 484 raise error.RevlogError(_("index %s is corrupted") %
485 485 self.indexfile)
486 486 self.index, nodemap, self._chunkcache = d
487 487 if nodemap is not None:
488 488 self.nodemap = self._nodecache = nodemap
489 489 if not self._chunkcache:
490 490 self._chunkclear()
491 491 # revnum -> (chain-length, sum-delta-length)
492 492 self._chaininfocache = {}
493 493 # revlog header -> revlog compressor
494 494 self._decompressors = {}
495 495
496 496 @util.propertycache
497 497 def _compressor(self):
498 498 engine = util.compengines[self._compengine]
499 499 return engine.revlogcompressor(self._compengineopts)
500 500
501 501 def _indexfp(self, mode='r'):
502 502 """file object for the revlog's index file"""
503 503 args = {r'mode': mode}
504 504 if mode != 'r':
505 505 args[r'checkambig'] = self._checkambig
506 506 if mode == 'w':
507 507 args[r'atomictemp'] = True
508 508 return self.opener(self.indexfile, **args)
509 509
510 510 def _datafp(self, mode='r'):
511 511 """file object for the revlog's data file"""
512 512 return self.opener(self.datafile, mode=mode)
513 513
514 514 @contextlib.contextmanager
515 515 def _datareadfp(self, existingfp=None):
516 516 """file object suitable to read data"""
517 517 # Use explicit file handle, if given.
518 518 if existingfp is not None:
519 519 yield existingfp
520 520
521 521 # Use a file handle being actively used for writes, if available.
522 522 # There is some danger to doing this because reads will seek the
523 523 # file. However, _writeentry() performs a SEEK_END before all writes,
524 524 # so we should be safe.
525 525 elif self._writinghandles:
526 526 if self._inline:
527 527 yield self._writinghandles[0]
528 528 else:
529 529 yield self._writinghandles[1]
530 530
531 531 # Otherwise open a new file handle.
532 532 else:
533 533 if self._inline:
534 534 func = self._indexfp
535 535 else:
536 536 func = self._datafp
537 537 with func() as fp:
538 538 yield fp
539 539
540 540 def tip(self):
541 541 return self.node(len(self.index) - 1)
542 542 def __contains__(self, rev):
543 543 return 0 <= rev < len(self)
544 544 def __len__(self):
545 545 return len(self.index)
546 546 def __iter__(self):
547 547 return iter(pycompat.xrange(len(self)))
548 548 def revs(self, start=0, stop=None):
549 549 """iterate over all rev in this revlog (from start to stop)"""
550 550 return storageutil.iterrevs(len(self), start=start, stop=stop)
551 551
552 552 @util.propertycache
553 553 def nodemap(self):
554 554 if self.index:
555 555 # populate mapping down to the initial node
556 556 node0 = self.index[0][7] # get around changelog filtering
557 557 self.rev(node0)
558 558 return self._nodecache
559 559
560 560 def hasnode(self, node):
561 561 try:
562 562 self.rev(node)
563 563 return True
564 564 except KeyError:
565 565 return False
566 566
567 567 def candelta(self, baserev, rev):
568 568 """whether two revisions (baserev, rev) can be delta-ed or not"""
569 569 # Disable delta if either rev requires a content-changing flag
570 570 # processor (ex. LFS). This is because such flag processor can alter
571 571 # the rawtext content that the delta will be based on, and two clients
572 572 # could have a same revlog node with different flags (i.e. different
573 573 # rawtext contents) and the delta could be incompatible.
574 574 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
575 575 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
576 576 return False
577 577 return True
578 578
579 579 def clearcaches(self):
580 580 self._revisioncache = None
581 581 self._chainbasecache.clear()
582 582 self._chunkcache = (0, '')
583 583 self._pcache = {}
584 584
585 585 try:
586 586 # If we are using the native C version, you are in a fun case
587 587 # where self.index, self.nodemap and self._nodecaches is the same
588 588 # object.
589 589 self._nodecache.clearcaches()
590 590 except AttributeError:
591 591 self._nodecache = {nullid: nullrev}
592 592 self._nodepos = None
593 593
594 594 def rev(self, node):
595 595 try:
596 596 return self._nodecache[node]
597 597 except TypeError:
598 598 raise
599 599 except error.RevlogError:
600 600 # parsers.c radix tree lookup failed
601 601 if node == wdirid or node in wdirfilenodeids:
602 602 raise error.WdirUnsupported
603 603 raise error.LookupError(node, self.indexfile, _('no node'))
604 604 except KeyError:
605 605 # pure python cache lookup failed
606 606 n = self._nodecache
607 607 i = self.index
608 608 p = self._nodepos
609 609 if p is None:
610 610 p = len(i) - 1
611 611 else:
612 612 assert p < len(i)
613 613 for r in pycompat.xrange(p, -1, -1):
614 614 v = i[r][7]
615 615 n[v] = r
616 616 if v == node:
617 617 self._nodepos = r - 1
618 618 return r
619 619 if node == wdirid or node in wdirfilenodeids:
620 620 raise error.WdirUnsupported
621 621 raise error.LookupError(node, self.indexfile, _('no node'))
622 622
623 623 # Accessors for index entries.
624 624
625 625 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
626 626 # are flags.
627 627 def start(self, rev):
628 628 return int(self.index[rev][0] >> 16)
629 629
630 630 def flags(self, rev):
631 631 return self.index[rev][0] & 0xFFFF
632 632
633 633 def length(self, rev):
634 634 return self.index[rev][1]
635 635
636 636 def rawsize(self, rev):
637 637 """return the length of the uncompressed text for a given revision"""
638 638 l = self.index[rev][2]
639 639 if l >= 0:
640 640 return l
641 641
642 642 t = self.rawdata(rev)
643 643 return len(t)
644 644
645 645 def size(self, rev):
646 646 """length of non-raw text (processed by a "read" flag processor)"""
647 647 # fast path: if no "read" flag processor could change the content,
648 648 # size is rawsize. note: ELLIPSIS is known to not change the content.
649 649 flags = self.flags(rev)
650 650 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
651 651 return self.rawsize(rev)
652 652
653 653 return len(self.revision(rev, raw=False))
654 654
655 655 def chainbase(self, rev):
656 656 base = self._chainbasecache.get(rev)
657 657 if base is not None:
658 658 return base
659 659
660 660 index = self.index
661 661 iterrev = rev
662 662 base = index[iterrev][3]
663 663 while base != iterrev:
664 664 iterrev = base
665 665 base = index[iterrev][3]
666 666
667 667 self._chainbasecache[rev] = base
668 668 return base
669 669
670 670 def linkrev(self, rev):
671 671 return self.index[rev][4]
672 672
673 673 def parentrevs(self, rev):
674 674 try:
675 675 entry = self.index[rev]
676 676 except IndexError:
677 677 if rev == wdirrev:
678 678 raise error.WdirUnsupported
679 679 raise
680 680
681 681 return entry[5], entry[6]
682 682
683 683 # fast parentrevs(rev) where rev isn't filtered
684 684 _uncheckedparentrevs = parentrevs
685 685
686 686 def node(self, rev):
687 687 try:
688 688 return self.index[rev][7]
689 689 except IndexError:
690 690 if rev == wdirrev:
691 691 raise error.WdirUnsupported
692 692 raise
693 693
694 694 # Derived from index values.
695 695
696 696 def end(self, rev):
697 697 return self.start(rev) + self.length(rev)
698 698
699 699 def parents(self, node):
700 700 i = self.index
701 701 d = i[self.rev(node)]
702 702 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
703 703
704 704 def chainlen(self, rev):
705 705 return self._chaininfo(rev)[0]
706 706
707 707 def _chaininfo(self, rev):
708 708 chaininfocache = self._chaininfocache
709 709 if rev in chaininfocache:
710 710 return chaininfocache[rev]
711 711 index = self.index
712 712 generaldelta = self._generaldelta
713 713 iterrev = rev
714 714 e = index[iterrev]
715 715 clen = 0
716 716 compresseddeltalen = 0
717 717 while iterrev != e[3]:
718 718 clen += 1
719 719 compresseddeltalen += e[1]
720 720 if generaldelta:
721 721 iterrev = e[3]
722 722 else:
723 723 iterrev -= 1
724 724 if iterrev in chaininfocache:
725 725 t = chaininfocache[iterrev]
726 726 clen += t[0]
727 727 compresseddeltalen += t[1]
728 728 break
729 729 e = index[iterrev]
730 730 else:
731 731 # Add text length of base since decompressing that also takes
732 732 # work. For cache hits the length is already included.
733 733 compresseddeltalen += e[1]
734 734 r = (clen, compresseddeltalen)
735 735 chaininfocache[rev] = r
736 736 return r
737 737
738 738 def _deltachain(self, rev, stoprev=None):
739 739 """Obtain the delta chain for a revision.
740 740
741 741 ``stoprev`` specifies a revision to stop at. If not specified, we
742 742 stop at the base of the chain.
743 743
744 744 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
745 745 revs in ascending order and ``stopped`` is a bool indicating whether
746 746 ``stoprev`` was hit.
747 747 """
748 748 # Try C implementation.
749 749 try:
750 750 return self.index.deltachain(rev, stoprev, self._generaldelta)
751 751 except AttributeError:
752 752 pass
753 753
754 754 chain = []
755 755
756 756 # Alias to prevent attribute lookup in tight loop.
757 757 index = self.index
758 758 generaldelta = self._generaldelta
759 759
760 760 iterrev = rev
761 761 e = index[iterrev]
762 762 while iterrev != e[3] and iterrev != stoprev:
763 763 chain.append(iterrev)
764 764 if generaldelta:
765 765 iterrev = e[3]
766 766 else:
767 767 iterrev -= 1
768 768 e = index[iterrev]
769 769
770 770 if iterrev == stoprev:
771 771 stopped = True
772 772 else:
773 773 chain.append(iterrev)
774 774 stopped = False
775 775
776 776 chain.reverse()
777 777 return chain, stopped
778 778
779 779 def ancestors(self, revs, stoprev=0, inclusive=False):
780 780 """Generate the ancestors of 'revs' in reverse revision order.
781 781 Does not generate revs lower than stoprev.
782 782
783 783 See the documentation for ancestor.lazyancestors for more details."""
784 784
785 785 # first, make sure start revisions aren't filtered
786 786 revs = list(revs)
787 787 checkrev = self.node
788 788 for r in revs:
789 789 checkrev(r)
790 790 # and we're sure ancestors aren't filtered as well
791 791
792 792 if rustancestor is not None:
793 793 lazyancestors = rustancestor.LazyAncestors
794 794 arg = self.index
795 795 elif util.safehasattr(parsers, 'rustlazyancestors'):
796 796 lazyancestors = ancestor.rustlazyancestors
797 797 arg = self.index
798 798 else:
799 799 lazyancestors = ancestor.lazyancestors
800 800 arg = self._uncheckedparentrevs
801 801 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
802 802
803 803 def descendants(self, revs):
804 804 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
805 805
806 806 def findcommonmissing(self, common=None, heads=None):
807 807 """Return a tuple of the ancestors of common and the ancestors of heads
808 808 that are not ancestors of common. In revset terminology, we return the
809 809 tuple:
810 810
811 811 ::common, (::heads) - (::common)
812 812
813 813 The list is sorted by revision number, meaning it is
814 814 topologically sorted.
815 815
816 816 'heads' and 'common' are both lists of node IDs. If heads is
817 817 not supplied, uses all of the revlog's heads. If common is not
818 818 supplied, uses nullid."""
819 819 if common is None:
820 820 common = [nullid]
821 821 if heads is None:
822 822 heads = self.heads()
823 823
824 824 common = [self.rev(n) for n in common]
825 825 heads = [self.rev(n) for n in heads]
826 826
827 827 # we want the ancestors, but inclusive
828 828 class lazyset(object):
829 829 def __init__(self, lazyvalues):
830 830 self.addedvalues = set()
831 831 self.lazyvalues = lazyvalues
832 832
833 833 def __contains__(self, value):
834 834 return value in self.addedvalues or value in self.lazyvalues
835 835
836 836 def __iter__(self):
837 837 added = self.addedvalues
838 838 for r in added:
839 839 yield r
840 840 for r in self.lazyvalues:
841 841 if not r in added:
842 842 yield r
843 843
844 844 def add(self, value):
845 845 self.addedvalues.add(value)
846 846
847 847 def update(self, values):
848 848 self.addedvalues.update(values)
849 849
850 850 has = lazyset(self.ancestors(common))
851 851 has.add(nullrev)
852 852 has.update(common)
853 853
854 854 # take all ancestors from heads that aren't in has
855 855 missing = set()
856 856 visit = collections.deque(r for r in heads if r not in has)
857 857 while visit:
858 858 r = visit.popleft()
859 859 if r in missing:
860 860 continue
861 861 else:
862 862 missing.add(r)
863 863 for p in self.parentrevs(r):
864 864 if p not in has:
865 865 visit.append(p)
866 866 missing = list(missing)
867 867 missing.sort()
868 868 return has, [self.node(miss) for miss in missing]
869 869
870 870 def incrementalmissingrevs(self, common=None):
871 871 """Return an object that can be used to incrementally compute the
872 872 revision numbers of the ancestors of arbitrary sets that are not
873 873 ancestors of common. This is an ancestor.incrementalmissingancestors
874 874 object.
875 875
876 876 'common' is a list of revision numbers. If common is not supplied, uses
877 877 nullrev.
878 878 """
879 879 if common is None:
880 880 common = [nullrev]
881 881
882 882 if rustancestor is not None:
883 883 return rustancestor.MissingAncestors(self.index, common)
884 884 return ancestor.incrementalmissingancestors(self.parentrevs, common)
885 885
886 886 def findmissingrevs(self, common=None, heads=None):
887 887 """Return the revision numbers of the ancestors of heads that
888 888 are not ancestors of common.
889 889
890 890 More specifically, return a list of revision numbers corresponding to
891 891 nodes N such that every N satisfies the following constraints:
892 892
893 893 1. N is an ancestor of some node in 'heads'
894 894 2. N is not an ancestor of any node in 'common'
895 895
896 896 The list is sorted by revision number, meaning it is
897 897 topologically sorted.
898 898
899 899 'heads' and 'common' are both lists of revision numbers. If heads is
900 900 not supplied, uses all of the revlog's heads. If common is not
901 901 supplied, uses nullid."""
902 902 if common is None:
903 903 common = [nullrev]
904 904 if heads is None:
905 905 heads = self.headrevs()
906 906
907 907 inc = self.incrementalmissingrevs(common=common)
908 908 return inc.missingancestors(heads)
909 909
910 910 def findmissing(self, common=None, heads=None):
911 911 """Return the ancestors of heads that are not ancestors of common.
912 912
913 913 More specifically, return a list of nodes N such that every N
914 914 satisfies the following constraints:
915 915
916 916 1. N is an ancestor of some node in 'heads'
917 917 2. N is not an ancestor of any node in 'common'
918 918
919 919 The list is sorted by revision number, meaning it is
920 920 topologically sorted.
921 921
922 922 'heads' and 'common' are both lists of node IDs. If heads is
923 923 not supplied, uses all of the revlog's heads. If common is not
924 924 supplied, uses nullid."""
925 925 if common is None:
926 926 common = [nullid]
927 927 if heads is None:
928 928 heads = self.heads()
929 929
930 930 common = [self.rev(n) for n in common]
931 931 heads = [self.rev(n) for n in heads]
932 932
933 933 inc = self.incrementalmissingrevs(common=common)
934 934 return [self.node(r) for r in inc.missingancestors(heads)]
935 935
936 936 def nodesbetween(self, roots=None, heads=None):
937 937 """Return a topological path from 'roots' to 'heads'.
938 938
939 939 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
940 940 topologically sorted list of all nodes N that satisfy both of
941 941 these constraints:
942 942
943 943 1. N is a descendant of some node in 'roots'
944 944 2. N is an ancestor of some node in 'heads'
945 945
946 946 Every node is considered to be both a descendant and an ancestor
947 947 of itself, so every reachable node in 'roots' and 'heads' will be
948 948 included in 'nodes'.
949 949
950 950 'outroots' is the list of reachable nodes in 'roots', i.e., the
951 951 subset of 'roots' that is returned in 'nodes'. Likewise,
952 952 'outheads' is the subset of 'heads' that is also in 'nodes'.
953 953
954 954 'roots' and 'heads' are both lists of node IDs. If 'roots' is
955 955 unspecified, uses nullid as the only root. If 'heads' is
956 956 unspecified, uses list of all of the revlog's heads."""
957 957 nonodes = ([], [], [])
958 958 if roots is not None:
959 959 roots = list(roots)
960 960 if not roots:
961 961 return nonodes
962 962 lowestrev = min([self.rev(n) for n in roots])
963 963 else:
964 964 roots = [nullid] # Everybody's a descendant of nullid
965 965 lowestrev = nullrev
966 966 if (lowestrev == nullrev) and (heads is None):
967 967 # We want _all_ the nodes!
968 968 return ([self.node(r) for r in self], [nullid], list(self.heads()))
969 969 if heads is None:
970 970 # All nodes are ancestors, so the latest ancestor is the last
971 971 # node.
972 972 highestrev = len(self) - 1
973 973 # Set ancestors to None to signal that every node is an ancestor.
974 974 ancestors = None
975 975 # Set heads to an empty dictionary for later discovery of heads
976 976 heads = {}
977 977 else:
978 978 heads = list(heads)
979 979 if not heads:
980 980 return nonodes
981 981 ancestors = set()
982 982 # Turn heads into a dictionary so we can remove 'fake' heads.
983 983 # Also, later we will be using it to filter out the heads we can't
984 984 # find from roots.
985 985 heads = dict.fromkeys(heads, False)
986 986 # Start at the top and keep marking parents until we're done.
987 987 nodestotag = set(heads)
988 988 # Remember where the top was so we can use it as a limit later.
989 989 highestrev = max([self.rev(n) for n in nodestotag])
990 990 while nodestotag:
991 991 # grab a node to tag
992 992 n = nodestotag.pop()
993 993 # Never tag nullid
994 994 if n == nullid:
995 995 continue
996 996 # A node's revision number represents its place in a
997 997 # topologically sorted list of nodes.
998 998 r = self.rev(n)
999 999 if r >= lowestrev:
1000 1000 if n not in ancestors:
1001 1001 # If we are possibly a descendant of one of the roots
1002 1002 # and we haven't already been marked as an ancestor
1003 1003 ancestors.add(n) # Mark as ancestor
1004 1004 # Add non-nullid parents to list of nodes to tag.
1005 1005 nodestotag.update([p for p in self.parents(n) if
1006 1006 p != nullid])
1007 1007 elif n in heads: # We've seen it before, is it a fake head?
1008 1008 # So it is, real heads should not be the ancestors of
1009 1009 # any other heads.
1010 1010 heads.pop(n)
1011 1011 if not ancestors:
1012 1012 return nonodes
1013 1013 # Now that we have our set of ancestors, we want to remove any
1014 1014 # roots that are not ancestors.
1015 1015
1016 1016 # If one of the roots was nullid, everything is included anyway.
1017 1017 if lowestrev > nullrev:
1018 1018 # But, since we weren't, let's recompute the lowest rev to not
1019 1019 # include roots that aren't ancestors.
1020 1020
1021 1021 # Filter out roots that aren't ancestors of heads
1022 1022 roots = [root for root in roots if root in ancestors]
1023 1023 # Recompute the lowest revision
1024 1024 if roots:
1025 1025 lowestrev = min([self.rev(root) for root in roots])
1026 1026 else:
1027 1027 # No more roots? Return empty list
1028 1028 return nonodes
1029 1029 else:
1030 1030 # We are descending from nullid, and don't need to care about
1031 1031 # any other roots.
1032 1032 lowestrev = nullrev
1033 1033 roots = [nullid]
1034 1034 # Transform our roots list into a set.
1035 1035 descendants = set(roots)
1036 1036 # Also, keep the original roots so we can filter out roots that aren't
1037 1037 # 'real' roots (i.e. are descended from other roots).
1038 1038 roots = descendants.copy()
1039 1039 # Our topologically sorted list of output nodes.
1040 1040 orderedout = []
1041 1041 # Don't start at nullid since we don't want nullid in our output list,
1042 1042 # and if nullid shows up in descendants, empty parents will look like
1043 1043 # they're descendants.
1044 1044 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1045 1045 n = self.node(r)
1046 1046 isdescendant = False
1047 1047 if lowestrev == nullrev: # Everybody is a descendant of nullid
1048 1048 isdescendant = True
1049 1049 elif n in descendants:
1050 1050 # n is already a descendant
1051 1051 isdescendant = True
1052 1052 # This check only needs to be done here because all the roots
1053 1053 # will start being marked is descendants before the loop.
1054 1054 if n in roots:
1055 1055 # If n was a root, check if it's a 'real' root.
1056 1056 p = tuple(self.parents(n))
1057 1057 # If any of its parents are descendants, it's not a root.
1058 1058 if (p[0] in descendants) or (p[1] in descendants):
1059 1059 roots.remove(n)
1060 1060 else:
1061 1061 p = tuple(self.parents(n))
1062 1062 # A node is a descendant if either of its parents are
1063 1063 # descendants. (We seeded the dependents list with the roots
1064 1064 # up there, remember?)
1065 1065 if (p[0] in descendants) or (p[1] in descendants):
1066 1066 descendants.add(n)
1067 1067 isdescendant = True
1068 1068 if isdescendant and ((ancestors is None) or (n in ancestors)):
1069 1069 # Only include nodes that are both descendants and ancestors.
1070 1070 orderedout.append(n)
1071 1071 if (ancestors is not None) and (n in heads):
1072 1072 # We're trying to figure out which heads are reachable
1073 1073 # from roots.
1074 1074 # Mark this head as having been reached
1075 1075 heads[n] = True
1076 1076 elif ancestors is None:
1077 1077 # Otherwise, we're trying to discover the heads.
1078 1078 # Assume this is a head because if it isn't, the next step
1079 1079 # will eventually remove it.
1080 1080 heads[n] = True
1081 1081 # But, obviously its parents aren't.
1082 1082 for p in self.parents(n):
1083 1083 heads.pop(p, None)
1084 1084 heads = [head for head, flag in heads.iteritems() if flag]
1085 1085 roots = list(roots)
1086 1086 assert orderedout
1087 1087 assert roots
1088 1088 assert heads
1089 1089 return (orderedout, roots, heads)
1090 1090
1091 1091 def headrevs(self, revs=None):
1092 1092 if revs is None:
1093 1093 try:
1094 1094 return self.index.headrevs()
1095 1095 except AttributeError:
1096 1096 return self._headrevs()
1097 1097 if rustdagop is not None:
1098 1098 return rustdagop.headrevs(self.index, revs)
1099 1099 return dagop.headrevs(revs, self._uncheckedparentrevs)
1100 1100
1101 1101 def computephases(self, roots):
1102 1102 return self.index.computephasesmapsets(roots)
1103 1103
1104 1104 def _headrevs(self):
1105 1105 count = len(self)
1106 1106 if not count:
1107 1107 return [nullrev]
1108 1108 # we won't iter over filtered rev so nobody is a head at start
1109 1109 ishead = [0] * (count + 1)
1110 1110 index = self.index
1111 1111 for r in self:
1112 1112 ishead[r] = 1 # I may be an head
1113 1113 e = index[r]
1114 1114 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1115 1115 return [r for r, val in enumerate(ishead) if val]
1116 1116
1117 1117 def heads(self, start=None, stop=None):
1118 1118 """return the list of all nodes that have no children
1119 1119
1120 1120 if start is specified, only heads that are descendants of
1121 1121 start will be returned
1122 1122 if stop is specified, it will consider all the revs from stop
1123 1123 as if they had no children
1124 1124 """
1125 1125 if start is None and stop is None:
1126 1126 if not len(self):
1127 1127 return [nullid]
1128 1128 return [self.node(r) for r in self.headrevs()]
1129 1129
1130 1130 if start is None:
1131 1131 start = nullrev
1132 1132 else:
1133 1133 start = self.rev(start)
1134 1134
1135 1135 stoprevs = set(self.rev(n) for n in stop or [])
1136 1136
1137 1137 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1138 1138 stoprevs=stoprevs)
1139 1139
1140 1140 return [self.node(rev) for rev in revs]
1141 1141
1142 1142 def children(self, node):
1143 1143 """find the children of a given node"""
1144 1144 c = []
1145 1145 p = self.rev(node)
1146 1146 for r in self.revs(start=p + 1):
1147 1147 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1148 1148 if prevs:
1149 1149 for pr in prevs:
1150 1150 if pr == p:
1151 1151 c.append(self.node(r))
1152 1152 elif p == nullrev:
1153 1153 c.append(self.node(r))
1154 1154 return c
1155 1155
1156 1156 def commonancestorsheads(self, a, b):
1157 1157 """calculate all the heads of the common ancestors of nodes a and b"""
1158 1158 a, b = self.rev(a), self.rev(b)
1159 1159 ancs = self._commonancestorsheads(a, b)
1160 1160 return pycompat.maplist(self.node, ancs)
1161 1161
1162 1162 def _commonancestorsheads(self, *revs):
1163 1163 """calculate all the heads of the common ancestors of revs"""
1164 1164 try:
1165 1165 ancs = self.index.commonancestorsheads(*revs)
1166 1166 except (AttributeError, OverflowError): # C implementation failed
1167 1167 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1168 1168 return ancs
1169 1169
1170 1170 def isancestor(self, a, b):
1171 1171 """return True if node a is an ancestor of node b
1172 1172
1173 1173 A revision is considered an ancestor of itself."""
1174 1174 a, b = self.rev(a), self.rev(b)
1175 1175 return self.isancestorrev(a, b)
1176 1176
1177 1177 def isancestorrev(self, a, b):
1178 1178 """return True if revision a is an ancestor of revision b
1179 1179
1180 1180 A revision is considered an ancestor of itself.
1181 1181
1182 1182 The implementation of this is trivial but the use of
1183 1183 reachableroots is not."""
1184 1184 if a == nullrev:
1185 1185 return True
1186 1186 elif a == b:
1187 1187 return True
1188 1188 elif a > b:
1189 1189 return False
1190 1190 return bool(self.reachableroots(a, [b], [a], includepath=False))
1191 1191
1192 1192 def reachableroots(self, minroot, heads, roots, includepath=False):
1193 1193 """return (heads(::<roots> and <roots>::<heads>))
1194 1194
1195 1195 If includepath is True, return (<roots>::<heads>)."""
1196 1196 try:
1197 1197 return self.index.reachableroots2(minroot, heads, roots,
1198 1198 includepath)
1199 1199 except AttributeError:
1200 1200 return dagop._reachablerootspure(self.parentrevs,
1201 1201 minroot, roots, heads, includepath)
1202 1202
1203 1203 def ancestor(self, a, b):
1204 1204 """calculate the "best" common ancestor of nodes a and b"""
1205 1205
1206 1206 a, b = self.rev(a), self.rev(b)
1207 1207 try:
1208 1208 ancs = self.index.ancestors(a, b)
1209 1209 except (AttributeError, OverflowError):
1210 1210 ancs = ancestor.ancestors(self.parentrevs, a, b)
1211 1211 if ancs:
1212 1212 # choose a consistent winner when there's a tie
1213 1213 return min(map(self.node, ancs))
1214 1214 return nullid
1215 1215
1216 1216 def _match(self, id):
1217 1217 if isinstance(id, int):
1218 1218 # rev
1219 1219 return self.node(id)
1220 1220 if len(id) == 20:
1221 1221 # possibly a binary node
1222 1222 # odds of a binary node being all hex in ASCII are 1 in 10**25
1223 1223 try:
1224 1224 node = id
1225 1225 self.rev(node) # quick search the index
1226 1226 return node
1227 1227 except error.LookupError:
1228 1228 pass # may be partial hex id
1229 1229 try:
1230 1230 # str(rev)
1231 1231 rev = int(id)
1232 1232 if "%d" % rev != id:
1233 1233 raise ValueError
1234 1234 if rev < 0:
1235 1235 rev = len(self) + rev
1236 1236 if rev < 0 or rev >= len(self):
1237 1237 raise ValueError
1238 1238 return self.node(rev)
1239 1239 except (ValueError, OverflowError):
1240 1240 pass
1241 1241 if len(id) == 40:
1242 1242 try:
1243 1243 # a full hex nodeid?
1244 1244 node = bin(id)
1245 1245 self.rev(node)
1246 1246 return node
1247 1247 except (TypeError, error.LookupError):
1248 1248 pass
1249 1249
1250 1250 def _partialmatch(self, id):
1251 1251 # we don't care wdirfilenodeids as they should be always full hash
1252 1252 maybewdir = wdirhex.startswith(id)
1253 1253 try:
1254 1254 partial = self.index.partialmatch(id)
1255 1255 if partial and self.hasnode(partial):
1256 1256 if maybewdir:
1257 1257 # single 'ff...' match in radix tree, ambiguous with wdir
1258 1258 raise error.RevlogError
1259 1259 return partial
1260 1260 if maybewdir:
1261 1261 # no 'ff...' match in radix tree, wdir identified
1262 1262 raise error.WdirUnsupported
1263 1263 return None
1264 1264 except error.RevlogError:
1265 1265 # parsers.c radix tree lookup gave multiple matches
1266 1266 # fast path: for unfiltered changelog, radix tree is accurate
1267 1267 if not getattr(self, 'filteredrevs', None):
1268 1268 raise error.AmbiguousPrefixLookupError(
1269 1269 id, self.indexfile, _('ambiguous identifier'))
1270 1270 # fall through to slow path that filters hidden revisions
1271 1271 except (AttributeError, ValueError):
1272 1272 # we are pure python, or key was too short to search radix tree
1273 1273 pass
1274 1274
1275 1275 if id in self._pcache:
1276 1276 return self._pcache[id]
1277 1277
1278 1278 if len(id) <= 40:
1279 1279 try:
1280 1280 # hex(node)[:...]
1281 1281 l = len(id) // 2 # grab an even number of digits
1282 1282 prefix = bin(id[:l * 2])
1283 1283 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1284 1284 nl = [n for n in nl if hex(n).startswith(id) and
1285 1285 self.hasnode(n)]
1286 1286 if nullhex.startswith(id):
1287 1287 nl.append(nullid)
1288 1288 if len(nl) > 0:
1289 1289 if len(nl) == 1 and not maybewdir:
1290 1290 self._pcache[id] = nl[0]
1291 1291 return nl[0]
1292 1292 raise error.AmbiguousPrefixLookupError(
1293 1293 id, self.indexfile, _('ambiguous identifier'))
1294 1294 if maybewdir:
1295 1295 raise error.WdirUnsupported
1296 1296 return None
1297 1297 except TypeError:
1298 1298 pass
1299 1299
1300 1300 def lookup(self, id):
1301 1301 """locate a node based on:
1302 1302 - revision number or str(revision number)
1303 1303 - nodeid or subset of hex nodeid
1304 1304 """
1305 1305 n = self._match(id)
1306 1306 if n is not None:
1307 1307 return n
1308 1308 n = self._partialmatch(id)
1309 1309 if n:
1310 1310 return n
1311 1311
1312 1312 raise error.LookupError(id, self.indexfile, _('no match found'))
1313 1313
1314 1314 def shortest(self, node, minlength=1):
1315 1315 """Find the shortest unambiguous prefix that matches node."""
1316 1316 def isvalid(prefix):
1317 1317 try:
1318 1318 matchednode = self._partialmatch(prefix)
1319 1319 except error.AmbiguousPrefixLookupError:
1320 1320 return False
1321 1321 except error.WdirUnsupported:
1322 1322 # single 'ff...' match
1323 1323 return True
1324 1324 if matchednode is None:
1325 1325 raise error.LookupError(node, self.indexfile, _('no node'))
1326 1326 return True
1327 1327
1328 1328 def maybewdir(prefix):
1329 1329 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1330 1330
1331 1331 hexnode = hex(node)
1332 1332
1333 1333 def disambiguate(hexnode, minlength):
1334 1334 """Disambiguate against wdirid."""
1335 1335 for length in range(minlength, 41):
1336 1336 prefix = hexnode[:length]
1337 1337 if not maybewdir(prefix):
1338 1338 return prefix
1339 1339
1340 1340 if not getattr(self, 'filteredrevs', None):
1341 1341 try:
1342 1342 length = max(self.index.shortest(node), minlength)
1343 1343 return disambiguate(hexnode, length)
1344 1344 except error.RevlogError:
1345 1345 if node != wdirid:
1346 1346 raise error.LookupError(node, self.indexfile, _('no node'))
1347 1347 except AttributeError:
1348 1348 # Fall through to pure code
1349 1349 pass
1350 1350
1351 1351 if node == wdirid:
1352 1352 for length in range(minlength, 41):
1353 1353 prefix = hexnode[:length]
1354 1354 if isvalid(prefix):
1355 1355 return prefix
1356 1356
1357 1357 for length in range(minlength, 41):
1358 1358 prefix = hexnode[:length]
1359 1359 if isvalid(prefix):
1360 1360 return disambiguate(hexnode, length)
1361 1361
1362 1362 def cmp(self, node, text):
1363 1363 """compare text with a given file revision
1364 1364
1365 1365 returns True if text is different than what is stored.
1366 1366 """
1367 1367 p1, p2 = self.parents(node)
1368 1368 return storageutil.hashrevisionsha1(text, p1, p2) != node
1369 1369
1370 1370 def _cachesegment(self, offset, data):
1371 1371 """Add a segment to the revlog cache.
1372 1372
1373 1373 Accepts an absolute offset and the data that is at that location.
1374 1374 """
1375 1375 o, d = self._chunkcache
1376 1376 # try to add to existing cache
1377 1377 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1378 1378 self._chunkcache = o, d + data
1379 1379 else:
1380 1380 self._chunkcache = offset, data
1381 1381
1382 1382 def _readsegment(self, offset, length, df=None):
1383 1383 """Load a segment of raw data from the revlog.
1384 1384
1385 1385 Accepts an absolute offset, length to read, and an optional existing
1386 1386 file handle to read from.
1387 1387
1388 1388 If an existing file handle is passed, it will be seeked and the
1389 1389 original seek position will NOT be restored.
1390 1390
1391 1391 Returns a str or buffer of raw byte data.
1392 1392
1393 1393 Raises if the requested number of bytes could not be read.
1394 1394 """
1395 1395 # Cache data both forward and backward around the requested
1396 1396 # data, in a fixed size window. This helps speed up operations
1397 1397 # involving reading the revlog backwards.
1398 1398 cachesize = self._chunkcachesize
1399 1399 realoffset = offset & ~(cachesize - 1)
1400 1400 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1401 1401 - realoffset)
1402 1402 with self._datareadfp(df) as df:
1403 1403 df.seek(realoffset)
1404 1404 d = df.read(reallength)
1405 1405
1406 1406 self._cachesegment(realoffset, d)
1407 1407 if offset != realoffset or reallength != length:
1408 1408 startoffset = offset - realoffset
1409 1409 if len(d) - startoffset < length:
1410 1410 raise error.RevlogError(
1411 1411 _('partial read of revlog %s; expected %d bytes from '
1412 1412 'offset %d, got %d') %
1413 1413 (self.indexfile if self._inline else self.datafile,
1414 1414 length, realoffset, len(d) - startoffset))
1415 1415
1416 1416 return util.buffer(d, startoffset, length)
1417 1417
1418 1418 if len(d) < length:
1419 1419 raise error.RevlogError(
1420 1420 _('partial read of revlog %s; expected %d bytes from offset '
1421 1421 '%d, got %d') %
1422 1422 (self.indexfile if self._inline else self.datafile,
1423 1423 length, offset, len(d)))
1424 1424
1425 1425 return d
1426 1426
1427 1427 def _getsegment(self, offset, length, df=None):
1428 1428 """Obtain a segment of raw data from the revlog.
1429 1429
1430 1430 Accepts an absolute offset, length of bytes to obtain, and an
1431 1431 optional file handle to the already-opened revlog. If the file
1432 1432 handle is used, it's original seek position will not be preserved.
1433 1433
1434 1434 Requests for data may be returned from a cache.
1435 1435
1436 1436 Returns a str or a buffer instance of raw byte data.
1437 1437 """
1438 1438 o, d = self._chunkcache
1439 1439 l = len(d)
1440 1440
1441 1441 # is it in the cache?
1442 1442 cachestart = offset - o
1443 1443 cacheend = cachestart + length
1444 1444 if cachestart >= 0 and cacheend <= l:
1445 1445 if cachestart == 0 and cacheend == l:
1446 1446 return d # avoid a copy
1447 1447 return util.buffer(d, cachestart, cacheend - cachestart)
1448 1448
1449 1449 return self._readsegment(offset, length, df=df)
1450 1450
1451 1451 def _getsegmentforrevs(self, startrev, endrev, df=None):
1452 1452 """Obtain a segment of raw data corresponding to a range of revisions.
1453 1453
1454 1454 Accepts the start and end revisions and an optional already-open
1455 1455 file handle to be used for reading. If the file handle is read, its
1456 1456 seek position will not be preserved.
1457 1457
1458 1458 Requests for data may be satisfied by a cache.
1459 1459
1460 1460 Returns a 2-tuple of (offset, data) for the requested range of
1461 1461 revisions. Offset is the integer offset from the beginning of the
1462 1462 revlog and data is a str or buffer of the raw byte data.
1463 1463
1464 1464 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1465 1465 to determine where each revision's data begins and ends.
1466 1466 """
1467 1467 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1468 1468 # (functions are expensive).
1469 1469 index = self.index
1470 1470 istart = index[startrev]
1471 1471 start = int(istart[0] >> 16)
1472 1472 if startrev == endrev:
1473 1473 end = start + istart[1]
1474 1474 else:
1475 1475 iend = index[endrev]
1476 1476 end = int(iend[0] >> 16) + iend[1]
1477 1477
1478 1478 if self._inline:
1479 1479 start += (startrev + 1) * self._io.size
1480 1480 end += (endrev + 1) * self._io.size
1481 1481 length = end - start
1482 1482
1483 1483 return start, self._getsegment(start, length, df=df)
1484 1484
1485 1485 def _chunk(self, rev, df=None):
1486 1486 """Obtain a single decompressed chunk for a revision.
1487 1487
1488 1488 Accepts an integer revision and an optional already-open file handle
1489 1489 to be used for reading. If used, the seek position of the file will not
1490 1490 be preserved.
1491 1491
1492 1492 Returns a str holding uncompressed data for the requested revision.
1493 1493 """
1494 1494 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1495 1495
1496 1496 def _chunks(self, revs, df=None, targetsize=None):
1497 1497 """Obtain decompressed chunks for the specified revisions.
1498 1498
1499 1499 Accepts an iterable of numeric revisions that are assumed to be in
1500 1500 ascending order. Also accepts an optional already-open file handle
1501 1501 to be used for reading. If used, the seek position of the file will
1502 1502 not be preserved.
1503 1503
1504 1504 This function is similar to calling ``self._chunk()`` multiple times,
1505 1505 but is faster.
1506 1506
1507 1507 Returns a list with decompressed data for each requested revision.
1508 1508 """
1509 1509 if not revs:
1510 1510 return []
1511 1511 start = self.start
1512 1512 length = self.length
1513 1513 inline = self._inline
1514 1514 iosize = self._io.size
1515 1515 buffer = util.buffer
1516 1516
1517 1517 l = []
1518 1518 ladd = l.append
1519 1519
1520 1520 if not self._withsparseread:
1521 1521 slicedchunks = (revs,)
1522 1522 else:
1523 1523 slicedchunks = deltautil.slicechunk(self, revs,
1524 1524 targetsize=targetsize)
1525 1525
1526 1526 for revschunk in slicedchunks:
1527 1527 firstrev = revschunk[0]
1528 1528 # Skip trailing revisions with empty diff
1529 1529 for lastrev in revschunk[::-1]:
1530 1530 if length(lastrev) != 0:
1531 1531 break
1532 1532
1533 1533 try:
1534 1534 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1535 1535 except OverflowError:
1536 1536 # issue4215 - we can't cache a run of chunks greater than
1537 1537 # 2G on Windows
1538 1538 return [self._chunk(rev, df=df) for rev in revschunk]
1539 1539
1540 1540 decomp = self.decompress
1541 1541 for rev in revschunk:
1542 1542 chunkstart = start(rev)
1543 1543 if inline:
1544 1544 chunkstart += (rev + 1) * iosize
1545 1545 chunklength = length(rev)
1546 1546 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1547 1547
1548 1548 return l
1549 1549
1550 1550 def _chunkclear(self):
1551 1551 """Clear the raw chunk cache."""
1552 1552 self._chunkcache = (0, '')
1553 1553
1554 1554 def deltaparent(self, rev):
1555 1555 """return deltaparent of the given revision"""
1556 1556 base = self.index[rev][3]
1557 1557 if base == rev:
1558 1558 return nullrev
1559 1559 elif self._generaldelta:
1560 1560 return base
1561 1561 else:
1562 1562 return rev - 1
1563 1563
1564 1564 def issnapshot(self, rev):
1565 1565 """tells whether rev is a snapshot
1566 1566 """
1567 1567 if not self._sparserevlog:
1568 1568 return self.deltaparent(rev) == nullrev
1569 1569 elif util.safehasattr(self.index, 'issnapshot'):
1570 1570 # directly assign the method to cache the testing and access
1571 1571 self.issnapshot = self.index.issnapshot
1572 1572 return self.issnapshot(rev)
1573 1573 if rev == nullrev:
1574 1574 return True
1575 1575 entry = self.index[rev]
1576 1576 base = entry[3]
1577 1577 if base == rev:
1578 1578 return True
1579 1579 if base == nullrev:
1580 1580 return True
1581 1581 p1 = entry[5]
1582 1582 p2 = entry[6]
1583 1583 if base == p1 or base == p2:
1584 1584 return False
1585 1585 return self.issnapshot(base)
1586 1586
1587 1587 def snapshotdepth(self, rev):
1588 1588 """number of snapshot in the chain before this one"""
1589 1589 if not self.issnapshot(rev):
1590 1590 raise error.ProgrammingError('revision %d not a snapshot')
1591 1591 return len(self._deltachain(rev)[0]) - 1
1592 1592
1593 1593 def revdiff(self, rev1, rev2):
1594 1594 """return or calculate a delta between two revisions
1595 1595
1596 1596 The delta calculated is in binary form and is intended to be written to
1597 1597 revlog data directly. So this function needs raw revision data.
1598 1598 """
1599 1599 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1600 1600 return bytes(self._chunk(rev2))
1601 1601
1602 1602 return mdiff.textdiff(self.rawdata(rev1),
1603 1603 self.rawdata(rev2))
1604 1604
1605 1605 def _processflags(self, text, flags, operation, raw=False):
1606 1606 """deprecated entry point to access flag processors"""
1607 1607 msg = ('_processflag(...) use the specialized variant')
1608 1608 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1609 1609 if raw:
1610 1610 return text, flagutil.processflagsraw(self, text, flags)
1611 1611 elif operation == 'read':
1612 1612 return flagutil.processflagsread(self, text, flags)
1613 1613 else: # write operation
1614 1614 return flagutil.processflagswrite(self, text, flags)
1615 1615
1616 1616 def revision(self, nodeorrev, _df=None, raw=False):
1617 1617 """return an uncompressed revision of a given node or revision
1618 1618 number.
1619 1619
1620 1620 _df - an existing file handle to read from. (internal-only)
1621 1621 raw - an optional argument specifying if the revision data is to be
1622 1622 treated as raw data when applying flag transforms. 'raw' should be set
1623 1623 to True when generating changegroups or in debug commands.
1624 1624 """
1625 1625 if raw:
1626 1626 msg = ('revlog.revision(..., raw=True) is deprecated, '
1627 1627 'use revlog.rawdata(...)')
1628 1628 util.nouideprecwarn(msg, '5.2', stacklevel=2)
1629 1629 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1630 1630
1631 1631 def sidedata(self, nodeorrev, _df=None):
1632 1632 """a map of extra data related to the changeset but not part of the hash
1633 1633
1634 1634 This function currently return a dictionary. However, more advanced
1635 1635 mapping object will likely be used in the future for a more
1636 1636 efficient/lazy code.
1637 1637 """
1638 1638 return self._revisiondata(nodeorrev, _df)[1]
1639 1639
1640 1640 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1641 1641 # deal with <nodeorrev> argument type
1642 1642 if isinstance(nodeorrev, int):
1643 1643 rev = nodeorrev
1644 1644 node = self.node(rev)
1645 1645 else:
1646 1646 node = nodeorrev
1647 1647 rev = None
1648 1648
1649 1649 # fast path the special `nullid` rev
1650 1650 if node == nullid:
1651 1651 return "", {}
1652 1652
1653 1653 # The text as stored inside the revlog. Might be the revision or might
1654 1654 # need to be processed to retrieve the revision.
1655 1655 rawtext = None
1656 1656
1657 1657 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1658 1658
1659 1659 if raw and validated:
1660 1660 # if we don't want to process the raw text and that raw
1661 1661 # text is cached, we can exit early.
1662 1662 return rawtext, {}
1663 1663 if rev is None:
1664 1664 rev = self.rev(node)
1665 1665 # the revlog's flag for this revision
1666 1666 # (usually alter its state or content)
1667 1667 flags = self.flags(rev)
1668 1668
1669 1669 if validated and flags == REVIDX_DEFAULT_FLAGS:
1670 1670 # no extra flags set, no flag processor runs, text = rawtext
1671 1671 return rawtext, {}
1672 1672
1673 1673 sidedata = {}
1674 1674 if raw:
1675 1675 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1676 1676 text = rawtext
1677 1677 else:
1678 r = flagutil.processflagsread(self, rawtext, flags)
1678 try:
1679 r = flagutil.processflagsread(self, rawtext, flags)
1680 except error.SidedataHashError as exc:
1681 msg = _("integrity check failed on %s:%s sidedata key %d")
1682 msg %= (self.indexfile, pycompat.bytestr(rev),
1683 exc.sidedatakey)
1684 raise error.RevlogError(msg)
1679 1685 text, validatehash, sidedata = r
1680 1686 if validatehash:
1681 1687 self.checkhash(text, node, rev=rev)
1682 1688 if not validated:
1683 1689 self._revisioncache = (node, rev, rawtext)
1684 1690
1685 1691 return text, sidedata
1686 1692
1687 1693 def _rawtext(self, node, rev, _df=None):
1688 1694 """return the possibly unvalidated rawtext for a revision
1689 1695
1690 1696 returns (rev, rawtext, validated)
1691 1697 """
1692 1698
1693 1699 # revision in the cache (could be useful to apply delta)
1694 1700 cachedrev = None
1695 1701 # An intermediate text to apply deltas to
1696 1702 basetext = None
1697 1703
1698 1704 # Check if we have the entry in cache
1699 1705 # The cache entry looks like (node, rev, rawtext)
1700 1706 if self._revisioncache:
1701 1707 if self._revisioncache[0] == node:
1702 1708 return (rev, self._revisioncache[2], True)
1703 1709 cachedrev = self._revisioncache[1]
1704 1710
1705 1711 if rev is None:
1706 1712 rev = self.rev(node)
1707 1713
1708 1714 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1709 1715 if stopped:
1710 1716 basetext = self._revisioncache[2]
1711 1717
1712 1718 # drop cache to save memory, the caller is expected to
1713 1719 # update self._revisioncache after validating the text
1714 1720 self._revisioncache = None
1715 1721
1716 1722 targetsize = None
1717 1723 rawsize = self.index[rev][2]
1718 1724 if 0 <= rawsize:
1719 1725 targetsize = 4 * rawsize
1720 1726
1721 1727 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1722 1728 if basetext is None:
1723 1729 basetext = bytes(bins[0])
1724 1730 bins = bins[1:]
1725 1731
1726 1732 rawtext = mdiff.patches(basetext, bins)
1727 1733 del basetext # let us have a chance to free memory early
1728 1734 return (rev, rawtext, False)
1729 1735
1730 1736 def rawdata(self, nodeorrev, _df=None):
1731 1737 """return an uncompressed raw data of a given node or revision number.
1732 1738
1733 1739 _df - an existing file handle to read from. (internal-only)
1734 1740 """
1735 1741 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1736 1742
1737 1743 def hash(self, text, p1, p2):
1738 1744 """Compute a node hash.
1739 1745
1740 1746 Available as a function so that subclasses can replace the hash
1741 1747 as needed.
1742 1748 """
1743 1749 return storageutil.hashrevisionsha1(text, p1, p2)
1744 1750
1745 1751 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1746 1752 """Check node hash integrity.
1747 1753
1748 1754 Available as a function so that subclasses can extend hash mismatch
1749 1755 behaviors as needed.
1750 1756 """
1751 1757 try:
1752 1758 if p1 is None and p2 is None:
1753 1759 p1, p2 = self.parents(node)
1754 1760 if node != self.hash(text, p1, p2):
1755 1761 # Clear the revision cache on hash failure. The revision cache
1756 1762 # only stores the raw revision and clearing the cache does have
1757 1763 # the side-effect that we won't have a cache hit when the raw
1758 1764 # revision data is accessed. But this case should be rare and
1759 1765 # it is extra work to teach the cache about the hash
1760 1766 # verification state.
1761 1767 if self._revisioncache and self._revisioncache[0] == node:
1762 1768 self._revisioncache = None
1763 1769
1764 1770 revornode = rev
1765 1771 if revornode is None:
1766 1772 revornode = templatefilters.short(hex(node))
1767 1773 raise error.RevlogError(_("integrity check failed on %s:%s")
1768 1774 % (self.indexfile, pycompat.bytestr(revornode)))
1769 1775 except error.RevlogError:
1770 1776 if self._censorable and storageutil.iscensoredtext(text):
1771 1777 raise error.CensoredNodeError(self.indexfile, node, text)
1772 1778 raise
1773 1779
1774 1780 def _enforceinlinesize(self, tr, fp=None):
1775 1781 """Check if the revlog is too big for inline and convert if so.
1776 1782
1777 1783 This should be called after revisions are added to the revlog. If the
1778 1784 revlog has grown too large to be an inline revlog, it will convert it
1779 1785 to use multiple index and data files.
1780 1786 """
1781 1787 tiprev = len(self) - 1
1782 1788 if (not self._inline or
1783 1789 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1784 1790 return
1785 1791
1786 1792 trinfo = tr.find(self.indexfile)
1787 1793 if trinfo is None:
1788 1794 raise error.RevlogError(_("%s not found in the transaction")
1789 1795 % self.indexfile)
1790 1796
1791 1797 trindex = trinfo[2]
1792 1798 if trindex is not None:
1793 1799 dataoff = self.start(trindex)
1794 1800 else:
1795 1801 # revlog was stripped at start of transaction, use all leftover data
1796 1802 trindex = len(self) - 1
1797 1803 dataoff = self.end(tiprev)
1798 1804
1799 1805 tr.add(self.datafile, dataoff)
1800 1806
1801 1807 if fp:
1802 1808 fp.flush()
1803 1809 fp.close()
1804 1810 # We can't use the cached file handle after close(). So prevent
1805 1811 # its usage.
1806 1812 self._writinghandles = None
1807 1813
1808 1814 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1809 1815 for r in self:
1810 1816 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1811 1817
1812 1818 with self._indexfp('w') as fp:
1813 1819 self.version &= ~FLAG_INLINE_DATA
1814 1820 self._inline = False
1815 1821 io = self._io
1816 1822 for i in self:
1817 1823 e = io.packentry(self.index[i], self.node, self.version, i)
1818 1824 fp.write(e)
1819 1825
1820 1826 # the temp file replace the real index when we exit the context
1821 1827 # manager
1822 1828
1823 1829 tr.replace(self.indexfile, trindex * self._io.size)
1824 1830 self._chunkclear()
1825 1831
1826 1832 def _nodeduplicatecallback(self, transaction, node):
1827 1833 """called when trying to add a node already stored.
1828 1834 """
1829 1835
1830 1836 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1831 1837 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None,
1832 1838 sidedata=None):
1833 1839 """add a revision to the log
1834 1840
1835 1841 text - the revision data to add
1836 1842 transaction - the transaction object used for rollback
1837 1843 link - the linkrev data to add
1838 1844 p1, p2 - the parent nodeids of the revision
1839 1845 cachedelta - an optional precomputed delta
1840 1846 node - nodeid of revision; typically node is not specified, and it is
1841 1847 computed by default as hash(text, p1, p2), however subclasses might
1842 1848 use different hashing method (and override checkhash() in such case)
1843 1849 flags - the known flags to set on the revision
1844 1850 deltacomputer - an optional deltacomputer instance shared between
1845 1851 multiple calls
1846 1852 """
1847 1853 if link == nullrev:
1848 1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1849 1855 % self.indexfile)
1850 1856
1851 1857 if sidedata is None:
1852 1858 sidedata = {}
1853 1859 elif not self.hassidedata:
1854 1860 raise error.ProgrammingError(
1855 1861 _("trying to add sidedata to a revlog who don't support them")
1856 1862 )
1857 1863
1858 1864 if flags:
1859 1865 node = node or self.hash(text, p1, p2)
1860 1866
1861 1867 rawtext, validatehash = flagutil.processflagswrite(self, text, flags,
1862 1868 sidedata=sidedata)
1863 1869
1864 1870 # If the flag processor modifies the revision data, ignore any provided
1865 1871 # cachedelta.
1866 1872 if rawtext != text:
1867 1873 cachedelta = None
1868 1874
1869 1875 if len(rawtext) > _maxentrysize:
1870 1876 raise error.RevlogError(
1871 1877 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1872 1878 % (self.indexfile, len(rawtext)))
1873 1879
1874 1880 node = node or self.hash(rawtext, p1, p2)
1875 1881 if node in self.nodemap:
1876 1882 return node
1877 1883
1878 1884 if validatehash:
1879 1885 self.checkhash(rawtext, node, p1=p1, p2=p2)
1880 1886
1881 1887 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1882 1888 flags, cachedelta=cachedelta,
1883 1889 deltacomputer=deltacomputer)
1884 1890
1885 1891 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1886 1892 cachedelta=None, deltacomputer=None):
1887 1893 """add a raw revision with known flags, node and parents
1888 1894 useful when reusing a revision not stored in this revlog (ex: received
1889 1895 over wire, or read from an external bundle).
1890 1896 """
1891 1897 dfh = None
1892 1898 if not self._inline:
1893 1899 dfh = self._datafp("a+")
1894 1900 ifh = self._indexfp("a+")
1895 1901 try:
1896 1902 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1897 1903 flags, cachedelta, ifh, dfh,
1898 1904 deltacomputer=deltacomputer)
1899 1905 finally:
1900 1906 if dfh:
1901 1907 dfh.close()
1902 1908 ifh.close()
1903 1909
1904 1910 def compress(self, data):
1905 1911 """Generate a possibly-compressed representation of data."""
1906 1912 if not data:
1907 1913 return '', data
1908 1914
1909 1915 compressed = self._compressor.compress(data)
1910 1916
1911 1917 if compressed:
1912 1918 # The revlog compressor added the header in the returned data.
1913 1919 return '', compressed
1914 1920
1915 1921 if data[0:1] == '\0':
1916 1922 return '', data
1917 1923 return 'u', data
1918 1924
1919 1925 def decompress(self, data):
1920 1926 """Decompress a revlog chunk.
1921 1927
1922 1928 The chunk is expected to begin with a header identifying the
1923 1929 format type so it can be routed to an appropriate decompressor.
1924 1930 """
1925 1931 if not data:
1926 1932 return data
1927 1933
1928 1934 # Revlogs are read much more frequently than they are written and many
1929 1935 # chunks only take microseconds to decompress, so performance is
1930 1936 # important here.
1931 1937 #
1932 1938 # We can make a few assumptions about revlogs:
1933 1939 #
1934 1940 # 1) the majority of chunks will be compressed (as opposed to inline
1935 1941 # raw data).
1936 1942 # 2) decompressing *any* data will likely by at least 10x slower than
1937 1943 # returning raw inline data.
1938 1944 # 3) we want to prioritize common and officially supported compression
1939 1945 # engines
1940 1946 #
1941 1947 # It follows that we want to optimize for "decompress compressed data
1942 1948 # when encoded with common and officially supported compression engines"
1943 1949 # case over "raw data" and "data encoded by less common or non-official
1944 1950 # compression engines." That is why we have the inline lookup first
1945 1951 # followed by the compengines lookup.
1946 1952 #
1947 1953 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1948 1954 # compressed chunks. And this matters for changelog and manifest reads.
1949 1955 t = data[0:1]
1950 1956
1951 1957 if t == 'x':
1952 1958 try:
1953 1959 return _zlibdecompress(data)
1954 1960 except zlib.error as e:
1955 1961 raise error.RevlogError(_('revlog decompress error: %s') %
1956 1962 stringutil.forcebytestr(e))
1957 1963 # '\0' is more common than 'u' so it goes first.
1958 1964 elif t == '\0':
1959 1965 return data
1960 1966 elif t == 'u':
1961 1967 return util.buffer(data, 1)
1962 1968
1963 1969 try:
1964 1970 compressor = self._decompressors[t]
1965 1971 except KeyError:
1966 1972 try:
1967 1973 engine = util.compengines.forrevlogheader(t)
1968 1974 compressor = engine.revlogcompressor(self._compengineopts)
1969 1975 self._decompressors[t] = compressor
1970 1976 except KeyError:
1971 1977 raise error.RevlogError(_('unknown compression type %r') % t)
1972 1978
1973 1979 return compressor.decompress(data)
1974 1980
1975 1981 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1976 1982 cachedelta, ifh, dfh, alwayscache=False,
1977 1983 deltacomputer=None):
1978 1984 """internal function to add revisions to the log
1979 1985
1980 1986 see addrevision for argument descriptions.
1981 1987
1982 1988 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1983 1989
1984 1990 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1985 1991 be used.
1986 1992
1987 1993 invariants:
1988 1994 - rawtext is optional (can be None); if not set, cachedelta must be set.
1989 1995 if both are set, they must correspond to each other.
1990 1996 """
1991 1997 if node == nullid:
1992 1998 raise error.RevlogError(_("%s: attempt to add null revision") %
1993 1999 self.indexfile)
1994 2000 if node == wdirid or node in wdirfilenodeids:
1995 2001 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1996 2002 self.indexfile)
1997 2003
1998 2004 if self._inline:
1999 2005 fh = ifh
2000 2006 else:
2001 2007 fh = dfh
2002 2008
2003 2009 btext = [rawtext]
2004 2010
2005 2011 curr = len(self)
2006 2012 prev = curr - 1
2007 2013 offset = self.end(prev)
2008 2014 p1r, p2r = self.rev(p1), self.rev(p2)
2009 2015
2010 2016 # full versions are inserted when the needed deltas
2011 2017 # become comparable to the uncompressed text
2012 2018 if rawtext is None:
2013 2019 # need rawtext size, before changed by flag processors, which is
2014 2020 # the non-raw size. use revlog explicitly to avoid filelog's extra
2015 2021 # logic that might remove metadata size.
2016 2022 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2017 2023 cachedelta[1])
2018 2024 else:
2019 2025 textlen = len(rawtext)
2020 2026
2021 2027 if deltacomputer is None:
2022 2028 deltacomputer = deltautil.deltacomputer(self)
2023 2029
2024 2030 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2025 2031
2026 2032 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2027 2033
2028 2034 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2029 2035 deltainfo.base, link, p1r, p2r, node)
2030 2036 self.index.append(e)
2031 2037 self.nodemap[node] = curr
2032 2038
2033 2039 # Reset the pure node cache start lookup offset to account for new
2034 2040 # revision.
2035 2041 if self._nodepos is not None:
2036 2042 self._nodepos = curr
2037 2043
2038 2044 entry = self._io.packentry(e, self.node, self.version, curr)
2039 2045 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2040 2046 link, offset)
2041 2047
2042 2048 rawtext = btext[0]
2043 2049
2044 2050 if alwayscache and rawtext is None:
2045 2051 rawtext = deltacomputer.buildtext(revinfo, fh)
2046 2052
2047 2053 if type(rawtext) == bytes: # only accept immutable objects
2048 2054 self._revisioncache = (node, curr, rawtext)
2049 2055 self._chainbasecache[curr] = deltainfo.chainbase
2050 2056 return node
2051 2057
2052 2058 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2053 2059 # Files opened in a+ mode have inconsistent behavior on various
2054 2060 # platforms. Windows requires that a file positioning call be made
2055 2061 # when the file handle transitions between reads and writes. See
2056 2062 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2057 2063 # platforms, Python or the platform itself can be buggy. Some versions
2058 2064 # of Solaris have been observed to not append at the end of the file
2059 2065 # if the file was seeked to before the end. See issue4943 for more.
2060 2066 #
2061 2067 # We work around this issue by inserting a seek() before writing.
2062 2068 # Note: This is likely not necessary on Python 3. However, because
2063 2069 # the file handle is reused for reads and may be seeked there, we need
2064 2070 # to be careful before changing this.
2065 2071 ifh.seek(0, os.SEEK_END)
2066 2072 if dfh:
2067 2073 dfh.seek(0, os.SEEK_END)
2068 2074
2069 2075 curr = len(self) - 1
2070 2076 if not self._inline:
2071 2077 transaction.add(self.datafile, offset)
2072 2078 transaction.add(self.indexfile, curr * len(entry))
2073 2079 if data[0]:
2074 2080 dfh.write(data[0])
2075 2081 dfh.write(data[1])
2076 2082 ifh.write(entry)
2077 2083 else:
2078 2084 offset += curr * self._io.size
2079 2085 transaction.add(self.indexfile, offset, curr)
2080 2086 ifh.write(entry)
2081 2087 ifh.write(data[0])
2082 2088 ifh.write(data[1])
2083 2089 self._enforceinlinesize(transaction, ifh)
2084 2090
2085 2091 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2086 2092 """
2087 2093 add a delta group
2088 2094
2089 2095 given a set of deltas, add them to the revision log. the
2090 2096 first delta is against its parent, which should be in our
2091 2097 log, the rest are against the previous delta.
2092 2098
2093 2099 If ``addrevisioncb`` is defined, it will be called with arguments of
2094 2100 this revlog and the node that was added.
2095 2101 """
2096 2102
2097 2103 if self._writinghandles:
2098 2104 raise error.ProgrammingError('cannot nest addgroup() calls')
2099 2105
2100 2106 nodes = []
2101 2107
2102 2108 r = len(self)
2103 2109 end = 0
2104 2110 if r:
2105 2111 end = self.end(r - 1)
2106 2112 ifh = self._indexfp("a+")
2107 2113 isize = r * self._io.size
2108 2114 if self._inline:
2109 2115 transaction.add(self.indexfile, end + isize, r)
2110 2116 dfh = None
2111 2117 else:
2112 2118 transaction.add(self.indexfile, isize, r)
2113 2119 transaction.add(self.datafile, end)
2114 2120 dfh = self._datafp("a+")
2115 2121 def flush():
2116 2122 if dfh:
2117 2123 dfh.flush()
2118 2124 ifh.flush()
2119 2125
2120 2126 self._writinghandles = (ifh, dfh)
2121 2127
2122 2128 try:
2123 2129 deltacomputer = deltautil.deltacomputer(self)
2124 2130 # loop through our set of deltas
2125 2131 for data in deltas:
2126 2132 node, p1, p2, linknode, deltabase, delta, flags = data
2127 2133 link = linkmapper(linknode)
2128 2134 flags = flags or REVIDX_DEFAULT_FLAGS
2129 2135
2130 2136 nodes.append(node)
2131 2137
2132 2138 if node in self.nodemap:
2133 2139 self._nodeduplicatecallback(transaction, node)
2134 2140 # this can happen if two branches make the same change
2135 2141 continue
2136 2142
2137 2143 for p in (p1, p2):
2138 2144 if p not in self.nodemap:
2139 2145 raise error.LookupError(p, self.indexfile,
2140 2146 _('unknown parent'))
2141 2147
2142 2148 if deltabase not in self.nodemap:
2143 2149 raise error.LookupError(deltabase, self.indexfile,
2144 2150 _('unknown delta base'))
2145 2151
2146 2152 baserev = self.rev(deltabase)
2147 2153
2148 2154 if baserev != nullrev and self.iscensored(baserev):
2149 2155 # if base is censored, delta must be full replacement in a
2150 2156 # single patch operation
2151 2157 hlen = struct.calcsize(">lll")
2152 2158 oldlen = self.rawsize(baserev)
2153 2159 newlen = len(delta) - hlen
2154 2160 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2155 2161 raise error.CensoredBaseError(self.indexfile,
2156 2162 self.node(baserev))
2157 2163
2158 2164 if not flags and self._peek_iscensored(baserev, delta, flush):
2159 2165 flags |= REVIDX_ISCENSORED
2160 2166
2161 2167 # We assume consumers of addrevisioncb will want to retrieve
2162 2168 # the added revision, which will require a call to
2163 2169 # revision(). revision() will fast path if there is a cache
2164 2170 # hit. So, we tell _addrevision() to always cache in this case.
2165 2171 # We're only using addgroup() in the context of changegroup
2166 2172 # generation so the revision data can always be handled as raw
2167 2173 # by the flagprocessor.
2168 2174 self._addrevision(node, None, transaction, link,
2169 2175 p1, p2, flags, (baserev, delta),
2170 2176 ifh, dfh,
2171 2177 alwayscache=bool(addrevisioncb),
2172 2178 deltacomputer=deltacomputer)
2173 2179
2174 2180 if addrevisioncb:
2175 2181 addrevisioncb(self, node)
2176 2182
2177 2183 if not dfh and not self._inline:
2178 2184 # addrevision switched from inline to conventional
2179 2185 # reopen the index
2180 2186 ifh.close()
2181 2187 dfh = self._datafp("a+")
2182 2188 ifh = self._indexfp("a+")
2183 2189 self._writinghandles = (ifh, dfh)
2184 2190 finally:
2185 2191 self._writinghandles = None
2186 2192
2187 2193 if dfh:
2188 2194 dfh.close()
2189 2195 ifh.close()
2190 2196
2191 2197 return nodes
2192 2198
2193 2199 def iscensored(self, rev):
2194 2200 """Check if a file revision is censored."""
2195 2201 if not self._censorable:
2196 2202 return False
2197 2203
2198 2204 return self.flags(rev) & REVIDX_ISCENSORED
2199 2205
2200 2206 def _peek_iscensored(self, baserev, delta, flush):
2201 2207 """Quickly check if a delta produces a censored revision."""
2202 2208 if not self._censorable:
2203 2209 return False
2204 2210
2205 2211 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2206 2212
2207 2213 def getstrippoint(self, minlink):
2208 2214 """find the minimum rev that must be stripped to strip the linkrev
2209 2215
2210 2216 Returns a tuple containing the minimum rev and a set of all revs that
2211 2217 have linkrevs that will be broken by this strip.
2212 2218 """
2213 2219 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2214 2220 self.headrevs(),
2215 2221 self.linkrev, self.parentrevs)
2216 2222
2217 2223 def strip(self, minlink, transaction):
2218 2224 """truncate the revlog on the first revision with a linkrev >= minlink
2219 2225
2220 2226 This function is called when we're stripping revision minlink and
2221 2227 its descendants from the repository.
2222 2228
2223 2229 We have to remove all revisions with linkrev >= minlink, because
2224 2230 the equivalent changelog revisions will be renumbered after the
2225 2231 strip.
2226 2232
2227 2233 So we truncate the revlog on the first of these revisions, and
2228 2234 trust that the caller has saved the revisions that shouldn't be
2229 2235 removed and that it'll re-add them after this truncation.
2230 2236 """
2231 2237 if len(self) == 0:
2232 2238 return
2233 2239
2234 2240 rev, _ = self.getstrippoint(minlink)
2235 2241 if rev == len(self):
2236 2242 return
2237 2243
2238 2244 # first truncate the files on disk
2239 2245 end = self.start(rev)
2240 2246 if not self._inline:
2241 2247 transaction.add(self.datafile, end)
2242 2248 end = rev * self._io.size
2243 2249 else:
2244 2250 end += rev * self._io.size
2245 2251
2246 2252 transaction.add(self.indexfile, end)
2247 2253
2248 2254 # then reset internal state in memory to forget those revisions
2249 2255 self._revisioncache = None
2250 2256 self._chaininfocache = {}
2251 2257 self._chunkclear()
2252 2258 for x in pycompat.xrange(rev, len(self)):
2253 2259 del self.nodemap[self.node(x)]
2254 2260
2255 2261 del self.index[rev:-1]
2256 2262 self._nodepos = None
2257 2263
2258 2264 def checksize(self):
2259 2265 """Check size of index and data files
2260 2266
2261 2267 return a (dd, di) tuple.
2262 2268 - dd: extra bytes for the "data" file
2263 2269 - di: extra bytes for the "index" file
2264 2270
2265 2271 A healthy revlog will return (0, 0).
2266 2272 """
2267 2273 expected = 0
2268 2274 if len(self):
2269 2275 expected = max(0, self.end(len(self) - 1))
2270 2276
2271 2277 try:
2272 2278 with self._datafp() as f:
2273 2279 f.seek(0, io.SEEK_END)
2274 2280 actual = f.tell()
2275 2281 dd = actual - expected
2276 2282 except IOError as inst:
2277 2283 if inst.errno != errno.ENOENT:
2278 2284 raise
2279 2285 dd = 0
2280 2286
2281 2287 try:
2282 2288 f = self.opener(self.indexfile)
2283 2289 f.seek(0, io.SEEK_END)
2284 2290 actual = f.tell()
2285 2291 f.close()
2286 2292 s = self._io.size
2287 2293 i = max(0, actual // s)
2288 2294 di = actual - (i * s)
2289 2295 if self._inline:
2290 2296 databytes = 0
2291 2297 for r in self:
2292 2298 databytes += max(0, self.length(r))
2293 2299 dd = 0
2294 2300 di = actual - len(self) * s - databytes
2295 2301 except IOError as inst:
2296 2302 if inst.errno != errno.ENOENT:
2297 2303 raise
2298 2304 di = 0
2299 2305
2300 2306 return (dd, di)
2301 2307
2302 2308 def files(self):
2303 2309 res = [self.indexfile]
2304 2310 if not self._inline:
2305 2311 res.append(self.datafile)
2306 2312 return res
2307 2313
2308 2314 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2309 2315 assumehaveparentrevisions=False,
2310 2316 deltamode=repository.CG_DELTAMODE_STD):
2311 2317 if nodesorder not in ('nodes', 'storage', 'linear', None):
2312 2318 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2313 2319 nodesorder)
2314 2320
2315 2321 if nodesorder is None and not self._generaldelta:
2316 2322 nodesorder = 'storage'
2317 2323
2318 2324 if (not self._storedeltachains and
2319 2325 deltamode != repository.CG_DELTAMODE_PREV):
2320 2326 deltamode = repository.CG_DELTAMODE_FULL
2321 2327
2322 2328 return storageutil.emitrevisions(
2323 2329 self, nodes, nodesorder, revlogrevisiondelta,
2324 2330 deltaparentfn=self.deltaparent,
2325 2331 candeltafn=self.candelta,
2326 2332 rawsizefn=self.rawsize,
2327 2333 revdifffn=self.revdiff,
2328 2334 flagsfn=self.flags,
2329 2335 deltamode=deltamode,
2330 2336 revisiondata=revisiondata,
2331 2337 assumehaveparentrevisions=assumehaveparentrevisions)
2332 2338
2333 2339 DELTAREUSEALWAYS = 'always'
2334 2340 DELTAREUSESAMEREVS = 'samerevs'
2335 2341 DELTAREUSENEVER = 'never'
2336 2342
2337 2343 DELTAREUSEFULLADD = 'fulladd'
2338 2344
2339 2345 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2340 2346
2341 2347 def clone(self, tr, destrevlog, addrevisioncb=None,
2342 2348 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2343 2349 """Copy this revlog to another, possibly with format changes.
2344 2350
2345 2351 The destination revlog will contain the same revisions and nodes.
2346 2352 However, it may not be bit-for-bit identical due to e.g. delta encoding
2347 2353 differences.
2348 2354
2349 2355 The ``deltareuse`` argument control how deltas from the existing revlog
2350 2356 are preserved in the destination revlog. The argument can have the
2351 2357 following values:
2352 2358
2353 2359 DELTAREUSEALWAYS
2354 2360 Deltas will always be reused (if possible), even if the destination
2355 2361 revlog would not select the same revisions for the delta. This is the
2356 2362 fastest mode of operation.
2357 2363 DELTAREUSESAMEREVS
2358 2364 Deltas will be reused if the destination revlog would pick the same
2359 2365 revisions for the delta. This mode strikes a balance between speed
2360 2366 and optimization.
2361 2367 DELTAREUSENEVER
2362 2368 Deltas will never be reused. This is the slowest mode of execution.
2363 2369 This mode can be used to recompute deltas (e.g. if the diff/delta
2364 2370 algorithm changes).
2365 2371 DELTAREUSEFULLADD
2366 2372 Revision will be re-added as if their were new content. This is
2367 2373 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2368 2374 eg: large file detection and handling.
2369 2375
2370 2376 Delta computation can be slow, so the choice of delta reuse policy can
2371 2377 significantly affect run time.
2372 2378
2373 2379 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2374 2380 two extremes. Deltas will be reused if they are appropriate. But if the
2375 2381 delta could choose a better revision, it will do so. This means if you
2376 2382 are converting a non-generaldelta revlog to a generaldelta revlog,
2377 2383 deltas will be recomputed if the delta's parent isn't a parent of the
2378 2384 revision.
2379 2385
2380 2386 In addition to the delta policy, the ``forcedeltabothparents``
2381 2387 argument controls whether to force compute deltas against both parents
2382 2388 for merges. By default, the current default is used.
2383 2389 """
2384 2390 if deltareuse not in self.DELTAREUSEALL:
2385 2391 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2386 2392
2387 2393 if len(destrevlog):
2388 2394 raise ValueError(_('destination revlog is not empty'))
2389 2395
2390 2396 if getattr(self, 'filteredrevs', None):
2391 2397 raise ValueError(_('source revlog has filtered revisions'))
2392 2398 if getattr(destrevlog, 'filteredrevs', None):
2393 2399 raise ValueError(_('destination revlog has filtered revisions'))
2394 2400
2395 2401 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2396 2402 # if possible.
2397 2403 oldlazydelta = destrevlog._lazydelta
2398 2404 oldlazydeltabase = destrevlog._lazydeltabase
2399 2405 oldamd = destrevlog._deltabothparents
2400 2406
2401 2407 try:
2402 2408 if deltareuse == self.DELTAREUSEALWAYS:
2403 2409 destrevlog._lazydeltabase = True
2404 2410 destrevlog._lazydelta = True
2405 2411 elif deltareuse == self.DELTAREUSESAMEREVS:
2406 2412 destrevlog._lazydeltabase = False
2407 2413 destrevlog._lazydelta = True
2408 2414 elif deltareuse == self.DELTAREUSENEVER:
2409 2415 destrevlog._lazydeltabase = False
2410 2416 destrevlog._lazydelta = False
2411 2417
2412 2418 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2413 2419
2414 2420 self._clone(tr, destrevlog, addrevisioncb, deltareuse,
2415 2421 forcedeltabothparents)
2416 2422
2417 2423 finally:
2418 2424 destrevlog._lazydelta = oldlazydelta
2419 2425 destrevlog._lazydeltabase = oldlazydeltabase
2420 2426 destrevlog._deltabothparents = oldamd
2421 2427
2422 2428 def _clone(self, tr, destrevlog, addrevisioncb, deltareuse,
2423 2429 forcedeltabothparents):
2424 2430 """perform the core duty of `revlog.clone` after parameter processing"""
2425 2431 deltacomputer = deltautil.deltacomputer(destrevlog)
2426 2432 index = self.index
2427 2433 for rev in self:
2428 2434 entry = index[rev]
2429 2435
2430 2436 # Some classes override linkrev to take filtered revs into
2431 2437 # account. Use raw entry from index.
2432 2438 flags = entry[0] & 0xffff
2433 2439 linkrev = entry[4]
2434 2440 p1 = index[entry[5]][7]
2435 2441 p2 = index[entry[6]][7]
2436 2442 node = entry[7]
2437 2443
2438 2444 # (Possibly) reuse the delta from the revlog if allowed and
2439 2445 # the revlog chunk is a delta.
2440 2446 cachedelta = None
2441 2447 rawtext = None
2442 2448 if deltareuse == self.DELTAREUSEFULLADD:
2443 2449 text = self.revision(rev)
2444 2450 destrevlog.addrevision(text, tr, linkrev, p1, p2,
2445 2451 cachedelta=cachedelta,
2446 2452 node=node, flags=flags,
2447 2453 deltacomputer=deltacomputer)
2448 2454 else:
2449 2455 if destrevlog._lazydelta:
2450 2456 dp = self.deltaparent(rev)
2451 2457 if dp != nullrev:
2452 2458 cachedelta = (dp, bytes(self._chunk(rev)))
2453 2459
2454 2460 if not cachedelta:
2455 2461 rawtext = self.rawdata(rev)
2456 2462
2457 2463 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2458 2464 checkambig=False)
2459 2465 dfh = None
2460 2466 if not destrevlog._inline:
2461 2467 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2462 2468 try:
2463 2469 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2464 2470 p2, flags, cachedelta, ifh, dfh,
2465 2471 deltacomputer=deltacomputer)
2466 2472 finally:
2467 2473 if dfh:
2468 2474 dfh.close()
2469 2475 ifh.close()
2470 2476
2471 2477 if addrevisioncb:
2472 2478 addrevisioncb(self, rev, node)
2473 2479
2474 2480 def censorrevision(self, tr, censornode, tombstone=b''):
2475 2481 if (self.version & 0xFFFF) == REVLOGV0:
2476 2482 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2477 2483 self.version)
2478 2484
2479 2485 censorrev = self.rev(censornode)
2480 2486 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2481 2487
2482 2488 if len(tombstone) > self.rawsize(censorrev):
2483 2489 raise error.Abort(_('censor tombstone must be no longer than '
2484 2490 'censored data'))
2485 2491
2486 2492 # Rewriting the revlog in place is hard. Our strategy for censoring is
2487 2493 # to create a new revlog, copy all revisions to it, then replace the
2488 2494 # revlogs on transaction close.
2489 2495
2490 2496 newindexfile = self.indexfile + b'.tmpcensored'
2491 2497 newdatafile = self.datafile + b'.tmpcensored'
2492 2498
2493 2499 # This is a bit dangerous. We could easily have a mismatch of state.
2494 2500 newrl = revlog(self.opener, newindexfile, newdatafile,
2495 2501 censorable=True)
2496 2502 newrl.version = self.version
2497 2503 newrl._generaldelta = self._generaldelta
2498 2504 newrl._io = self._io
2499 2505
2500 2506 for rev in self.revs():
2501 2507 node = self.node(rev)
2502 2508 p1, p2 = self.parents(node)
2503 2509
2504 2510 if rev == censorrev:
2505 2511 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2506 2512 p1, p2, censornode, REVIDX_ISCENSORED)
2507 2513
2508 2514 if newrl.deltaparent(rev) != nullrev:
2509 2515 raise error.Abort(_('censored revision stored as delta; '
2510 2516 'cannot censor'),
2511 2517 hint=_('censoring of revlogs is not '
2512 2518 'fully implemented; please report '
2513 2519 'this bug'))
2514 2520 continue
2515 2521
2516 2522 if self.iscensored(rev):
2517 2523 if self.deltaparent(rev) != nullrev:
2518 2524 raise error.Abort(_('cannot censor due to censored '
2519 2525 'revision having delta stored'))
2520 2526 rawtext = self._chunk(rev)
2521 2527 else:
2522 2528 rawtext = self.rawdata(rev)
2523 2529
2524 2530 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2525 2531 self.flags(rev))
2526 2532
2527 2533 tr.addbackup(self.indexfile, location='store')
2528 2534 if not self._inline:
2529 2535 tr.addbackup(self.datafile, location='store')
2530 2536
2531 2537 self.opener.rename(newrl.indexfile, self.indexfile)
2532 2538 if not self._inline:
2533 2539 self.opener.rename(newrl.datafile, self.datafile)
2534 2540
2535 2541 self.clearcaches()
2536 2542 self._loadindex()
2537 2543
2538 2544 def verifyintegrity(self, state):
2539 2545 """Verifies the integrity of the revlog.
2540 2546
2541 2547 Yields ``revlogproblem`` instances describing problems that are
2542 2548 found.
2543 2549 """
2544 2550 dd, di = self.checksize()
2545 2551 if dd:
2546 2552 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2547 2553 if di:
2548 2554 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2549 2555
2550 2556 version = self.version & 0xFFFF
2551 2557
2552 2558 # The verifier tells us what version revlog we should be.
2553 2559 if version != state['expectedversion']:
2554 2560 yield revlogproblem(
2555 2561 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2556 2562 (self.indexfile, version, state['expectedversion']))
2557 2563
2558 2564 state['skipread'] = set()
2559 2565
2560 2566 for rev in self:
2561 2567 node = self.node(rev)
2562 2568
2563 2569 # Verify contents. 4 cases to care about:
2564 2570 #
2565 2571 # common: the most common case
2566 2572 # rename: with a rename
2567 2573 # meta: file content starts with b'\1\n', the metadata
2568 2574 # header defined in filelog.py, but without a rename
2569 2575 # ext: content stored externally
2570 2576 #
2571 2577 # More formally, their differences are shown below:
2572 2578 #
2573 2579 # | common | rename | meta | ext
2574 2580 # -------------------------------------------------------
2575 2581 # flags() | 0 | 0 | 0 | not 0
2576 2582 # renamed() | False | True | False | ?
2577 2583 # rawtext[0:2]=='\1\n'| False | True | True | ?
2578 2584 #
2579 2585 # "rawtext" means the raw text stored in revlog data, which
2580 2586 # could be retrieved by "rawdata(rev)". "text"
2581 2587 # mentioned below is "revision(rev)".
2582 2588 #
2583 2589 # There are 3 different lengths stored physically:
2584 2590 # 1. L1: rawsize, stored in revlog index
2585 2591 # 2. L2: len(rawtext), stored in revlog data
2586 2592 # 3. L3: len(text), stored in revlog data if flags==0, or
2587 2593 # possibly somewhere else if flags!=0
2588 2594 #
2589 2595 # L1 should be equal to L2. L3 could be different from them.
2590 2596 # "text" may or may not affect commit hash depending on flag
2591 2597 # processors (see flagutil.addflagprocessor).
2592 2598 #
2593 2599 # | common | rename | meta | ext
2594 2600 # -------------------------------------------------
2595 2601 # rawsize() | L1 | L1 | L1 | L1
2596 2602 # size() | L1 | L2-LM | L1(*) | L1 (?)
2597 2603 # len(rawtext) | L2 | L2 | L2 | L2
2598 2604 # len(text) | L2 | L2 | L2 | L3
2599 2605 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2600 2606 #
2601 2607 # LM: length of metadata, depending on rawtext
2602 2608 # (*): not ideal, see comment in filelog.size
2603 2609 # (?): could be "- len(meta)" if the resolved content has
2604 2610 # rename metadata
2605 2611 #
2606 2612 # Checks needed to be done:
2607 2613 # 1. length check: L1 == L2, in all cases.
2608 2614 # 2. hash check: depending on flag processor, we may need to
2609 2615 # use either "text" (external), or "rawtext" (in revlog).
2610 2616
2611 2617 try:
2612 2618 skipflags = state.get('skipflags', 0)
2613 2619 if skipflags:
2614 2620 skipflags &= self.flags(rev)
2615 2621
2616 2622 if skipflags:
2617 2623 state['skipread'].add(node)
2618 2624 else:
2619 2625 # Side-effect: read content and verify hash.
2620 2626 self.revision(node)
2621 2627
2622 2628 l1 = self.rawsize(rev)
2623 2629 l2 = len(self.rawdata(node))
2624 2630
2625 2631 if l1 != l2:
2626 2632 yield revlogproblem(
2627 2633 error=_('unpacked size is %d, %d expected') % (l2, l1),
2628 2634 node=node)
2629 2635
2630 2636 except error.CensoredNodeError:
2631 2637 if state['erroroncensored']:
2632 2638 yield revlogproblem(error=_('censored file data'),
2633 2639 node=node)
2634 2640 state['skipread'].add(node)
2635 2641 except Exception as e:
2636 2642 yield revlogproblem(
2637 2643 error=_('unpacking %s: %s') % (short(node),
2638 2644 stringutil.forcebytestr(e)),
2639 2645 node=node)
2640 2646 state['skipread'].add(node)
2641 2647
2642 2648 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2643 2649 revisionscount=False, trackedsize=False,
2644 2650 storedsize=False):
2645 2651 d = {}
2646 2652
2647 2653 if exclusivefiles:
2648 2654 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2649 2655 if not self._inline:
2650 2656 d['exclusivefiles'].append((self.opener, self.datafile))
2651 2657
2652 2658 if sharedfiles:
2653 2659 d['sharedfiles'] = []
2654 2660
2655 2661 if revisionscount:
2656 2662 d['revisionscount'] = len(self)
2657 2663
2658 2664 if trackedsize:
2659 2665 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2660 2666
2661 2667 if storedsize:
2662 2668 d['storedsize'] = sum(self.opener.stat(path).st_size
2663 2669 for path in self.files())
2664 2670
2665 2671 return d
@@ -1,34 +1,63
1 1 # sidedata.py - Logic around store extra data alongside revlog revisions
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net)
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7 """core code for "sidedata" support
8 8
9 9 The "sidedata" are stored alongside the revision without actually being part of
10 10 its content and not affecting its hash. It's main use cases is to cache
11 11 important information related to a changesets.
12 12
13 13 The current implementation is experimental and subject to changes. Do not rely
14 14 on it in production.
15 15
16 16 Sidedata are stored in the revlog itself, withing the revision rawtext. They
17 17 are inserted, removed from it using the flagprocessors mechanism. The following
18 18 format is currently used::
19 19
20 20 initial header:
21 21 <number of sidedata; 2 bytes>
22 22 sidedata (repeated N times):
23 23 <sidedata-key; 2 bytes>
24 24 <sidedata-entry-length: 4 bytes>
25 25 <sidedata-content-sha1-digest: 20 bytes>
26 26 <sidedata-content; X bytes>
27 27 normal raw text:
28 28 <all bytes remaining in the rawtext>
29 29
30 30 This is a simple and effective format. It should be enought to experiment with
31 31 the concept.
32 32 """
33 33
34 34 from __future__ import absolute_import
35
36 import hashlib
37 import struct
38
39 from .. import error
40
41 SIDEDATA_HEADER = struct.Struct('>H')
42 SIDEDATA_ENTRY = struct.Struct('>HL20s')
43
44 def sidedatareadprocessor(rl, text):
45 sidedata = {}
46 offset = 0
47 nbentry, = SIDEDATA_HEADER.unpack(text[:SIDEDATA_HEADER.size])
48 offset += SIDEDATA_HEADER.size
49 dataoffset = SIDEDATA_HEADER.size + (SIDEDATA_ENTRY.size * nbentry)
50 for i in range(nbentry):
51 nextoffset = offset + SIDEDATA_ENTRY.size
52 key, size, storeddigest = SIDEDATA_ENTRY.unpack(text[offset:nextoffset])
53 offset = nextoffset
54 # read the data associated with that entry
55 nextdataoffset = dataoffset + size
56 entrytext = text[dataoffset:nextdataoffset]
57 readdigest = hashlib.sha1(entrytext).digest()
58 if storeddigest != readdigest:
59 raise error.SidedataHashError(key, storeddigest, readdigest)
60 sidedata[key] = entrytext
61 dataoffset = nextdataoffset
62 text = text[dataoffset:]
63 return text, True, sidedata
General Comments 0
You need to be logged in to leave comments. Login now