##// END OF EJS Templates
revlog: no longer return sidedata from `_revisiondata`...
marmoute -
r48177:9d9eb22b default
parent child Browse files
Show More
@@ -1,627 +1,627 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 )
14 )
15 from .thirdparty import attr
15 from .thirdparty import attr
16
16
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 metadata,
20 metadata,
21 pycompat,
21 pycompat,
22 revlog,
22 revlog,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 dateutil,
25 dateutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from .revlogutils import (
28 from .revlogutils import (
29 constants as revlog_constants,
29 constants as revlog_constants,
30 flagutil,
30 flagutil,
31 )
31 )
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 return b"\0".join(items)
87 return b"\0".join(items)
88
88
89
89
90 def stripdesc(desc):
90 def stripdesc(desc):
91 """strip trailing whitespace and leading and trailing empty lines"""
91 """strip trailing whitespace and leading and trailing empty lines"""
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93
93
94
94
95 class appender(object):
95 class appender(object):
96 """the changelog index must be updated last on disk, so we use this class
96 """the changelog index must be updated last on disk, so we use this class
97 to delay writes to it"""
97 to delay writes to it"""
98
98
99 def __init__(self, vfs, name, mode, buf):
99 def __init__(self, vfs, name, mode, buf):
100 self.data = buf
100 self.data = buf
101 fp = vfs(name, mode)
101 fp = vfs(name, mode)
102 self.fp = fp
102 self.fp = fp
103 self.offset = fp.tell()
103 self.offset = fp.tell()
104 self.size = vfs.fstat(fp).st_size
104 self.size = vfs.fstat(fp).st_size
105 self._end = self.size
105 self._end = self.size
106
106
107 def end(self):
107 def end(self):
108 return self._end
108 return self._end
109
109
110 def tell(self):
110 def tell(self):
111 return self.offset
111 return self.offset
112
112
113 def flush(self):
113 def flush(self):
114 pass
114 pass
115
115
116 @property
116 @property
117 def closed(self):
117 def closed(self):
118 return self.fp.closed
118 return self.fp.closed
119
119
120 def close(self):
120 def close(self):
121 self.fp.close()
121 self.fp.close()
122
122
123 def seek(self, offset, whence=0):
123 def seek(self, offset, whence=0):
124 '''virtual file offset spans real file and data'''
124 '''virtual file offset spans real file and data'''
125 if whence == 0:
125 if whence == 0:
126 self.offset = offset
126 self.offset = offset
127 elif whence == 1:
127 elif whence == 1:
128 self.offset += offset
128 self.offset += offset
129 elif whence == 2:
129 elif whence == 2:
130 self.offset = self.end() + offset
130 self.offset = self.end() + offset
131 if self.offset < self.size:
131 if self.offset < self.size:
132 self.fp.seek(self.offset)
132 self.fp.seek(self.offset)
133
133
134 def read(self, count=-1):
134 def read(self, count=-1):
135 '''only trick here is reads that span real file and data'''
135 '''only trick here is reads that span real file and data'''
136 ret = b""
136 ret = b""
137 if self.offset < self.size:
137 if self.offset < self.size:
138 s = self.fp.read(count)
138 s = self.fp.read(count)
139 ret = s
139 ret = s
140 self.offset += len(s)
140 self.offset += len(s)
141 if count > 0:
141 if count > 0:
142 count -= len(s)
142 count -= len(s)
143 if count != 0:
143 if count != 0:
144 doff = self.offset - self.size
144 doff = self.offset - self.size
145 self.data.insert(0, b"".join(self.data))
145 self.data.insert(0, b"".join(self.data))
146 del self.data[1:]
146 del self.data[1:]
147 s = self.data[0][doff : doff + count]
147 s = self.data[0][doff : doff + count]
148 self.offset += len(s)
148 self.offset += len(s)
149 ret += s
149 ret += s
150 return ret
150 return ret
151
151
152 def write(self, s):
152 def write(self, s):
153 self.data.append(bytes(s))
153 self.data.append(bytes(s))
154 self.offset += len(s)
154 self.offset += len(s)
155 self._end += len(s)
155 self._end += len(s)
156
156
157 def __enter__(self):
157 def __enter__(self):
158 self.fp.__enter__()
158 self.fp.__enter__()
159 return self
159 return self
160
160
161 def __exit__(self, *args):
161 def __exit__(self, *args):
162 return self.fp.__exit__(*args)
162 return self.fp.__exit__(*args)
163
163
164
164
165 class _divertopener(object):
165 class _divertopener(object):
166 def __init__(self, opener, target):
166 def __init__(self, opener, target):
167 self._opener = opener
167 self._opener = opener
168 self._target = target
168 self._target = target
169
169
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 if name != self._target:
171 if name != self._target:
172 return self._opener(name, mode, **kwargs)
172 return self._opener(name, mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
174
174
175 def __getattr__(self, attr):
175 def __getattr__(self, attr):
176 return getattr(self._opener, attr)
176 return getattr(self._opener, attr)
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 if name != target:
183 if name != target:
184 return opener(name, mode, **kwargs)
184 return opener(name, mode, **kwargs)
185 assert not kwargs
185 assert not kwargs
186 return appender(opener, name, mode, buf)
186 return appender(opener, name, mode, buf)
187
187
188 return _delay
188 return _delay
189
189
190
190
191 @attr.s
191 @attr.s
192 class _changelogrevision(object):
192 class _changelogrevision(object):
193 # Extensions might modify _defaultextra, so let the constructor below pass
193 # Extensions might modify _defaultextra, so let the constructor below pass
194 # it in
194 # it in
195 extra = attr.ib()
195 extra = attr.ib()
196 manifest = attr.ib()
196 manifest = attr.ib()
197 user = attr.ib(default=b'')
197 user = attr.ib(default=b'')
198 date = attr.ib(default=(0, 0))
198 date = attr.ib(default=(0, 0))
199 files = attr.ib(default=attr.Factory(list))
199 files = attr.ib(default=attr.Factory(list))
200 filesadded = attr.ib(default=None)
200 filesadded = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
204 description = attr.ib(default=b'')
204 description = attr.ib(default=b'')
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206
206
207
207
208 class changelogrevision(object):
208 class changelogrevision(object):
209 """Holds results of a parsed changelog revision.
209 """Holds results of a parsed changelog revision.
210
210
211 Changelog revisions consist of multiple pieces of data, including
211 Changelog revisions consist of multiple pieces of data, including
212 the manifest node, user, and date. This object exposes a view into
212 the manifest node, user, and date. This object exposes a view into
213 the parsed object.
213 the parsed object.
214 """
214 """
215
215
216 __slots__ = (
216 __slots__ = (
217 '_offsets',
217 '_offsets',
218 '_text',
218 '_text',
219 '_sidedata',
219 '_sidedata',
220 '_cpsd',
220 '_cpsd',
221 '_changes',
221 '_changes',
222 )
222 )
223
223
224 def __new__(cls, cl, text, sidedata, cpsd):
224 def __new__(cls, cl, text, sidedata, cpsd):
225 if not text:
225 if not text:
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227
227
228 self = super(changelogrevision, cls).__new__(cls)
228 self = super(changelogrevision, cls).__new__(cls)
229 # We could return here and implement the following as an __init__.
229 # We could return here and implement the following as an __init__.
230 # But doing it here is equivalent and saves an extra function call.
230 # But doing it here is equivalent and saves an extra function call.
231
231
232 # format used:
232 # format used:
233 # nodeid\n : manifest node in ascii
233 # nodeid\n : manifest node in ascii
234 # user\n : user, no \n or \r allowed
234 # user\n : user, no \n or \r allowed
235 # time tz extra\n : date (time is int or float, timezone is int)
235 # time tz extra\n : date (time is int or float, timezone is int)
236 # : extra is metadata, encoded and separated by '\0'
236 # : extra is metadata, encoded and separated by '\0'
237 # : older versions ignore it
237 # : older versions ignore it
238 # files\n\n : files modified by the cset, no \n or \r allowed
238 # files\n\n : files modified by the cset, no \n or \r allowed
239 # (.*) : comment (free text, ideally utf-8)
239 # (.*) : comment (free text, ideally utf-8)
240 #
240 #
241 # changelog v0 doesn't use extra
241 # changelog v0 doesn't use extra
242
242
243 nl1 = text.index(b'\n')
243 nl1 = text.index(b'\n')
244 nl2 = text.index(b'\n', nl1 + 1)
244 nl2 = text.index(b'\n', nl1 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
246
246
247 # The list of files may be empty. Which means nl3 is the first of the
247 # The list of files may be empty. Which means nl3 is the first of the
248 # double newline that precedes the description.
248 # double newline that precedes the description.
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 doublenl = nl3
250 doublenl = nl3
251 else:
251 else:
252 doublenl = text.index(b'\n\n', nl3 + 1)
252 doublenl = text.index(b'\n\n', nl3 + 1)
253
253
254 self._offsets = (nl1, nl2, nl3, doublenl)
254 self._offsets = (nl1, nl2, nl3, doublenl)
255 self._text = text
255 self._text = text
256 self._sidedata = sidedata
256 self._sidedata = sidedata
257 self._cpsd = cpsd
257 self._cpsd = cpsd
258 self._changes = None
258 self._changes = None
259
259
260 return self
260 return self
261
261
262 @property
262 @property
263 def manifest(self):
263 def manifest(self):
264 return bin(self._text[0 : self._offsets[0]])
264 return bin(self._text[0 : self._offsets[0]])
265
265
266 @property
266 @property
267 def user(self):
267 def user(self):
268 off = self._offsets
268 off = self._offsets
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270
270
271 @property
271 @property
272 def _rawdate(self):
272 def _rawdate(self):
273 off = self._offsets
273 off = self._offsets
274 dateextra = self._text[off[1] + 1 : off[2]]
274 dateextra = self._text[off[1] + 1 : off[2]]
275 return dateextra.split(b' ', 2)[0:2]
275 return dateextra.split(b' ', 2)[0:2]
276
276
277 @property
277 @property
278 def _rawextra(self):
278 def _rawextra(self):
279 off = self._offsets
279 off = self._offsets
280 dateextra = self._text[off[1] + 1 : off[2]]
280 dateextra = self._text[off[1] + 1 : off[2]]
281 fields = dateextra.split(b' ', 2)
281 fields = dateextra.split(b' ', 2)
282 if len(fields) != 3:
282 if len(fields) != 3:
283 return None
283 return None
284
284
285 return fields[2]
285 return fields[2]
286
286
287 @property
287 @property
288 def date(self):
288 def date(self):
289 raw = self._rawdate
289 raw = self._rawdate
290 time = float(raw[0])
290 time = float(raw[0])
291 # Various tools did silly things with the timezone.
291 # Various tools did silly things with the timezone.
292 try:
292 try:
293 timezone = int(raw[1])
293 timezone = int(raw[1])
294 except ValueError:
294 except ValueError:
295 timezone = 0
295 timezone = 0
296
296
297 return time, timezone
297 return time, timezone
298
298
299 @property
299 @property
300 def extra(self):
300 def extra(self):
301 raw = self._rawextra
301 raw = self._rawextra
302 if raw is None:
302 if raw is None:
303 return _defaultextra
303 return _defaultextra
304
304
305 return decodeextra(raw)
305 return decodeextra(raw)
306
306
307 @property
307 @property
308 def changes(self):
308 def changes(self):
309 if self._changes is not None:
309 if self._changes is not None:
310 return self._changes
310 return self._changes
311 if self._cpsd:
311 if self._cpsd:
312 changes = metadata.decode_files_sidedata(self._sidedata)
312 changes = metadata.decode_files_sidedata(self._sidedata)
313 else:
313 else:
314 changes = metadata.ChangingFiles(
314 changes = metadata.ChangingFiles(
315 touched=self.files or (),
315 touched=self.files or (),
316 added=self.filesadded or (),
316 added=self.filesadded or (),
317 removed=self.filesremoved or (),
317 removed=self.filesremoved or (),
318 p1_copies=self.p1copies or {},
318 p1_copies=self.p1copies or {},
319 p2_copies=self.p2copies or {},
319 p2_copies=self.p2copies or {},
320 )
320 )
321 self._changes = changes
321 self._changes = changes
322 return changes
322 return changes
323
323
324 @property
324 @property
325 def files(self):
325 def files(self):
326 if self._cpsd:
326 if self._cpsd:
327 return sorted(self.changes.touched)
327 return sorted(self.changes.touched)
328 off = self._offsets
328 off = self._offsets
329 if off[2] == off[3]:
329 if off[2] == off[3]:
330 return []
330 return []
331
331
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333
333
334 @property
334 @property
335 def filesadded(self):
335 def filesadded(self):
336 if self._cpsd:
336 if self._cpsd:
337 return self.changes.added
337 return self.changes.added
338 else:
338 else:
339 rawindices = self.extra.get(b'filesadded')
339 rawindices = self.extra.get(b'filesadded')
340 if rawindices is None:
340 if rawindices is None:
341 return None
341 return None
342 return metadata.decodefileindices(self.files, rawindices)
342 return metadata.decodefileindices(self.files, rawindices)
343
343
344 @property
344 @property
345 def filesremoved(self):
345 def filesremoved(self):
346 if self._cpsd:
346 if self._cpsd:
347 return self.changes.removed
347 return self.changes.removed
348 else:
348 else:
349 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
350 if rawindices is None:
350 if rawindices is None:
351 return None
351 return None
352 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
353
353
354 @property
354 @property
355 def p1copies(self):
355 def p1copies(self):
356 if self._cpsd:
356 if self._cpsd:
357 return self.changes.copied_from_p1
357 return self.changes.copied_from_p1
358 else:
358 else:
359 rawcopies = self.extra.get(b'p1copies')
359 rawcopies = self.extra.get(b'p1copies')
360 if rawcopies is None:
360 if rawcopies is None:
361 return None
361 return None
362 return metadata.decodecopies(self.files, rawcopies)
362 return metadata.decodecopies(self.files, rawcopies)
363
363
364 @property
364 @property
365 def p2copies(self):
365 def p2copies(self):
366 if self._cpsd:
366 if self._cpsd:
367 return self.changes.copied_from_p2
367 return self.changes.copied_from_p2
368 else:
368 else:
369 rawcopies = self.extra.get(b'p2copies')
369 rawcopies = self.extra.get(b'p2copies')
370 if rawcopies is None:
370 if rawcopies is None:
371 return None
371 return None
372 return metadata.decodecopies(self.files, rawcopies)
372 return metadata.decodecopies(self.files, rawcopies)
373
373
374 @property
374 @property
375 def description(self):
375 def description(self):
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377
377
378 @property
378 @property
379 def branchinfo(self):
379 def branchinfo(self):
380 extra = self.extra
380 extra = self.extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382
382
383
383
384 class changelog(revlog.revlog):
384 class changelog(revlog.revlog):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 """Load a changelog revlog using an opener.
386 """Load a changelog revlog using an opener.
387
387
388 If ``trypending`` is true, we attempt to load the index from a
388 If ``trypending`` is true, we attempt to load the index from a
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 The ``00changelog.i.a`` file contains index (and possibly inline
390 The ``00changelog.i.a`` file contains index (and possibly inline
391 revision) data for a transaction that hasn't been finalized yet.
391 revision) data for a transaction that hasn't been finalized yet.
392 It exists in a separate file to facilitate readers (such as
392 It exists in a separate file to facilitate readers (such as
393 hooks processes) accessing data before a transaction is finalized.
393 hooks processes) accessing data before a transaction is finalized.
394
394
395 ``concurrencychecker`` will be passed to the revlog init function, see
395 ``concurrencychecker`` will be passed to the revlog init function, see
396 the documentation there.
396 the documentation there.
397 """
397 """
398 revlog.revlog.__init__(
398 revlog.revlog.__init__(
399 self,
399 self,
400 opener,
400 opener,
401 target=(revlog_constants.KIND_CHANGELOG, None),
401 target=(revlog_constants.KIND_CHANGELOG, None),
402 radix=b'00changelog',
402 radix=b'00changelog',
403 checkambig=True,
403 checkambig=True,
404 mmaplargeindex=True,
404 mmaplargeindex=True,
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
406 concurrencychecker=concurrencychecker,
406 concurrencychecker=concurrencychecker,
407 trypending=trypending,
407 trypending=trypending,
408 )
408 )
409
409
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
411 # changelogs don't benefit from generaldelta.
411 # changelogs don't benefit from generaldelta.
412
412
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
414 self._generaldelta = False
414 self._generaldelta = False
415
415
416 # Delta chains for changelogs tend to be very small because entries
416 # Delta chains for changelogs tend to be very small because entries
417 # tend to be small and don't delta well with each. So disable delta
417 # tend to be small and don't delta well with each. So disable delta
418 # chains.
418 # chains.
419 self._storedeltachains = False
419 self._storedeltachains = False
420
420
421 self._realopener = opener
421 self._realopener = opener
422 self._delayed = False
422 self._delayed = False
423 self._delaybuf = None
423 self._delaybuf = None
424 self._divert = False
424 self._divert = False
425 self._filteredrevs = frozenset()
425 self._filteredrevs = frozenset()
426 self._filteredrevs_hashcache = {}
426 self._filteredrevs_hashcache = {}
427 self._copiesstorage = opener.options.get(b'copies-storage')
427 self._copiesstorage = opener.options.get(b'copies-storage')
428
428
429 @property
429 @property
430 def filteredrevs(self):
430 def filteredrevs(self):
431 return self._filteredrevs
431 return self._filteredrevs
432
432
433 @filteredrevs.setter
433 @filteredrevs.setter
434 def filteredrevs(self, val):
434 def filteredrevs(self, val):
435 # Ensure all updates go through this function
435 # Ensure all updates go through this function
436 assert isinstance(val, frozenset)
436 assert isinstance(val, frozenset)
437 self._filteredrevs = val
437 self._filteredrevs = val
438 self._filteredrevs_hashcache = {}
438 self._filteredrevs_hashcache = {}
439
439
440 def _write_docket(self, tr):
440 def _write_docket(self, tr):
441 if not self._delayed:
441 if not self._delayed:
442 super(changelog, self)._write_docket(tr)
442 super(changelog, self)._write_docket(tr)
443
443
444 def delayupdate(self, tr):
444 def delayupdate(self, tr):
445 """delay visibility of index updates to other readers"""
445 """delay visibility of index updates to other readers"""
446 if self._docket is None and not self._delayed:
446 if self._docket is None and not self._delayed:
447 if len(self) == 0:
447 if len(self) == 0:
448 self._divert = True
448 self._divert = True
449 if self._realopener.exists(self._indexfile + b'.a'):
449 if self._realopener.exists(self._indexfile + b'.a'):
450 self._realopener.unlink(self._indexfile + b'.a')
450 self._realopener.unlink(self._indexfile + b'.a')
451 self.opener = _divertopener(self._realopener, self._indexfile)
451 self.opener = _divertopener(self._realopener, self._indexfile)
452 else:
452 else:
453 self._delaybuf = []
453 self._delaybuf = []
454 self.opener = _delayopener(
454 self.opener = _delayopener(
455 self._realopener, self._indexfile, self._delaybuf
455 self._realopener, self._indexfile, self._delaybuf
456 )
456 )
457 self._delayed = True
457 self._delayed = True
458 tr.addpending(b'cl-%i' % id(self), self._writepending)
458 tr.addpending(b'cl-%i' % id(self), self._writepending)
459 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
459 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
460
460
461 def _finalize(self, tr):
461 def _finalize(self, tr):
462 """finalize index updates"""
462 """finalize index updates"""
463 self._delayed = False
463 self._delayed = False
464 self.opener = self._realopener
464 self.opener = self._realopener
465 # move redirected index data back into place
465 # move redirected index data back into place
466 if self._docket is not None:
466 if self._docket is not None:
467 self._write_docket(tr)
467 self._write_docket(tr)
468 elif self._divert:
468 elif self._divert:
469 assert not self._delaybuf
469 assert not self._delaybuf
470 tmpname = self._indexfile + b".a"
470 tmpname = self._indexfile + b".a"
471 nfile = self.opener.open(tmpname)
471 nfile = self.opener.open(tmpname)
472 nfile.close()
472 nfile.close()
473 self.opener.rename(tmpname, self._indexfile, checkambig=True)
473 self.opener.rename(tmpname, self._indexfile, checkambig=True)
474 elif self._delaybuf:
474 elif self._delaybuf:
475 fp = self.opener(self._indexfile, b'a', checkambig=True)
475 fp = self.opener(self._indexfile, b'a', checkambig=True)
476 fp.write(b"".join(self._delaybuf))
476 fp.write(b"".join(self._delaybuf))
477 fp.close()
477 fp.close()
478 self._delaybuf = None
478 self._delaybuf = None
479 self._divert = False
479 self._divert = False
480 # split when we're done
480 # split when we're done
481 self._enforceinlinesize(tr)
481 self._enforceinlinesize(tr)
482
482
483 def _writepending(self, tr):
483 def _writepending(self, tr):
484 """create a file containing the unfinalized state for
484 """create a file containing the unfinalized state for
485 pretxnchangegroup"""
485 pretxnchangegroup"""
486 if self._docket:
486 if self._docket:
487 return self._docket.write(tr, pending=True)
487 return self._docket.write(tr, pending=True)
488 if self._delaybuf:
488 if self._delaybuf:
489 # make a temporary copy of the index
489 # make a temporary copy of the index
490 fp1 = self._realopener(self._indexfile)
490 fp1 = self._realopener(self._indexfile)
491 pendingfilename = self._indexfile + b".a"
491 pendingfilename = self._indexfile + b".a"
492 # register as a temp file to ensure cleanup on failure
492 # register as a temp file to ensure cleanup on failure
493 tr.registertmp(pendingfilename)
493 tr.registertmp(pendingfilename)
494 # write existing data
494 # write existing data
495 fp2 = self._realopener(pendingfilename, b"w")
495 fp2 = self._realopener(pendingfilename, b"w")
496 fp2.write(fp1.read())
496 fp2.write(fp1.read())
497 # add pending data
497 # add pending data
498 fp2.write(b"".join(self._delaybuf))
498 fp2.write(b"".join(self._delaybuf))
499 fp2.close()
499 fp2.close()
500 # switch modes so finalize can simply rename
500 # switch modes so finalize can simply rename
501 self._delaybuf = None
501 self._delaybuf = None
502 self._divert = True
502 self._divert = True
503 self.opener = _divertopener(self._realopener, self._indexfile)
503 self.opener = _divertopener(self._realopener, self._indexfile)
504
504
505 if self._divert:
505 if self._divert:
506 return True
506 return True
507
507
508 return False
508 return False
509
509
510 def _enforceinlinesize(self, tr):
510 def _enforceinlinesize(self, tr):
511 if not self._delayed:
511 if not self._delayed:
512 revlog.revlog._enforceinlinesize(self, tr)
512 revlog.revlog._enforceinlinesize(self, tr)
513
513
514 def read(self, nodeorrev):
514 def read(self, nodeorrev):
515 """Obtain data from a parsed changelog revision.
515 """Obtain data from a parsed changelog revision.
516
516
517 Returns a 6-tuple of:
517 Returns a 6-tuple of:
518
518
519 - manifest node in binary
519 - manifest node in binary
520 - author/user as a localstr
520 - author/user as a localstr
521 - date as a 2-tuple of (time, timezone)
521 - date as a 2-tuple of (time, timezone)
522 - list of files
522 - list of files
523 - commit message as a localstr
523 - commit message as a localstr
524 - dict of extra metadata
524 - dict of extra metadata
525
525
526 Unless you need to access all fields, consider calling
526 Unless you need to access all fields, consider calling
527 ``changelogrevision`` instead, as it is faster for partial object
527 ``changelogrevision`` instead, as it is faster for partial object
528 access.
528 access.
529 """
529 """
530 d = self._revisiondata(nodeorrev)[0]
530 d = self._revisiondata(nodeorrev)
531 sidedata = self.sidedata(nodeorrev)
531 sidedata = self.sidedata(nodeorrev)
532 copy_sd = self._copiesstorage == b'changeset-sidedata'
532 copy_sd = self._copiesstorage == b'changeset-sidedata'
533 c = changelogrevision(self, d, sidedata, copy_sd)
533 c = changelogrevision(self, d, sidedata, copy_sd)
534 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
534 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
535
535
536 def changelogrevision(self, nodeorrev):
536 def changelogrevision(self, nodeorrev):
537 """Obtain a ``changelogrevision`` for a node or revision."""
537 """Obtain a ``changelogrevision`` for a node or revision."""
538 text = self._revisiondata(nodeorrev)[0]
538 text = self._revisiondata(nodeorrev)
539 sidedata = self.sidedata(nodeorrev)
539 sidedata = self.sidedata(nodeorrev)
540 return changelogrevision(
540 return changelogrevision(
541 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
541 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
542 )
542 )
543
543
544 def readfiles(self, nodeorrev):
544 def readfiles(self, nodeorrev):
545 """
545 """
546 short version of read that only returns the files modified by the cset
546 short version of read that only returns the files modified by the cset
547 """
547 """
548 text = self.revision(nodeorrev)
548 text = self.revision(nodeorrev)
549 if not text:
549 if not text:
550 return []
550 return []
551 last = text.index(b"\n\n")
551 last = text.index(b"\n\n")
552 l = text[:last].split(b'\n')
552 l = text[:last].split(b'\n')
553 return l[3:]
553 return l[3:]
554
554
555 def add(
555 def add(
556 self,
556 self,
557 manifest,
557 manifest,
558 files,
558 files,
559 desc,
559 desc,
560 transaction,
560 transaction,
561 p1,
561 p1,
562 p2,
562 p2,
563 user,
563 user,
564 date=None,
564 date=None,
565 extra=None,
565 extra=None,
566 ):
566 ):
567 # Convert to UTF-8 encoded bytestrings as the very first
567 # Convert to UTF-8 encoded bytestrings as the very first
568 # thing: calling any method on a localstr object will turn it
568 # thing: calling any method on a localstr object will turn it
569 # into a str object and the cached UTF-8 string is thus lost.
569 # into a str object and the cached UTF-8 string is thus lost.
570 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
570 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
571
571
572 user = user.strip()
572 user = user.strip()
573 # An empty username or a username with a "\n" will make the
573 # An empty username or a username with a "\n" will make the
574 # revision text contain two "\n\n" sequences -> corrupt
574 # revision text contain two "\n\n" sequences -> corrupt
575 # repository since read cannot unpack the revision.
575 # repository since read cannot unpack the revision.
576 if not user:
576 if not user:
577 raise error.StorageError(_(b"empty username"))
577 raise error.StorageError(_(b"empty username"))
578 if b"\n" in user:
578 if b"\n" in user:
579 raise error.StorageError(
579 raise error.StorageError(
580 _(b"username %r contains a newline") % pycompat.bytestr(user)
580 _(b"username %r contains a newline") % pycompat.bytestr(user)
581 )
581 )
582
582
583 desc = stripdesc(desc)
583 desc = stripdesc(desc)
584
584
585 if date:
585 if date:
586 parseddate = b"%d %d" % dateutil.parsedate(date)
586 parseddate = b"%d %d" % dateutil.parsedate(date)
587 else:
587 else:
588 parseddate = b"%d %d" % dateutil.makedate()
588 parseddate = b"%d %d" % dateutil.makedate()
589 if extra:
589 if extra:
590 branch = extra.get(b"branch")
590 branch = extra.get(b"branch")
591 if branch in (b"default", b""):
591 if branch in (b"default", b""):
592 del extra[b"branch"]
592 del extra[b"branch"]
593 elif branch in (b".", b"null", b"tip"):
593 elif branch in (b".", b"null", b"tip"):
594 raise error.StorageError(
594 raise error.StorageError(
595 _(b'the name \'%s\' is reserved') % branch
595 _(b'the name \'%s\' is reserved') % branch
596 )
596 )
597 sortedfiles = sorted(files.touched)
597 sortedfiles = sorted(files.touched)
598 flags = 0
598 flags = 0
599 sidedata = None
599 sidedata = None
600 if self._copiesstorage == b'changeset-sidedata':
600 if self._copiesstorage == b'changeset-sidedata':
601 if files.has_copies_info:
601 if files.has_copies_info:
602 flags |= flagutil.REVIDX_HASCOPIESINFO
602 flags |= flagutil.REVIDX_HASCOPIESINFO
603 sidedata = metadata.encode_files_sidedata(files)
603 sidedata = metadata.encode_files_sidedata(files)
604
604
605 if extra:
605 if extra:
606 extra = encodeextra(extra)
606 extra = encodeextra(extra)
607 parseddate = b"%s %s" % (parseddate, extra)
607 parseddate = b"%s %s" % (parseddate, extra)
608 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
608 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
609 text = b"\n".join(l)
609 text = b"\n".join(l)
610 rev = self.addrevision(
610 rev = self.addrevision(
611 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
611 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
612 )
612 )
613 return self.node(rev)
613 return self.node(rev)
614
614
615 def branchinfo(self, rev):
615 def branchinfo(self, rev):
616 """return the branch name and open/close state of a revision
616 """return the branch name and open/close state of a revision
617
617
618 This function exists because creating a changectx object
618 This function exists because creating a changectx object
619 just to access this is costly."""
619 just to access this is costly."""
620 return self.changelogrevision(rev).branchinfo
620 return self.changelogrevision(rev).branchinfo
621
621
622 def _nodeduplicatecallback(self, transaction, rev):
622 def _nodeduplicatecallback(self, transaction, rev):
623 # keep track of revisions that got "re-added", eg: unbunde of know rev.
623 # keep track of revisions that got "re-added", eg: unbunde of know rev.
624 #
624 #
625 # We track them in a list to preserve their order from the source bundle
625 # We track them in a list to preserve their order from the source bundle
626 duplicates = transaction.changes.setdefault(b'revduplicates', [])
626 duplicates = transaction.changes.setdefault(b'revduplicates', [])
627 duplicates.append(rev)
627 duplicates.append(rev)
@@ -1,3479 +1,3472 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 CHANGELOGV2,
38 CHANGELOGV2,
39 COMP_MODE_DEFAULT,
39 COMP_MODE_DEFAULT,
40 COMP_MODE_INLINE,
40 COMP_MODE_INLINE,
41 COMP_MODE_PLAIN,
41 COMP_MODE_PLAIN,
42 FEATURES_BY_VERSION,
42 FEATURES_BY_VERSION,
43 FLAG_GENERALDELTA,
43 FLAG_GENERALDELTA,
44 FLAG_INLINE_DATA,
44 FLAG_INLINE_DATA,
45 INDEX_HEADER,
45 INDEX_HEADER,
46 KIND_CHANGELOG,
46 KIND_CHANGELOG,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 SUPPORTED_FLAGS,
55 SUPPORTED_FLAGS,
56 )
56 )
57 from .revlogutils.flagutil import (
57 from .revlogutils.flagutil import (
58 REVIDX_DEFAULT_FLAGS,
58 REVIDX_DEFAULT_FLAGS,
59 REVIDX_ELLIPSIS,
59 REVIDX_ELLIPSIS,
60 REVIDX_EXTSTORED,
60 REVIDX_EXTSTORED,
61 REVIDX_FLAGS_ORDER,
61 REVIDX_FLAGS_ORDER,
62 REVIDX_HASCOPIESINFO,
62 REVIDX_HASCOPIESINFO,
63 REVIDX_ISCENSORED,
63 REVIDX_ISCENSORED,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
64 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 )
65 )
66 from .thirdparty import attr
66 from .thirdparty import attr
67 from . import (
67 from . import (
68 ancestor,
68 ancestor,
69 dagop,
69 dagop,
70 error,
70 error,
71 mdiff,
71 mdiff,
72 policy,
72 policy,
73 pycompat,
73 pycompat,
74 templatefilters,
74 templatefilters,
75 util,
75 util,
76 )
76 )
77 from .interfaces import (
77 from .interfaces import (
78 repository,
78 repository,
79 util as interfaceutil,
79 util as interfaceutil,
80 )
80 )
81 from .revlogutils import (
81 from .revlogutils import (
82 deltas as deltautil,
82 deltas as deltautil,
83 docket as docketutil,
83 docket as docketutil,
84 flagutil,
84 flagutil,
85 nodemap as nodemaputil,
85 nodemap as nodemaputil,
86 revlogv0,
86 revlogv0,
87 sidedata as sidedatautil,
87 sidedata as sidedatautil,
88 )
88 )
89 from .utils import (
89 from .utils import (
90 storageutil,
90 storageutil,
91 stringutil,
91 stringutil,
92 )
92 )
93
93
94 # blanked usage of all the name to prevent pyflakes constraints
94 # blanked usage of all the name to prevent pyflakes constraints
95 # We need these name available in the module for extensions.
95 # We need these name available in the module for extensions.
96
96
97 REVLOGV0
97 REVLOGV0
98 REVLOGV1
98 REVLOGV1
99 REVLOGV2
99 REVLOGV2
100 FLAG_INLINE_DATA
100 FLAG_INLINE_DATA
101 FLAG_GENERALDELTA
101 FLAG_GENERALDELTA
102 REVLOG_DEFAULT_FLAGS
102 REVLOG_DEFAULT_FLAGS
103 REVLOG_DEFAULT_FORMAT
103 REVLOG_DEFAULT_FORMAT
104 REVLOG_DEFAULT_VERSION
104 REVLOG_DEFAULT_VERSION
105 REVLOGV1_FLAGS
105 REVLOGV1_FLAGS
106 REVLOGV2_FLAGS
106 REVLOGV2_FLAGS
107 REVIDX_ISCENSORED
107 REVIDX_ISCENSORED
108 REVIDX_ELLIPSIS
108 REVIDX_ELLIPSIS
109 REVIDX_HASCOPIESINFO
109 REVIDX_HASCOPIESINFO
110 REVIDX_EXTSTORED
110 REVIDX_EXTSTORED
111 REVIDX_DEFAULT_FLAGS
111 REVIDX_DEFAULT_FLAGS
112 REVIDX_FLAGS_ORDER
112 REVIDX_FLAGS_ORDER
113 REVIDX_RAWTEXT_CHANGING_FLAGS
113 REVIDX_RAWTEXT_CHANGING_FLAGS
114
114
115 parsers = policy.importmod('parsers')
115 parsers = policy.importmod('parsers')
116 rustancestor = policy.importrust('ancestor')
116 rustancestor = policy.importrust('ancestor')
117 rustdagop = policy.importrust('dagop')
117 rustdagop = policy.importrust('dagop')
118 rustrevlog = policy.importrust('revlog')
118 rustrevlog = policy.importrust('revlog')
119
119
120 # Aliased for performance.
120 # Aliased for performance.
121 _zlibdecompress = zlib.decompress
121 _zlibdecompress = zlib.decompress
122
122
123 # max size of revlog with inline data
123 # max size of revlog with inline data
124 _maxinline = 131072
124 _maxinline = 131072
125 _chunksize = 1048576
125 _chunksize = 1048576
126
126
127 # Flag processors for REVIDX_ELLIPSIS.
127 # Flag processors for REVIDX_ELLIPSIS.
128 def ellipsisreadprocessor(rl, text):
128 def ellipsisreadprocessor(rl, text):
129 return text, False
129 return text, False
130
130
131
131
132 def ellipsiswriteprocessor(rl, text):
132 def ellipsiswriteprocessor(rl, text):
133 return text, False
133 return text, False
134
134
135
135
136 def ellipsisrawprocessor(rl, text):
136 def ellipsisrawprocessor(rl, text):
137 return False
137 return False
138
138
139
139
140 ellipsisprocessor = (
140 ellipsisprocessor = (
141 ellipsisreadprocessor,
141 ellipsisreadprocessor,
142 ellipsiswriteprocessor,
142 ellipsiswriteprocessor,
143 ellipsisrawprocessor,
143 ellipsisrawprocessor,
144 )
144 )
145
145
146
146
147 def offset_type(offset, type):
147 def offset_type(offset, type):
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
148 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
149 raise ValueError(b'unknown revlog index flags')
149 raise ValueError(b'unknown revlog index flags')
150 return int(int(offset) << 16 | type)
150 return int(int(offset) << 16 | type)
151
151
152
152
153 def _verify_revision(rl, skipflags, state, node):
153 def _verify_revision(rl, skipflags, state, node):
154 """Verify the integrity of the given revlog ``node`` while providing a hook
154 """Verify the integrity of the given revlog ``node`` while providing a hook
155 point for extensions to influence the operation."""
155 point for extensions to influence the operation."""
156 if skipflags:
156 if skipflags:
157 state[b'skipread'].add(node)
157 state[b'skipread'].add(node)
158 else:
158 else:
159 # Side-effect: read content and verify hash.
159 # Side-effect: read content and verify hash.
160 rl.revision(node)
160 rl.revision(node)
161
161
162
162
163 # True if a fast implementation for persistent-nodemap is available
163 # True if a fast implementation for persistent-nodemap is available
164 #
164 #
165 # We also consider we have a "fast" implementation in "pure" python because
165 # We also consider we have a "fast" implementation in "pure" python because
166 # people using pure don't really have performance consideration (and a
166 # people using pure don't really have performance consideration (and a
167 # wheelbarrow of other slowness source)
167 # wheelbarrow of other slowness source)
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
168 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
169 parsers, 'BaseIndexObject'
169 parsers, 'BaseIndexObject'
170 )
170 )
171
171
172
172
173 @attr.s(slots=True, frozen=True)
173 @attr.s(slots=True, frozen=True)
174 class _revisioninfo(object):
174 class _revisioninfo(object):
175 """Information about a revision that allows building its fulltext
175 """Information about a revision that allows building its fulltext
176 node: expected hash of the revision
176 node: expected hash of the revision
177 p1, p2: parent revs of the revision
177 p1, p2: parent revs of the revision
178 btext: built text cache consisting of a one-element list
178 btext: built text cache consisting of a one-element list
179 cachedelta: (baserev, uncompressed_delta) or None
179 cachedelta: (baserev, uncompressed_delta) or None
180 flags: flags associated to the revision storage
180 flags: flags associated to the revision storage
181
181
182 One of btext[0] or cachedelta must be set.
182 One of btext[0] or cachedelta must be set.
183 """
183 """
184
184
185 node = attr.ib()
185 node = attr.ib()
186 p1 = attr.ib()
186 p1 = attr.ib()
187 p2 = attr.ib()
187 p2 = attr.ib()
188 btext = attr.ib()
188 btext = attr.ib()
189 textlen = attr.ib()
189 textlen = attr.ib()
190 cachedelta = attr.ib()
190 cachedelta = attr.ib()
191 flags = attr.ib()
191 flags = attr.ib()
192
192
193
193
194 @interfaceutil.implementer(repository.irevisiondelta)
194 @interfaceutil.implementer(repository.irevisiondelta)
195 @attr.s(slots=True)
195 @attr.s(slots=True)
196 class revlogrevisiondelta(object):
196 class revlogrevisiondelta(object):
197 node = attr.ib()
197 node = attr.ib()
198 p1node = attr.ib()
198 p1node = attr.ib()
199 p2node = attr.ib()
199 p2node = attr.ib()
200 basenode = attr.ib()
200 basenode = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202 baserevisionsize = attr.ib()
202 baserevisionsize = attr.ib()
203 revision = attr.ib()
203 revision = attr.ib()
204 delta = attr.ib()
204 delta = attr.ib()
205 sidedata = attr.ib()
205 sidedata = attr.ib()
206 protocol_flags = attr.ib()
206 protocol_flags = attr.ib()
207 linknode = attr.ib(default=None)
207 linknode = attr.ib(default=None)
208
208
209
209
210 @interfaceutil.implementer(repository.iverifyproblem)
210 @interfaceutil.implementer(repository.iverifyproblem)
211 @attr.s(frozen=True)
211 @attr.s(frozen=True)
212 class revlogproblem(object):
212 class revlogproblem(object):
213 warning = attr.ib(default=None)
213 warning = attr.ib(default=None)
214 error = attr.ib(default=None)
214 error = attr.ib(default=None)
215 node = attr.ib(default=None)
215 node = attr.ib(default=None)
216
216
217
217
218 def parse_index_v1(data, inline):
218 def parse_index_v1(data, inline):
219 # call the C implementation to parse the index data
219 # call the C implementation to parse the index data
220 index, cache = parsers.parse_index2(data, inline)
220 index, cache = parsers.parse_index2(data, inline)
221 return index, cache
221 return index, cache
222
222
223
223
224 def parse_index_v2(data, inline):
224 def parse_index_v2(data, inline):
225 # call the C implementation to parse the index data
225 # call the C implementation to parse the index data
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
226 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
227 return index, cache
227 return index, cache
228
228
229
229
230 def parse_index_cl_v2(data, inline):
230 def parse_index_cl_v2(data, inline):
231 # call the C implementation to parse the index data
231 # call the C implementation to parse the index data
232 assert not inline
232 assert not inline
233 from .pure.parsers import parse_index_cl_v2
233 from .pure.parsers import parse_index_cl_v2
234
234
235 index, cache = parse_index_cl_v2(data)
235 index, cache = parse_index_cl_v2(data)
236 return index, cache
236 return index, cache
237
237
238
238
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
239 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
240
240
241 def parse_index_v1_nodemap(data, inline):
241 def parse_index_v1_nodemap(data, inline):
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
242 index, cache = parsers.parse_index_devel_nodemap(data, inline)
243 return index, cache
243 return index, cache
244
244
245
245
246 else:
246 else:
247 parse_index_v1_nodemap = None
247 parse_index_v1_nodemap = None
248
248
249
249
250 def parse_index_v1_mixed(data, inline):
250 def parse_index_v1_mixed(data, inline):
251 index, cache = parse_index_v1(data, inline)
251 index, cache = parse_index_v1(data, inline)
252 return rustrevlog.MixedIndex(index), cache
252 return rustrevlog.MixedIndex(index), cache
253
253
254
254
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
255 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
256 # signed integer)
256 # signed integer)
257 _maxentrysize = 0x7FFFFFFF
257 _maxentrysize = 0x7FFFFFFF
258
258
259
259
260 class revlog(object):
260 class revlog(object):
261 """
261 """
262 the underlying revision storage object
262 the underlying revision storage object
263
263
264 A revlog consists of two parts, an index and the revision data.
264 A revlog consists of two parts, an index and the revision data.
265
265
266 The index is a file with a fixed record size containing
266 The index is a file with a fixed record size containing
267 information on each revision, including its nodeid (hash), the
267 information on each revision, including its nodeid (hash), the
268 nodeids of its parents, the position and offset of its data within
268 nodeids of its parents, the position and offset of its data within
269 the data file, and the revision it's based on. Finally, each entry
269 the data file, and the revision it's based on. Finally, each entry
270 contains a linkrev entry that can serve as a pointer to external
270 contains a linkrev entry that can serve as a pointer to external
271 data.
271 data.
272
272
273 The revision data itself is a linear collection of data chunks.
273 The revision data itself is a linear collection of data chunks.
274 Each chunk represents a revision and is usually represented as a
274 Each chunk represents a revision and is usually represented as a
275 delta against the previous chunk. To bound lookup time, runs of
275 delta against the previous chunk. To bound lookup time, runs of
276 deltas are limited to about 2 times the length of the original
276 deltas are limited to about 2 times the length of the original
277 version data. This makes retrieval of a version proportional to
277 version data. This makes retrieval of a version proportional to
278 its size, or O(1) relative to the number of revisions.
278 its size, or O(1) relative to the number of revisions.
279
279
280 Both pieces of the revlog are written to in an append-only
280 Both pieces of the revlog are written to in an append-only
281 fashion, which means we never need to rewrite a file to insert or
281 fashion, which means we never need to rewrite a file to insert or
282 remove data, and can use some simple techniques to avoid the need
282 remove data, and can use some simple techniques to avoid the need
283 for locking while reading.
283 for locking while reading.
284
284
285 If checkambig, indexfile is opened with checkambig=True at
285 If checkambig, indexfile is opened with checkambig=True at
286 writing, to avoid file stat ambiguity.
286 writing, to avoid file stat ambiguity.
287
287
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
288 If mmaplargeindex is True, and an mmapindexthreshold is set, the
289 index will be mmapped rather than read if it is larger than the
289 index will be mmapped rather than read if it is larger than the
290 configured threshold.
290 configured threshold.
291
291
292 If censorable is True, the revlog can have censored revisions.
292 If censorable is True, the revlog can have censored revisions.
293
293
294 If `upperboundcomp` is not None, this is the expected maximal gain from
294 If `upperboundcomp` is not None, this is the expected maximal gain from
295 compression for the data content.
295 compression for the data content.
296
296
297 `concurrencychecker` is an optional function that receives 3 arguments: a
297 `concurrencychecker` is an optional function that receives 3 arguments: a
298 file handle, a filename, and an expected position. It should check whether
298 file handle, a filename, and an expected position. It should check whether
299 the current position in the file handle is valid, and log/warn/fail (by
299 the current position in the file handle is valid, and log/warn/fail (by
300 raising).
300 raising).
301
301
302
302
303 Internal details
303 Internal details
304 ----------------
304 ----------------
305
305
306 A large part of the revlog logic deals with revisions' "index entries", tuple
306 A large part of the revlog logic deals with revisions' "index entries", tuple
307 objects that contains the same "items" whatever the revlog version.
307 objects that contains the same "items" whatever the revlog version.
308 Different versions will have different ways of storing these items (sometimes
308 Different versions will have different ways of storing these items (sometimes
309 not having them at all), but the tuple will always be the same. New fields
309 not having them at all), but the tuple will always be the same. New fields
310 are usually added at the end to avoid breaking existing code that relies
310 are usually added at the end to avoid breaking existing code that relies
311 on the existing order. The field are defined as follows:
311 on the existing order. The field are defined as follows:
312
312
313 [0] offset:
313 [0] offset:
314 The byte index of the start of revision data chunk.
314 The byte index of the start of revision data chunk.
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
315 That value is shifted up by 16 bits. use "offset = field >> 16" to
316 retrieve it.
316 retrieve it.
317
317
318 flags:
318 flags:
319 A flag field that carries special information or changes the behavior
319 A flag field that carries special information or changes the behavior
320 of the revision. (see `REVIDX_*` constants for details)
320 of the revision. (see `REVIDX_*` constants for details)
321 The flag field only occupies the first 16 bits of this field,
321 The flag field only occupies the first 16 bits of this field,
322 use "flags = field & 0xFFFF" to retrieve the value.
322 use "flags = field & 0xFFFF" to retrieve the value.
323
323
324 [1] compressed length:
324 [1] compressed length:
325 The size, in bytes, of the chunk on disk
325 The size, in bytes, of the chunk on disk
326
326
327 [2] uncompressed length:
327 [2] uncompressed length:
328 The size, in bytes, of the full revision once reconstructed.
328 The size, in bytes, of the full revision once reconstructed.
329
329
330 [3] base rev:
330 [3] base rev:
331 Either the base of the revision delta chain (without general
331 Either the base of the revision delta chain (without general
332 delta), or the base of the delta (stored in the data chunk)
332 delta), or the base of the delta (stored in the data chunk)
333 with general delta.
333 with general delta.
334
334
335 [4] link rev:
335 [4] link rev:
336 Changelog revision number of the changeset introducing this
336 Changelog revision number of the changeset introducing this
337 revision.
337 revision.
338
338
339 [5] parent 1 rev:
339 [5] parent 1 rev:
340 Revision number of the first parent
340 Revision number of the first parent
341
341
342 [6] parent 2 rev:
342 [6] parent 2 rev:
343 Revision number of the second parent
343 Revision number of the second parent
344
344
345 [7] node id:
345 [7] node id:
346 The node id of the current revision
346 The node id of the current revision
347
347
348 [8] sidedata offset:
348 [8] sidedata offset:
349 The byte index of the start of the revision's side-data chunk.
349 The byte index of the start of the revision's side-data chunk.
350
350
351 [9] sidedata chunk length:
351 [9] sidedata chunk length:
352 The size, in bytes, of the revision's side-data chunk.
352 The size, in bytes, of the revision's side-data chunk.
353
353
354 [10] data compression mode:
354 [10] data compression mode:
355 two bits that detail the way the data chunk is compressed on disk.
355 two bits that detail the way the data chunk is compressed on disk.
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
356 (see "COMP_MODE_*" constants for details). For revlog version 0 and
357 1 this will always be COMP_MODE_INLINE.
357 1 this will always be COMP_MODE_INLINE.
358
358
359 [11] side-data compression mode:
359 [11] side-data compression mode:
360 two bits that detail the way the sidedata chunk is compressed on disk.
360 two bits that detail the way the sidedata chunk is compressed on disk.
361 (see "COMP_MODE_*" constants for details)
361 (see "COMP_MODE_*" constants for details)
362 """
362 """
363
363
364 _flagserrorclass = error.RevlogError
364 _flagserrorclass = error.RevlogError
365
365
366 def __init__(
366 def __init__(
367 self,
367 self,
368 opener,
368 opener,
369 target,
369 target,
370 radix,
370 radix,
371 postfix=None, # only exist for `tmpcensored` now
371 postfix=None, # only exist for `tmpcensored` now
372 checkambig=False,
372 checkambig=False,
373 mmaplargeindex=False,
373 mmaplargeindex=False,
374 censorable=False,
374 censorable=False,
375 upperboundcomp=None,
375 upperboundcomp=None,
376 persistentnodemap=False,
376 persistentnodemap=False,
377 concurrencychecker=None,
377 concurrencychecker=None,
378 trypending=False,
378 trypending=False,
379 ):
379 ):
380 """
380 """
381 create a revlog object
381 create a revlog object
382
382
383 opener is a function that abstracts the file opening operation
383 opener is a function that abstracts the file opening operation
384 and can be used to implement COW semantics or the like.
384 and can be used to implement COW semantics or the like.
385
385
386 `target`: a (KIND, ID) tuple that identify the content stored in
386 `target`: a (KIND, ID) tuple that identify the content stored in
387 this revlog. It help the rest of the code to understand what the revlog
387 this revlog. It help the rest of the code to understand what the revlog
388 is about without having to resort to heuristic and index filename
388 is about without having to resort to heuristic and index filename
389 analysis. Note: that this must be reliably be set by normal code, but
389 analysis. Note: that this must be reliably be set by normal code, but
390 that test, debug, or performance measurement code might not set this to
390 that test, debug, or performance measurement code might not set this to
391 accurate value.
391 accurate value.
392 """
392 """
393 self.upperboundcomp = upperboundcomp
393 self.upperboundcomp = upperboundcomp
394
394
395 self.radix = radix
395 self.radix = radix
396
396
397 self._docket_file = None
397 self._docket_file = None
398 self._indexfile = None
398 self._indexfile = None
399 self._datafile = None
399 self._datafile = None
400 self._nodemap_file = None
400 self._nodemap_file = None
401 self.postfix = postfix
401 self.postfix = postfix
402 self._trypending = trypending
402 self._trypending = trypending
403 self.opener = opener
403 self.opener = opener
404 if persistentnodemap:
404 if persistentnodemap:
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
405 self._nodemap_file = nodemaputil.get_nodemap_file(self)
406
406
407 assert target[0] in ALL_KINDS
407 assert target[0] in ALL_KINDS
408 assert len(target) == 2
408 assert len(target) == 2
409 self.target = target
409 self.target = target
410 # When True, indexfile is opened with checkambig=True at writing, to
410 # When True, indexfile is opened with checkambig=True at writing, to
411 # avoid file stat ambiguity.
411 # avoid file stat ambiguity.
412 self._checkambig = checkambig
412 self._checkambig = checkambig
413 self._mmaplargeindex = mmaplargeindex
413 self._mmaplargeindex = mmaplargeindex
414 self._censorable = censorable
414 self._censorable = censorable
415 # 3-tuple of (node, rev, text) for a raw revision.
415 # 3-tuple of (node, rev, text) for a raw revision.
416 self._revisioncache = None
416 self._revisioncache = None
417 # Maps rev to chain base rev.
417 # Maps rev to chain base rev.
418 self._chainbasecache = util.lrucachedict(100)
418 self._chainbasecache = util.lrucachedict(100)
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
419 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
420 self._chunkcache = (0, b'')
420 self._chunkcache = (0, b'')
421 # How much data to read and cache into the raw revlog data cache.
421 # How much data to read and cache into the raw revlog data cache.
422 self._chunkcachesize = 65536
422 self._chunkcachesize = 65536
423 self._maxchainlen = None
423 self._maxchainlen = None
424 self._deltabothparents = True
424 self._deltabothparents = True
425 self.index = None
425 self.index = None
426 self._docket = None
426 self._docket = None
427 self._nodemap_docket = None
427 self._nodemap_docket = None
428 # Mapping of partial identifiers to full nodes.
428 # Mapping of partial identifiers to full nodes.
429 self._pcache = {}
429 self._pcache = {}
430 # Mapping of revision integer to full node.
430 # Mapping of revision integer to full node.
431 self._compengine = b'zlib'
431 self._compengine = b'zlib'
432 self._compengineopts = {}
432 self._compengineopts = {}
433 self._maxdeltachainspan = -1
433 self._maxdeltachainspan = -1
434 self._withsparseread = False
434 self._withsparseread = False
435 self._sparserevlog = False
435 self._sparserevlog = False
436 self.hassidedata = False
436 self.hassidedata = False
437 self._srdensitythreshold = 0.50
437 self._srdensitythreshold = 0.50
438 self._srmingapsize = 262144
438 self._srmingapsize = 262144
439
439
440 # Make copy of flag processors so each revlog instance can support
440 # Make copy of flag processors so each revlog instance can support
441 # custom flags.
441 # custom flags.
442 self._flagprocessors = dict(flagutil.flagprocessors)
442 self._flagprocessors = dict(flagutil.flagprocessors)
443
443
444 # 2-tuple of file handles being used for active writing.
444 # 2-tuple of file handles being used for active writing.
445 self._writinghandles = None
445 self._writinghandles = None
446 # prevent nesting of addgroup
446 # prevent nesting of addgroup
447 self._adding_group = None
447 self._adding_group = None
448
448
449 self._loadindex()
449 self._loadindex()
450
450
451 self._concurrencychecker = concurrencychecker
451 self._concurrencychecker = concurrencychecker
452
452
453 def _init_opts(self):
453 def _init_opts(self):
454 """process options (from above/config) to setup associated default revlog mode
454 """process options (from above/config) to setup associated default revlog mode
455
455
456 These values might be affected when actually reading on disk information.
456 These values might be affected when actually reading on disk information.
457
457
458 The relevant values are returned for use in _loadindex().
458 The relevant values are returned for use in _loadindex().
459
459
460 * newversionflags:
460 * newversionflags:
461 version header to use if we need to create a new revlog
461 version header to use if we need to create a new revlog
462
462
463 * mmapindexthreshold:
463 * mmapindexthreshold:
464 minimal index size for start to use mmap
464 minimal index size for start to use mmap
465
465
466 * force_nodemap:
466 * force_nodemap:
467 force the usage of a "development" version of the nodemap code
467 force the usage of a "development" version of the nodemap code
468 """
468 """
469 mmapindexthreshold = None
469 mmapindexthreshold = None
470 opts = self.opener.options
470 opts = self.opener.options
471
471
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
472 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
473 new_header = CHANGELOGV2
473 new_header = CHANGELOGV2
474 elif b'revlogv2' in opts:
474 elif b'revlogv2' in opts:
475 new_header = REVLOGV2
475 new_header = REVLOGV2
476 elif b'revlogv1' in opts:
476 elif b'revlogv1' in opts:
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
477 new_header = REVLOGV1 | FLAG_INLINE_DATA
478 if b'generaldelta' in opts:
478 if b'generaldelta' in opts:
479 new_header |= FLAG_GENERALDELTA
479 new_header |= FLAG_GENERALDELTA
480 elif b'revlogv0' in self.opener.options:
480 elif b'revlogv0' in self.opener.options:
481 new_header = REVLOGV0
481 new_header = REVLOGV0
482 else:
482 else:
483 new_header = REVLOG_DEFAULT_VERSION
483 new_header = REVLOG_DEFAULT_VERSION
484
484
485 if b'chunkcachesize' in opts:
485 if b'chunkcachesize' in opts:
486 self._chunkcachesize = opts[b'chunkcachesize']
486 self._chunkcachesize = opts[b'chunkcachesize']
487 if b'maxchainlen' in opts:
487 if b'maxchainlen' in opts:
488 self._maxchainlen = opts[b'maxchainlen']
488 self._maxchainlen = opts[b'maxchainlen']
489 if b'deltabothparents' in opts:
489 if b'deltabothparents' in opts:
490 self._deltabothparents = opts[b'deltabothparents']
490 self._deltabothparents = opts[b'deltabothparents']
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
491 self._lazydelta = bool(opts.get(b'lazydelta', True))
492 self._lazydeltabase = False
492 self._lazydeltabase = False
493 if self._lazydelta:
493 if self._lazydelta:
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
494 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
495 if b'compengine' in opts:
495 if b'compengine' in opts:
496 self._compengine = opts[b'compengine']
496 self._compengine = opts[b'compengine']
497 if b'zlib.level' in opts:
497 if b'zlib.level' in opts:
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
498 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
499 if b'zstd.level' in opts:
499 if b'zstd.level' in opts:
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
500 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
501 if b'maxdeltachainspan' in opts:
501 if b'maxdeltachainspan' in opts:
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
502 self._maxdeltachainspan = opts[b'maxdeltachainspan']
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
503 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
504 mmapindexthreshold = opts[b'mmapindexthreshold']
504 mmapindexthreshold = opts[b'mmapindexthreshold']
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
505 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
506 withsparseread = bool(opts.get(b'with-sparse-read', False))
507 # sparse-revlog forces sparse-read
507 # sparse-revlog forces sparse-read
508 self._withsparseread = self._sparserevlog or withsparseread
508 self._withsparseread = self._sparserevlog or withsparseread
509 if b'sparse-read-density-threshold' in opts:
509 if b'sparse-read-density-threshold' in opts:
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
510 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
511 if b'sparse-read-min-gap-size' in opts:
511 if b'sparse-read-min-gap-size' in opts:
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
512 self._srmingapsize = opts[b'sparse-read-min-gap-size']
513 if opts.get(b'enableellipsis'):
513 if opts.get(b'enableellipsis'):
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
514 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
515
515
516 # revlog v0 doesn't have flag processors
516 # revlog v0 doesn't have flag processors
517 for flag, processor in pycompat.iteritems(
517 for flag, processor in pycompat.iteritems(
518 opts.get(b'flagprocessors', {})
518 opts.get(b'flagprocessors', {})
519 ):
519 ):
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
520 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
521
521
522 if self._chunkcachesize <= 0:
522 if self._chunkcachesize <= 0:
523 raise error.RevlogError(
523 raise error.RevlogError(
524 _(b'revlog chunk cache size %r is not greater than 0')
524 _(b'revlog chunk cache size %r is not greater than 0')
525 % self._chunkcachesize
525 % self._chunkcachesize
526 )
526 )
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
527 elif self._chunkcachesize & (self._chunkcachesize - 1):
528 raise error.RevlogError(
528 raise error.RevlogError(
529 _(b'revlog chunk cache size %r is not a power of 2')
529 _(b'revlog chunk cache size %r is not a power of 2')
530 % self._chunkcachesize
530 % self._chunkcachesize
531 )
531 )
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
532 force_nodemap = opts.get(b'devel-force-nodemap', False)
533 return new_header, mmapindexthreshold, force_nodemap
533 return new_header, mmapindexthreshold, force_nodemap
534
534
535 def _get_data(self, filepath, mmap_threshold, size=None):
535 def _get_data(self, filepath, mmap_threshold, size=None):
536 """return a file content with or without mmap
536 """return a file content with or without mmap
537
537
538 If the file is missing return the empty string"""
538 If the file is missing return the empty string"""
539 try:
539 try:
540 with self.opener(filepath) as fp:
540 with self.opener(filepath) as fp:
541 if mmap_threshold is not None:
541 if mmap_threshold is not None:
542 file_size = self.opener.fstat(fp).st_size
542 file_size = self.opener.fstat(fp).st_size
543 if file_size >= mmap_threshold:
543 if file_size >= mmap_threshold:
544 if size is not None:
544 if size is not None:
545 # avoid potentiel mmap crash
545 # avoid potentiel mmap crash
546 size = min(file_size, size)
546 size = min(file_size, size)
547 # TODO: should .close() to release resources without
547 # TODO: should .close() to release resources without
548 # relying on Python GC
548 # relying on Python GC
549 if size is None:
549 if size is None:
550 return util.buffer(util.mmapread(fp))
550 return util.buffer(util.mmapread(fp))
551 else:
551 else:
552 return util.buffer(util.mmapread(fp, size))
552 return util.buffer(util.mmapread(fp, size))
553 if size is None:
553 if size is None:
554 return fp.read()
554 return fp.read()
555 else:
555 else:
556 return fp.read(size)
556 return fp.read(size)
557 except IOError as inst:
557 except IOError as inst:
558 if inst.errno != errno.ENOENT:
558 if inst.errno != errno.ENOENT:
559 raise
559 raise
560 return b''
560 return b''
561
561
562 def _loadindex(self):
562 def _loadindex(self):
563
563
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
564 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
565
565
566 if self.postfix is not None:
566 if self.postfix is not None:
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
567 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
568 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
569 entry_point = b'%s.i.a' % self.radix
569 entry_point = b'%s.i.a' % self.radix
570 else:
570 else:
571 entry_point = b'%s.i' % self.radix
571 entry_point = b'%s.i' % self.radix
572
572
573 entry_data = b''
573 entry_data = b''
574 self._initempty = True
574 self._initempty = True
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
575 entry_data = self._get_data(entry_point, mmapindexthreshold)
576 if len(entry_data) > 0:
576 if len(entry_data) > 0:
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
577 header = INDEX_HEADER.unpack(entry_data[:4])[0]
578 self._initempty = False
578 self._initempty = False
579 else:
579 else:
580 header = new_header
580 header = new_header
581
581
582 self._format_flags = header & ~0xFFFF
582 self._format_flags = header & ~0xFFFF
583 self._format_version = header & 0xFFFF
583 self._format_version = header & 0xFFFF
584
584
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
585 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
586 if supported_flags is None:
586 if supported_flags is None:
587 msg = _(b'unknown version (%d) in revlog %s')
587 msg = _(b'unknown version (%d) in revlog %s')
588 msg %= (self._format_version, self.display_id)
588 msg %= (self._format_version, self.display_id)
589 raise error.RevlogError(msg)
589 raise error.RevlogError(msg)
590 elif self._format_flags & ~supported_flags:
590 elif self._format_flags & ~supported_flags:
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
591 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
592 display_flag = self._format_flags >> 16
592 display_flag = self._format_flags >> 16
593 msg %= (display_flag, self._format_version, self.display_id)
593 msg %= (display_flag, self._format_version, self.display_id)
594 raise error.RevlogError(msg)
594 raise error.RevlogError(msg)
595
595
596 features = FEATURES_BY_VERSION[self._format_version]
596 features = FEATURES_BY_VERSION[self._format_version]
597 self._inline = features[b'inline'](self._format_flags)
597 self._inline = features[b'inline'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
598 self._generaldelta = features[b'generaldelta'](self._format_flags)
599 self.hassidedata = features[b'sidedata']
599 self.hassidedata = features[b'sidedata']
600
600
601 if not features[b'docket']:
601 if not features[b'docket']:
602 self._indexfile = entry_point
602 self._indexfile = entry_point
603 index_data = entry_data
603 index_data = entry_data
604 else:
604 else:
605 self._docket_file = entry_point
605 self._docket_file = entry_point
606 if self._initempty:
606 if self._initempty:
607 self._docket = docketutil.default_docket(self, header)
607 self._docket = docketutil.default_docket(self, header)
608 else:
608 else:
609 self._docket = docketutil.parse_docket(
609 self._docket = docketutil.parse_docket(
610 self, entry_data, use_pending=self._trypending
610 self, entry_data, use_pending=self._trypending
611 )
611 )
612 self._indexfile = self._docket.index_filepath()
612 self._indexfile = self._docket.index_filepath()
613 index_data = b''
613 index_data = b''
614 index_size = self._docket.index_end
614 index_size = self._docket.index_end
615 if index_size > 0:
615 if index_size > 0:
616 index_data = self._get_data(
616 index_data = self._get_data(
617 self._indexfile, mmapindexthreshold, size=index_size
617 self._indexfile, mmapindexthreshold, size=index_size
618 )
618 )
619 if len(index_data) < index_size:
619 if len(index_data) < index_size:
620 msg = _(b'too few index data for %s: got %d, expected %d')
620 msg = _(b'too few index data for %s: got %d, expected %d')
621 msg %= (self.display_id, len(index_data), index_size)
621 msg %= (self.display_id, len(index_data), index_size)
622 raise error.RevlogError(msg)
622 raise error.RevlogError(msg)
623
623
624 self._inline = False
624 self._inline = False
625 # generaldelta implied by version 2 revlogs.
625 # generaldelta implied by version 2 revlogs.
626 self._generaldelta = True
626 self._generaldelta = True
627 # the logic for persistent nodemap will be dealt with within the
627 # the logic for persistent nodemap will be dealt with within the
628 # main docket, so disable it for now.
628 # main docket, so disable it for now.
629 self._nodemap_file = None
629 self._nodemap_file = None
630
630
631 if self._docket is not None:
631 if self._docket is not None:
632 self._datafile = self._docket.data_filepath()
632 self._datafile = self._docket.data_filepath()
633 elif self.postfix is None:
633 elif self.postfix is None:
634 self._datafile = b'%s.d' % self.radix
634 self._datafile = b'%s.d' % self.radix
635 else:
635 else:
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
636 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
637
637
638 self.nodeconstants = sha1nodeconstants
638 self.nodeconstants = sha1nodeconstants
639 self.nullid = self.nodeconstants.nullid
639 self.nullid = self.nodeconstants.nullid
640
640
641 # sparse-revlog can't be on without general-delta (issue6056)
641 # sparse-revlog can't be on without general-delta (issue6056)
642 if not self._generaldelta:
642 if not self._generaldelta:
643 self._sparserevlog = False
643 self._sparserevlog = False
644
644
645 self._storedeltachains = True
645 self._storedeltachains = True
646
646
647 devel_nodemap = (
647 devel_nodemap = (
648 self._nodemap_file
648 self._nodemap_file
649 and force_nodemap
649 and force_nodemap
650 and parse_index_v1_nodemap is not None
650 and parse_index_v1_nodemap is not None
651 )
651 )
652
652
653 use_rust_index = False
653 use_rust_index = False
654 if rustrevlog is not None:
654 if rustrevlog is not None:
655 if self._nodemap_file is not None:
655 if self._nodemap_file is not None:
656 use_rust_index = True
656 use_rust_index = True
657 else:
657 else:
658 use_rust_index = self.opener.options.get(b'rust.index')
658 use_rust_index = self.opener.options.get(b'rust.index')
659
659
660 self._parse_index = parse_index_v1
660 self._parse_index = parse_index_v1
661 if self._format_version == REVLOGV0:
661 if self._format_version == REVLOGV0:
662 self._parse_index = revlogv0.parse_index_v0
662 self._parse_index = revlogv0.parse_index_v0
663 elif self._format_version == REVLOGV2:
663 elif self._format_version == REVLOGV2:
664 self._parse_index = parse_index_v2
664 self._parse_index = parse_index_v2
665 elif self._format_version == CHANGELOGV2:
665 elif self._format_version == CHANGELOGV2:
666 self._parse_index = parse_index_cl_v2
666 self._parse_index = parse_index_cl_v2
667 elif devel_nodemap:
667 elif devel_nodemap:
668 self._parse_index = parse_index_v1_nodemap
668 self._parse_index = parse_index_v1_nodemap
669 elif use_rust_index:
669 elif use_rust_index:
670 self._parse_index = parse_index_v1_mixed
670 self._parse_index = parse_index_v1_mixed
671 try:
671 try:
672 d = self._parse_index(index_data, self._inline)
672 d = self._parse_index(index_data, self._inline)
673 index, _chunkcache = d
673 index, _chunkcache = d
674 use_nodemap = (
674 use_nodemap = (
675 not self._inline
675 not self._inline
676 and self._nodemap_file is not None
676 and self._nodemap_file is not None
677 and util.safehasattr(index, 'update_nodemap_data')
677 and util.safehasattr(index, 'update_nodemap_data')
678 )
678 )
679 if use_nodemap:
679 if use_nodemap:
680 nodemap_data = nodemaputil.persisted_data(self)
680 nodemap_data = nodemaputil.persisted_data(self)
681 if nodemap_data is not None:
681 if nodemap_data is not None:
682 docket = nodemap_data[0]
682 docket = nodemap_data[0]
683 if (
683 if (
684 len(d[0]) > docket.tip_rev
684 len(d[0]) > docket.tip_rev
685 and d[0][docket.tip_rev][7] == docket.tip_node
685 and d[0][docket.tip_rev][7] == docket.tip_node
686 ):
686 ):
687 # no changelog tampering
687 # no changelog tampering
688 self._nodemap_docket = docket
688 self._nodemap_docket = docket
689 index.update_nodemap_data(*nodemap_data)
689 index.update_nodemap_data(*nodemap_data)
690 except (ValueError, IndexError):
690 except (ValueError, IndexError):
691 raise error.RevlogError(
691 raise error.RevlogError(
692 _(b"index %s is corrupted") % self.display_id
692 _(b"index %s is corrupted") % self.display_id
693 )
693 )
694 self.index, self._chunkcache = d
694 self.index, self._chunkcache = d
695 if not self._chunkcache:
695 if not self._chunkcache:
696 self._chunkclear()
696 self._chunkclear()
697 # revnum -> (chain-length, sum-delta-length)
697 # revnum -> (chain-length, sum-delta-length)
698 self._chaininfocache = util.lrucachedict(500)
698 self._chaininfocache = util.lrucachedict(500)
699 # revlog header -> revlog compressor
699 # revlog header -> revlog compressor
700 self._decompressors = {}
700 self._decompressors = {}
701
701
702 @util.propertycache
702 @util.propertycache
703 def revlog_kind(self):
703 def revlog_kind(self):
704 return self.target[0]
704 return self.target[0]
705
705
706 @util.propertycache
706 @util.propertycache
707 def display_id(self):
707 def display_id(self):
708 """The public facing "ID" of the revlog that we use in message"""
708 """The public facing "ID" of the revlog that we use in message"""
709 # Maybe we should build a user facing representation of
709 # Maybe we should build a user facing representation of
710 # revlog.target instead of using `self.radix`
710 # revlog.target instead of using `self.radix`
711 return self.radix
711 return self.radix
712
712
713 def _get_decompressor(self, t):
713 def _get_decompressor(self, t):
714 try:
714 try:
715 compressor = self._decompressors[t]
715 compressor = self._decompressors[t]
716 except KeyError:
716 except KeyError:
717 try:
717 try:
718 engine = util.compengines.forrevlogheader(t)
718 engine = util.compengines.forrevlogheader(t)
719 compressor = engine.revlogcompressor(self._compengineopts)
719 compressor = engine.revlogcompressor(self._compengineopts)
720 self._decompressors[t] = compressor
720 self._decompressors[t] = compressor
721 except KeyError:
721 except KeyError:
722 raise error.RevlogError(
722 raise error.RevlogError(
723 _(b'unknown compression type %s') % binascii.hexlify(t)
723 _(b'unknown compression type %s') % binascii.hexlify(t)
724 )
724 )
725 return compressor
725 return compressor
726
726
727 @util.propertycache
727 @util.propertycache
728 def _compressor(self):
728 def _compressor(self):
729 engine = util.compengines[self._compengine]
729 engine = util.compengines[self._compengine]
730 return engine.revlogcompressor(self._compengineopts)
730 return engine.revlogcompressor(self._compengineopts)
731
731
732 @util.propertycache
732 @util.propertycache
733 def _decompressor(self):
733 def _decompressor(self):
734 """the default decompressor"""
734 """the default decompressor"""
735 if self._docket is None:
735 if self._docket is None:
736 return None
736 return None
737 t = self._docket.default_compression_header
737 t = self._docket.default_compression_header
738 c = self._get_decompressor(t)
738 c = self._get_decompressor(t)
739 return c.decompress
739 return c.decompress
740
740
741 def _indexfp(self):
741 def _indexfp(self):
742 """file object for the revlog's index file"""
742 """file object for the revlog's index file"""
743 return self.opener(self._indexfile, mode=b"r")
743 return self.opener(self._indexfile, mode=b"r")
744
744
745 def __index_write_fp(self):
745 def __index_write_fp(self):
746 # You should not use this directly and use `_writing` instead
746 # You should not use this directly and use `_writing` instead
747 try:
747 try:
748 f = self.opener(
748 f = self.opener(
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
749 self._indexfile, mode=b"r+", checkambig=self._checkambig
750 )
750 )
751 if self._docket is None:
751 if self._docket is None:
752 f.seek(0, os.SEEK_END)
752 f.seek(0, os.SEEK_END)
753 else:
753 else:
754 f.seek(self._docket.index_end, os.SEEK_SET)
754 f.seek(self._docket.index_end, os.SEEK_SET)
755 return f
755 return f
756 except IOError as inst:
756 except IOError as inst:
757 if inst.errno != errno.ENOENT:
757 if inst.errno != errno.ENOENT:
758 raise
758 raise
759 return self.opener(
759 return self.opener(
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
760 self._indexfile, mode=b"w+", checkambig=self._checkambig
761 )
761 )
762
762
763 def __index_new_fp(self):
763 def __index_new_fp(self):
764 # You should not use this unless you are upgrading from inline revlog
764 # You should not use this unless you are upgrading from inline revlog
765 return self.opener(
765 return self.opener(
766 self._indexfile,
766 self._indexfile,
767 mode=b"w",
767 mode=b"w",
768 checkambig=self._checkambig,
768 checkambig=self._checkambig,
769 atomictemp=True,
769 atomictemp=True,
770 )
770 )
771
771
772 def _datafp(self, mode=b'r'):
772 def _datafp(self, mode=b'r'):
773 """file object for the revlog's data file"""
773 """file object for the revlog's data file"""
774 return self.opener(self._datafile, mode=mode)
774 return self.opener(self._datafile, mode=mode)
775
775
776 @contextlib.contextmanager
776 @contextlib.contextmanager
777 def _datareadfp(self, existingfp=None):
777 def _datareadfp(self, existingfp=None):
778 """file object suitable to read data"""
778 """file object suitable to read data"""
779 # Use explicit file handle, if given.
779 # Use explicit file handle, if given.
780 if existingfp is not None:
780 if existingfp is not None:
781 yield existingfp
781 yield existingfp
782
782
783 # Use a file handle being actively used for writes, if available.
783 # Use a file handle being actively used for writes, if available.
784 # There is some danger to doing this because reads will seek the
784 # There is some danger to doing this because reads will seek the
785 # file. However, _writeentry() performs a SEEK_END before all writes,
785 # file. However, _writeentry() performs a SEEK_END before all writes,
786 # so we should be safe.
786 # so we should be safe.
787 elif self._writinghandles:
787 elif self._writinghandles:
788 if self._inline:
788 if self._inline:
789 yield self._writinghandles[0]
789 yield self._writinghandles[0]
790 else:
790 else:
791 yield self._writinghandles[1]
791 yield self._writinghandles[1]
792
792
793 # Otherwise open a new file handle.
793 # Otherwise open a new file handle.
794 else:
794 else:
795 if self._inline:
795 if self._inline:
796 func = self._indexfp
796 func = self._indexfp
797 else:
797 else:
798 func = self._datafp
798 func = self._datafp
799 with func() as fp:
799 with func() as fp:
800 yield fp
800 yield fp
801
801
802 def tiprev(self):
802 def tiprev(self):
803 return len(self.index) - 1
803 return len(self.index) - 1
804
804
805 def tip(self):
805 def tip(self):
806 return self.node(self.tiprev())
806 return self.node(self.tiprev())
807
807
808 def __contains__(self, rev):
808 def __contains__(self, rev):
809 return 0 <= rev < len(self)
809 return 0 <= rev < len(self)
810
810
811 def __len__(self):
811 def __len__(self):
812 return len(self.index)
812 return len(self.index)
813
813
814 def __iter__(self):
814 def __iter__(self):
815 return iter(pycompat.xrange(len(self)))
815 return iter(pycompat.xrange(len(self)))
816
816
817 def revs(self, start=0, stop=None):
817 def revs(self, start=0, stop=None):
818 """iterate over all rev in this revlog (from start to stop)"""
818 """iterate over all rev in this revlog (from start to stop)"""
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
819 return storageutil.iterrevs(len(self), start=start, stop=stop)
820
820
821 @property
821 @property
822 def nodemap(self):
822 def nodemap(self):
823 msg = (
823 msg = (
824 b"revlog.nodemap is deprecated, "
824 b"revlog.nodemap is deprecated, "
825 b"use revlog.index.[has_node|rev|get_rev]"
825 b"use revlog.index.[has_node|rev|get_rev]"
826 )
826 )
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
827 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
828 return self.index.nodemap
828 return self.index.nodemap
829
829
830 @property
830 @property
831 def _nodecache(self):
831 def _nodecache(self):
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
832 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
833 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
834 return self.index.nodemap
834 return self.index.nodemap
835
835
836 def hasnode(self, node):
836 def hasnode(self, node):
837 try:
837 try:
838 self.rev(node)
838 self.rev(node)
839 return True
839 return True
840 except KeyError:
840 except KeyError:
841 return False
841 return False
842
842
843 def candelta(self, baserev, rev):
843 def candelta(self, baserev, rev):
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
844 """whether two revisions (baserev, rev) can be delta-ed or not"""
845 # Disable delta if either rev requires a content-changing flag
845 # Disable delta if either rev requires a content-changing flag
846 # processor (ex. LFS). This is because such flag processor can alter
846 # processor (ex. LFS). This is because such flag processor can alter
847 # the rawtext content that the delta will be based on, and two clients
847 # the rawtext content that the delta will be based on, and two clients
848 # could have a same revlog node with different flags (i.e. different
848 # could have a same revlog node with different flags (i.e. different
849 # rawtext contents) and the delta could be incompatible.
849 # rawtext contents) and the delta could be incompatible.
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
850 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
851 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
852 ):
852 ):
853 return False
853 return False
854 return True
854 return True
855
855
856 def update_caches(self, transaction):
856 def update_caches(self, transaction):
857 if self._nodemap_file is not None:
857 if self._nodemap_file is not None:
858 if transaction is None:
858 if transaction is None:
859 nodemaputil.update_persistent_nodemap(self)
859 nodemaputil.update_persistent_nodemap(self)
860 else:
860 else:
861 nodemaputil.setup_persistent_nodemap(transaction, self)
861 nodemaputil.setup_persistent_nodemap(transaction, self)
862
862
863 def clearcaches(self):
863 def clearcaches(self):
864 self._revisioncache = None
864 self._revisioncache = None
865 self._chainbasecache.clear()
865 self._chainbasecache.clear()
866 self._chunkcache = (0, b'')
866 self._chunkcache = (0, b'')
867 self._pcache = {}
867 self._pcache = {}
868 self._nodemap_docket = None
868 self._nodemap_docket = None
869 self.index.clearcaches()
869 self.index.clearcaches()
870 # The python code is the one responsible for validating the docket, we
870 # The python code is the one responsible for validating the docket, we
871 # end up having to refresh it here.
871 # end up having to refresh it here.
872 use_nodemap = (
872 use_nodemap = (
873 not self._inline
873 not self._inline
874 and self._nodemap_file is not None
874 and self._nodemap_file is not None
875 and util.safehasattr(self.index, 'update_nodemap_data')
875 and util.safehasattr(self.index, 'update_nodemap_data')
876 )
876 )
877 if use_nodemap:
877 if use_nodemap:
878 nodemap_data = nodemaputil.persisted_data(self)
878 nodemap_data = nodemaputil.persisted_data(self)
879 if nodemap_data is not None:
879 if nodemap_data is not None:
880 self._nodemap_docket = nodemap_data[0]
880 self._nodemap_docket = nodemap_data[0]
881 self.index.update_nodemap_data(*nodemap_data)
881 self.index.update_nodemap_data(*nodemap_data)
882
882
883 def rev(self, node):
883 def rev(self, node):
884 try:
884 try:
885 return self.index.rev(node)
885 return self.index.rev(node)
886 except TypeError:
886 except TypeError:
887 raise
887 raise
888 except error.RevlogError:
888 except error.RevlogError:
889 # parsers.c radix tree lookup failed
889 # parsers.c radix tree lookup failed
890 if (
890 if (
891 node == self.nodeconstants.wdirid
891 node == self.nodeconstants.wdirid
892 or node in self.nodeconstants.wdirfilenodeids
892 or node in self.nodeconstants.wdirfilenodeids
893 ):
893 ):
894 raise error.WdirUnsupported
894 raise error.WdirUnsupported
895 raise error.LookupError(node, self.display_id, _(b'no node'))
895 raise error.LookupError(node, self.display_id, _(b'no node'))
896
896
897 # Accessors for index entries.
897 # Accessors for index entries.
898
898
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
899 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
900 # are flags.
900 # are flags.
901 def start(self, rev):
901 def start(self, rev):
902 return int(self.index[rev][0] >> 16)
902 return int(self.index[rev][0] >> 16)
903
903
904 def flags(self, rev):
904 def flags(self, rev):
905 return self.index[rev][0] & 0xFFFF
905 return self.index[rev][0] & 0xFFFF
906
906
907 def length(self, rev):
907 def length(self, rev):
908 return self.index[rev][1]
908 return self.index[rev][1]
909
909
910 def sidedata_length(self, rev):
910 def sidedata_length(self, rev):
911 if not self.hassidedata:
911 if not self.hassidedata:
912 return 0
912 return 0
913 return self.index[rev][9]
913 return self.index[rev][9]
914
914
915 def rawsize(self, rev):
915 def rawsize(self, rev):
916 """return the length of the uncompressed text for a given revision"""
916 """return the length of the uncompressed text for a given revision"""
917 l = self.index[rev][2]
917 l = self.index[rev][2]
918 if l >= 0:
918 if l >= 0:
919 return l
919 return l
920
920
921 t = self.rawdata(rev)
921 t = self.rawdata(rev)
922 return len(t)
922 return len(t)
923
923
924 def size(self, rev):
924 def size(self, rev):
925 """length of non-raw text (processed by a "read" flag processor)"""
925 """length of non-raw text (processed by a "read" flag processor)"""
926 # fast path: if no "read" flag processor could change the content,
926 # fast path: if no "read" flag processor could change the content,
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
927 # size is rawsize. note: ELLIPSIS is known to not change the content.
928 flags = self.flags(rev)
928 flags = self.flags(rev)
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
929 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
930 return self.rawsize(rev)
930 return self.rawsize(rev)
931
931
932 return len(self.revision(rev, raw=False))
932 return len(self.revision(rev, raw=False))
933
933
934 def chainbase(self, rev):
934 def chainbase(self, rev):
935 base = self._chainbasecache.get(rev)
935 base = self._chainbasecache.get(rev)
936 if base is not None:
936 if base is not None:
937 return base
937 return base
938
938
939 index = self.index
939 index = self.index
940 iterrev = rev
940 iterrev = rev
941 base = index[iterrev][3]
941 base = index[iterrev][3]
942 while base != iterrev:
942 while base != iterrev:
943 iterrev = base
943 iterrev = base
944 base = index[iterrev][3]
944 base = index[iterrev][3]
945
945
946 self._chainbasecache[rev] = base
946 self._chainbasecache[rev] = base
947 return base
947 return base
948
948
949 def linkrev(self, rev):
949 def linkrev(self, rev):
950 return self.index[rev][4]
950 return self.index[rev][4]
951
951
952 def parentrevs(self, rev):
952 def parentrevs(self, rev):
953 try:
953 try:
954 entry = self.index[rev]
954 entry = self.index[rev]
955 except IndexError:
955 except IndexError:
956 if rev == wdirrev:
956 if rev == wdirrev:
957 raise error.WdirUnsupported
957 raise error.WdirUnsupported
958 raise
958 raise
959 if entry[5] == nullrev:
959 if entry[5] == nullrev:
960 return entry[6], entry[5]
960 return entry[6], entry[5]
961 else:
961 else:
962 return entry[5], entry[6]
962 return entry[5], entry[6]
963
963
964 # fast parentrevs(rev) where rev isn't filtered
964 # fast parentrevs(rev) where rev isn't filtered
965 _uncheckedparentrevs = parentrevs
965 _uncheckedparentrevs = parentrevs
966
966
967 def node(self, rev):
967 def node(self, rev):
968 try:
968 try:
969 return self.index[rev][7]
969 return self.index[rev][7]
970 except IndexError:
970 except IndexError:
971 if rev == wdirrev:
971 if rev == wdirrev:
972 raise error.WdirUnsupported
972 raise error.WdirUnsupported
973 raise
973 raise
974
974
975 # Derived from index values.
975 # Derived from index values.
976
976
977 def end(self, rev):
977 def end(self, rev):
978 return self.start(rev) + self.length(rev)
978 return self.start(rev) + self.length(rev)
979
979
980 def parents(self, node):
980 def parents(self, node):
981 i = self.index
981 i = self.index
982 d = i[self.rev(node)]
982 d = i[self.rev(node)]
983 # inline node() to avoid function call overhead
983 # inline node() to avoid function call overhead
984 if d[5] == self.nullid:
984 if d[5] == self.nullid:
985 return i[d[6]][7], i[d[5]][7]
985 return i[d[6]][7], i[d[5]][7]
986 else:
986 else:
987 return i[d[5]][7], i[d[6]][7]
987 return i[d[5]][7], i[d[6]][7]
988
988
989 def chainlen(self, rev):
989 def chainlen(self, rev):
990 return self._chaininfo(rev)[0]
990 return self._chaininfo(rev)[0]
991
991
992 def _chaininfo(self, rev):
992 def _chaininfo(self, rev):
993 chaininfocache = self._chaininfocache
993 chaininfocache = self._chaininfocache
994 if rev in chaininfocache:
994 if rev in chaininfocache:
995 return chaininfocache[rev]
995 return chaininfocache[rev]
996 index = self.index
996 index = self.index
997 generaldelta = self._generaldelta
997 generaldelta = self._generaldelta
998 iterrev = rev
998 iterrev = rev
999 e = index[iterrev]
999 e = index[iterrev]
1000 clen = 0
1000 clen = 0
1001 compresseddeltalen = 0
1001 compresseddeltalen = 0
1002 while iterrev != e[3]:
1002 while iterrev != e[3]:
1003 clen += 1
1003 clen += 1
1004 compresseddeltalen += e[1]
1004 compresseddeltalen += e[1]
1005 if generaldelta:
1005 if generaldelta:
1006 iterrev = e[3]
1006 iterrev = e[3]
1007 else:
1007 else:
1008 iterrev -= 1
1008 iterrev -= 1
1009 if iterrev in chaininfocache:
1009 if iterrev in chaininfocache:
1010 t = chaininfocache[iterrev]
1010 t = chaininfocache[iterrev]
1011 clen += t[0]
1011 clen += t[0]
1012 compresseddeltalen += t[1]
1012 compresseddeltalen += t[1]
1013 break
1013 break
1014 e = index[iterrev]
1014 e = index[iterrev]
1015 else:
1015 else:
1016 # Add text length of base since decompressing that also takes
1016 # Add text length of base since decompressing that also takes
1017 # work. For cache hits the length is already included.
1017 # work. For cache hits the length is already included.
1018 compresseddeltalen += e[1]
1018 compresseddeltalen += e[1]
1019 r = (clen, compresseddeltalen)
1019 r = (clen, compresseddeltalen)
1020 chaininfocache[rev] = r
1020 chaininfocache[rev] = r
1021 return r
1021 return r
1022
1022
1023 def _deltachain(self, rev, stoprev=None):
1023 def _deltachain(self, rev, stoprev=None):
1024 """Obtain the delta chain for a revision.
1024 """Obtain the delta chain for a revision.
1025
1025
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1026 ``stoprev`` specifies a revision to stop at. If not specified, we
1027 stop at the base of the chain.
1027 stop at the base of the chain.
1028
1028
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1029 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1030 revs in ascending order and ``stopped`` is a bool indicating whether
1031 ``stoprev`` was hit.
1031 ``stoprev`` was hit.
1032 """
1032 """
1033 # Try C implementation.
1033 # Try C implementation.
1034 try:
1034 try:
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1035 return self.index.deltachain(rev, stoprev, self._generaldelta)
1036 except AttributeError:
1036 except AttributeError:
1037 pass
1037 pass
1038
1038
1039 chain = []
1039 chain = []
1040
1040
1041 # Alias to prevent attribute lookup in tight loop.
1041 # Alias to prevent attribute lookup in tight loop.
1042 index = self.index
1042 index = self.index
1043 generaldelta = self._generaldelta
1043 generaldelta = self._generaldelta
1044
1044
1045 iterrev = rev
1045 iterrev = rev
1046 e = index[iterrev]
1046 e = index[iterrev]
1047 while iterrev != e[3] and iterrev != stoprev:
1047 while iterrev != e[3] and iterrev != stoprev:
1048 chain.append(iterrev)
1048 chain.append(iterrev)
1049 if generaldelta:
1049 if generaldelta:
1050 iterrev = e[3]
1050 iterrev = e[3]
1051 else:
1051 else:
1052 iterrev -= 1
1052 iterrev -= 1
1053 e = index[iterrev]
1053 e = index[iterrev]
1054
1054
1055 if iterrev == stoprev:
1055 if iterrev == stoprev:
1056 stopped = True
1056 stopped = True
1057 else:
1057 else:
1058 chain.append(iterrev)
1058 chain.append(iterrev)
1059 stopped = False
1059 stopped = False
1060
1060
1061 chain.reverse()
1061 chain.reverse()
1062 return chain, stopped
1062 return chain, stopped
1063
1063
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1064 def ancestors(self, revs, stoprev=0, inclusive=False):
1065 """Generate the ancestors of 'revs' in reverse revision order.
1065 """Generate the ancestors of 'revs' in reverse revision order.
1066 Does not generate revs lower than stoprev.
1066 Does not generate revs lower than stoprev.
1067
1067
1068 See the documentation for ancestor.lazyancestors for more details."""
1068 See the documentation for ancestor.lazyancestors for more details."""
1069
1069
1070 # first, make sure start revisions aren't filtered
1070 # first, make sure start revisions aren't filtered
1071 revs = list(revs)
1071 revs = list(revs)
1072 checkrev = self.node
1072 checkrev = self.node
1073 for r in revs:
1073 for r in revs:
1074 checkrev(r)
1074 checkrev(r)
1075 # and we're sure ancestors aren't filtered as well
1075 # and we're sure ancestors aren't filtered as well
1076
1076
1077 if rustancestor is not None and self.index.rust_ext_compat:
1077 if rustancestor is not None and self.index.rust_ext_compat:
1078 lazyancestors = rustancestor.LazyAncestors
1078 lazyancestors = rustancestor.LazyAncestors
1079 arg = self.index
1079 arg = self.index
1080 else:
1080 else:
1081 lazyancestors = ancestor.lazyancestors
1081 lazyancestors = ancestor.lazyancestors
1082 arg = self._uncheckedparentrevs
1082 arg = self._uncheckedparentrevs
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1083 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1084
1084
1085 def descendants(self, revs):
1085 def descendants(self, revs):
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1086 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1087
1087
1088 def findcommonmissing(self, common=None, heads=None):
1088 def findcommonmissing(self, common=None, heads=None):
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1089 """Return a tuple of the ancestors of common and the ancestors of heads
1090 that are not ancestors of common. In revset terminology, we return the
1090 that are not ancestors of common. In revset terminology, we return the
1091 tuple:
1091 tuple:
1092
1092
1093 ::common, (::heads) - (::common)
1093 ::common, (::heads) - (::common)
1094
1094
1095 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1096 topologically sorted.
1096 topologically sorted.
1097
1097
1098 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1099 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1100 supplied, uses nullid."""
1100 supplied, uses nullid."""
1101 if common is None:
1101 if common is None:
1102 common = [self.nullid]
1102 common = [self.nullid]
1103 if heads is None:
1103 if heads is None:
1104 heads = self.heads()
1104 heads = self.heads()
1105
1105
1106 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1107 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1108
1108
1109 # we want the ancestors, but inclusive
1109 # we want the ancestors, but inclusive
1110 class lazyset(object):
1110 class lazyset(object):
1111 def __init__(self, lazyvalues):
1111 def __init__(self, lazyvalues):
1112 self.addedvalues = set()
1112 self.addedvalues = set()
1113 self.lazyvalues = lazyvalues
1113 self.lazyvalues = lazyvalues
1114
1114
1115 def __contains__(self, value):
1115 def __contains__(self, value):
1116 return value in self.addedvalues or value in self.lazyvalues
1116 return value in self.addedvalues or value in self.lazyvalues
1117
1117
1118 def __iter__(self):
1118 def __iter__(self):
1119 added = self.addedvalues
1119 added = self.addedvalues
1120 for r in added:
1120 for r in added:
1121 yield r
1121 yield r
1122 for r in self.lazyvalues:
1122 for r in self.lazyvalues:
1123 if not r in added:
1123 if not r in added:
1124 yield r
1124 yield r
1125
1125
1126 def add(self, value):
1126 def add(self, value):
1127 self.addedvalues.add(value)
1127 self.addedvalues.add(value)
1128
1128
1129 def update(self, values):
1129 def update(self, values):
1130 self.addedvalues.update(values)
1130 self.addedvalues.update(values)
1131
1131
1132 has = lazyset(self.ancestors(common))
1132 has = lazyset(self.ancestors(common))
1133 has.add(nullrev)
1133 has.add(nullrev)
1134 has.update(common)
1134 has.update(common)
1135
1135
1136 # take all ancestors from heads that aren't in has
1136 # take all ancestors from heads that aren't in has
1137 missing = set()
1137 missing = set()
1138 visit = collections.deque(r for r in heads if r not in has)
1138 visit = collections.deque(r for r in heads if r not in has)
1139 while visit:
1139 while visit:
1140 r = visit.popleft()
1140 r = visit.popleft()
1141 if r in missing:
1141 if r in missing:
1142 continue
1142 continue
1143 else:
1143 else:
1144 missing.add(r)
1144 missing.add(r)
1145 for p in self.parentrevs(r):
1145 for p in self.parentrevs(r):
1146 if p not in has:
1146 if p not in has:
1147 visit.append(p)
1147 visit.append(p)
1148 missing = list(missing)
1148 missing = list(missing)
1149 missing.sort()
1149 missing.sort()
1150 return has, [self.node(miss) for miss in missing]
1150 return has, [self.node(miss) for miss in missing]
1151
1151
1152 def incrementalmissingrevs(self, common=None):
1152 def incrementalmissingrevs(self, common=None):
1153 """Return an object that can be used to incrementally compute the
1153 """Return an object that can be used to incrementally compute the
1154 revision numbers of the ancestors of arbitrary sets that are not
1154 revision numbers of the ancestors of arbitrary sets that are not
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1155 ancestors of common. This is an ancestor.incrementalmissingancestors
1156 object.
1156 object.
1157
1157
1158 'common' is a list of revision numbers. If common is not supplied, uses
1158 'common' is a list of revision numbers. If common is not supplied, uses
1159 nullrev.
1159 nullrev.
1160 """
1160 """
1161 if common is None:
1161 if common is None:
1162 common = [nullrev]
1162 common = [nullrev]
1163
1163
1164 if rustancestor is not None and self.index.rust_ext_compat:
1164 if rustancestor is not None and self.index.rust_ext_compat:
1165 return rustancestor.MissingAncestors(self.index, common)
1165 return rustancestor.MissingAncestors(self.index, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1166 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1167
1167
1168 def findmissingrevs(self, common=None, heads=None):
1168 def findmissingrevs(self, common=None, heads=None):
1169 """Return the revision numbers of the ancestors of heads that
1169 """Return the revision numbers of the ancestors of heads that
1170 are not ancestors of common.
1170 are not ancestors of common.
1171
1171
1172 More specifically, return a list of revision numbers corresponding to
1172 More specifically, return a list of revision numbers corresponding to
1173 nodes N such that every N satisfies the following constraints:
1173 nodes N such that every N satisfies the following constraints:
1174
1174
1175 1. N is an ancestor of some node in 'heads'
1175 1. N is an ancestor of some node in 'heads'
1176 2. N is not an ancestor of any node in 'common'
1176 2. N is not an ancestor of any node in 'common'
1177
1177
1178 The list is sorted by revision number, meaning it is
1178 The list is sorted by revision number, meaning it is
1179 topologically sorted.
1179 topologically sorted.
1180
1180
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1181 'heads' and 'common' are both lists of revision numbers. If heads is
1182 not supplied, uses all of the revlog's heads. If common is not
1182 not supplied, uses all of the revlog's heads. If common is not
1183 supplied, uses nullid."""
1183 supplied, uses nullid."""
1184 if common is None:
1184 if common is None:
1185 common = [nullrev]
1185 common = [nullrev]
1186 if heads is None:
1186 if heads is None:
1187 heads = self.headrevs()
1187 heads = self.headrevs()
1188
1188
1189 inc = self.incrementalmissingrevs(common=common)
1189 inc = self.incrementalmissingrevs(common=common)
1190 return inc.missingancestors(heads)
1190 return inc.missingancestors(heads)
1191
1191
1192 def findmissing(self, common=None, heads=None):
1192 def findmissing(self, common=None, heads=None):
1193 """Return the ancestors of heads that are not ancestors of common.
1193 """Return the ancestors of heads that are not ancestors of common.
1194
1194
1195 More specifically, return a list of nodes N such that every N
1195 More specifically, return a list of nodes N such that every N
1196 satisfies the following constraints:
1196 satisfies the following constraints:
1197
1197
1198 1. N is an ancestor of some node in 'heads'
1198 1. N is an ancestor of some node in 'heads'
1199 2. N is not an ancestor of any node in 'common'
1199 2. N is not an ancestor of any node in 'common'
1200
1200
1201 The list is sorted by revision number, meaning it is
1201 The list is sorted by revision number, meaning it is
1202 topologically sorted.
1202 topologically sorted.
1203
1203
1204 'heads' and 'common' are both lists of node IDs. If heads is
1204 'heads' and 'common' are both lists of node IDs. If heads is
1205 not supplied, uses all of the revlog's heads. If common is not
1205 not supplied, uses all of the revlog's heads. If common is not
1206 supplied, uses nullid."""
1206 supplied, uses nullid."""
1207 if common is None:
1207 if common is None:
1208 common = [self.nullid]
1208 common = [self.nullid]
1209 if heads is None:
1209 if heads is None:
1210 heads = self.heads()
1210 heads = self.heads()
1211
1211
1212 common = [self.rev(n) for n in common]
1212 common = [self.rev(n) for n in common]
1213 heads = [self.rev(n) for n in heads]
1213 heads = [self.rev(n) for n in heads]
1214
1214
1215 inc = self.incrementalmissingrevs(common=common)
1215 inc = self.incrementalmissingrevs(common=common)
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1216 return [self.node(r) for r in inc.missingancestors(heads)]
1217
1217
1218 def nodesbetween(self, roots=None, heads=None):
1218 def nodesbetween(self, roots=None, heads=None):
1219 """Return a topological path from 'roots' to 'heads'.
1219 """Return a topological path from 'roots' to 'heads'.
1220
1220
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1221 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1222 topologically sorted list of all nodes N that satisfy both of
1222 topologically sorted list of all nodes N that satisfy both of
1223 these constraints:
1223 these constraints:
1224
1224
1225 1. N is a descendant of some node in 'roots'
1225 1. N is a descendant of some node in 'roots'
1226 2. N is an ancestor of some node in 'heads'
1226 2. N is an ancestor of some node in 'heads'
1227
1227
1228 Every node is considered to be both a descendant and an ancestor
1228 Every node is considered to be both a descendant and an ancestor
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1229 of itself, so every reachable node in 'roots' and 'heads' will be
1230 included in 'nodes'.
1230 included in 'nodes'.
1231
1231
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1232 'outroots' is the list of reachable nodes in 'roots', i.e., the
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1233 subset of 'roots' that is returned in 'nodes'. Likewise,
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1234 'outheads' is the subset of 'heads' that is also in 'nodes'.
1235
1235
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1236 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1237 unspecified, uses nullid as the only root. If 'heads' is
1238 unspecified, uses list of all of the revlog's heads."""
1238 unspecified, uses list of all of the revlog's heads."""
1239 nonodes = ([], [], [])
1239 nonodes = ([], [], [])
1240 if roots is not None:
1240 if roots is not None:
1241 roots = list(roots)
1241 roots = list(roots)
1242 if not roots:
1242 if not roots:
1243 return nonodes
1243 return nonodes
1244 lowestrev = min([self.rev(n) for n in roots])
1244 lowestrev = min([self.rev(n) for n in roots])
1245 else:
1245 else:
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1246 roots = [self.nullid] # Everybody's a descendant of nullid
1247 lowestrev = nullrev
1247 lowestrev = nullrev
1248 if (lowestrev == nullrev) and (heads is None):
1248 if (lowestrev == nullrev) and (heads is None):
1249 # We want _all_ the nodes!
1249 # We want _all_ the nodes!
1250 return (
1250 return (
1251 [self.node(r) for r in self],
1251 [self.node(r) for r in self],
1252 [self.nullid],
1252 [self.nullid],
1253 list(self.heads()),
1253 list(self.heads()),
1254 )
1254 )
1255 if heads is None:
1255 if heads is None:
1256 # All nodes are ancestors, so the latest ancestor is the last
1256 # All nodes are ancestors, so the latest ancestor is the last
1257 # node.
1257 # node.
1258 highestrev = len(self) - 1
1258 highestrev = len(self) - 1
1259 # Set ancestors to None to signal that every node is an ancestor.
1259 # Set ancestors to None to signal that every node is an ancestor.
1260 ancestors = None
1260 ancestors = None
1261 # Set heads to an empty dictionary for later discovery of heads
1261 # Set heads to an empty dictionary for later discovery of heads
1262 heads = {}
1262 heads = {}
1263 else:
1263 else:
1264 heads = list(heads)
1264 heads = list(heads)
1265 if not heads:
1265 if not heads:
1266 return nonodes
1266 return nonodes
1267 ancestors = set()
1267 ancestors = set()
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1268 # Turn heads into a dictionary so we can remove 'fake' heads.
1269 # Also, later we will be using it to filter out the heads we can't
1269 # Also, later we will be using it to filter out the heads we can't
1270 # find from roots.
1270 # find from roots.
1271 heads = dict.fromkeys(heads, False)
1271 heads = dict.fromkeys(heads, False)
1272 # Start at the top and keep marking parents until we're done.
1272 # Start at the top and keep marking parents until we're done.
1273 nodestotag = set(heads)
1273 nodestotag = set(heads)
1274 # Remember where the top was so we can use it as a limit later.
1274 # Remember where the top was so we can use it as a limit later.
1275 highestrev = max([self.rev(n) for n in nodestotag])
1275 highestrev = max([self.rev(n) for n in nodestotag])
1276 while nodestotag:
1276 while nodestotag:
1277 # grab a node to tag
1277 # grab a node to tag
1278 n = nodestotag.pop()
1278 n = nodestotag.pop()
1279 # Never tag nullid
1279 # Never tag nullid
1280 if n == self.nullid:
1280 if n == self.nullid:
1281 continue
1281 continue
1282 # A node's revision number represents its place in a
1282 # A node's revision number represents its place in a
1283 # topologically sorted list of nodes.
1283 # topologically sorted list of nodes.
1284 r = self.rev(n)
1284 r = self.rev(n)
1285 if r >= lowestrev:
1285 if r >= lowestrev:
1286 if n not in ancestors:
1286 if n not in ancestors:
1287 # If we are possibly a descendant of one of the roots
1287 # If we are possibly a descendant of one of the roots
1288 # and we haven't already been marked as an ancestor
1288 # and we haven't already been marked as an ancestor
1289 ancestors.add(n) # Mark as ancestor
1289 ancestors.add(n) # Mark as ancestor
1290 # Add non-nullid parents to list of nodes to tag.
1290 # Add non-nullid parents to list of nodes to tag.
1291 nodestotag.update(
1291 nodestotag.update(
1292 [p for p in self.parents(n) if p != self.nullid]
1292 [p for p in self.parents(n) if p != self.nullid]
1293 )
1293 )
1294 elif n in heads: # We've seen it before, is it a fake head?
1294 elif n in heads: # We've seen it before, is it a fake head?
1295 # So it is, real heads should not be the ancestors of
1295 # So it is, real heads should not be the ancestors of
1296 # any other heads.
1296 # any other heads.
1297 heads.pop(n)
1297 heads.pop(n)
1298 if not ancestors:
1298 if not ancestors:
1299 return nonodes
1299 return nonodes
1300 # Now that we have our set of ancestors, we want to remove any
1300 # Now that we have our set of ancestors, we want to remove any
1301 # roots that are not ancestors.
1301 # roots that are not ancestors.
1302
1302
1303 # If one of the roots was nullid, everything is included anyway.
1303 # If one of the roots was nullid, everything is included anyway.
1304 if lowestrev > nullrev:
1304 if lowestrev > nullrev:
1305 # But, since we weren't, let's recompute the lowest rev to not
1305 # But, since we weren't, let's recompute the lowest rev to not
1306 # include roots that aren't ancestors.
1306 # include roots that aren't ancestors.
1307
1307
1308 # Filter out roots that aren't ancestors of heads
1308 # Filter out roots that aren't ancestors of heads
1309 roots = [root for root in roots if root in ancestors]
1309 roots = [root for root in roots if root in ancestors]
1310 # Recompute the lowest revision
1310 # Recompute the lowest revision
1311 if roots:
1311 if roots:
1312 lowestrev = min([self.rev(root) for root in roots])
1312 lowestrev = min([self.rev(root) for root in roots])
1313 else:
1313 else:
1314 # No more roots? Return empty list
1314 # No more roots? Return empty list
1315 return nonodes
1315 return nonodes
1316 else:
1316 else:
1317 # We are descending from nullid, and don't need to care about
1317 # We are descending from nullid, and don't need to care about
1318 # any other roots.
1318 # any other roots.
1319 lowestrev = nullrev
1319 lowestrev = nullrev
1320 roots = [self.nullid]
1320 roots = [self.nullid]
1321 # Transform our roots list into a set.
1321 # Transform our roots list into a set.
1322 descendants = set(roots)
1322 descendants = set(roots)
1323 # Also, keep the original roots so we can filter out roots that aren't
1323 # Also, keep the original roots so we can filter out roots that aren't
1324 # 'real' roots (i.e. are descended from other roots).
1324 # 'real' roots (i.e. are descended from other roots).
1325 roots = descendants.copy()
1325 roots = descendants.copy()
1326 # Our topologically sorted list of output nodes.
1326 # Our topologically sorted list of output nodes.
1327 orderedout = []
1327 orderedout = []
1328 # Don't start at nullid since we don't want nullid in our output list,
1328 # Don't start at nullid since we don't want nullid in our output list,
1329 # and if nullid shows up in descendants, empty parents will look like
1329 # and if nullid shows up in descendants, empty parents will look like
1330 # they're descendants.
1330 # they're descendants.
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1331 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1332 n = self.node(r)
1332 n = self.node(r)
1333 isdescendant = False
1333 isdescendant = False
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1334 if lowestrev == nullrev: # Everybody is a descendant of nullid
1335 isdescendant = True
1335 isdescendant = True
1336 elif n in descendants:
1336 elif n in descendants:
1337 # n is already a descendant
1337 # n is already a descendant
1338 isdescendant = True
1338 isdescendant = True
1339 # This check only needs to be done here because all the roots
1339 # This check only needs to be done here because all the roots
1340 # will start being marked is descendants before the loop.
1340 # will start being marked is descendants before the loop.
1341 if n in roots:
1341 if n in roots:
1342 # If n was a root, check if it's a 'real' root.
1342 # If n was a root, check if it's a 'real' root.
1343 p = tuple(self.parents(n))
1343 p = tuple(self.parents(n))
1344 # If any of its parents are descendants, it's not a root.
1344 # If any of its parents are descendants, it's not a root.
1345 if (p[0] in descendants) or (p[1] in descendants):
1345 if (p[0] in descendants) or (p[1] in descendants):
1346 roots.remove(n)
1346 roots.remove(n)
1347 else:
1347 else:
1348 p = tuple(self.parents(n))
1348 p = tuple(self.parents(n))
1349 # A node is a descendant if either of its parents are
1349 # A node is a descendant if either of its parents are
1350 # descendants. (We seeded the dependents list with the roots
1350 # descendants. (We seeded the dependents list with the roots
1351 # up there, remember?)
1351 # up there, remember?)
1352 if (p[0] in descendants) or (p[1] in descendants):
1352 if (p[0] in descendants) or (p[1] in descendants):
1353 descendants.add(n)
1353 descendants.add(n)
1354 isdescendant = True
1354 isdescendant = True
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1355 if isdescendant and ((ancestors is None) or (n in ancestors)):
1356 # Only include nodes that are both descendants and ancestors.
1356 # Only include nodes that are both descendants and ancestors.
1357 orderedout.append(n)
1357 orderedout.append(n)
1358 if (ancestors is not None) and (n in heads):
1358 if (ancestors is not None) and (n in heads):
1359 # We're trying to figure out which heads are reachable
1359 # We're trying to figure out which heads are reachable
1360 # from roots.
1360 # from roots.
1361 # Mark this head as having been reached
1361 # Mark this head as having been reached
1362 heads[n] = True
1362 heads[n] = True
1363 elif ancestors is None:
1363 elif ancestors is None:
1364 # Otherwise, we're trying to discover the heads.
1364 # Otherwise, we're trying to discover the heads.
1365 # Assume this is a head because if it isn't, the next step
1365 # Assume this is a head because if it isn't, the next step
1366 # will eventually remove it.
1366 # will eventually remove it.
1367 heads[n] = True
1367 heads[n] = True
1368 # But, obviously its parents aren't.
1368 # But, obviously its parents aren't.
1369 for p in self.parents(n):
1369 for p in self.parents(n):
1370 heads.pop(p, None)
1370 heads.pop(p, None)
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1371 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1372 roots = list(roots)
1372 roots = list(roots)
1373 assert orderedout
1373 assert orderedout
1374 assert roots
1374 assert roots
1375 assert heads
1375 assert heads
1376 return (orderedout, roots, heads)
1376 return (orderedout, roots, heads)
1377
1377
1378 def headrevs(self, revs=None):
1378 def headrevs(self, revs=None):
1379 if revs is None:
1379 if revs is None:
1380 try:
1380 try:
1381 return self.index.headrevs()
1381 return self.index.headrevs()
1382 except AttributeError:
1382 except AttributeError:
1383 return self._headrevs()
1383 return self._headrevs()
1384 if rustdagop is not None and self.index.rust_ext_compat:
1384 if rustdagop is not None and self.index.rust_ext_compat:
1385 return rustdagop.headrevs(self.index, revs)
1385 return rustdagop.headrevs(self.index, revs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1386 return dagop.headrevs(revs, self._uncheckedparentrevs)
1387
1387
1388 def computephases(self, roots):
1388 def computephases(self, roots):
1389 return self.index.computephasesmapsets(roots)
1389 return self.index.computephasesmapsets(roots)
1390
1390
1391 def _headrevs(self):
1391 def _headrevs(self):
1392 count = len(self)
1392 count = len(self)
1393 if not count:
1393 if not count:
1394 return [nullrev]
1394 return [nullrev]
1395 # we won't iter over filtered rev so nobody is a head at start
1395 # we won't iter over filtered rev so nobody is a head at start
1396 ishead = [0] * (count + 1)
1396 ishead = [0] * (count + 1)
1397 index = self.index
1397 index = self.index
1398 for r in self:
1398 for r in self:
1399 ishead[r] = 1 # I may be an head
1399 ishead[r] = 1 # I may be an head
1400 e = index[r]
1400 e = index[r]
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1401 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1402 return [r for r, val in enumerate(ishead) if val]
1402 return [r for r, val in enumerate(ishead) if val]
1403
1403
1404 def heads(self, start=None, stop=None):
1404 def heads(self, start=None, stop=None):
1405 """return the list of all nodes that have no children
1405 """return the list of all nodes that have no children
1406
1406
1407 if start is specified, only heads that are descendants of
1407 if start is specified, only heads that are descendants of
1408 start will be returned
1408 start will be returned
1409 if stop is specified, it will consider all the revs from stop
1409 if stop is specified, it will consider all the revs from stop
1410 as if they had no children
1410 as if they had no children
1411 """
1411 """
1412 if start is None and stop is None:
1412 if start is None and stop is None:
1413 if not len(self):
1413 if not len(self):
1414 return [self.nullid]
1414 return [self.nullid]
1415 return [self.node(r) for r in self.headrevs()]
1415 return [self.node(r) for r in self.headrevs()]
1416
1416
1417 if start is None:
1417 if start is None:
1418 start = nullrev
1418 start = nullrev
1419 else:
1419 else:
1420 start = self.rev(start)
1420 start = self.rev(start)
1421
1421
1422 stoprevs = {self.rev(n) for n in stop or []}
1422 stoprevs = {self.rev(n) for n in stop or []}
1423
1423
1424 revs = dagop.headrevssubset(
1424 revs = dagop.headrevssubset(
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1425 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1426 )
1426 )
1427
1427
1428 return [self.node(rev) for rev in revs]
1428 return [self.node(rev) for rev in revs]
1429
1429
1430 def children(self, node):
1430 def children(self, node):
1431 """find the children of a given node"""
1431 """find the children of a given node"""
1432 c = []
1432 c = []
1433 p = self.rev(node)
1433 p = self.rev(node)
1434 for r in self.revs(start=p + 1):
1434 for r in self.revs(start=p + 1):
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1435 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1436 if prevs:
1436 if prevs:
1437 for pr in prevs:
1437 for pr in prevs:
1438 if pr == p:
1438 if pr == p:
1439 c.append(self.node(r))
1439 c.append(self.node(r))
1440 elif p == nullrev:
1440 elif p == nullrev:
1441 c.append(self.node(r))
1441 c.append(self.node(r))
1442 return c
1442 return c
1443
1443
1444 def commonancestorsheads(self, a, b):
1444 def commonancestorsheads(self, a, b):
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1445 """calculate all the heads of the common ancestors of nodes a and b"""
1446 a, b = self.rev(a), self.rev(b)
1446 a, b = self.rev(a), self.rev(b)
1447 ancs = self._commonancestorsheads(a, b)
1447 ancs = self._commonancestorsheads(a, b)
1448 return pycompat.maplist(self.node, ancs)
1448 return pycompat.maplist(self.node, ancs)
1449
1449
1450 def _commonancestorsheads(self, *revs):
1450 def _commonancestorsheads(self, *revs):
1451 """calculate all the heads of the common ancestors of revs"""
1451 """calculate all the heads of the common ancestors of revs"""
1452 try:
1452 try:
1453 ancs = self.index.commonancestorsheads(*revs)
1453 ancs = self.index.commonancestorsheads(*revs)
1454 except (AttributeError, OverflowError): # C implementation failed
1454 except (AttributeError, OverflowError): # C implementation failed
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1455 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1456 return ancs
1456 return ancs
1457
1457
1458 def isancestor(self, a, b):
1458 def isancestor(self, a, b):
1459 """return True if node a is an ancestor of node b
1459 """return True if node a is an ancestor of node b
1460
1460
1461 A revision is considered an ancestor of itself."""
1461 A revision is considered an ancestor of itself."""
1462 a, b = self.rev(a), self.rev(b)
1462 a, b = self.rev(a), self.rev(b)
1463 return self.isancestorrev(a, b)
1463 return self.isancestorrev(a, b)
1464
1464
1465 def isancestorrev(self, a, b):
1465 def isancestorrev(self, a, b):
1466 """return True if revision a is an ancestor of revision b
1466 """return True if revision a is an ancestor of revision b
1467
1467
1468 A revision is considered an ancestor of itself.
1468 A revision is considered an ancestor of itself.
1469
1469
1470 The implementation of this is trivial but the use of
1470 The implementation of this is trivial but the use of
1471 reachableroots is not."""
1471 reachableroots is not."""
1472 if a == nullrev:
1472 if a == nullrev:
1473 return True
1473 return True
1474 elif a == b:
1474 elif a == b:
1475 return True
1475 return True
1476 elif a > b:
1476 elif a > b:
1477 return False
1477 return False
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1478 return bool(self.reachableroots(a, [b], [a], includepath=False))
1479
1479
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1480 def reachableroots(self, minroot, heads, roots, includepath=False):
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1481 """return (heads(::(<roots> and <roots>::<heads>)))
1482
1482
1483 If includepath is True, return (<roots>::<heads>)."""
1483 If includepath is True, return (<roots>::<heads>)."""
1484 try:
1484 try:
1485 return self.index.reachableroots2(
1485 return self.index.reachableroots2(
1486 minroot, heads, roots, includepath
1486 minroot, heads, roots, includepath
1487 )
1487 )
1488 except AttributeError:
1488 except AttributeError:
1489 return dagop._reachablerootspure(
1489 return dagop._reachablerootspure(
1490 self.parentrevs, minroot, roots, heads, includepath
1490 self.parentrevs, minroot, roots, heads, includepath
1491 )
1491 )
1492
1492
1493 def ancestor(self, a, b):
1493 def ancestor(self, a, b):
1494 """calculate the "best" common ancestor of nodes a and b"""
1494 """calculate the "best" common ancestor of nodes a and b"""
1495
1495
1496 a, b = self.rev(a), self.rev(b)
1496 a, b = self.rev(a), self.rev(b)
1497 try:
1497 try:
1498 ancs = self.index.ancestors(a, b)
1498 ancs = self.index.ancestors(a, b)
1499 except (AttributeError, OverflowError):
1499 except (AttributeError, OverflowError):
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1500 ancs = ancestor.ancestors(self.parentrevs, a, b)
1501 if ancs:
1501 if ancs:
1502 # choose a consistent winner when there's a tie
1502 # choose a consistent winner when there's a tie
1503 return min(map(self.node, ancs))
1503 return min(map(self.node, ancs))
1504 return self.nullid
1504 return self.nullid
1505
1505
1506 def _match(self, id):
1506 def _match(self, id):
1507 if isinstance(id, int):
1507 if isinstance(id, int):
1508 # rev
1508 # rev
1509 return self.node(id)
1509 return self.node(id)
1510 if len(id) == self.nodeconstants.nodelen:
1510 if len(id) == self.nodeconstants.nodelen:
1511 # possibly a binary node
1511 # possibly a binary node
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1512 # odds of a binary node being all hex in ASCII are 1 in 10**25
1513 try:
1513 try:
1514 node = id
1514 node = id
1515 self.rev(node) # quick search the index
1515 self.rev(node) # quick search the index
1516 return node
1516 return node
1517 except error.LookupError:
1517 except error.LookupError:
1518 pass # may be partial hex id
1518 pass # may be partial hex id
1519 try:
1519 try:
1520 # str(rev)
1520 # str(rev)
1521 rev = int(id)
1521 rev = int(id)
1522 if b"%d" % rev != id:
1522 if b"%d" % rev != id:
1523 raise ValueError
1523 raise ValueError
1524 if rev < 0:
1524 if rev < 0:
1525 rev = len(self) + rev
1525 rev = len(self) + rev
1526 if rev < 0 or rev >= len(self):
1526 if rev < 0 or rev >= len(self):
1527 raise ValueError
1527 raise ValueError
1528 return self.node(rev)
1528 return self.node(rev)
1529 except (ValueError, OverflowError):
1529 except (ValueError, OverflowError):
1530 pass
1530 pass
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1531 if len(id) == 2 * self.nodeconstants.nodelen:
1532 try:
1532 try:
1533 # a full hex nodeid?
1533 # a full hex nodeid?
1534 node = bin(id)
1534 node = bin(id)
1535 self.rev(node)
1535 self.rev(node)
1536 return node
1536 return node
1537 except (TypeError, error.LookupError):
1537 except (TypeError, error.LookupError):
1538 pass
1538 pass
1539
1539
1540 def _partialmatch(self, id):
1540 def _partialmatch(self, id):
1541 # we don't care wdirfilenodeids as they should be always full hash
1541 # we don't care wdirfilenodeids as they should be always full hash
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1542 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1543 ambiguous = False
1543 ambiguous = False
1544 try:
1544 try:
1545 partial = self.index.partialmatch(id)
1545 partial = self.index.partialmatch(id)
1546 if partial and self.hasnode(partial):
1546 if partial and self.hasnode(partial):
1547 if maybewdir:
1547 if maybewdir:
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1548 # single 'ff...' match in radix tree, ambiguous with wdir
1549 ambiguous = True
1549 ambiguous = True
1550 else:
1550 else:
1551 return partial
1551 return partial
1552 elif maybewdir:
1552 elif maybewdir:
1553 # no 'ff...' match in radix tree, wdir identified
1553 # no 'ff...' match in radix tree, wdir identified
1554 raise error.WdirUnsupported
1554 raise error.WdirUnsupported
1555 else:
1555 else:
1556 return None
1556 return None
1557 except error.RevlogError:
1557 except error.RevlogError:
1558 # parsers.c radix tree lookup gave multiple matches
1558 # parsers.c radix tree lookup gave multiple matches
1559 # fast path: for unfiltered changelog, radix tree is accurate
1559 # fast path: for unfiltered changelog, radix tree is accurate
1560 if not getattr(self, 'filteredrevs', None):
1560 if not getattr(self, 'filteredrevs', None):
1561 ambiguous = True
1561 ambiguous = True
1562 # fall through to slow path that filters hidden revisions
1562 # fall through to slow path that filters hidden revisions
1563 except (AttributeError, ValueError):
1563 except (AttributeError, ValueError):
1564 # we are pure python, or key was too short to search radix tree
1564 # we are pure python, or key was too short to search radix tree
1565 pass
1565 pass
1566 if ambiguous:
1566 if ambiguous:
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570
1570
1571 if id in self._pcache:
1571 if id in self._pcache:
1572 return self._pcache[id]
1572 return self._pcache[id]
1573
1573
1574 if len(id) <= 40:
1574 if len(id) <= 40:
1575 try:
1575 try:
1576 # hex(node)[:...]
1576 # hex(node)[:...]
1577 l = len(id) // 2 # grab an even number of digits
1577 l = len(id) // 2 # grab an even number of digits
1578 prefix = bin(id[: l * 2])
1578 prefix = bin(id[: l * 2])
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1579 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1580 nl = [
1580 nl = [
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1581 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1582 ]
1582 ]
1583 if self.nodeconstants.nullhex.startswith(id):
1583 if self.nodeconstants.nullhex.startswith(id):
1584 nl.append(self.nullid)
1584 nl.append(self.nullid)
1585 if len(nl) > 0:
1585 if len(nl) > 0:
1586 if len(nl) == 1 and not maybewdir:
1586 if len(nl) == 1 and not maybewdir:
1587 self._pcache[id] = nl[0]
1587 self._pcache[id] = nl[0]
1588 return nl[0]
1588 return nl[0]
1589 raise error.AmbiguousPrefixLookupError(
1589 raise error.AmbiguousPrefixLookupError(
1590 id, self.display_id, _(b'ambiguous identifier')
1590 id, self.display_id, _(b'ambiguous identifier')
1591 )
1591 )
1592 if maybewdir:
1592 if maybewdir:
1593 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1594 return None
1594 return None
1595 except TypeError:
1595 except TypeError:
1596 pass
1596 pass
1597
1597
1598 def lookup(self, id):
1598 def lookup(self, id):
1599 """locate a node based on:
1599 """locate a node based on:
1600 - revision number or str(revision number)
1600 - revision number or str(revision number)
1601 - nodeid or subset of hex nodeid
1601 - nodeid or subset of hex nodeid
1602 """
1602 """
1603 n = self._match(id)
1603 n = self._match(id)
1604 if n is not None:
1604 if n is not None:
1605 return n
1605 return n
1606 n = self._partialmatch(id)
1606 n = self._partialmatch(id)
1607 if n:
1607 if n:
1608 return n
1608 return n
1609
1609
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1610 raise error.LookupError(id, self.display_id, _(b'no match found'))
1611
1611
1612 def shortest(self, node, minlength=1):
1612 def shortest(self, node, minlength=1):
1613 """Find the shortest unambiguous prefix that matches node."""
1613 """Find the shortest unambiguous prefix that matches node."""
1614
1614
1615 def isvalid(prefix):
1615 def isvalid(prefix):
1616 try:
1616 try:
1617 matchednode = self._partialmatch(prefix)
1617 matchednode = self._partialmatch(prefix)
1618 except error.AmbiguousPrefixLookupError:
1618 except error.AmbiguousPrefixLookupError:
1619 return False
1619 return False
1620 except error.WdirUnsupported:
1620 except error.WdirUnsupported:
1621 # single 'ff...' match
1621 # single 'ff...' match
1622 return True
1622 return True
1623 if matchednode is None:
1623 if matchednode is None:
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1624 raise error.LookupError(node, self.display_id, _(b'no node'))
1625 return True
1625 return True
1626
1626
1627 def maybewdir(prefix):
1627 def maybewdir(prefix):
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1628 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1629
1629
1630 hexnode = hex(node)
1630 hexnode = hex(node)
1631
1631
1632 def disambiguate(hexnode, minlength):
1632 def disambiguate(hexnode, minlength):
1633 """Disambiguate against wdirid."""
1633 """Disambiguate against wdirid."""
1634 for length in range(minlength, len(hexnode) + 1):
1634 for length in range(minlength, len(hexnode) + 1):
1635 prefix = hexnode[:length]
1635 prefix = hexnode[:length]
1636 if not maybewdir(prefix):
1636 if not maybewdir(prefix):
1637 return prefix
1637 return prefix
1638
1638
1639 if not getattr(self, 'filteredrevs', None):
1639 if not getattr(self, 'filteredrevs', None):
1640 try:
1640 try:
1641 length = max(self.index.shortest(node), minlength)
1641 length = max(self.index.shortest(node), minlength)
1642 return disambiguate(hexnode, length)
1642 return disambiguate(hexnode, length)
1643 except error.RevlogError:
1643 except error.RevlogError:
1644 if node != self.nodeconstants.wdirid:
1644 if node != self.nodeconstants.wdirid:
1645 raise error.LookupError(
1645 raise error.LookupError(
1646 node, self.display_id, _(b'no node')
1646 node, self.display_id, _(b'no node')
1647 )
1647 )
1648 except AttributeError:
1648 except AttributeError:
1649 # Fall through to pure code
1649 # Fall through to pure code
1650 pass
1650 pass
1651
1651
1652 if node == self.nodeconstants.wdirid:
1652 if node == self.nodeconstants.wdirid:
1653 for length in range(minlength, len(hexnode) + 1):
1653 for length in range(minlength, len(hexnode) + 1):
1654 prefix = hexnode[:length]
1654 prefix = hexnode[:length]
1655 if isvalid(prefix):
1655 if isvalid(prefix):
1656 return prefix
1656 return prefix
1657
1657
1658 for length in range(minlength, len(hexnode) + 1):
1658 for length in range(minlength, len(hexnode) + 1):
1659 prefix = hexnode[:length]
1659 prefix = hexnode[:length]
1660 if isvalid(prefix):
1660 if isvalid(prefix):
1661 return disambiguate(hexnode, length)
1661 return disambiguate(hexnode, length)
1662
1662
1663 def cmp(self, node, text):
1663 def cmp(self, node, text):
1664 """compare text with a given file revision
1664 """compare text with a given file revision
1665
1665
1666 returns True if text is different than what is stored.
1666 returns True if text is different than what is stored.
1667 """
1667 """
1668 p1, p2 = self.parents(node)
1668 p1, p2 = self.parents(node)
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1669 return storageutil.hashrevisionsha1(text, p1, p2) != node
1670
1670
1671 def _cachesegment(self, offset, data):
1671 def _cachesegment(self, offset, data):
1672 """Add a segment to the revlog cache.
1672 """Add a segment to the revlog cache.
1673
1673
1674 Accepts an absolute offset and the data that is at that location.
1674 Accepts an absolute offset and the data that is at that location.
1675 """
1675 """
1676 o, d = self._chunkcache
1676 o, d = self._chunkcache
1677 # try to add to existing cache
1677 # try to add to existing cache
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1678 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1679 self._chunkcache = o, d + data
1679 self._chunkcache = o, d + data
1680 else:
1680 else:
1681 self._chunkcache = offset, data
1681 self._chunkcache = offset, data
1682
1682
1683 def _readsegment(self, offset, length, df=None):
1683 def _readsegment(self, offset, length, df=None):
1684 """Load a segment of raw data from the revlog.
1684 """Load a segment of raw data from the revlog.
1685
1685
1686 Accepts an absolute offset, length to read, and an optional existing
1686 Accepts an absolute offset, length to read, and an optional existing
1687 file handle to read from.
1687 file handle to read from.
1688
1688
1689 If an existing file handle is passed, it will be seeked and the
1689 If an existing file handle is passed, it will be seeked and the
1690 original seek position will NOT be restored.
1690 original seek position will NOT be restored.
1691
1691
1692 Returns a str or buffer of raw byte data.
1692 Returns a str or buffer of raw byte data.
1693
1693
1694 Raises if the requested number of bytes could not be read.
1694 Raises if the requested number of bytes could not be read.
1695 """
1695 """
1696 # Cache data both forward and backward around the requested
1696 # Cache data both forward and backward around the requested
1697 # data, in a fixed size window. This helps speed up operations
1697 # data, in a fixed size window. This helps speed up operations
1698 # involving reading the revlog backwards.
1698 # involving reading the revlog backwards.
1699 cachesize = self._chunkcachesize
1699 cachesize = self._chunkcachesize
1700 realoffset = offset & ~(cachesize - 1)
1700 realoffset = offset & ~(cachesize - 1)
1701 reallength = (
1701 reallength = (
1702 (offset + length + cachesize) & ~(cachesize - 1)
1702 (offset + length + cachesize) & ~(cachesize - 1)
1703 ) - realoffset
1703 ) - realoffset
1704 with self._datareadfp(df) as df:
1704 with self._datareadfp(df) as df:
1705 df.seek(realoffset)
1705 df.seek(realoffset)
1706 d = df.read(reallength)
1706 d = df.read(reallength)
1707
1707
1708 self._cachesegment(realoffset, d)
1708 self._cachesegment(realoffset, d)
1709 if offset != realoffset or reallength != length:
1709 if offset != realoffset or reallength != length:
1710 startoffset = offset - realoffset
1710 startoffset = offset - realoffset
1711 if len(d) - startoffset < length:
1711 if len(d) - startoffset < length:
1712 raise error.RevlogError(
1712 raise error.RevlogError(
1713 _(
1713 _(
1714 b'partial read of revlog %s; expected %d bytes from '
1714 b'partial read of revlog %s; expected %d bytes from '
1715 b'offset %d, got %d'
1715 b'offset %d, got %d'
1716 )
1716 )
1717 % (
1717 % (
1718 self._indexfile if self._inline else self._datafile,
1718 self._indexfile if self._inline else self._datafile,
1719 length,
1719 length,
1720 offset,
1720 offset,
1721 len(d) - startoffset,
1721 len(d) - startoffset,
1722 )
1722 )
1723 )
1723 )
1724
1724
1725 return util.buffer(d, startoffset, length)
1725 return util.buffer(d, startoffset, length)
1726
1726
1727 if len(d) < length:
1727 if len(d) < length:
1728 raise error.RevlogError(
1728 raise error.RevlogError(
1729 _(
1729 _(
1730 b'partial read of revlog %s; expected %d bytes from offset '
1730 b'partial read of revlog %s; expected %d bytes from offset '
1731 b'%d, got %d'
1731 b'%d, got %d'
1732 )
1732 )
1733 % (
1733 % (
1734 self._indexfile if self._inline else self._datafile,
1734 self._indexfile if self._inline else self._datafile,
1735 length,
1735 length,
1736 offset,
1736 offset,
1737 len(d),
1737 len(d),
1738 )
1738 )
1739 )
1739 )
1740
1740
1741 return d
1741 return d
1742
1742
1743 def _getsegment(self, offset, length, df=None):
1743 def _getsegment(self, offset, length, df=None):
1744 """Obtain a segment of raw data from the revlog.
1744 """Obtain a segment of raw data from the revlog.
1745
1745
1746 Accepts an absolute offset, length of bytes to obtain, and an
1746 Accepts an absolute offset, length of bytes to obtain, and an
1747 optional file handle to the already-opened revlog. If the file
1747 optional file handle to the already-opened revlog. If the file
1748 handle is used, it's original seek position will not be preserved.
1748 handle is used, it's original seek position will not be preserved.
1749
1749
1750 Requests for data may be returned from a cache.
1750 Requests for data may be returned from a cache.
1751
1751
1752 Returns a str or a buffer instance of raw byte data.
1752 Returns a str or a buffer instance of raw byte data.
1753 """
1753 """
1754 o, d = self._chunkcache
1754 o, d = self._chunkcache
1755 l = len(d)
1755 l = len(d)
1756
1756
1757 # is it in the cache?
1757 # is it in the cache?
1758 cachestart = offset - o
1758 cachestart = offset - o
1759 cacheend = cachestart + length
1759 cacheend = cachestart + length
1760 if cachestart >= 0 and cacheend <= l:
1760 if cachestart >= 0 and cacheend <= l:
1761 if cachestart == 0 and cacheend == l:
1761 if cachestart == 0 and cacheend == l:
1762 return d # avoid a copy
1762 return d # avoid a copy
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1763 return util.buffer(d, cachestart, cacheend - cachestart)
1764
1764
1765 return self._readsegment(offset, length, df=df)
1765 return self._readsegment(offset, length, df=df)
1766
1766
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1767 def _getsegmentforrevs(self, startrev, endrev, df=None):
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1768 """Obtain a segment of raw data corresponding to a range of revisions.
1769
1769
1770 Accepts the start and end revisions and an optional already-open
1770 Accepts the start and end revisions and an optional already-open
1771 file handle to be used for reading. If the file handle is read, its
1771 file handle to be used for reading. If the file handle is read, its
1772 seek position will not be preserved.
1772 seek position will not be preserved.
1773
1773
1774 Requests for data may be satisfied by a cache.
1774 Requests for data may be satisfied by a cache.
1775
1775
1776 Returns a 2-tuple of (offset, data) for the requested range of
1776 Returns a 2-tuple of (offset, data) for the requested range of
1777 revisions. Offset is the integer offset from the beginning of the
1777 revisions. Offset is the integer offset from the beginning of the
1778 revlog and data is a str or buffer of the raw byte data.
1778 revlog and data is a str or buffer of the raw byte data.
1779
1779
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1780 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1781 to determine where each revision's data begins and ends.
1781 to determine where each revision's data begins and ends.
1782 """
1782 """
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1783 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1784 # (functions are expensive).
1784 # (functions are expensive).
1785 index = self.index
1785 index = self.index
1786 istart = index[startrev]
1786 istart = index[startrev]
1787 start = int(istart[0] >> 16)
1787 start = int(istart[0] >> 16)
1788 if startrev == endrev:
1788 if startrev == endrev:
1789 end = start + istart[1]
1789 end = start + istart[1]
1790 else:
1790 else:
1791 iend = index[endrev]
1791 iend = index[endrev]
1792 end = int(iend[0] >> 16) + iend[1]
1792 end = int(iend[0] >> 16) + iend[1]
1793
1793
1794 if self._inline:
1794 if self._inline:
1795 start += (startrev + 1) * self.index.entry_size
1795 start += (startrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1796 end += (endrev + 1) * self.index.entry_size
1797 length = end - start
1797 length = end - start
1798
1798
1799 return start, self._getsegment(start, length, df=df)
1799 return start, self._getsegment(start, length, df=df)
1800
1800
1801 def _chunk(self, rev, df=None):
1801 def _chunk(self, rev, df=None):
1802 """Obtain a single decompressed chunk for a revision.
1802 """Obtain a single decompressed chunk for a revision.
1803
1803
1804 Accepts an integer revision and an optional already-open file handle
1804 Accepts an integer revision and an optional already-open file handle
1805 to be used for reading. If used, the seek position of the file will not
1805 to be used for reading. If used, the seek position of the file will not
1806 be preserved.
1806 be preserved.
1807
1807
1808 Returns a str holding uncompressed data for the requested revision.
1808 Returns a str holding uncompressed data for the requested revision.
1809 """
1809 """
1810 compression_mode = self.index[rev][10]
1810 compression_mode = self.index[rev][10]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1811 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1812 if compression_mode == COMP_MODE_PLAIN:
1812 if compression_mode == COMP_MODE_PLAIN:
1813 return data
1813 return data
1814 elif compression_mode == COMP_MODE_DEFAULT:
1814 elif compression_mode == COMP_MODE_DEFAULT:
1815 return self._decompressor(data)
1815 return self._decompressor(data)
1816 elif compression_mode == COMP_MODE_INLINE:
1816 elif compression_mode == COMP_MODE_INLINE:
1817 return self.decompress(data)
1817 return self.decompress(data)
1818 else:
1818 else:
1819 msg = 'unknown compression mode %d'
1819 msg = 'unknown compression mode %d'
1820 msg %= compression_mode
1820 msg %= compression_mode
1821 raise error.RevlogError(msg)
1821 raise error.RevlogError(msg)
1822
1822
1823 def _chunks(self, revs, df=None, targetsize=None):
1823 def _chunks(self, revs, df=None, targetsize=None):
1824 """Obtain decompressed chunks for the specified revisions.
1824 """Obtain decompressed chunks for the specified revisions.
1825
1825
1826 Accepts an iterable of numeric revisions that are assumed to be in
1826 Accepts an iterable of numeric revisions that are assumed to be in
1827 ascending order. Also accepts an optional already-open file handle
1827 ascending order. Also accepts an optional already-open file handle
1828 to be used for reading. If used, the seek position of the file will
1828 to be used for reading. If used, the seek position of the file will
1829 not be preserved.
1829 not be preserved.
1830
1830
1831 This function is similar to calling ``self._chunk()`` multiple times,
1831 This function is similar to calling ``self._chunk()`` multiple times,
1832 but is faster.
1832 but is faster.
1833
1833
1834 Returns a list with decompressed data for each requested revision.
1834 Returns a list with decompressed data for each requested revision.
1835 """
1835 """
1836 if not revs:
1836 if not revs:
1837 return []
1837 return []
1838 start = self.start
1838 start = self.start
1839 length = self.length
1839 length = self.length
1840 inline = self._inline
1840 inline = self._inline
1841 iosize = self.index.entry_size
1841 iosize = self.index.entry_size
1842 buffer = util.buffer
1842 buffer = util.buffer
1843
1843
1844 l = []
1844 l = []
1845 ladd = l.append
1845 ladd = l.append
1846
1846
1847 if not self._withsparseread:
1847 if not self._withsparseread:
1848 slicedchunks = (revs,)
1848 slicedchunks = (revs,)
1849 else:
1849 else:
1850 slicedchunks = deltautil.slicechunk(
1850 slicedchunks = deltautil.slicechunk(
1851 self, revs, targetsize=targetsize
1851 self, revs, targetsize=targetsize
1852 )
1852 )
1853
1853
1854 for revschunk in slicedchunks:
1854 for revschunk in slicedchunks:
1855 firstrev = revschunk[0]
1855 firstrev = revschunk[0]
1856 # Skip trailing revisions with empty diff
1856 # Skip trailing revisions with empty diff
1857 for lastrev in revschunk[::-1]:
1857 for lastrev in revschunk[::-1]:
1858 if length(lastrev) != 0:
1858 if length(lastrev) != 0:
1859 break
1859 break
1860
1860
1861 try:
1861 try:
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1862 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1863 except OverflowError:
1863 except OverflowError:
1864 # issue4215 - we can't cache a run of chunks greater than
1864 # issue4215 - we can't cache a run of chunks greater than
1865 # 2G on Windows
1865 # 2G on Windows
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1866 return [self._chunk(rev, df=df) for rev in revschunk]
1867
1867
1868 decomp = self.decompress
1868 decomp = self.decompress
1869 # self._decompressor might be None, but will not be used in that case
1869 # self._decompressor might be None, but will not be used in that case
1870 def_decomp = self._decompressor
1870 def_decomp = self._decompressor
1871 for rev in revschunk:
1871 for rev in revschunk:
1872 chunkstart = start(rev)
1872 chunkstart = start(rev)
1873 if inline:
1873 if inline:
1874 chunkstart += (rev + 1) * iosize
1874 chunkstart += (rev + 1) * iosize
1875 chunklength = length(rev)
1875 chunklength = length(rev)
1876 comp_mode = self.index[rev][10]
1876 comp_mode = self.index[rev][10]
1877 c = buffer(data, chunkstart - offset, chunklength)
1877 c = buffer(data, chunkstart - offset, chunklength)
1878 if comp_mode == COMP_MODE_PLAIN:
1878 if comp_mode == COMP_MODE_PLAIN:
1879 ladd(c)
1879 ladd(c)
1880 elif comp_mode == COMP_MODE_INLINE:
1880 elif comp_mode == COMP_MODE_INLINE:
1881 ladd(decomp(c))
1881 ladd(decomp(c))
1882 elif comp_mode == COMP_MODE_DEFAULT:
1882 elif comp_mode == COMP_MODE_DEFAULT:
1883 ladd(def_decomp(c))
1883 ladd(def_decomp(c))
1884 else:
1884 else:
1885 msg = 'unknown compression mode %d'
1885 msg = 'unknown compression mode %d'
1886 msg %= comp_mode
1886 msg %= comp_mode
1887 raise error.RevlogError(msg)
1887 raise error.RevlogError(msg)
1888
1888
1889 return l
1889 return l
1890
1890
1891 def _chunkclear(self):
1891 def _chunkclear(self):
1892 """Clear the raw chunk cache."""
1892 """Clear the raw chunk cache."""
1893 self._chunkcache = (0, b'')
1893 self._chunkcache = (0, b'')
1894
1894
1895 def deltaparent(self, rev):
1895 def deltaparent(self, rev):
1896 """return deltaparent of the given revision"""
1896 """return deltaparent of the given revision"""
1897 base = self.index[rev][3]
1897 base = self.index[rev][3]
1898 if base == rev:
1898 if base == rev:
1899 return nullrev
1899 return nullrev
1900 elif self._generaldelta:
1900 elif self._generaldelta:
1901 return base
1901 return base
1902 else:
1902 else:
1903 return rev - 1
1903 return rev - 1
1904
1904
1905 def issnapshot(self, rev):
1905 def issnapshot(self, rev):
1906 """tells whether rev is a snapshot"""
1906 """tells whether rev is a snapshot"""
1907 if not self._sparserevlog:
1907 if not self._sparserevlog:
1908 return self.deltaparent(rev) == nullrev
1908 return self.deltaparent(rev) == nullrev
1909 elif util.safehasattr(self.index, b'issnapshot'):
1909 elif util.safehasattr(self.index, b'issnapshot'):
1910 # directly assign the method to cache the testing and access
1910 # directly assign the method to cache the testing and access
1911 self.issnapshot = self.index.issnapshot
1911 self.issnapshot = self.index.issnapshot
1912 return self.issnapshot(rev)
1912 return self.issnapshot(rev)
1913 if rev == nullrev:
1913 if rev == nullrev:
1914 return True
1914 return True
1915 entry = self.index[rev]
1915 entry = self.index[rev]
1916 base = entry[3]
1916 base = entry[3]
1917 if base == rev:
1917 if base == rev:
1918 return True
1918 return True
1919 if base == nullrev:
1919 if base == nullrev:
1920 return True
1920 return True
1921 p1 = entry[5]
1921 p1 = entry[5]
1922 p2 = entry[6]
1922 p2 = entry[6]
1923 if base == p1 or base == p2:
1923 if base == p1 or base == p2:
1924 return False
1924 return False
1925 return self.issnapshot(base)
1925 return self.issnapshot(base)
1926
1926
1927 def snapshotdepth(self, rev):
1927 def snapshotdepth(self, rev):
1928 """number of snapshot in the chain before this one"""
1928 """number of snapshot in the chain before this one"""
1929 if not self.issnapshot(rev):
1929 if not self.issnapshot(rev):
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1930 raise error.ProgrammingError(b'revision %d not a snapshot')
1931 return len(self._deltachain(rev)[0]) - 1
1931 return len(self._deltachain(rev)[0]) - 1
1932
1932
1933 def revdiff(self, rev1, rev2):
1933 def revdiff(self, rev1, rev2):
1934 """return or calculate a delta between two revisions
1934 """return or calculate a delta between two revisions
1935
1935
1936 The delta calculated is in binary form and is intended to be written to
1936 The delta calculated is in binary form and is intended to be written to
1937 revlog data directly. So this function needs raw revision data.
1937 revlog data directly. So this function needs raw revision data.
1938 """
1938 """
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1939 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1940 return bytes(self._chunk(rev2))
1940 return bytes(self._chunk(rev2))
1941
1941
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1942 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1943
1943
1944 def _processflags(self, text, flags, operation, raw=False):
1944 def _processflags(self, text, flags, operation, raw=False):
1945 """deprecated entry point to access flag processors"""
1945 """deprecated entry point to access flag processors"""
1946 msg = b'_processflag(...) use the specialized variant'
1946 msg = b'_processflag(...) use the specialized variant'
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 if raw:
1948 if raw:
1949 return text, flagutil.processflagsraw(self, text, flags)
1949 return text, flagutil.processflagsraw(self, text, flags)
1950 elif operation == b'read':
1950 elif operation == b'read':
1951 return flagutil.processflagsread(self, text, flags)
1951 return flagutil.processflagsread(self, text, flags)
1952 else: # write operation
1952 else: # write operation
1953 return flagutil.processflagswrite(self, text, flags)
1953 return flagutil.processflagswrite(self, text, flags)
1954
1954
1955 def revision(self, nodeorrev, _df=None, raw=False):
1955 def revision(self, nodeorrev, _df=None, raw=False):
1956 """return an uncompressed revision of a given node or revision
1956 """return an uncompressed revision of a given node or revision
1957 number.
1957 number.
1958
1958
1959 _df - an existing file handle to read from. (internal-only)
1959 _df - an existing file handle to read from. (internal-only)
1960 raw - an optional argument specifying if the revision data is to be
1960 raw - an optional argument specifying if the revision data is to be
1961 treated as raw data when applying flag transforms. 'raw' should be set
1961 treated as raw data when applying flag transforms. 'raw' should be set
1962 to True when generating changegroups or in debug commands.
1962 to True when generating changegroups or in debug commands.
1963 """
1963 """
1964 if raw:
1964 if raw:
1965 msg = (
1965 msg = (
1966 b'revlog.revision(..., raw=True) is deprecated, '
1966 b'revlog.revision(..., raw=True) is deprecated, '
1967 b'use revlog.rawdata(...)'
1967 b'use revlog.rawdata(...)'
1968 )
1968 )
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1970 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1970 return self._revisiondata(nodeorrev, _df, raw=raw)
1971
1971
1972 def sidedata(self, nodeorrev, _df=None):
1972 def sidedata(self, nodeorrev, _df=None):
1973 """a map of extra data related to the changeset but not part of the hash
1973 """a map of extra data related to the changeset but not part of the hash
1974
1974
1975 This function currently return a dictionary. However, more advanced
1975 This function currently return a dictionary. However, more advanced
1976 mapping object will likely be used in the future for a more
1976 mapping object will likely be used in the future for a more
1977 efficient/lazy code.
1977 efficient/lazy code.
1978 """
1978 """
1979 # deal with <nodeorrev> argument type
1979 # deal with <nodeorrev> argument type
1980 if isinstance(nodeorrev, int):
1980 if isinstance(nodeorrev, int):
1981 rev = nodeorrev
1981 rev = nodeorrev
1982 else:
1982 else:
1983 rev = self.rev(nodeorrev)
1983 rev = self.rev(nodeorrev)
1984 return self._sidedata(rev)
1984 return self._sidedata(rev)
1985
1985
1986 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1986 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1987 # deal with <nodeorrev> argument type
1987 # deal with <nodeorrev> argument type
1988 if isinstance(nodeorrev, int):
1988 if isinstance(nodeorrev, int):
1989 rev = nodeorrev
1989 rev = nodeorrev
1990 node = self.node(rev)
1990 node = self.node(rev)
1991 else:
1991 else:
1992 node = nodeorrev
1992 node = nodeorrev
1993 rev = None
1993 rev = None
1994
1994
1995 # fast path the special `nullid` rev
1995 # fast path the special `nullid` rev
1996 if node == self.nullid:
1996 if node == self.nullid:
1997 return b"", {}
1997 return b""
1998
1998
1999 # ``rawtext`` is the text as stored inside the revlog. Might be the
1999 # ``rawtext`` is the text as stored inside the revlog. Might be the
2000 # revision or might need to be processed to retrieve the revision.
2000 # revision or might need to be processed to retrieve the revision.
2001 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2001 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2002
2002
2003 if self.hassidedata:
2004 if rev is None:
2005 rev = self.rev(node)
2006 sidedata = self._sidedata(rev)
2007 else:
2008 sidedata = {}
2009
2010 if raw and validated:
2003 if raw and validated:
2011 # if we don't want to process the raw text and that raw
2004 # if we don't want to process the raw text and that raw
2012 # text is cached, we can exit early.
2005 # text is cached, we can exit early.
2013 return rawtext, sidedata
2006 return rawtext
2014 if rev is None:
2007 if rev is None:
2015 rev = self.rev(node)
2008 rev = self.rev(node)
2016 # the revlog's flag for this revision
2009 # the revlog's flag for this revision
2017 # (usually alter its state or content)
2010 # (usually alter its state or content)
2018 flags = self.flags(rev)
2011 flags = self.flags(rev)
2019
2012
2020 if validated and flags == REVIDX_DEFAULT_FLAGS:
2013 if validated and flags == REVIDX_DEFAULT_FLAGS:
2021 # no extra flags set, no flag processor runs, text = rawtext
2014 # no extra flags set, no flag processor runs, text = rawtext
2022 return rawtext, sidedata
2015 return rawtext
2023
2016
2024 if raw:
2017 if raw:
2025 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2018 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2026 text = rawtext
2019 text = rawtext
2027 else:
2020 else:
2028 r = flagutil.processflagsread(self, rawtext, flags)
2021 r = flagutil.processflagsread(self, rawtext, flags)
2029 text, validatehash = r
2022 text, validatehash = r
2030 if validatehash:
2023 if validatehash:
2031 self.checkhash(text, node, rev=rev)
2024 self.checkhash(text, node, rev=rev)
2032 if not validated:
2025 if not validated:
2033 self._revisioncache = (node, rev, rawtext)
2026 self._revisioncache = (node, rev, rawtext)
2034
2027
2035 return text, sidedata
2028 return text
2036
2029
2037 def _rawtext(self, node, rev, _df=None):
2030 def _rawtext(self, node, rev, _df=None):
2038 """return the possibly unvalidated rawtext for a revision
2031 """return the possibly unvalidated rawtext for a revision
2039
2032
2040 returns (rev, rawtext, validated)
2033 returns (rev, rawtext, validated)
2041 """
2034 """
2042
2035
2043 # revision in the cache (could be useful to apply delta)
2036 # revision in the cache (could be useful to apply delta)
2044 cachedrev = None
2037 cachedrev = None
2045 # An intermediate text to apply deltas to
2038 # An intermediate text to apply deltas to
2046 basetext = None
2039 basetext = None
2047
2040
2048 # Check if we have the entry in cache
2041 # Check if we have the entry in cache
2049 # The cache entry looks like (node, rev, rawtext)
2042 # The cache entry looks like (node, rev, rawtext)
2050 if self._revisioncache:
2043 if self._revisioncache:
2051 if self._revisioncache[0] == node:
2044 if self._revisioncache[0] == node:
2052 return (rev, self._revisioncache[2], True)
2045 return (rev, self._revisioncache[2], True)
2053 cachedrev = self._revisioncache[1]
2046 cachedrev = self._revisioncache[1]
2054
2047
2055 if rev is None:
2048 if rev is None:
2056 rev = self.rev(node)
2049 rev = self.rev(node)
2057
2050
2058 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2051 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2059 if stopped:
2052 if stopped:
2060 basetext = self._revisioncache[2]
2053 basetext = self._revisioncache[2]
2061
2054
2062 # drop cache to save memory, the caller is expected to
2055 # drop cache to save memory, the caller is expected to
2063 # update self._revisioncache after validating the text
2056 # update self._revisioncache after validating the text
2064 self._revisioncache = None
2057 self._revisioncache = None
2065
2058
2066 targetsize = None
2059 targetsize = None
2067 rawsize = self.index[rev][2]
2060 rawsize = self.index[rev][2]
2068 if 0 <= rawsize:
2061 if 0 <= rawsize:
2069 targetsize = 4 * rawsize
2062 targetsize = 4 * rawsize
2070
2063
2071 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2064 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2072 if basetext is None:
2065 if basetext is None:
2073 basetext = bytes(bins[0])
2066 basetext = bytes(bins[0])
2074 bins = bins[1:]
2067 bins = bins[1:]
2075
2068
2076 rawtext = mdiff.patches(basetext, bins)
2069 rawtext = mdiff.patches(basetext, bins)
2077 del basetext # let us have a chance to free memory early
2070 del basetext # let us have a chance to free memory early
2078 return (rev, rawtext, False)
2071 return (rev, rawtext, False)
2079
2072
2080 def _sidedata(self, rev):
2073 def _sidedata(self, rev):
2081 """Return the sidedata for a given revision number."""
2074 """Return the sidedata for a given revision number."""
2082 index_entry = self.index[rev]
2075 index_entry = self.index[rev]
2083 sidedata_offset = index_entry[8]
2076 sidedata_offset = index_entry[8]
2084 sidedata_size = index_entry[9]
2077 sidedata_size = index_entry[9]
2085
2078
2086 if self._inline:
2079 if self._inline:
2087 sidedata_offset += self.index.entry_size * (1 + rev)
2080 sidedata_offset += self.index.entry_size * (1 + rev)
2088 if sidedata_size == 0:
2081 if sidedata_size == 0:
2089 return {}
2082 return {}
2090
2083
2091 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2084 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2092 comp = self.index[rev][11]
2085 comp = self.index[rev][11]
2093 if comp == COMP_MODE_PLAIN:
2086 if comp == COMP_MODE_PLAIN:
2094 segment = comp_segment
2087 segment = comp_segment
2095 elif comp == COMP_MODE_DEFAULT:
2088 elif comp == COMP_MODE_DEFAULT:
2096 segment = self._decompressor(comp_segment)
2089 segment = self._decompressor(comp_segment)
2097 elif comp == COMP_MODE_INLINE:
2090 elif comp == COMP_MODE_INLINE:
2098 segment = self.decompress(comp_segment)
2091 segment = self.decompress(comp_segment)
2099 else:
2092 else:
2100 msg = 'unknown compression mode %d'
2093 msg = 'unknown compression mode %d'
2101 msg %= comp
2094 msg %= comp
2102 raise error.RevlogError(msg)
2095 raise error.RevlogError(msg)
2103
2096
2104 sidedata = sidedatautil.deserialize_sidedata(segment)
2097 sidedata = sidedatautil.deserialize_sidedata(segment)
2105 return sidedata
2098 return sidedata
2106
2099
2107 def rawdata(self, nodeorrev, _df=None):
2100 def rawdata(self, nodeorrev, _df=None):
2108 """return an uncompressed raw data of a given node or revision number.
2101 """return an uncompressed raw data of a given node or revision number.
2109
2102
2110 _df - an existing file handle to read from. (internal-only)
2103 _df - an existing file handle to read from. (internal-only)
2111 """
2104 """
2112 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2105 return self._revisiondata(nodeorrev, _df, raw=True)
2113
2106
2114 def hash(self, text, p1, p2):
2107 def hash(self, text, p1, p2):
2115 """Compute a node hash.
2108 """Compute a node hash.
2116
2109
2117 Available as a function so that subclasses can replace the hash
2110 Available as a function so that subclasses can replace the hash
2118 as needed.
2111 as needed.
2119 """
2112 """
2120 return storageutil.hashrevisionsha1(text, p1, p2)
2113 return storageutil.hashrevisionsha1(text, p1, p2)
2121
2114
2122 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2115 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2123 """Check node hash integrity.
2116 """Check node hash integrity.
2124
2117
2125 Available as a function so that subclasses can extend hash mismatch
2118 Available as a function so that subclasses can extend hash mismatch
2126 behaviors as needed.
2119 behaviors as needed.
2127 """
2120 """
2128 try:
2121 try:
2129 if p1 is None and p2 is None:
2122 if p1 is None and p2 is None:
2130 p1, p2 = self.parents(node)
2123 p1, p2 = self.parents(node)
2131 if node != self.hash(text, p1, p2):
2124 if node != self.hash(text, p1, p2):
2132 # Clear the revision cache on hash failure. The revision cache
2125 # Clear the revision cache on hash failure. The revision cache
2133 # only stores the raw revision and clearing the cache does have
2126 # only stores the raw revision and clearing the cache does have
2134 # the side-effect that we won't have a cache hit when the raw
2127 # the side-effect that we won't have a cache hit when the raw
2135 # revision data is accessed. But this case should be rare and
2128 # revision data is accessed. But this case should be rare and
2136 # it is extra work to teach the cache about the hash
2129 # it is extra work to teach the cache about the hash
2137 # verification state.
2130 # verification state.
2138 if self._revisioncache and self._revisioncache[0] == node:
2131 if self._revisioncache and self._revisioncache[0] == node:
2139 self._revisioncache = None
2132 self._revisioncache = None
2140
2133
2141 revornode = rev
2134 revornode = rev
2142 if revornode is None:
2135 if revornode is None:
2143 revornode = templatefilters.short(hex(node))
2136 revornode = templatefilters.short(hex(node))
2144 raise error.RevlogError(
2137 raise error.RevlogError(
2145 _(b"integrity check failed on %s:%s")
2138 _(b"integrity check failed on %s:%s")
2146 % (self.display_id, pycompat.bytestr(revornode))
2139 % (self.display_id, pycompat.bytestr(revornode))
2147 )
2140 )
2148 except error.RevlogError:
2141 except error.RevlogError:
2149 if self._censorable and storageutil.iscensoredtext(text):
2142 if self._censorable and storageutil.iscensoredtext(text):
2150 raise error.CensoredNodeError(self.display_id, node, text)
2143 raise error.CensoredNodeError(self.display_id, node, text)
2151 raise
2144 raise
2152
2145
2153 def _enforceinlinesize(self, tr):
2146 def _enforceinlinesize(self, tr):
2154 """Check if the revlog is too big for inline and convert if so.
2147 """Check if the revlog is too big for inline and convert if so.
2155
2148
2156 This should be called after revisions are added to the revlog. If the
2149 This should be called after revisions are added to the revlog. If the
2157 revlog has grown too large to be an inline revlog, it will convert it
2150 revlog has grown too large to be an inline revlog, it will convert it
2158 to use multiple index and data files.
2151 to use multiple index and data files.
2159 """
2152 """
2160 tiprev = len(self) - 1
2153 tiprev = len(self) - 1
2161 total_size = self.start(tiprev) + self.length(tiprev)
2154 total_size = self.start(tiprev) + self.length(tiprev)
2162 if not self._inline or total_size < _maxinline:
2155 if not self._inline or total_size < _maxinline:
2163 return
2156 return
2164
2157
2165 troffset = tr.findoffset(self._indexfile)
2158 troffset = tr.findoffset(self._indexfile)
2166 if troffset is None:
2159 if troffset is None:
2167 raise error.RevlogError(
2160 raise error.RevlogError(
2168 _(b"%s not found in the transaction") % self._indexfile
2161 _(b"%s not found in the transaction") % self._indexfile
2169 )
2162 )
2170 trindex = 0
2163 trindex = 0
2171 tr.add(self._datafile, 0)
2164 tr.add(self._datafile, 0)
2172
2165
2173 existing_handles = False
2166 existing_handles = False
2174 if self._writinghandles is not None:
2167 if self._writinghandles is not None:
2175 existing_handles = True
2168 existing_handles = True
2176 fp = self._writinghandles[0]
2169 fp = self._writinghandles[0]
2177 fp.flush()
2170 fp.flush()
2178 fp.close()
2171 fp.close()
2179 # We can't use the cached file handle after close(). So prevent
2172 # We can't use the cached file handle after close(). So prevent
2180 # its usage.
2173 # its usage.
2181 self._writinghandles = None
2174 self._writinghandles = None
2182
2175
2183 new_dfh = self._datafp(b'w+')
2176 new_dfh = self._datafp(b'w+')
2184 new_dfh.truncate(0) # drop any potentially existing data
2177 new_dfh.truncate(0) # drop any potentially existing data
2185 try:
2178 try:
2186 with self._indexfp() as read_ifh:
2179 with self._indexfp() as read_ifh:
2187 for r in self:
2180 for r in self:
2188 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2181 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2189 if troffset <= self.start(r) + r * self.index.entry_size:
2182 if troffset <= self.start(r) + r * self.index.entry_size:
2190 trindex = r
2183 trindex = r
2191 new_dfh.flush()
2184 new_dfh.flush()
2192
2185
2193 with self.__index_new_fp() as fp:
2186 with self.__index_new_fp() as fp:
2194 self._format_flags &= ~FLAG_INLINE_DATA
2187 self._format_flags &= ~FLAG_INLINE_DATA
2195 self._inline = False
2188 self._inline = False
2196 for i in self:
2189 for i in self:
2197 e = self.index.entry_binary(i)
2190 e = self.index.entry_binary(i)
2198 if i == 0 and self._docket is None:
2191 if i == 0 and self._docket is None:
2199 header = self._format_flags | self._format_version
2192 header = self._format_flags | self._format_version
2200 header = self.index.pack_header(header)
2193 header = self.index.pack_header(header)
2201 e = header + e
2194 e = header + e
2202 fp.write(e)
2195 fp.write(e)
2203 if self._docket is not None:
2196 if self._docket is not None:
2204 self._docket.index_end = fp.tell()
2197 self._docket.index_end = fp.tell()
2205
2198
2206 # There is a small transactional race here. If the rename of
2199 # There is a small transactional race here. If the rename of
2207 # the index fails, we should remove the datafile. It is more
2200 # the index fails, we should remove the datafile. It is more
2208 # important to ensure that the data file is not truncated
2201 # important to ensure that the data file is not truncated
2209 # when the index is replaced as otherwise data is lost.
2202 # when the index is replaced as otherwise data is lost.
2210 tr.replace(self._datafile, self.start(trindex))
2203 tr.replace(self._datafile, self.start(trindex))
2211
2204
2212 # the temp file replace the real index when we exit the context
2205 # the temp file replace the real index when we exit the context
2213 # manager
2206 # manager
2214
2207
2215 tr.replace(self._indexfile, trindex * self.index.entry_size)
2208 tr.replace(self._indexfile, trindex * self.index.entry_size)
2216 nodemaputil.setup_persistent_nodemap(tr, self)
2209 nodemaputil.setup_persistent_nodemap(tr, self)
2217 self._chunkclear()
2210 self._chunkclear()
2218
2211
2219 if existing_handles:
2212 if existing_handles:
2220 # switched from inline to conventional reopen the index
2213 # switched from inline to conventional reopen the index
2221 ifh = self.__index_write_fp()
2214 ifh = self.__index_write_fp()
2222 self._writinghandles = (ifh, new_dfh)
2215 self._writinghandles = (ifh, new_dfh)
2223 new_dfh = None
2216 new_dfh = None
2224 finally:
2217 finally:
2225 if new_dfh is not None:
2218 if new_dfh is not None:
2226 new_dfh.close()
2219 new_dfh.close()
2227
2220
2228 def _nodeduplicatecallback(self, transaction, node):
2221 def _nodeduplicatecallback(self, transaction, node):
2229 """called when trying to add a node already stored."""
2222 """called when trying to add a node already stored."""
2230
2223
2231 @contextlib.contextmanager
2224 @contextlib.contextmanager
2232 def _writing(self, transaction):
2225 def _writing(self, transaction):
2233 if self._trypending:
2226 if self._trypending:
2234 msg = b'try to write in a `trypending` revlog: %s'
2227 msg = b'try to write in a `trypending` revlog: %s'
2235 msg %= self.display_id
2228 msg %= self.display_id
2236 raise error.ProgrammingError(msg)
2229 raise error.ProgrammingError(msg)
2237 if self._writinghandles is not None:
2230 if self._writinghandles is not None:
2238 yield
2231 yield
2239 else:
2232 else:
2240 ifh = dfh = None
2233 ifh = dfh = None
2241 try:
2234 try:
2242 r = len(self)
2235 r = len(self)
2243 # opening the data file.
2236 # opening the data file.
2244 dsize = 0
2237 dsize = 0
2245 if r:
2238 if r:
2246 dsize = self.end(r - 1)
2239 dsize = self.end(r - 1)
2247 dfh = None
2240 dfh = None
2248 if not self._inline:
2241 if not self._inline:
2249 try:
2242 try:
2250 dfh = self._datafp(b"r+")
2243 dfh = self._datafp(b"r+")
2251 if self._docket is None:
2244 if self._docket is None:
2252 dfh.seek(0, os.SEEK_END)
2245 dfh.seek(0, os.SEEK_END)
2253 else:
2246 else:
2254 dfh.seek(self._docket.data_end, os.SEEK_SET)
2247 dfh.seek(self._docket.data_end, os.SEEK_SET)
2255 except IOError as inst:
2248 except IOError as inst:
2256 if inst.errno != errno.ENOENT:
2249 if inst.errno != errno.ENOENT:
2257 raise
2250 raise
2258 dfh = self._datafp(b"w+")
2251 dfh = self._datafp(b"w+")
2259 transaction.add(self._datafile, dsize)
2252 transaction.add(self._datafile, dsize)
2260
2253
2261 # opening the index file.
2254 # opening the index file.
2262 isize = r * self.index.entry_size
2255 isize = r * self.index.entry_size
2263 ifh = self.__index_write_fp()
2256 ifh = self.__index_write_fp()
2264 if self._inline:
2257 if self._inline:
2265 transaction.add(self._indexfile, dsize + isize)
2258 transaction.add(self._indexfile, dsize + isize)
2266 else:
2259 else:
2267 transaction.add(self._indexfile, isize)
2260 transaction.add(self._indexfile, isize)
2268 # exposing all file handle for writing.
2261 # exposing all file handle for writing.
2269 self._writinghandles = (ifh, dfh)
2262 self._writinghandles = (ifh, dfh)
2270 yield
2263 yield
2271 if self._docket is not None:
2264 if self._docket is not None:
2272 self._write_docket(transaction)
2265 self._write_docket(transaction)
2273 finally:
2266 finally:
2274 self._writinghandles = None
2267 self._writinghandles = None
2275 if dfh is not None:
2268 if dfh is not None:
2276 dfh.close()
2269 dfh.close()
2277 # closing the index file last to avoid exposing referent to
2270 # closing the index file last to avoid exposing referent to
2278 # potential unflushed data content.
2271 # potential unflushed data content.
2279 if ifh is not None:
2272 if ifh is not None:
2280 ifh.close()
2273 ifh.close()
2281
2274
2282 def _write_docket(self, transaction):
2275 def _write_docket(self, transaction):
2283 """write the current docket on disk
2276 """write the current docket on disk
2284
2277
2285 Exist as a method to help changelog to implement transaction logic
2278 Exist as a method to help changelog to implement transaction logic
2286
2279
2287 We could also imagine using the same transaction logic for all revlog
2280 We could also imagine using the same transaction logic for all revlog
2288 since docket are cheap."""
2281 since docket are cheap."""
2289 self._docket.write(transaction)
2282 self._docket.write(transaction)
2290
2283
2291 def addrevision(
2284 def addrevision(
2292 self,
2285 self,
2293 text,
2286 text,
2294 transaction,
2287 transaction,
2295 link,
2288 link,
2296 p1,
2289 p1,
2297 p2,
2290 p2,
2298 cachedelta=None,
2291 cachedelta=None,
2299 node=None,
2292 node=None,
2300 flags=REVIDX_DEFAULT_FLAGS,
2293 flags=REVIDX_DEFAULT_FLAGS,
2301 deltacomputer=None,
2294 deltacomputer=None,
2302 sidedata=None,
2295 sidedata=None,
2303 ):
2296 ):
2304 """add a revision to the log
2297 """add a revision to the log
2305
2298
2306 text - the revision data to add
2299 text - the revision data to add
2307 transaction - the transaction object used for rollback
2300 transaction - the transaction object used for rollback
2308 link - the linkrev data to add
2301 link - the linkrev data to add
2309 p1, p2 - the parent nodeids of the revision
2302 p1, p2 - the parent nodeids of the revision
2310 cachedelta - an optional precomputed delta
2303 cachedelta - an optional precomputed delta
2311 node - nodeid of revision; typically node is not specified, and it is
2304 node - nodeid of revision; typically node is not specified, and it is
2312 computed by default as hash(text, p1, p2), however subclasses might
2305 computed by default as hash(text, p1, p2), however subclasses might
2313 use different hashing method (and override checkhash() in such case)
2306 use different hashing method (and override checkhash() in such case)
2314 flags - the known flags to set on the revision
2307 flags - the known flags to set on the revision
2315 deltacomputer - an optional deltacomputer instance shared between
2308 deltacomputer - an optional deltacomputer instance shared between
2316 multiple calls
2309 multiple calls
2317 """
2310 """
2318 if link == nullrev:
2311 if link == nullrev:
2319 raise error.RevlogError(
2312 raise error.RevlogError(
2320 _(b"attempted to add linkrev -1 to %s") % self.display_id
2313 _(b"attempted to add linkrev -1 to %s") % self.display_id
2321 )
2314 )
2322
2315
2323 if sidedata is None:
2316 if sidedata is None:
2324 sidedata = {}
2317 sidedata = {}
2325 elif sidedata and not self.hassidedata:
2318 elif sidedata and not self.hassidedata:
2326 raise error.ProgrammingError(
2319 raise error.ProgrammingError(
2327 _(b"trying to add sidedata to a revlog who don't support them")
2320 _(b"trying to add sidedata to a revlog who don't support them")
2328 )
2321 )
2329
2322
2330 if flags:
2323 if flags:
2331 node = node or self.hash(text, p1, p2)
2324 node = node or self.hash(text, p1, p2)
2332
2325
2333 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2326 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2334
2327
2335 # If the flag processor modifies the revision data, ignore any provided
2328 # If the flag processor modifies the revision data, ignore any provided
2336 # cachedelta.
2329 # cachedelta.
2337 if rawtext != text:
2330 if rawtext != text:
2338 cachedelta = None
2331 cachedelta = None
2339
2332
2340 if len(rawtext) > _maxentrysize:
2333 if len(rawtext) > _maxentrysize:
2341 raise error.RevlogError(
2334 raise error.RevlogError(
2342 _(
2335 _(
2343 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2336 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2344 )
2337 )
2345 % (self.display_id, len(rawtext))
2338 % (self.display_id, len(rawtext))
2346 )
2339 )
2347
2340
2348 node = node or self.hash(rawtext, p1, p2)
2341 node = node or self.hash(rawtext, p1, p2)
2349 rev = self.index.get_rev(node)
2342 rev = self.index.get_rev(node)
2350 if rev is not None:
2343 if rev is not None:
2351 return rev
2344 return rev
2352
2345
2353 if validatehash:
2346 if validatehash:
2354 self.checkhash(rawtext, node, p1=p1, p2=p2)
2347 self.checkhash(rawtext, node, p1=p1, p2=p2)
2355
2348
2356 return self.addrawrevision(
2349 return self.addrawrevision(
2357 rawtext,
2350 rawtext,
2358 transaction,
2351 transaction,
2359 link,
2352 link,
2360 p1,
2353 p1,
2361 p2,
2354 p2,
2362 node,
2355 node,
2363 flags,
2356 flags,
2364 cachedelta=cachedelta,
2357 cachedelta=cachedelta,
2365 deltacomputer=deltacomputer,
2358 deltacomputer=deltacomputer,
2366 sidedata=sidedata,
2359 sidedata=sidedata,
2367 )
2360 )
2368
2361
2369 def addrawrevision(
2362 def addrawrevision(
2370 self,
2363 self,
2371 rawtext,
2364 rawtext,
2372 transaction,
2365 transaction,
2373 link,
2366 link,
2374 p1,
2367 p1,
2375 p2,
2368 p2,
2376 node,
2369 node,
2377 flags,
2370 flags,
2378 cachedelta=None,
2371 cachedelta=None,
2379 deltacomputer=None,
2372 deltacomputer=None,
2380 sidedata=None,
2373 sidedata=None,
2381 ):
2374 ):
2382 """add a raw revision with known flags, node and parents
2375 """add a raw revision with known flags, node and parents
2383 useful when reusing a revision not stored in this revlog (ex: received
2376 useful when reusing a revision not stored in this revlog (ex: received
2384 over wire, or read from an external bundle).
2377 over wire, or read from an external bundle).
2385 """
2378 """
2386 with self._writing(transaction):
2379 with self._writing(transaction):
2387 return self._addrevision(
2380 return self._addrevision(
2388 node,
2381 node,
2389 rawtext,
2382 rawtext,
2390 transaction,
2383 transaction,
2391 link,
2384 link,
2392 p1,
2385 p1,
2393 p2,
2386 p2,
2394 flags,
2387 flags,
2395 cachedelta,
2388 cachedelta,
2396 deltacomputer=deltacomputer,
2389 deltacomputer=deltacomputer,
2397 sidedata=sidedata,
2390 sidedata=sidedata,
2398 )
2391 )
2399
2392
2400 def compress(self, data):
2393 def compress(self, data):
2401 """Generate a possibly-compressed representation of data."""
2394 """Generate a possibly-compressed representation of data."""
2402 if not data:
2395 if not data:
2403 return b'', data
2396 return b'', data
2404
2397
2405 compressed = self._compressor.compress(data)
2398 compressed = self._compressor.compress(data)
2406
2399
2407 if compressed:
2400 if compressed:
2408 # The revlog compressor added the header in the returned data.
2401 # The revlog compressor added the header in the returned data.
2409 return b'', compressed
2402 return b'', compressed
2410
2403
2411 if data[0:1] == b'\0':
2404 if data[0:1] == b'\0':
2412 return b'', data
2405 return b'', data
2413 return b'u', data
2406 return b'u', data
2414
2407
2415 def decompress(self, data):
2408 def decompress(self, data):
2416 """Decompress a revlog chunk.
2409 """Decompress a revlog chunk.
2417
2410
2418 The chunk is expected to begin with a header identifying the
2411 The chunk is expected to begin with a header identifying the
2419 format type so it can be routed to an appropriate decompressor.
2412 format type so it can be routed to an appropriate decompressor.
2420 """
2413 """
2421 if not data:
2414 if not data:
2422 return data
2415 return data
2423
2416
2424 # Revlogs are read much more frequently than they are written and many
2417 # Revlogs are read much more frequently than they are written and many
2425 # chunks only take microseconds to decompress, so performance is
2418 # chunks only take microseconds to decompress, so performance is
2426 # important here.
2419 # important here.
2427 #
2420 #
2428 # We can make a few assumptions about revlogs:
2421 # We can make a few assumptions about revlogs:
2429 #
2422 #
2430 # 1) the majority of chunks will be compressed (as opposed to inline
2423 # 1) the majority of chunks will be compressed (as opposed to inline
2431 # raw data).
2424 # raw data).
2432 # 2) decompressing *any* data will likely by at least 10x slower than
2425 # 2) decompressing *any* data will likely by at least 10x slower than
2433 # returning raw inline data.
2426 # returning raw inline data.
2434 # 3) we want to prioritize common and officially supported compression
2427 # 3) we want to prioritize common and officially supported compression
2435 # engines
2428 # engines
2436 #
2429 #
2437 # It follows that we want to optimize for "decompress compressed data
2430 # It follows that we want to optimize for "decompress compressed data
2438 # when encoded with common and officially supported compression engines"
2431 # when encoded with common and officially supported compression engines"
2439 # case over "raw data" and "data encoded by less common or non-official
2432 # case over "raw data" and "data encoded by less common or non-official
2440 # compression engines." That is why we have the inline lookup first
2433 # compression engines." That is why we have the inline lookup first
2441 # followed by the compengines lookup.
2434 # followed by the compengines lookup.
2442 #
2435 #
2443 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2436 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2444 # compressed chunks. And this matters for changelog and manifest reads.
2437 # compressed chunks. And this matters for changelog and manifest reads.
2445 t = data[0:1]
2438 t = data[0:1]
2446
2439
2447 if t == b'x':
2440 if t == b'x':
2448 try:
2441 try:
2449 return _zlibdecompress(data)
2442 return _zlibdecompress(data)
2450 except zlib.error as e:
2443 except zlib.error as e:
2451 raise error.RevlogError(
2444 raise error.RevlogError(
2452 _(b'revlog decompress error: %s')
2445 _(b'revlog decompress error: %s')
2453 % stringutil.forcebytestr(e)
2446 % stringutil.forcebytestr(e)
2454 )
2447 )
2455 # '\0' is more common than 'u' so it goes first.
2448 # '\0' is more common than 'u' so it goes first.
2456 elif t == b'\0':
2449 elif t == b'\0':
2457 return data
2450 return data
2458 elif t == b'u':
2451 elif t == b'u':
2459 return util.buffer(data, 1)
2452 return util.buffer(data, 1)
2460
2453
2461 compressor = self._get_decompressor(t)
2454 compressor = self._get_decompressor(t)
2462
2455
2463 return compressor.decompress(data)
2456 return compressor.decompress(data)
2464
2457
2465 def _addrevision(
2458 def _addrevision(
2466 self,
2459 self,
2467 node,
2460 node,
2468 rawtext,
2461 rawtext,
2469 transaction,
2462 transaction,
2470 link,
2463 link,
2471 p1,
2464 p1,
2472 p2,
2465 p2,
2473 flags,
2466 flags,
2474 cachedelta,
2467 cachedelta,
2475 alwayscache=False,
2468 alwayscache=False,
2476 deltacomputer=None,
2469 deltacomputer=None,
2477 sidedata=None,
2470 sidedata=None,
2478 ):
2471 ):
2479 """internal function to add revisions to the log
2472 """internal function to add revisions to the log
2480
2473
2481 see addrevision for argument descriptions.
2474 see addrevision for argument descriptions.
2482
2475
2483 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2476 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2484
2477
2485 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2478 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2486 be used.
2479 be used.
2487
2480
2488 invariants:
2481 invariants:
2489 - rawtext is optional (can be None); if not set, cachedelta must be set.
2482 - rawtext is optional (can be None); if not set, cachedelta must be set.
2490 if both are set, they must correspond to each other.
2483 if both are set, they must correspond to each other.
2491 """
2484 """
2492 if node == self.nullid:
2485 if node == self.nullid:
2493 raise error.RevlogError(
2486 raise error.RevlogError(
2494 _(b"%s: attempt to add null revision") % self.display_id
2487 _(b"%s: attempt to add null revision") % self.display_id
2495 )
2488 )
2496 if (
2489 if (
2497 node == self.nodeconstants.wdirid
2490 node == self.nodeconstants.wdirid
2498 or node in self.nodeconstants.wdirfilenodeids
2491 or node in self.nodeconstants.wdirfilenodeids
2499 ):
2492 ):
2500 raise error.RevlogError(
2493 raise error.RevlogError(
2501 _(b"%s: attempt to add wdir revision") % self.display_id
2494 _(b"%s: attempt to add wdir revision") % self.display_id
2502 )
2495 )
2503 if self._writinghandles is None:
2496 if self._writinghandles is None:
2504 msg = b'adding revision outside `revlog._writing` context'
2497 msg = b'adding revision outside `revlog._writing` context'
2505 raise error.ProgrammingError(msg)
2498 raise error.ProgrammingError(msg)
2506
2499
2507 if self._inline:
2500 if self._inline:
2508 fh = self._writinghandles[0]
2501 fh = self._writinghandles[0]
2509 else:
2502 else:
2510 fh = self._writinghandles[1]
2503 fh = self._writinghandles[1]
2511
2504
2512 btext = [rawtext]
2505 btext = [rawtext]
2513
2506
2514 curr = len(self)
2507 curr = len(self)
2515 prev = curr - 1
2508 prev = curr - 1
2516
2509
2517 offset = self._get_data_offset(prev)
2510 offset = self._get_data_offset(prev)
2518
2511
2519 if self._concurrencychecker:
2512 if self._concurrencychecker:
2520 ifh, dfh = self._writinghandles
2513 ifh, dfh = self._writinghandles
2521 if self._inline:
2514 if self._inline:
2522 # offset is "as if" it were in the .d file, so we need to add on
2515 # offset is "as if" it were in the .d file, so we need to add on
2523 # the size of the entry metadata.
2516 # the size of the entry metadata.
2524 self._concurrencychecker(
2517 self._concurrencychecker(
2525 ifh, self._indexfile, offset + curr * self.index.entry_size
2518 ifh, self._indexfile, offset + curr * self.index.entry_size
2526 )
2519 )
2527 else:
2520 else:
2528 # Entries in the .i are a consistent size.
2521 # Entries in the .i are a consistent size.
2529 self._concurrencychecker(
2522 self._concurrencychecker(
2530 ifh, self._indexfile, curr * self.index.entry_size
2523 ifh, self._indexfile, curr * self.index.entry_size
2531 )
2524 )
2532 self._concurrencychecker(dfh, self._datafile, offset)
2525 self._concurrencychecker(dfh, self._datafile, offset)
2533
2526
2534 p1r, p2r = self.rev(p1), self.rev(p2)
2527 p1r, p2r = self.rev(p1), self.rev(p2)
2535
2528
2536 # full versions are inserted when the needed deltas
2529 # full versions are inserted when the needed deltas
2537 # become comparable to the uncompressed text
2530 # become comparable to the uncompressed text
2538 if rawtext is None:
2531 if rawtext is None:
2539 # need rawtext size, before changed by flag processors, which is
2532 # need rawtext size, before changed by flag processors, which is
2540 # the non-raw size. use revlog explicitly to avoid filelog's extra
2533 # the non-raw size. use revlog explicitly to avoid filelog's extra
2541 # logic that might remove metadata size.
2534 # logic that might remove metadata size.
2542 textlen = mdiff.patchedsize(
2535 textlen = mdiff.patchedsize(
2543 revlog.size(self, cachedelta[0]), cachedelta[1]
2536 revlog.size(self, cachedelta[0]), cachedelta[1]
2544 )
2537 )
2545 else:
2538 else:
2546 textlen = len(rawtext)
2539 textlen = len(rawtext)
2547
2540
2548 if deltacomputer is None:
2541 if deltacomputer is None:
2549 deltacomputer = deltautil.deltacomputer(self)
2542 deltacomputer = deltautil.deltacomputer(self)
2550
2543
2551 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2544 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2552
2545
2553 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2546 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2554
2547
2555 compression_mode = COMP_MODE_INLINE
2548 compression_mode = COMP_MODE_INLINE
2556 if self._docket is not None:
2549 if self._docket is not None:
2557 h, d = deltainfo.data
2550 h, d = deltainfo.data
2558 if not h and not d:
2551 if not h and not d:
2559 # not data to store at all... declare them uncompressed
2552 # not data to store at all... declare them uncompressed
2560 compression_mode = COMP_MODE_PLAIN
2553 compression_mode = COMP_MODE_PLAIN
2561 elif not h:
2554 elif not h:
2562 t = d[0:1]
2555 t = d[0:1]
2563 if t == b'\0':
2556 if t == b'\0':
2564 compression_mode = COMP_MODE_PLAIN
2557 compression_mode = COMP_MODE_PLAIN
2565 elif t == self._docket.default_compression_header:
2558 elif t == self._docket.default_compression_header:
2566 compression_mode = COMP_MODE_DEFAULT
2559 compression_mode = COMP_MODE_DEFAULT
2567 elif h == b'u':
2560 elif h == b'u':
2568 # we have a more efficient way to declare uncompressed
2561 # we have a more efficient way to declare uncompressed
2569 h = b''
2562 h = b''
2570 compression_mode = COMP_MODE_PLAIN
2563 compression_mode = COMP_MODE_PLAIN
2571 deltainfo = deltautil.drop_u_compression(deltainfo)
2564 deltainfo = deltautil.drop_u_compression(deltainfo)
2572
2565
2573 sidedata_compression_mode = COMP_MODE_INLINE
2566 sidedata_compression_mode = COMP_MODE_INLINE
2574 if sidedata and self.hassidedata:
2567 if sidedata and self.hassidedata:
2575 sidedata_compression_mode = COMP_MODE_PLAIN
2568 sidedata_compression_mode = COMP_MODE_PLAIN
2576 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2569 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2577 sidedata_offset = offset + deltainfo.deltalen
2570 sidedata_offset = offset + deltainfo.deltalen
2578 h, comp_sidedata = self.compress(serialized_sidedata)
2571 h, comp_sidedata = self.compress(serialized_sidedata)
2579 if (
2572 if (
2580 h != b'u'
2573 h != b'u'
2581 and comp_sidedata[0:1] != b'\0'
2574 and comp_sidedata[0:1] != b'\0'
2582 and len(comp_sidedata) < len(serialized_sidedata)
2575 and len(comp_sidedata) < len(serialized_sidedata)
2583 ):
2576 ):
2584 assert not h
2577 assert not h
2585 if (
2578 if (
2586 comp_sidedata[0:1]
2579 comp_sidedata[0:1]
2587 == self._docket.default_compression_header
2580 == self._docket.default_compression_header
2588 ):
2581 ):
2589 sidedata_compression_mode = COMP_MODE_DEFAULT
2582 sidedata_compression_mode = COMP_MODE_DEFAULT
2590 serialized_sidedata = comp_sidedata
2583 serialized_sidedata = comp_sidedata
2591 else:
2584 else:
2592 sidedata_compression_mode = COMP_MODE_INLINE
2585 sidedata_compression_mode = COMP_MODE_INLINE
2593 serialized_sidedata = comp_sidedata
2586 serialized_sidedata = comp_sidedata
2594 else:
2587 else:
2595 serialized_sidedata = b""
2588 serialized_sidedata = b""
2596 # Don't store the offset if the sidedata is empty, that way
2589 # Don't store the offset if the sidedata is empty, that way
2597 # we can easily detect empty sidedata and they will be no different
2590 # we can easily detect empty sidedata and they will be no different
2598 # than ones we manually add.
2591 # than ones we manually add.
2599 sidedata_offset = 0
2592 sidedata_offset = 0
2600
2593
2601 e = (
2594 e = (
2602 offset_type(offset, flags),
2595 offset_type(offset, flags),
2603 deltainfo.deltalen,
2596 deltainfo.deltalen,
2604 textlen,
2597 textlen,
2605 deltainfo.base,
2598 deltainfo.base,
2606 link,
2599 link,
2607 p1r,
2600 p1r,
2608 p2r,
2601 p2r,
2609 node,
2602 node,
2610 sidedata_offset,
2603 sidedata_offset,
2611 len(serialized_sidedata),
2604 len(serialized_sidedata),
2612 compression_mode,
2605 compression_mode,
2613 sidedata_compression_mode,
2606 sidedata_compression_mode,
2614 )
2607 )
2615
2608
2616 self.index.append(e)
2609 self.index.append(e)
2617 entry = self.index.entry_binary(curr)
2610 entry = self.index.entry_binary(curr)
2618 if curr == 0 and self._docket is None:
2611 if curr == 0 and self._docket is None:
2619 header = self._format_flags | self._format_version
2612 header = self._format_flags | self._format_version
2620 header = self.index.pack_header(header)
2613 header = self.index.pack_header(header)
2621 entry = header + entry
2614 entry = header + entry
2622 self._writeentry(
2615 self._writeentry(
2623 transaction,
2616 transaction,
2624 entry,
2617 entry,
2625 deltainfo.data,
2618 deltainfo.data,
2626 link,
2619 link,
2627 offset,
2620 offset,
2628 serialized_sidedata,
2621 serialized_sidedata,
2629 )
2622 )
2630
2623
2631 rawtext = btext[0]
2624 rawtext = btext[0]
2632
2625
2633 if alwayscache and rawtext is None:
2626 if alwayscache and rawtext is None:
2634 rawtext = deltacomputer.buildtext(revinfo, fh)
2627 rawtext = deltacomputer.buildtext(revinfo, fh)
2635
2628
2636 if type(rawtext) == bytes: # only accept immutable objects
2629 if type(rawtext) == bytes: # only accept immutable objects
2637 self._revisioncache = (node, curr, rawtext)
2630 self._revisioncache = (node, curr, rawtext)
2638 self._chainbasecache[curr] = deltainfo.chainbase
2631 self._chainbasecache[curr] = deltainfo.chainbase
2639 return curr
2632 return curr
2640
2633
2641 def _get_data_offset(self, prev):
2634 def _get_data_offset(self, prev):
2642 """Returns the current offset in the (in-transaction) data file.
2635 """Returns the current offset in the (in-transaction) data file.
2643 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2636 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2644 file to store that information: since sidedata can be rewritten to the
2637 file to store that information: since sidedata can be rewritten to the
2645 end of the data file within a transaction, you can have cases where, for
2638 end of the data file within a transaction, you can have cases where, for
2646 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2639 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2647 to `n - 1`'s sidedata being written after `n`'s data.
2640 to `n - 1`'s sidedata being written after `n`'s data.
2648
2641
2649 TODO cache this in a docket file before getting out of experimental."""
2642 TODO cache this in a docket file before getting out of experimental."""
2650 if self._docket is None:
2643 if self._docket is None:
2651 return self.end(prev)
2644 return self.end(prev)
2652 else:
2645 else:
2653 return self._docket.data_end
2646 return self._docket.data_end
2654
2647
2655 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2648 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2656 # Files opened in a+ mode have inconsistent behavior on various
2649 # Files opened in a+ mode have inconsistent behavior on various
2657 # platforms. Windows requires that a file positioning call be made
2650 # platforms. Windows requires that a file positioning call be made
2658 # when the file handle transitions between reads and writes. See
2651 # when the file handle transitions between reads and writes. See
2659 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2652 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2660 # platforms, Python or the platform itself can be buggy. Some versions
2653 # platforms, Python or the platform itself can be buggy. Some versions
2661 # of Solaris have been observed to not append at the end of the file
2654 # of Solaris have been observed to not append at the end of the file
2662 # if the file was seeked to before the end. See issue4943 for more.
2655 # if the file was seeked to before the end. See issue4943 for more.
2663 #
2656 #
2664 # We work around this issue by inserting a seek() before writing.
2657 # We work around this issue by inserting a seek() before writing.
2665 # Note: This is likely not necessary on Python 3. However, because
2658 # Note: This is likely not necessary on Python 3. However, because
2666 # the file handle is reused for reads and may be seeked there, we need
2659 # the file handle is reused for reads and may be seeked there, we need
2667 # to be careful before changing this.
2660 # to be careful before changing this.
2668 if self._writinghandles is None:
2661 if self._writinghandles is None:
2669 msg = b'adding revision outside `revlog._writing` context'
2662 msg = b'adding revision outside `revlog._writing` context'
2670 raise error.ProgrammingError(msg)
2663 raise error.ProgrammingError(msg)
2671 ifh, dfh = self._writinghandles
2664 ifh, dfh = self._writinghandles
2672 if self._docket is None:
2665 if self._docket is None:
2673 ifh.seek(0, os.SEEK_END)
2666 ifh.seek(0, os.SEEK_END)
2674 else:
2667 else:
2675 ifh.seek(self._docket.index_end, os.SEEK_SET)
2668 ifh.seek(self._docket.index_end, os.SEEK_SET)
2676 if dfh:
2669 if dfh:
2677 if self._docket is None:
2670 if self._docket is None:
2678 dfh.seek(0, os.SEEK_END)
2671 dfh.seek(0, os.SEEK_END)
2679 else:
2672 else:
2680 dfh.seek(self._docket.data_end, os.SEEK_SET)
2673 dfh.seek(self._docket.data_end, os.SEEK_SET)
2681
2674
2682 curr = len(self) - 1
2675 curr = len(self) - 1
2683 if not self._inline:
2676 if not self._inline:
2684 transaction.add(self._datafile, offset)
2677 transaction.add(self._datafile, offset)
2685 transaction.add(self._indexfile, curr * len(entry))
2678 transaction.add(self._indexfile, curr * len(entry))
2686 if data[0]:
2679 if data[0]:
2687 dfh.write(data[0])
2680 dfh.write(data[0])
2688 dfh.write(data[1])
2681 dfh.write(data[1])
2689 if sidedata:
2682 if sidedata:
2690 dfh.write(sidedata)
2683 dfh.write(sidedata)
2691 ifh.write(entry)
2684 ifh.write(entry)
2692 else:
2685 else:
2693 offset += curr * self.index.entry_size
2686 offset += curr * self.index.entry_size
2694 transaction.add(self._indexfile, offset)
2687 transaction.add(self._indexfile, offset)
2695 ifh.write(entry)
2688 ifh.write(entry)
2696 ifh.write(data[0])
2689 ifh.write(data[0])
2697 ifh.write(data[1])
2690 ifh.write(data[1])
2698 if sidedata:
2691 if sidedata:
2699 ifh.write(sidedata)
2692 ifh.write(sidedata)
2700 self._enforceinlinesize(transaction)
2693 self._enforceinlinesize(transaction)
2701 if self._docket is not None:
2694 if self._docket is not None:
2702 self._docket.index_end = self._writinghandles[0].tell()
2695 self._docket.index_end = self._writinghandles[0].tell()
2703 self._docket.data_end = self._writinghandles[1].tell()
2696 self._docket.data_end = self._writinghandles[1].tell()
2704
2697
2705 nodemaputil.setup_persistent_nodemap(transaction, self)
2698 nodemaputil.setup_persistent_nodemap(transaction, self)
2706
2699
2707 def addgroup(
2700 def addgroup(
2708 self,
2701 self,
2709 deltas,
2702 deltas,
2710 linkmapper,
2703 linkmapper,
2711 transaction,
2704 transaction,
2712 alwayscache=False,
2705 alwayscache=False,
2713 addrevisioncb=None,
2706 addrevisioncb=None,
2714 duplicaterevisioncb=None,
2707 duplicaterevisioncb=None,
2715 ):
2708 ):
2716 """
2709 """
2717 add a delta group
2710 add a delta group
2718
2711
2719 given a set of deltas, add them to the revision log. the
2712 given a set of deltas, add them to the revision log. the
2720 first delta is against its parent, which should be in our
2713 first delta is against its parent, which should be in our
2721 log, the rest are against the previous delta.
2714 log, the rest are against the previous delta.
2722
2715
2723 If ``addrevisioncb`` is defined, it will be called with arguments of
2716 If ``addrevisioncb`` is defined, it will be called with arguments of
2724 this revlog and the node that was added.
2717 this revlog and the node that was added.
2725 """
2718 """
2726
2719
2727 if self._adding_group:
2720 if self._adding_group:
2728 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2721 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2729
2722
2730 self._adding_group = True
2723 self._adding_group = True
2731 empty = True
2724 empty = True
2732 try:
2725 try:
2733 with self._writing(transaction):
2726 with self._writing(transaction):
2734 deltacomputer = deltautil.deltacomputer(self)
2727 deltacomputer = deltautil.deltacomputer(self)
2735 # loop through our set of deltas
2728 # loop through our set of deltas
2736 for data in deltas:
2729 for data in deltas:
2737 (
2730 (
2738 node,
2731 node,
2739 p1,
2732 p1,
2740 p2,
2733 p2,
2741 linknode,
2734 linknode,
2742 deltabase,
2735 deltabase,
2743 delta,
2736 delta,
2744 flags,
2737 flags,
2745 sidedata,
2738 sidedata,
2746 ) = data
2739 ) = data
2747 link = linkmapper(linknode)
2740 link = linkmapper(linknode)
2748 flags = flags or REVIDX_DEFAULT_FLAGS
2741 flags = flags or REVIDX_DEFAULT_FLAGS
2749
2742
2750 rev = self.index.get_rev(node)
2743 rev = self.index.get_rev(node)
2751 if rev is not None:
2744 if rev is not None:
2752 # this can happen if two branches make the same change
2745 # this can happen if two branches make the same change
2753 self._nodeduplicatecallback(transaction, rev)
2746 self._nodeduplicatecallback(transaction, rev)
2754 if duplicaterevisioncb:
2747 if duplicaterevisioncb:
2755 duplicaterevisioncb(self, rev)
2748 duplicaterevisioncb(self, rev)
2756 empty = False
2749 empty = False
2757 continue
2750 continue
2758
2751
2759 for p in (p1, p2):
2752 for p in (p1, p2):
2760 if not self.index.has_node(p):
2753 if not self.index.has_node(p):
2761 raise error.LookupError(
2754 raise error.LookupError(
2762 p, self.radix, _(b'unknown parent')
2755 p, self.radix, _(b'unknown parent')
2763 )
2756 )
2764
2757
2765 if not self.index.has_node(deltabase):
2758 if not self.index.has_node(deltabase):
2766 raise error.LookupError(
2759 raise error.LookupError(
2767 deltabase, self.display_id, _(b'unknown delta base')
2760 deltabase, self.display_id, _(b'unknown delta base')
2768 )
2761 )
2769
2762
2770 baserev = self.rev(deltabase)
2763 baserev = self.rev(deltabase)
2771
2764
2772 if baserev != nullrev and self.iscensored(baserev):
2765 if baserev != nullrev and self.iscensored(baserev):
2773 # if base is censored, delta must be full replacement in a
2766 # if base is censored, delta must be full replacement in a
2774 # single patch operation
2767 # single patch operation
2775 hlen = struct.calcsize(b">lll")
2768 hlen = struct.calcsize(b">lll")
2776 oldlen = self.rawsize(baserev)
2769 oldlen = self.rawsize(baserev)
2777 newlen = len(delta) - hlen
2770 newlen = len(delta) - hlen
2778 if delta[:hlen] != mdiff.replacediffheader(
2771 if delta[:hlen] != mdiff.replacediffheader(
2779 oldlen, newlen
2772 oldlen, newlen
2780 ):
2773 ):
2781 raise error.CensoredBaseError(
2774 raise error.CensoredBaseError(
2782 self.display_id, self.node(baserev)
2775 self.display_id, self.node(baserev)
2783 )
2776 )
2784
2777
2785 if not flags and self._peek_iscensored(baserev, delta):
2778 if not flags and self._peek_iscensored(baserev, delta):
2786 flags |= REVIDX_ISCENSORED
2779 flags |= REVIDX_ISCENSORED
2787
2780
2788 # We assume consumers of addrevisioncb will want to retrieve
2781 # We assume consumers of addrevisioncb will want to retrieve
2789 # the added revision, which will require a call to
2782 # the added revision, which will require a call to
2790 # revision(). revision() will fast path if there is a cache
2783 # revision(). revision() will fast path if there is a cache
2791 # hit. So, we tell _addrevision() to always cache in this case.
2784 # hit. So, we tell _addrevision() to always cache in this case.
2792 # We're only using addgroup() in the context of changegroup
2785 # We're only using addgroup() in the context of changegroup
2793 # generation so the revision data can always be handled as raw
2786 # generation so the revision data can always be handled as raw
2794 # by the flagprocessor.
2787 # by the flagprocessor.
2795 rev = self._addrevision(
2788 rev = self._addrevision(
2796 node,
2789 node,
2797 None,
2790 None,
2798 transaction,
2791 transaction,
2799 link,
2792 link,
2800 p1,
2793 p1,
2801 p2,
2794 p2,
2802 flags,
2795 flags,
2803 (baserev, delta),
2796 (baserev, delta),
2804 alwayscache=alwayscache,
2797 alwayscache=alwayscache,
2805 deltacomputer=deltacomputer,
2798 deltacomputer=deltacomputer,
2806 sidedata=sidedata,
2799 sidedata=sidedata,
2807 )
2800 )
2808
2801
2809 if addrevisioncb:
2802 if addrevisioncb:
2810 addrevisioncb(self, rev)
2803 addrevisioncb(self, rev)
2811 empty = False
2804 empty = False
2812 finally:
2805 finally:
2813 self._adding_group = False
2806 self._adding_group = False
2814 return not empty
2807 return not empty
2815
2808
2816 def iscensored(self, rev):
2809 def iscensored(self, rev):
2817 """Check if a file revision is censored."""
2810 """Check if a file revision is censored."""
2818 if not self._censorable:
2811 if not self._censorable:
2819 return False
2812 return False
2820
2813
2821 return self.flags(rev) & REVIDX_ISCENSORED
2814 return self.flags(rev) & REVIDX_ISCENSORED
2822
2815
2823 def _peek_iscensored(self, baserev, delta):
2816 def _peek_iscensored(self, baserev, delta):
2824 """Quickly check if a delta produces a censored revision."""
2817 """Quickly check if a delta produces a censored revision."""
2825 if not self._censorable:
2818 if not self._censorable:
2826 return False
2819 return False
2827
2820
2828 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2821 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2829
2822
2830 def getstrippoint(self, minlink):
2823 def getstrippoint(self, minlink):
2831 """find the minimum rev that must be stripped to strip the linkrev
2824 """find the minimum rev that must be stripped to strip the linkrev
2832
2825
2833 Returns a tuple containing the minimum rev and a set of all revs that
2826 Returns a tuple containing the minimum rev and a set of all revs that
2834 have linkrevs that will be broken by this strip.
2827 have linkrevs that will be broken by this strip.
2835 """
2828 """
2836 return storageutil.resolvestripinfo(
2829 return storageutil.resolvestripinfo(
2837 minlink,
2830 minlink,
2838 len(self) - 1,
2831 len(self) - 1,
2839 self.headrevs(),
2832 self.headrevs(),
2840 self.linkrev,
2833 self.linkrev,
2841 self.parentrevs,
2834 self.parentrevs,
2842 )
2835 )
2843
2836
2844 def strip(self, minlink, transaction):
2837 def strip(self, minlink, transaction):
2845 """truncate the revlog on the first revision with a linkrev >= minlink
2838 """truncate the revlog on the first revision with a linkrev >= minlink
2846
2839
2847 This function is called when we're stripping revision minlink and
2840 This function is called when we're stripping revision minlink and
2848 its descendants from the repository.
2841 its descendants from the repository.
2849
2842
2850 We have to remove all revisions with linkrev >= minlink, because
2843 We have to remove all revisions with linkrev >= minlink, because
2851 the equivalent changelog revisions will be renumbered after the
2844 the equivalent changelog revisions will be renumbered after the
2852 strip.
2845 strip.
2853
2846
2854 So we truncate the revlog on the first of these revisions, and
2847 So we truncate the revlog on the first of these revisions, and
2855 trust that the caller has saved the revisions that shouldn't be
2848 trust that the caller has saved the revisions that shouldn't be
2856 removed and that it'll re-add them after this truncation.
2849 removed and that it'll re-add them after this truncation.
2857 """
2850 """
2858 if len(self) == 0:
2851 if len(self) == 0:
2859 return
2852 return
2860
2853
2861 rev, _ = self.getstrippoint(minlink)
2854 rev, _ = self.getstrippoint(minlink)
2862 if rev == len(self):
2855 if rev == len(self):
2863 return
2856 return
2864
2857
2865 # first truncate the files on disk
2858 # first truncate the files on disk
2866 data_end = self.start(rev)
2859 data_end = self.start(rev)
2867 if not self._inline:
2860 if not self._inline:
2868 transaction.add(self._datafile, data_end)
2861 transaction.add(self._datafile, data_end)
2869 end = rev * self.index.entry_size
2862 end = rev * self.index.entry_size
2870 else:
2863 else:
2871 end = data_end + (rev * self.index.entry_size)
2864 end = data_end + (rev * self.index.entry_size)
2872
2865
2873 transaction.add(self._indexfile, end)
2866 transaction.add(self._indexfile, end)
2874 if self._docket is not None:
2867 if self._docket is not None:
2875 # XXX we could, leverage the docket while stripping. However it is
2868 # XXX we could, leverage the docket while stripping. However it is
2876 # not powerfull enough at the time of this comment
2869 # not powerfull enough at the time of this comment
2877 self._docket.index_end = end
2870 self._docket.index_end = end
2878 self._docket.data_end = data_end
2871 self._docket.data_end = data_end
2879 self._docket.write(transaction, stripping=True)
2872 self._docket.write(transaction, stripping=True)
2880
2873
2881 # then reset internal state in memory to forget those revisions
2874 # then reset internal state in memory to forget those revisions
2882 self._revisioncache = None
2875 self._revisioncache = None
2883 self._chaininfocache = util.lrucachedict(500)
2876 self._chaininfocache = util.lrucachedict(500)
2884 self._chunkclear()
2877 self._chunkclear()
2885
2878
2886 del self.index[rev:-1]
2879 del self.index[rev:-1]
2887
2880
2888 def checksize(self):
2881 def checksize(self):
2889 """Check size of index and data files
2882 """Check size of index and data files
2890
2883
2891 return a (dd, di) tuple.
2884 return a (dd, di) tuple.
2892 - dd: extra bytes for the "data" file
2885 - dd: extra bytes for the "data" file
2893 - di: extra bytes for the "index" file
2886 - di: extra bytes for the "index" file
2894
2887
2895 A healthy revlog will return (0, 0).
2888 A healthy revlog will return (0, 0).
2896 """
2889 """
2897 expected = 0
2890 expected = 0
2898 if len(self):
2891 if len(self):
2899 expected = max(0, self.end(len(self) - 1))
2892 expected = max(0, self.end(len(self) - 1))
2900
2893
2901 try:
2894 try:
2902 with self._datafp() as f:
2895 with self._datafp() as f:
2903 f.seek(0, io.SEEK_END)
2896 f.seek(0, io.SEEK_END)
2904 actual = f.tell()
2897 actual = f.tell()
2905 dd = actual - expected
2898 dd = actual - expected
2906 except IOError as inst:
2899 except IOError as inst:
2907 if inst.errno != errno.ENOENT:
2900 if inst.errno != errno.ENOENT:
2908 raise
2901 raise
2909 dd = 0
2902 dd = 0
2910
2903
2911 try:
2904 try:
2912 f = self.opener(self._indexfile)
2905 f = self.opener(self._indexfile)
2913 f.seek(0, io.SEEK_END)
2906 f.seek(0, io.SEEK_END)
2914 actual = f.tell()
2907 actual = f.tell()
2915 f.close()
2908 f.close()
2916 s = self.index.entry_size
2909 s = self.index.entry_size
2917 i = max(0, actual // s)
2910 i = max(0, actual // s)
2918 di = actual - (i * s)
2911 di = actual - (i * s)
2919 if self._inline:
2912 if self._inline:
2920 databytes = 0
2913 databytes = 0
2921 for r in self:
2914 for r in self:
2922 databytes += max(0, self.length(r))
2915 databytes += max(0, self.length(r))
2923 dd = 0
2916 dd = 0
2924 di = actual - len(self) * s - databytes
2917 di = actual - len(self) * s - databytes
2925 except IOError as inst:
2918 except IOError as inst:
2926 if inst.errno != errno.ENOENT:
2919 if inst.errno != errno.ENOENT:
2927 raise
2920 raise
2928 di = 0
2921 di = 0
2929
2922
2930 return (dd, di)
2923 return (dd, di)
2931
2924
2932 def files(self):
2925 def files(self):
2933 res = [self._indexfile]
2926 res = [self._indexfile]
2934 if not self._inline:
2927 if not self._inline:
2935 res.append(self._datafile)
2928 res.append(self._datafile)
2936 return res
2929 return res
2937
2930
2938 def emitrevisions(
2931 def emitrevisions(
2939 self,
2932 self,
2940 nodes,
2933 nodes,
2941 nodesorder=None,
2934 nodesorder=None,
2942 revisiondata=False,
2935 revisiondata=False,
2943 assumehaveparentrevisions=False,
2936 assumehaveparentrevisions=False,
2944 deltamode=repository.CG_DELTAMODE_STD,
2937 deltamode=repository.CG_DELTAMODE_STD,
2945 sidedata_helpers=None,
2938 sidedata_helpers=None,
2946 ):
2939 ):
2947 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2940 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2948 raise error.ProgrammingError(
2941 raise error.ProgrammingError(
2949 b'unhandled value for nodesorder: %s' % nodesorder
2942 b'unhandled value for nodesorder: %s' % nodesorder
2950 )
2943 )
2951
2944
2952 if nodesorder is None and not self._generaldelta:
2945 if nodesorder is None and not self._generaldelta:
2953 nodesorder = b'storage'
2946 nodesorder = b'storage'
2954
2947
2955 if (
2948 if (
2956 not self._storedeltachains
2949 not self._storedeltachains
2957 and deltamode != repository.CG_DELTAMODE_PREV
2950 and deltamode != repository.CG_DELTAMODE_PREV
2958 ):
2951 ):
2959 deltamode = repository.CG_DELTAMODE_FULL
2952 deltamode = repository.CG_DELTAMODE_FULL
2960
2953
2961 return storageutil.emitrevisions(
2954 return storageutil.emitrevisions(
2962 self,
2955 self,
2963 nodes,
2956 nodes,
2964 nodesorder,
2957 nodesorder,
2965 revlogrevisiondelta,
2958 revlogrevisiondelta,
2966 deltaparentfn=self.deltaparent,
2959 deltaparentfn=self.deltaparent,
2967 candeltafn=self.candelta,
2960 candeltafn=self.candelta,
2968 rawsizefn=self.rawsize,
2961 rawsizefn=self.rawsize,
2969 revdifffn=self.revdiff,
2962 revdifffn=self.revdiff,
2970 flagsfn=self.flags,
2963 flagsfn=self.flags,
2971 deltamode=deltamode,
2964 deltamode=deltamode,
2972 revisiondata=revisiondata,
2965 revisiondata=revisiondata,
2973 assumehaveparentrevisions=assumehaveparentrevisions,
2966 assumehaveparentrevisions=assumehaveparentrevisions,
2974 sidedata_helpers=sidedata_helpers,
2967 sidedata_helpers=sidedata_helpers,
2975 )
2968 )
2976
2969
2977 DELTAREUSEALWAYS = b'always'
2970 DELTAREUSEALWAYS = b'always'
2978 DELTAREUSESAMEREVS = b'samerevs'
2971 DELTAREUSESAMEREVS = b'samerevs'
2979 DELTAREUSENEVER = b'never'
2972 DELTAREUSENEVER = b'never'
2980
2973
2981 DELTAREUSEFULLADD = b'fulladd'
2974 DELTAREUSEFULLADD = b'fulladd'
2982
2975
2983 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2976 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2984
2977
2985 def clone(
2978 def clone(
2986 self,
2979 self,
2987 tr,
2980 tr,
2988 destrevlog,
2981 destrevlog,
2989 addrevisioncb=None,
2982 addrevisioncb=None,
2990 deltareuse=DELTAREUSESAMEREVS,
2983 deltareuse=DELTAREUSESAMEREVS,
2991 forcedeltabothparents=None,
2984 forcedeltabothparents=None,
2992 sidedata_helpers=None,
2985 sidedata_helpers=None,
2993 ):
2986 ):
2994 """Copy this revlog to another, possibly with format changes.
2987 """Copy this revlog to another, possibly with format changes.
2995
2988
2996 The destination revlog will contain the same revisions and nodes.
2989 The destination revlog will contain the same revisions and nodes.
2997 However, it may not be bit-for-bit identical due to e.g. delta encoding
2990 However, it may not be bit-for-bit identical due to e.g. delta encoding
2998 differences.
2991 differences.
2999
2992
3000 The ``deltareuse`` argument control how deltas from the existing revlog
2993 The ``deltareuse`` argument control how deltas from the existing revlog
3001 are preserved in the destination revlog. The argument can have the
2994 are preserved in the destination revlog. The argument can have the
3002 following values:
2995 following values:
3003
2996
3004 DELTAREUSEALWAYS
2997 DELTAREUSEALWAYS
3005 Deltas will always be reused (if possible), even if the destination
2998 Deltas will always be reused (if possible), even if the destination
3006 revlog would not select the same revisions for the delta. This is the
2999 revlog would not select the same revisions for the delta. This is the
3007 fastest mode of operation.
3000 fastest mode of operation.
3008 DELTAREUSESAMEREVS
3001 DELTAREUSESAMEREVS
3009 Deltas will be reused if the destination revlog would pick the same
3002 Deltas will be reused if the destination revlog would pick the same
3010 revisions for the delta. This mode strikes a balance between speed
3003 revisions for the delta. This mode strikes a balance between speed
3011 and optimization.
3004 and optimization.
3012 DELTAREUSENEVER
3005 DELTAREUSENEVER
3013 Deltas will never be reused. This is the slowest mode of execution.
3006 Deltas will never be reused. This is the slowest mode of execution.
3014 This mode can be used to recompute deltas (e.g. if the diff/delta
3007 This mode can be used to recompute deltas (e.g. if the diff/delta
3015 algorithm changes).
3008 algorithm changes).
3016 DELTAREUSEFULLADD
3009 DELTAREUSEFULLADD
3017 Revision will be re-added as if their were new content. This is
3010 Revision will be re-added as if their were new content. This is
3018 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3011 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3019 eg: large file detection and handling.
3012 eg: large file detection and handling.
3020
3013
3021 Delta computation can be slow, so the choice of delta reuse policy can
3014 Delta computation can be slow, so the choice of delta reuse policy can
3022 significantly affect run time.
3015 significantly affect run time.
3023
3016
3024 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3017 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3025 two extremes. Deltas will be reused if they are appropriate. But if the
3018 two extremes. Deltas will be reused if they are appropriate. But if the
3026 delta could choose a better revision, it will do so. This means if you
3019 delta could choose a better revision, it will do so. This means if you
3027 are converting a non-generaldelta revlog to a generaldelta revlog,
3020 are converting a non-generaldelta revlog to a generaldelta revlog,
3028 deltas will be recomputed if the delta's parent isn't a parent of the
3021 deltas will be recomputed if the delta's parent isn't a parent of the
3029 revision.
3022 revision.
3030
3023
3031 In addition to the delta policy, the ``forcedeltabothparents``
3024 In addition to the delta policy, the ``forcedeltabothparents``
3032 argument controls whether to force compute deltas against both parents
3025 argument controls whether to force compute deltas against both parents
3033 for merges. By default, the current default is used.
3026 for merges. By default, the current default is used.
3034
3027
3035 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3028 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3036 `sidedata_helpers`.
3029 `sidedata_helpers`.
3037 """
3030 """
3038 if deltareuse not in self.DELTAREUSEALL:
3031 if deltareuse not in self.DELTAREUSEALL:
3039 raise ValueError(
3032 raise ValueError(
3040 _(b'value for deltareuse invalid: %s') % deltareuse
3033 _(b'value for deltareuse invalid: %s') % deltareuse
3041 )
3034 )
3042
3035
3043 if len(destrevlog):
3036 if len(destrevlog):
3044 raise ValueError(_(b'destination revlog is not empty'))
3037 raise ValueError(_(b'destination revlog is not empty'))
3045
3038
3046 if getattr(self, 'filteredrevs', None):
3039 if getattr(self, 'filteredrevs', None):
3047 raise ValueError(_(b'source revlog has filtered revisions'))
3040 raise ValueError(_(b'source revlog has filtered revisions'))
3048 if getattr(destrevlog, 'filteredrevs', None):
3041 if getattr(destrevlog, 'filteredrevs', None):
3049 raise ValueError(_(b'destination revlog has filtered revisions'))
3042 raise ValueError(_(b'destination revlog has filtered revisions'))
3050
3043
3051 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3044 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3052 # if possible.
3045 # if possible.
3053 oldlazydelta = destrevlog._lazydelta
3046 oldlazydelta = destrevlog._lazydelta
3054 oldlazydeltabase = destrevlog._lazydeltabase
3047 oldlazydeltabase = destrevlog._lazydeltabase
3055 oldamd = destrevlog._deltabothparents
3048 oldamd = destrevlog._deltabothparents
3056
3049
3057 try:
3050 try:
3058 if deltareuse == self.DELTAREUSEALWAYS:
3051 if deltareuse == self.DELTAREUSEALWAYS:
3059 destrevlog._lazydeltabase = True
3052 destrevlog._lazydeltabase = True
3060 destrevlog._lazydelta = True
3053 destrevlog._lazydelta = True
3061 elif deltareuse == self.DELTAREUSESAMEREVS:
3054 elif deltareuse == self.DELTAREUSESAMEREVS:
3062 destrevlog._lazydeltabase = False
3055 destrevlog._lazydeltabase = False
3063 destrevlog._lazydelta = True
3056 destrevlog._lazydelta = True
3064 elif deltareuse == self.DELTAREUSENEVER:
3057 elif deltareuse == self.DELTAREUSENEVER:
3065 destrevlog._lazydeltabase = False
3058 destrevlog._lazydeltabase = False
3066 destrevlog._lazydelta = False
3059 destrevlog._lazydelta = False
3067
3060
3068 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3061 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3069
3062
3070 self._clone(
3063 self._clone(
3071 tr,
3064 tr,
3072 destrevlog,
3065 destrevlog,
3073 addrevisioncb,
3066 addrevisioncb,
3074 deltareuse,
3067 deltareuse,
3075 forcedeltabothparents,
3068 forcedeltabothparents,
3076 sidedata_helpers,
3069 sidedata_helpers,
3077 )
3070 )
3078
3071
3079 finally:
3072 finally:
3080 destrevlog._lazydelta = oldlazydelta
3073 destrevlog._lazydelta = oldlazydelta
3081 destrevlog._lazydeltabase = oldlazydeltabase
3074 destrevlog._lazydeltabase = oldlazydeltabase
3082 destrevlog._deltabothparents = oldamd
3075 destrevlog._deltabothparents = oldamd
3083
3076
3084 def _clone(
3077 def _clone(
3085 self,
3078 self,
3086 tr,
3079 tr,
3087 destrevlog,
3080 destrevlog,
3088 addrevisioncb,
3081 addrevisioncb,
3089 deltareuse,
3082 deltareuse,
3090 forcedeltabothparents,
3083 forcedeltabothparents,
3091 sidedata_helpers,
3084 sidedata_helpers,
3092 ):
3085 ):
3093 """perform the core duty of `revlog.clone` after parameter processing"""
3086 """perform the core duty of `revlog.clone` after parameter processing"""
3094 deltacomputer = deltautil.deltacomputer(destrevlog)
3087 deltacomputer = deltautil.deltacomputer(destrevlog)
3095 index = self.index
3088 index = self.index
3096 for rev in self:
3089 for rev in self:
3097 entry = index[rev]
3090 entry = index[rev]
3098
3091
3099 # Some classes override linkrev to take filtered revs into
3092 # Some classes override linkrev to take filtered revs into
3100 # account. Use raw entry from index.
3093 # account. Use raw entry from index.
3101 flags = entry[0] & 0xFFFF
3094 flags = entry[0] & 0xFFFF
3102 linkrev = entry[4]
3095 linkrev = entry[4]
3103 p1 = index[entry[5]][7]
3096 p1 = index[entry[5]][7]
3104 p2 = index[entry[6]][7]
3097 p2 = index[entry[6]][7]
3105 node = entry[7]
3098 node = entry[7]
3106
3099
3107 # (Possibly) reuse the delta from the revlog if allowed and
3100 # (Possibly) reuse the delta from the revlog if allowed and
3108 # the revlog chunk is a delta.
3101 # the revlog chunk is a delta.
3109 cachedelta = None
3102 cachedelta = None
3110 rawtext = None
3103 rawtext = None
3111 if deltareuse == self.DELTAREUSEFULLADD:
3104 if deltareuse == self.DELTAREUSEFULLADD:
3112 text = self._revisiondata(rev)[0]
3105 text = self._revisiondata(rev)
3113 sidedata = self.sidedata(rev)
3106 sidedata = self.sidedata(rev)
3114
3107
3115 if sidedata_helpers is not None:
3108 if sidedata_helpers is not None:
3116 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3109 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3117 self, sidedata_helpers, sidedata, rev
3110 self, sidedata_helpers, sidedata, rev
3118 )
3111 )
3119 flags = flags | new_flags[0] & ~new_flags[1]
3112 flags = flags | new_flags[0] & ~new_flags[1]
3120
3113
3121 destrevlog.addrevision(
3114 destrevlog.addrevision(
3122 text,
3115 text,
3123 tr,
3116 tr,
3124 linkrev,
3117 linkrev,
3125 p1,
3118 p1,
3126 p2,
3119 p2,
3127 cachedelta=cachedelta,
3120 cachedelta=cachedelta,
3128 node=node,
3121 node=node,
3129 flags=flags,
3122 flags=flags,
3130 deltacomputer=deltacomputer,
3123 deltacomputer=deltacomputer,
3131 sidedata=sidedata,
3124 sidedata=sidedata,
3132 )
3125 )
3133 else:
3126 else:
3134 if destrevlog._lazydelta:
3127 if destrevlog._lazydelta:
3135 dp = self.deltaparent(rev)
3128 dp = self.deltaparent(rev)
3136 if dp != nullrev:
3129 if dp != nullrev:
3137 cachedelta = (dp, bytes(self._chunk(rev)))
3130 cachedelta = (dp, bytes(self._chunk(rev)))
3138
3131
3139 sidedata = None
3132 sidedata = None
3140 if not cachedelta:
3133 if not cachedelta:
3141 rawtext = self._revisiondata(rev)[0]
3134 rawtext = self._revisiondata(rev)
3142 sidedata = self.sidedata(rev)
3135 sidedata = self.sidedata(rev)
3143 if sidedata is None:
3136 if sidedata is None:
3144 sidedata = self.sidedata(rev)
3137 sidedata = self.sidedata(rev)
3145
3138
3146 if sidedata_helpers is not None:
3139 if sidedata_helpers is not None:
3147 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3140 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3148 self, sidedata_helpers, sidedata, rev
3141 self, sidedata_helpers, sidedata, rev
3149 )
3142 )
3150 flags = flags | new_flags[0] & ~new_flags[1]
3143 flags = flags | new_flags[0] & ~new_flags[1]
3151
3144
3152 with destrevlog._writing(tr):
3145 with destrevlog._writing(tr):
3153 destrevlog._addrevision(
3146 destrevlog._addrevision(
3154 node,
3147 node,
3155 rawtext,
3148 rawtext,
3156 tr,
3149 tr,
3157 linkrev,
3150 linkrev,
3158 p1,
3151 p1,
3159 p2,
3152 p2,
3160 flags,
3153 flags,
3161 cachedelta,
3154 cachedelta,
3162 deltacomputer=deltacomputer,
3155 deltacomputer=deltacomputer,
3163 sidedata=sidedata,
3156 sidedata=sidedata,
3164 )
3157 )
3165
3158
3166 if addrevisioncb:
3159 if addrevisioncb:
3167 addrevisioncb(self, rev, node)
3160 addrevisioncb(self, rev, node)
3168
3161
3169 def censorrevision(self, tr, censornode, tombstone=b''):
3162 def censorrevision(self, tr, censornode, tombstone=b''):
3170 if self._format_version == REVLOGV0:
3163 if self._format_version == REVLOGV0:
3171 raise error.RevlogError(
3164 raise error.RevlogError(
3172 _(b'cannot censor with version %d revlogs')
3165 _(b'cannot censor with version %d revlogs')
3173 % self._format_version
3166 % self._format_version
3174 )
3167 )
3175
3168
3176 censorrev = self.rev(censornode)
3169 censorrev = self.rev(censornode)
3177 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3170 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3178
3171
3179 if len(tombstone) > self.rawsize(censorrev):
3172 if len(tombstone) > self.rawsize(censorrev):
3180 raise error.Abort(
3173 raise error.Abort(
3181 _(b'censor tombstone must be no longer than censored data')
3174 _(b'censor tombstone must be no longer than censored data')
3182 )
3175 )
3183
3176
3184 # Rewriting the revlog in place is hard. Our strategy for censoring is
3177 # Rewriting the revlog in place is hard. Our strategy for censoring is
3185 # to create a new revlog, copy all revisions to it, then replace the
3178 # to create a new revlog, copy all revisions to it, then replace the
3186 # revlogs on transaction close.
3179 # revlogs on transaction close.
3187 #
3180 #
3188 # This is a bit dangerous. We could easily have a mismatch of state.
3181 # This is a bit dangerous. We could easily have a mismatch of state.
3189 newrl = revlog(
3182 newrl = revlog(
3190 self.opener,
3183 self.opener,
3191 target=self.target,
3184 target=self.target,
3192 radix=self.radix,
3185 radix=self.radix,
3193 postfix=b'tmpcensored',
3186 postfix=b'tmpcensored',
3194 censorable=True,
3187 censorable=True,
3195 )
3188 )
3196 newrl._format_version = self._format_version
3189 newrl._format_version = self._format_version
3197 newrl._format_flags = self._format_flags
3190 newrl._format_flags = self._format_flags
3198 newrl._generaldelta = self._generaldelta
3191 newrl._generaldelta = self._generaldelta
3199 newrl._parse_index = self._parse_index
3192 newrl._parse_index = self._parse_index
3200
3193
3201 for rev in self.revs():
3194 for rev in self.revs():
3202 node = self.node(rev)
3195 node = self.node(rev)
3203 p1, p2 = self.parents(node)
3196 p1, p2 = self.parents(node)
3204
3197
3205 if rev == censorrev:
3198 if rev == censorrev:
3206 newrl.addrawrevision(
3199 newrl.addrawrevision(
3207 tombstone,
3200 tombstone,
3208 tr,
3201 tr,
3209 self.linkrev(censorrev),
3202 self.linkrev(censorrev),
3210 p1,
3203 p1,
3211 p2,
3204 p2,
3212 censornode,
3205 censornode,
3213 REVIDX_ISCENSORED,
3206 REVIDX_ISCENSORED,
3214 )
3207 )
3215
3208
3216 if newrl.deltaparent(rev) != nullrev:
3209 if newrl.deltaparent(rev) != nullrev:
3217 raise error.Abort(
3210 raise error.Abort(
3218 _(
3211 _(
3219 b'censored revision stored as delta; '
3212 b'censored revision stored as delta; '
3220 b'cannot censor'
3213 b'cannot censor'
3221 ),
3214 ),
3222 hint=_(
3215 hint=_(
3223 b'censoring of revlogs is not '
3216 b'censoring of revlogs is not '
3224 b'fully implemented; please report '
3217 b'fully implemented; please report '
3225 b'this bug'
3218 b'this bug'
3226 ),
3219 ),
3227 )
3220 )
3228 continue
3221 continue
3229
3222
3230 if self.iscensored(rev):
3223 if self.iscensored(rev):
3231 if self.deltaparent(rev) != nullrev:
3224 if self.deltaparent(rev) != nullrev:
3232 raise error.Abort(
3225 raise error.Abort(
3233 _(
3226 _(
3234 b'cannot censor due to censored '
3227 b'cannot censor due to censored '
3235 b'revision having delta stored'
3228 b'revision having delta stored'
3236 )
3229 )
3237 )
3230 )
3238 rawtext = self._chunk(rev)
3231 rawtext = self._chunk(rev)
3239 else:
3232 else:
3240 rawtext = self.rawdata(rev)
3233 rawtext = self.rawdata(rev)
3241
3234
3242 newrl.addrawrevision(
3235 newrl.addrawrevision(
3243 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3236 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3244 )
3237 )
3245
3238
3246 tr.addbackup(self._indexfile, location=b'store')
3239 tr.addbackup(self._indexfile, location=b'store')
3247 if not self._inline:
3240 if not self._inline:
3248 tr.addbackup(self._datafile, location=b'store')
3241 tr.addbackup(self._datafile, location=b'store')
3249
3242
3250 self.opener.rename(newrl._indexfile, self._indexfile)
3243 self.opener.rename(newrl._indexfile, self._indexfile)
3251 if not self._inline:
3244 if not self._inline:
3252 self.opener.rename(newrl._datafile, self._datafile)
3245 self.opener.rename(newrl._datafile, self._datafile)
3253
3246
3254 self.clearcaches()
3247 self.clearcaches()
3255 self._loadindex()
3248 self._loadindex()
3256
3249
3257 def verifyintegrity(self, state):
3250 def verifyintegrity(self, state):
3258 """Verifies the integrity of the revlog.
3251 """Verifies the integrity of the revlog.
3259
3252
3260 Yields ``revlogproblem`` instances describing problems that are
3253 Yields ``revlogproblem`` instances describing problems that are
3261 found.
3254 found.
3262 """
3255 """
3263 dd, di = self.checksize()
3256 dd, di = self.checksize()
3264 if dd:
3257 if dd:
3265 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3258 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3266 if di:
3259 if di:
3267 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3260 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3268
3261
3269 version = self._format_version
3262 version = self._format_version
3270
3263
3271 # The verifier tells us what version revlog we should be.
3264 # The verifier tells us what version revlog we should be.
3272 if version != state[b'expectedversion']:
3265 if version != state[b'expectedversion']:
3273 yield revlogproblem(
3266 yield revlogproblem(
3274 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3267 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3275 % (self.display_id, version, state[b'expectedversion'])
3268 % (self.display_id, version, state[b'expectedversion'])
3276 )
3269 )
3277
3270
3278 state[b'skipread'] = set()
3271 state[b'skipread'] = set()
3279 state[b'safe_renamed'] = set()
3272 state[b'safe_renamed'] = set()
3280
3273
3281 for rev in self:
3274 for rev in self:
3282 node = self.node(rev)
3275 node = self.node(rev)
3283
3276
3284 # Verify contents. 4 cases to care about:
3277 # Verify contents. 4 cases to care about:
3285 #
3278 #
3286 # common: the most common case
3279 # common: the most common case
3287 # rename: with a rename
3280 # rename: with a rename
3288 # meta: file content starts with b'\1\n', the metadata
3281 # meta: file content starts with b'\1\n', the metadata
3289 # header defined in filelog.py, but without a rename
3282 # header defined in filelog.py, but without a rename
3290 # ext: content stored externally
3283 # ext: content stored externally
3291 #
3284 #
3292 # More formally, their differences are shown below:
3285 # More formally, their differences are shown below:
3293 #
3286 #
3294 # | common | rename | meta | ext
3287 # | common | rename | meta | ext
3295 # -------------------------------------------------------
3288 # -------------------------------------------------------
3296 # flags() | 0 | 0 | 0 | not 0
3289 # flags() | 0 | 0 | 0 | not 0
3297 # renamed() | False | True | False | ?
3290 # renamed() | False | True | False | ?
3298 # rawtext[0:2]=='\1\n'| False | True | True | ?
3291 # rawtext[0:2]=='\1\n'| False | True | True | ?
3299 #
3292 #
3300 # "rawtext" means the raw text stored in revlog data, which
3293 # "rawtext" means the raw text stored in revlog data, which
3301 # could be retrieved by "rawdata(rev)". "text"
3294 # could be retrieved by "rawdata(rev)". "text"
3302 # mentioned below is "revision(rev)".
3295 # mentioned below is "revision(rev)".
3303 #
3296 #
3304 # There are 3 different lengths stored physically:
3297 # There are 3 different lengths stored physically:
3305 # 1. L1: rawsize, stored in revlog index
3298 # 1. L1: rawsize, stored in revlog index
3306 # 2. L2: len(rawtext), stored in revlog data
3299 # 2. L2: len(rawtext), stored in revlog data
3307 # 3. L3: len(text), stored in revlog data if flags==0, or
3300 # 3. L3: len(text), stored in revlog data if flags==0, or
3308 # possibly somewhere else if flags!=0
3301 # possibly somewhere else if flags!=0
3309 #
3302 #
3310 # L1 should be equal to L2. L3 could be different from them.
3303 # L1 should be equal to L2. L3 could be different from them.
3311 # "text" may or may not affect commit hash depending on flag
3304 # "text" may or may not affect commit hash depending on flag
3312 # processors (see flagutil.addflagprocessor).
3305 # processors (see flagutil.addflagprocessor).
3313 #
3306 #
3314 # | common | rename | meta | ext
3307 # | common | rename | meta | ext
3315 # -------------------------------------------------
3308 # -------------------------------------------------
3316 # rawsize() | L1 | L1 | L1 | L1
3309 # rawsize() | L1 | L1 | L1 | L1
3317 # size() | L1 | L2-LM | L1(*) | L1 (?)
3310 # size() | L1 | L2-LM | L1(*) | L1 (?)
3318 # len(rawtext) | L2 | L2 | L2 | L2
3311 # len(rawtext) | L2 | L2 | L2 | L2
3319 # len(text) | L2 | L2 | L2 | L3
3312 # len(text) | L2 | L2 | L2 | L3
3320 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3313 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3321 #
3314 #
3322 # LM: length of metadata, depending on rawtext
3315 # LM: length of metadata, depending on rawtext
3323 # (*): not ideal, see comment in filelog.size
3316 # (*): not ideal, see comment in filelog.size
3324 # (?): could be "- len(meta)" if the resolved content has
3317 # (?): could be "- len(meta)" if the resolved content has
3325 # rename metadata
3318 # rename metadata
3326 #
3319 #
3327 # Checks needed to be done:
3320 # Checks needed to be done:
3328 # 1. length check: L1 == L2, in all cases.
3321 # 1. length check: L1 == L2, in all cases.
3329 # 2. hash check: depending on flag processor, we may need to
3322 # 2. hash check: depending on flag processor, we may need to
3330 # use either "text" (external), or "rawtext" (in revlog).
3323 # use either "text" (external), or "rawtext" (in revlog).
3331
3324
3332 try:
3325 try:
3333 skipflags = state.get(b'skipflags', 0)
3326 skipflags = state.get(b'skipflags', 0)
3334 if skipflags:
3327 if skipflags:
3335 skipflags &= self.flags(rev)
3328 skipflags &= self.flags(rev)
3336
3329
3337 _verify_revision(self, skipflags, state, node)
3330 _verify_revision(self, skipflags, state, node)
3338
3331
3339 l1 = self.rawsize(rev)
3332 l1 = self.rawsize(rev)
3340 l2 = len(self.rawdata(node))
3333 l2 = len(self.rawdata(node))
3341
3334
3342 if l1 != l2:
3335 if l1 != l2:
3343 yield revlogproblem(
3336 yield revlogproblem(
3344 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3337 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3345 node=node,
3338 node=node,
3346 )
3339 )
3347
3340
3348 except error.CensoredNodeError:
3341 except error.CensoredNodeError:
3349 if state[b'erroroncensored']:
3342 if state[b'erroroncensored']:
3350 yield revlogproblem(
3343 yield revlogproblem(
3351 error=_(b'censored file data'), node=node
3344 error=_(b'censored file data'), node=node
3352 )
3345 )
3353 state[b'skipread'].add(node)
3346 state[b'skipread'].add(node)
3354 except Exception as e:
3347 except Exception as e:
3355 yield revlogproblem(
3348 yield revlogproblem(
3356 error=_(b'unpacking %s: %s')
3349 error=_(b'unpacking %s: %s')
3357 % (short(node), stringutil.forcebytestr(e)),
3350 % (short(node), stringutil.forcebytestr(e)),
3358 node=node,
3351 node=node,
3359 )
3352 )
3360 state[b'skipread'].add(node)
3353 state[b'skipread'].add(node)
3361
3354
3362 def storageinfo(
3355 def storageinfo(
3363 self,
3356 self,
3364 exclusivefiles=False,
3357 exclusivefiles=False,
3365 sharedfiles=False,
3358 sharedfiles=False,
3366 revisionscount=False,
3359 revisionscount=False,
3367 trackedsize=False,
3360 trackedsize=False,
3368 storedsize=False,
3361 storedsize=False,
3369 ):
3362 ):
3370 d = {}
3363 d = {}
3371
3364
3372 if exclusivefiles:
3365 if exclusivefiles:
3373 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3366 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3374 if not self._inline:
3367 if not self._inline:
3375 d[b'exclusivefiles'].append((self.opener, self._datafile))
3368 d[b'exclusivefiles'].append((self.opener, self._datafile))
3376
3369
3377 if sharedfiles:
3370 if sharedfiles:
3378 d[b'sharedfiles'] = []
3371 d[b'sharedfiles'] = []
3379
3372
3380 if revisionscount:
3373 if revisionscount:
3381 d[b'revisionscount'] = len(self)
3374 d[b'revisionscount'] = len(self)
3382
3375
3383 if trackedsize:
3376 if trackedsize:
3384 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3377 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3385
3378
3386 if storedsize:
3379 if storedsize:
3387 d[b'storedsize'] = sum(
3380 d[b'storedsize'] = sum(
3388 self.opener.stat(path).st_size for path in self.files()
3381 self.opener.stat(path).st_size for path in self.files()
3389 )
3382 )
3390
3383
3391 return d
3384 return d
3392
3385
3393 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3386 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3394 if not self.hassidedata:
3387 if not self.hassidedata:
3395 return
3388 return
3396 # revlog formats with sidedata support does not support inline
3389 # revlog formats with sidedata support does not support inline
3397 assert not self._inline
3390 assert not self._inline
3398 if not helpers[1] and not helpers[2]:
3391 if not helpers[1] and not helpers[2]:
3399 # Nothing to generate or remove
3392 # Nothing to generate or remove
3400 return
3393 return
3401
3394
3402 new_entries = []
3395 new_entries = []
3403 # append the new sidedata
3396 # append the new sidedata
3404 with self._writing(transaction):
3397 with self._writing(transaction):
3405 ifh, dfh = self._writinghandles
3398 ifh, dfh = self._writinghandles
3406 if self._docket is not None:
3399 if self._docket is not None:
3407 dfh.seek(self._docket.data_end, os.SEEK_SET)
3400 dfh.seek(self._docket.data_end, os.SEEK_SET)
3408 else:
3401 else:
3409 dfh.seek(0, os.SEEK_END)
3402 dfh.seek(0, os.SEEK_END)
3410
3403
3411 current_offset = dfh.tell()
3404 current_offset = dfh.tell()
3412 for rev in range(startrev, endrev + 1):
3405 for rev in range(startrev, endrev + 1):
3413 entry = self.index[rev]
3406 entry = self.index[rev]
3414 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3407 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3415 store=self,
3408 store=self,
3416 sidedata_helpers=helpers,
3409 sidedata_helpers=helpers,
3417 sidedata={},
3410 sidedata={},
3418 rev=rev,
3411 rev=rev,
3419 )
3412 )
3420
3413
3421 serialized_sidedata = sidedatautil.serialize_sidedata(
3414 serialized_sidedata = sidedatautil.serialize_sidedata(
3422 new_sidedata
3415 new_sidedata
3423 )
3416 )
3424
3417
3425 sidedata_compression_mode = COMP_MODE_INLINE
3418 sidedata_compression_mode = COMP_MODE_INLINE
3426 if serialized_sidedata and self.hassidedata:
3419 if serialized_sidedata and self.hassidedata:
3427 sidedata_compression_mode = COMP_MODE_PLAIN
3420 sidedata_compression_mode = COMP_MODE_PLAIN
3428 h, comp_sidedata = self.compress(serialized_sidedata)
3421 h, comp_sidedata = self.compress(serialized_sidedata)
3429 if (
3422 if (
3430 h != b'u'
3423 h != b'u'
3431 and comp_sidedata[0] != b'\0'
3424 and comp_sidedata[0] != b'\0'
3432 and len(comp_sidedata) < len(serialized_sidedata)
3425 and len(comp_sidedata) < len(serialized_sidedata)
3433 ):
3426 ):
3434 assert not h
3427 assert not h
3435 if (
3428 if (
3436 comp_sidedata[0]
3429 comp_sidedata[0]
3437 == self._docket.default_compression_header
3430 == self._docket.default_compression_header
3438 ):
3431 ):
3439 sidedata_compression_mode = COMP_MODE_DEFAULT
3432 sidedata_compression_mode = COMP_MODE_DEFAULT
3440 serialized_sidedata = comp_sidedata
3433 serialized_sidedata = comp_sidedata
3441 else:
3434 else:
3442 sidedata_compression_mode = COMP_MODE_INLINE
3435 sidedata_compression_mode = COMP_MODE_INLINE
3443 serialized_sidedata = comp_sidedata
3436 serialized_sidedata = comp_sidedata
3444 if entry[8] != 0 or entry[9] != 0:
3437 if entry[8] != 0 or entry[9] != 0:
3445 # rewriting entries that already have sidedata is not
3438 # rewriting entries that already have sidedata is not
3446 # supported yet, because it introduces garbage data in the
3439 # supported yet, because it introduces garbage data in the
3447 # revlog.
3440 # revlog.
3448 msg = b"rewriting existing sidedata is not supported yet"
3441 msg = b"rewriting existing sidedata is not supported yet"
3449 raise error.Abort(msg)
3442 raise error.Abort(msg)
3450
3443
3451 # Apply (potential) flags to add and to remove after running
3444 # Apply (potential) flags to add and to remove after running
3452 # the sidedata helpers
3445 # the sidedata helpers
3453 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3446 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3454 entry_update = (
3447 entry_update = (
3455 current_offset,
3448 current_offset,
3456 len(serialized_sidedata),
3449 len(serialized_sidedata),
3457 new_offset_flags,
3450 new_offset_flags,
3458 sidedata_compression_mode,
3451 sidedata_compression_mode,
3459 )
3452 )
3460
3453
3461 # the sidedata computation might have move the file cursors around
3454 # the sidedata computation might have move the file cursors around
3462 dfh.seek(current_offset, os.SEEK_SET)
3455 dfh.seek(current_offset, os.SEEK_SET)
3463 dfh.write(serialized_sidedata)
3456 dfh.write(serialized_sidedata)
3464 new_entries.append(entry_update)
3457 new_entries.append(entry_update)
3465 current_offset += len(serialized_sidedata)
3458 current_offset += len(serialized_sidedata)
3466 if self._docket is not None:
3459 if self._docket is not None:
3467 self._docket.data_end = dfh.tell()
3460 self._docket.data_end = dfh.tell()
3468
3461
3469 # rewrite the new index entries
3462 # rewrite the new index entries
3470 ifh.seek(startrev * self.index.entry_size)
3463 ifh.seek(startrev * self.index.entry_size)
3471 for i, e in enumerate(new_entries):
3464 for i, e in enumerate(new_entries):
3472 rev = startrev + i
3465 rev = startrev + i
3473 self.index.replace_sidedata_info(rev, *e)
3466 self.index.replace_sidedata_info(rev, *e)
3474 packed = self.index.entry_binary(rev)
3467 packed = self.index.entry_binary(rev)
3475 if rev == 0 and self._docket is None:
3468 if rev == 0 and self._docket is None:
3476 header = self._format_flags | self._format_version
3469 header = self._format_flags | self._format_version
3477 header = self.index.pack_header(header)
3470 header = self.index.pack_header(header)
3478 packed = header + packed
3471 packed = header + packed
3479 ifh.write(packed)
3472 ifh.write(packed)
@@ -1,105 +1,106 b''
1 # ext-sidedata.py - small extension to test the sidedata logic
1 # ext-sidedata.py - small extension to test the sidedata logic
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import hashlib
10 import hashlib
11 import struct
11 import struct
12
12
13 from mercurial.node import nullrev
13 from mercurial.node import nullrev
14 from mercurial import (
14 from mercurial import (
15 extensions,
15 extensions,
16 requirements,
16 requirements,
17 revlog,
17 revlog,
18 )
18 )
19
19
20 from mercurial.upgrade_utils import engine as upgrade_engine
20 from mercurial.upgrade_utils import engine as upgrade_engine
21
21
22 from mercurial.revlogutils import constants
22 from mercurial.revlogutils import constants
23 from mercurial.revlogutils import sidedata
23 from mercurial.revlogutils import sidedata
24
24
25
25
26 def wrapaddrevision(
26 def wrapaddrevision(
27 orig, self, text, transaction, link, p1, p2, *args, **kwargs
27 orig, self, text, transaction, link, p1, p2, *args, **kwargs
28 ):
28 ):
29 if kwargs.get('sidedata') is None:
29 if kwargs.get('sidedata') is None:
30 kwargs['sidedata'] = {}
30 kwargs['sidedata'] = {}
31 sd = kwargs['sidedata']
31 sd = kwargs['sidedata']
32 ## let's store some arbitrary data just for testing
32 ## let's store some arbitrary data just for testing
33 # text length
33 # text length
34 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
34 sd[sidedata.SD_TEST1] = struct.pack('>I', len(text))
35 # and sha2 hashes
35 # and sha2 hashes
36 sha256 = hashlib.sha256(text).digest()
36 sha256 = hashlib.sha256(text).digest()
37 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
37 sd[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
38 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
38 return orig(self, text, transaction, link, p1, p2, *args, **kwargs)
39
39
40
40
41 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
41 def wrap_revisiondata(orig, self, nodeorrev, *args, **kwargs):
42 text, sd = orig(self, nodeorrev, *args, **kwargs)
42 text = orig(self, nodeorrev, *args, **kwargs)
43 sd = self.sidedata(nodeorrev)
43 if getattr(self, 'sidedatanocheck', False):
44 if getattr(self, 'sidedatanocheck', False):
44 return text, sd
45 return text
45 if self.hassidedata:
46 if self.hassidedata:
46 return text, sd
47 return text
47 if nodeorrev != nullrev and nodeorrev != self.nullid:
48 if nodeorrev != nullrev and nodeorrev != self.nullid:
48 cat1 = sd.get(sidedata.SD_TEST1)
49 cat1 = sd.get(sidedata.SD_TEST1)
49 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
50 if cat1 is not None and len(text) != struct.unpack('>I', cat1)[0]:
50 raise RuntimeError('text size mismatch')
51 raise RuntimeError('text size mismatch')
51 expected = sd.get(sidedata.SD_TEST2)
52 expected = sd.get(sidedata.SD_TEST2)
52 got = hashlib.sha256(text).digest()
53 got = hashlib.sha256(text).digest()
53 if expected is not None and got != expected:
54 if expected is not None and got != expected:
54 raise RuntimeError('sha256 mismatch')
55 raise RuntimeError('sha256 mismatch')
55 return text, sd
56 return text
56
57
57
58
58 def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
59 def wrapget_sidedata_helpers(orig, srcrepo, dstrepo):
59 repo, computers, removers = orig(srcrepo, dstrepo)
60 repo, computers, removers = orig(srcrepo, dstrepo)
60 assert not computers and not removers # deal with composition later
61 assert not computers and not removers # deal with composition later
61 addedreqs = dstrepo.requirements - srcrepo.requirements
62 addedreqs = dstrepo.requirements - srcrepo.requirements
62
63
63 if requirements.REVLOGV2_REQUIREMENT in addedreqs:
64 if requirements.REVLOGV2_REQUIREMENT in addedreqs:
64
65
65 def computer(repo, revlog, rev, old_sidedata):
66 def computer(repo, revlog, rev, old_sidedata):
66 assert not old_sidedata # not supported yet
67 assert not old_sidedata # not supported yet
67 update = {}
68 update = {}
68 revlog.sidedatanocheck = True
69 revlog.sidedatanocheck = True
69 try:
70 try:
70 text = revlog.revision(rev)
71 text = revlog.revision(rev)
71 finally:
72 finally:
72 del revlog.sidedatanocheck
73 del revlog.sidedatanocheck
73 ## let's store some arbitrary data just for testing
74 ## let's store some arbitrary data just for testing
74 # text length
75 # text length
75 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
76 update[sidedata.SD_TEST1] = struct.pack('>I', len(text))
76 # and sha2 hashes
77 # and sha2 hashes
77 sha256 = hashlib.sha256(text).digest()
78 sha256 = hashlib.sha256(text).digest()
78 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
79 update[sidedata.SD_TEST2] = struct.pack('>32s', sha256)
79 return update, (0, 0)
80 return update, (0, 0)
80
81
81 srcrepo.register_sidedata_computer(
82 srcrepo.register_sidedata_computer(
82 constants.KIND_CHANGELOG,
83 constants.KIND_CHANGELOG,
83 b"whatever",
84 b"whatever",
84 (sidedata.SD_TEST1, sidedata.SD_TEST2),
85 (sidedata.SD_TEST1, sidedata.SD_TEST2),
85 computer,
86 computer,
86 0,
87 0,
87 )
88 )
88 dstrepo.register_wanted_sidedata(b"whatever")
89 dstrepo.register_wanted_sidedata(b"whatever")
89
90
90 return sidedata.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
91 return sidedata.get_sidedata_helpers(srcrepo, dstrepo._wanted_sidedata)
91
92
92
93
93 def extsetup(ui):
94 def extsetup(ui):
94 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
95 extensions.wrapfunction(revlog.revlog, 'addrevision', wrapaddrevision)
95 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
96 extensions.wrapfunction(revlog.revlog, '_revisiondata', wrap_revisiondata)
96 extensions.wrapfunction(
97 extensions.wrapfunction(
97 upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
98 upgrade_engine, 'get_sidedata_helpers', wrapget_sidedata_helpers
98 )
99 )
99
100
100
101
101 def reposetup(ui, repo):
102 def reposetup(ui, repo):
102 # We don't register sidedata computers because we don't care within these
103 # We don't register sidedata computers because we don't care within these
103 # tests
104 # tests
104 repo.register_wanted_sidedata(sidedata.SD_TEST1)
105 repo.register_wanted_sidedata(sidedata.SD_TEST1)
105 repo.register_wanted_sidedata(sidedata.SD_TEST2)
106 repo.register_wanted_sidedata(sidedata.SD_TEST2)
General Comments 0
You need to be logged in to leave comments. Login now