##// END OF EJS Templates
revlog: deal with special "postfix" explicitely...
marmoute -
r47916:c6b8d5d9 default
parent child Browse files
Show More
@@ -1,625 +1,628
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 )
14 )
15 from .thirdparty import attr
15 from .thirdparty import attr
16
16
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 metadata,
20 metadata,
21 pycompat,
21 pycompat,
22 revlog,
22 revlog,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 dateutil,
25 dateutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from .revlogutils import (
28 from .revlogutils import (
29 constants as revlog_constants,
29 constants as revlog_constants,
30 flagutil,
30 flagutil,
31 )
31 )
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 return b"\0".join(items)
87 return b"\0".join(items)
88
88
89
89
90 def stripdesc(desc):
90 def stripdesc(desc):
91 """strip trailing whitespace and leading and trailing empty lines"""
91 """strip trailing whitespace and leading and trailing empty lines"""
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93
93
94
94
95 class appender(object):
95 class appender(object):
96 """the changelog index must be updated last on disk, so we use this class
96 """the changelog index must be updated last on disk, so we use this class
97 to delay writes to it"""
97 to delay writes to it"""
98
98
99 def __init__(self, vfs, name, mode, buf):
99 def __init__(self, vfs, name, mode, buf):
100 self.data = buf
100 self.data = buf
101 fp = vfs(name, mode)
101 fp = vfs(name, mode)
102 self.fp = fp
102 self.fp = fp
103 self.offset = fp.tell()
103 self.offset = fp.tell()
104 self.size = vfs.fstat(fp).st_size
104 self.size = vfs.fstat(fp).st_size
105 self._end = self.size
105 self._end = self.size
106
106
107 def end(self):
107 def end(self):
108 return self._end
108 return self._end
109
109
110 def tell(self):
110 def tell(self):
111 return self.offset
111 return self.offset
112
112
113 def flush(self):
113 def flush(self):
114 pass
114 pass
115
115
116 @property
116 @property
117 def closed(self):
117 def closed(self):
118 return self.fp.closed
118 return self.fp.closed
119
119
120 def close(self):
120 def close(self):
121 self.fp.close()
121 self.fp.close()
122
122
123 def seek(self, offset, whence=0):
123 def seek(self, offset, whence=0):
124 '''virtual file offset spans real file and data'''
124 '''virtual file offset spans real file and data'''
125 if whence == 0:
125 if whence == 0:
126 self.offset = offset
126 self.offset = offset
127 elif whence == 1:
127 elif whence == 1:
128 self.offset += offset
128 self.offset += offset
129 elif whence == 2:
129 elif whence == 2:
130 self.offset = self.end() + offset
130 self.offset = self.end() + offset
131 if self.offset < self.size:
131 if self.offset < self.size:
132 self.fp.seek(self.offset)
132 self.fp.seek(self.offset)
133
133
134 def read(self, count=-1):
134 def read(self, count=-1):
135 '''only trick here is reads that span real file and data'''
135 '''only trick here is reads that span real file and data'''
136 ret = b""
136 ret = b""
137 if self.offset < self.size:
137 if self.offset < self.size:
138 s = self.fp.read(count)
138 s = self.fp.read(count)
139 ret = s
139 ret = s
140 self.offset += len(s)
140 self.offset += len(s)
141 if count > 0:
141 if count > 0:
142 count -= len(s)
142 count -= len(s)
143 if count != 0:
143 if count != 0:
144 doff = self.offset - self.size
144 doff = self.offset - self.size
145 self.data.insert(0, b"".join(self.data))
145 self.data.insert(0, b"".join(self.data))
146 del self.data[1:]
146 del self.data[1:]
147 s = self.data[0][doff : doff + count]
147 s = self.data[0][doff : doff + count]
148 self.offset += len(s)
148 self.offset += len(s)
149 ret += s
149 ret += s
150 return ret
150 return ret
151
151
152 def write(self, s):
152 def write(self, s):
153 self.data.append(bytes(s))
153 self.data.append(bytes(s))
154 self.offset += len(s)
154 self.offset += len(s)
155 self._end += len(s)
155 self._end += len(s)
156
156
157 def __enter__(self):
157 def __enter__(self):
158 self.fp.__enter__()
158 self.fp.__enter__()
159 return self
159 return self
160
160
161 def __exit__(self, *args):
161 def __exit__(self, *args):
162 return self.fp.__exit__(*args)
162 return self.fp.__exit__(*args)
163
163
164
164
165 class _divertopener(object):
165 class _divertopener(object):
166 def __init__(self, opener, target):
166 def __init__(self, opener, target):
167 self._opener = opener
167 self._opener = opener
168 self._target = target
168 self._target = target
169
169
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 if name != self._target:
171 if name != self._target:
172 return self._opener(name, mode, **kwargs)
172 return self._opener(name, mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
174
174
175 def __getattr__(self, attr):
175 def __getattr__(self, attr):
176 return getattr(self._opener, attr)
176 return getattr(self._opener, attr)
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 if name != target:
183 if name != target:
184 return opener(name, mode, **kwargs)
184 return opener(name, mode, **kwargs)
185 assert not kwargs
185 assert not kwargs
186 return appender(opener, name, mode, buf)
186 return appender(opener, name, mode, buf)
187
187
188 return _delay
188 return _delay
189
189
190
190
191 @attr.s
191 @attr.s
192 class _changelogrevision(object):
192 class _changelogrevision(object):
193 # Extensions might modify _defaultextra, so let the constructor below pass
193 # Extensions might modify _defaultextra, so let the constructor below pass
194 # it in
194 # it in
195 extra = attr.ib()
195 extra = attr.ib()
196 manifest = attr.ib()
196 manifest = attr.ib()
197 user = attr.ib(default=b'')
197 user = attr.ib(default=b'')
198 date = attr.ib(default=(0, 0))
198 date = attr.ib(default=(0, 0))
199 files = attr.ib(default=attr.Factory(list))
199 files = attr.ib(default=attr.Factory(list))
200 filesadded = attr.ib(default=None)
200 filesadded = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
204 description = attr.ib(default=b'')
204 description = attr.ib(default=b'')
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206
206
207
207
208 class changelogrevision(object):
208 class changelogrevision(object):
209 """Holds results of a parsed changelog revision.
209 """Holds results of a parsed changelog revision.
210
210
211 Changelog revisions consist of multiple pieces of data, including
211 Changelog revisions consist of multiple pieces of data, including
212 the manifest node, user, and date. This object exposes a view into
212 the manifest node, user, and date. This object exposes a view into
213 the parsed object.
213 the parsed object.
214 """
214 """
215
215
216 __slots__ = (
216 __slots__ = (
217 '_offsets',
217 '_offsets',
218 '_text',
218 '_text',
219 '_sidedata',
219 '_sidedata',
220 '_cpsd',
220 '_cpsd',
221 '_changes',
221 '_changes',
222 )
222 )
223
223
224 def __new__(cls, cl, text, sidedata, cpsd):
224 def __new__(cls, cl, text, sidedata, cpsd):
225 if not text:
225 if not text:
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227
227
228 self = super(changelogrevision, cls).__new__(cls)
228 self = super(changelogrevision, cls).__new__(cls)
229 # We could return here and implement the following as an __init__.
229 # We could return here and implement the following as an __init__.
230 # But doing it here is equivalent and saves an extra function call.
230 # But doing it here is equivalent and saves an extra function call.
231
231
232 # format used:
232 # format used:
233 # nodeid\n : manifest node in ascii
233 # nodeid\n : manifest node in ascii
234 # user\n : user, no \n or \r allowed
234 # user\n : user, no \n or \r allowed
235 # time tz extra\n : date (time is int or float, timezone is int)
235 # time tz extra\n : date (time is int or float, timezone is int)
236 # : extra is metadata, encoded and separated by '\0'
236 # : extra is metadata, encoded and separated by '\0'
237 # : older versions ignore it
237 # : older versions ignore it
238 # files\n\n : files modified by the cset, no \n or \r allowed
238 # files\n\n : files modified by the cset, no \n or \r allowed
239 # (.*) : comment (free text, ideally utf-8)
239 # (.*) : comment (free text, ideally utf-8)
240 #
240 #
241 # changelog v0 doesn't use extra
241 # changelog v0 doesn't use extra
242
242
243 nl1 = text.index(b'\n')
243 nl1 = text.index(b'\n')
244 nl2 = text.index(b'\n', nl1 + 1)
244 nl2 = text.index(b'\n', nl1 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
246
246
247 # The list of files may be empty. Which means nl3 is the first of the
247 # The list of files may be empty. Which means nl3 is the first of the
248 # double newline that precedes the description.
248 # double newline that precedes the description.
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 doublenl = nl3
250 doublenl = nl3
251 else:
251 else:
252 doublenl = text.index(b'\n\n', nl3 + 1)
252 doublenl = text.index(b'\n\n', nl3 + 1)
253
253
254 self._offsets = (nl1, nl2, nl3, doublenl)
254 self._offsets = (nl1, nl2, nl3, doublenl)
255 self._text = text
255 self._text = text
256 self._sidedata = sidedata
256 self._sidedata = sidedata
257 self._cpsd = cpsd
257 self._cpsd = cpsd
258 self._changes = None
258 self._changes = None
259
259
260 return self
260 return self
261
261
262 @property
262 @property
263 def manifest(self):
263 def manifest(self):
264 return bin(self._text[0 : self._offsets[0]])
264 return bin(self._text[0 : self._offsets[0]])
265
265
266 @property
266 @property
267 def user(self):
267 def user(self):
268 off = self._offsets
268 off = self._offsets
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270
270
271 @property
271 @property
272 def _rawdate(self):
272 def _rawdate(self):
273 off = self._offsets
273 off = self._offsets
274 dateextra = self._text[off[1] + 1 : off[2]]
274 dateextra = self._text[off[1] + 1 : off[2]]
275 return dateextra.split(b' ', 2)[0:2]
275 return dateextra.split(b' ', 2)[0:2]
276
276
277 @property
277 @property
278 def _rawextra(self):
278 def _rawextra(self):
279 off = self._offsets
279 off = self._offsets
280 dateextra = self._text[off[1] + 1 : off[2]]
280 dateextra = self._text[off[1] + 1 : off[2]]
281 fields = dateextra.split(b' ', 2)
281 fields = dateextra.split(b' ', 2)
282 if len(fields) != 3:
282 if len(fields) != 3:
283 return None
283 return None
284
284
285 return fields[2]
285 return fields[2]
286
286
287 @property
287 @property
288 def date(self):
288 def date(self):
289 raw = self._rawdate
289 raw = self._rawdate
290 time = float(raw[0])
290 time = float(raw[0])
291 # Various tools did silly things with the timezone.
291 # Various tools did silly things with the timezone.
292 try:
292 try:
293 timezone = int(raw[1])
293 timezone = int(raw[1])
294 except ValueError:
294 except ValueError:
295 timezone = 0
295 timezone = 0
296
296
297 return time, timezone
297 return time, timezone
298
298
299 @property
299 @property
300 def extra(self):
300 def extra(self):
301 raw = self._rawextra
301 raw = self._rawextra
302 if raw is None:
302 if raw is None:
303 return _defaultextra
303 return _defaultextra
304
304
305 return decodeextra(raw)
305 return decodeextra(raw)
306
306
307 @property
307 @property
308 def changes(self):
308 def changes(self):
309 if self._changes is not None:
309 if self._changes is not None:
310 return self._changes
310 return self._changes
311 if self._cpsd:
311 if self._cpsd:
312 changes = metadata.decode_files_sidedata(self._sidedata)
312 changes = metadata.decode_files_sidedata(self._sidedata)
313 else:
313 else:
314 changes = metadata.ChangingFiles(
314 changes = metadata.ChangingFiles(
315 touched=self.files or (),
315 touched=self.files or (),
316 added=self.filesadded or (),
316 added=self.filesadded or (),
317 removed=self.filesremoved or (),
317 removed=self.filesremoved or (),
318 p1_copies=self.p1copies or {},
318 p1_copies=self.p1copies or {},
319 p2_copies=self.p2copies or {},
319 p2_copies=self.p2copies or {},
320 )
320 )
321 self._changes = changes
321 self._changes = changes
322 return changes
322 return changes
323
323
324 @property
324 @property
325 def files(self):
325 def files(self):
326 if self._cpsd:
326 if self._cpsd:
327 return sorted(self.changes.touched)
327 return sorted(self.changes.touched)
328 off = self._offsets
328 off = self._offsets
329 if off[2] == off[3]:
329 if off[2] == off[3]:
330 return []
330 return []
331
331
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333
333
334 @property
334 @property
335 def filesadded(self):
335 def filesadded(self):
336 if self._cpsd:
336 if self._cpsd:
337 return self.changes.added
337 return self.changes.added
338 else:
338 else:
339 rawindices = self.extra.get(b'filesadded')
339 rawindices = self.extra.get(b'filesadded')
340 if rawindices is None:
340 if rawindices is None:
341 return None
341 return None
342 return metadata.decodefileindices(self.files, rawindices)
342 return metadata.decodefileindices(self.files, rawindices)
343
343
344 @property
344 @property
345 def filesremoved(self):
345 def filesremoved(self):
346 if self._cpsd:
346 if self._cpsd:
347 return self.changes.removed
347 return self.changes.removed
348 else:
348 else:
349 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
350 if rawindices is None:
350 if rawindices is None:
351 return None
351 return None
352 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
353
353
354 @property
354 @property
355 def p1copies(self):
355 def p1copies(self):
356 if self._cpsd:
356 if self._cpsd:
357 return self.changes.copied_from_p1
357 return self.changes.copied_from_p1
358 else:
358 else:
359 rawcopies = self.extra.get(b'p1copies')
359 rawcopies = self.extra.get(b'p1copies')
360 if rawcopies is None:
360 if rawcopies is None:
361 return None
361 return None
362 return metadata.decodecopies(self.files, rawcopies)
362 return metadata.decodecopies(self.files, rawcopies)
363
363
364 @property
364 @property
365 def p2copies(self):
365 def p2copies(self):
366 if self._cpsd:
366 if self._cpsd:
367 return self.changes.copied_from_p2
367 return self.changes.copied_from_p2
368 else:
368 else:
369 rawcopies = self.extra.get(b'p2copies')
369 rawcopies = self.extra.get(b'p2copies')
370 if rawcopies is None:
370 if rawcopies is None:
371 return None
371 return None
372 return metadata.decodecopies(self.files, rawcopies)
372 return metadata.decodecopies(self.files, rawcopies)
373
373
374 @property
374 @property
375 def description(self):
375 def description(self):
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377
377
378 @property
378 @property
379 def branchinfo(self):
379 def branchinfo(self):
380 extra = self.extra
380 extra = self.extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382
382
383
383
384 class changelog(revlog.revlog):
384 class changelog(revlog.revlog):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 """Load a changelog revlog using an opener.
386 """Load a changelog revlog using an opener.
387
387
388 If ``trypending`` is true, we attempt to load the index from a
388 If ``trypending`` is true, we attempt to load the index from a
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 The ``00changelog.i.a`` file contains index (and possibly inline
390 The ``00changelog.i.a`` file contains index (and possibly inline
391 revision) data for a transaction that hasn't been finalized yet.
391 revision) data for a transaction that hasn't been finalized yet.
392 It exists in a separate file to facilitate readers (such as
392 It exists in a separate file to facilitate readers (such as
393 hooks processes) accessing data before a transaction is finalized.
393 hooks processes) accessing data before a transaction is finalized.
394
394
395 ``concurrencychecker`` will be passed to the revlog init function, see
395 ``concurrencychecker`` will be passed to the revlog init function, see
396 the documentation there.
396 the documentation there.
397 """
397 """
398
399 indexfile = b'00changelog.i'
398 if trypending and opener.exists(b'00changelog.i.a'):
400 if trypending and opener.exists(b'00changelog.i.a'):
399 indexfile = b'00changelog.i.a'
401 postfix = b'a'
400 else:
402 else:
401 indexfile = b'00changelog.i'
403 postfix = None
402
404
403 datafile = b'00changelog.d'
405 datafile = b'00changelog.d'
404 revlog.revlog.__init__(
406 revlog.revlog.__init__(
405 self,
407 self,
406 opener,
408 opener,
407 target=(revlog_constants.KIND_CHANGELOG, None),
409 target=(revlog_constants.KIND_CHANGELOG, None),
410 postfix=postfix,
408 indexfile=indexfile,
411 indexfile=indexfile,
409 datafile=datafile,
412 datafile=datafile,
410 checkambig=True,
413 checkambig=True,
411 mmaplargeindex=True,
414 mmaplargeindex=True,
412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
415 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 concurrencychecker=concurrencychecker,
416 concurrencychecker=concurrencychecker,
414 )
417 )
415
418
416 if self._initempty and (self._format_version == revlog.REVLOGV1):
419 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 # changelogs don't benefit from generaldelta.
420 # changelogs don't benefit from generaldelta.
418
421
419 self._format_flags &= ~revlog.FLAG_GENERALDELTA
422 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 self._generaldelta = False
423 self._generaldelta = False
421
424
422 # Delta chains for changelogs tend to be very small because entries
425 # Delta chains for changelogs tend to be very small because entries
423 # tend to be small and don't delta well with each. So disable delta
426 # tend to be small and don't delta well with each. So disable delta
424 # chains.
427 # chains.
425 self._storedeltachains = False
428 self._storedeltachains = False
426
429
427 self._realopener = opener
430 self._realopener = opener
428 self._delayed = False
431 self._delayed = False
429 self._delaybuf = None
432 self._delaybuf = None
430 self._divert = False
433 self._divert = False
431 self._filteredrevs = frozenset()
434 self._filteredrevs = frozenset()
432 self._filteredrevs_hashcache = {}
435 self._filteredrevs_hashcache = {}
433 self._copiesstorage = opener.options.get(b'copies-storage')
436 self._copiesstorage = opener.options.get(b'copies-storage')
434
437
435 @property
438 @property
436 def filteredrevs(self):
439 def filteredrevs(self):
437 return self._filteredrevs
440 return self._filteredrevs
438
441
439 @filteredrevs.setter
442 @filteredrevs.setter
440 def filteredrevs(self, val):
443 def filteredrevs(self, val):
441 # Ensure all updates go through this function
444 # Ensure all updates go through this function
442 assert isinstance(val, frozenset)
445 assert isinstance(val, frozenset)
443 self._filteredrevs = val
446 self._filteredrevs = val
444 self._filteredrevs_hashcache = {}
447 self._filteredrevs_hashcache = {}
445
448
446 def delayupdate(self, tr):
449 def delayupdate(self, tr):
447 """delay visibility of index updates to other readers"""
450 """delay visibility of index updates to other readers"""
448
451
449 if not self._delayed:
452 if not self._delayed:
450 if len(self) == 0:
453 if len(self) == 0:
451 self._divert = True
454 self._divert = True
452 if self._realopener.exists(self.indexfile + b'.a'):
455 if self._realopener.exists(self.indexfile + b'.a'):
453 self._realopener.unlink(self.indexfile + b'.a')
456 self._realopener.unlink(self.indexfile + b'.a')
454 self.opener = _divertopener(self._realopener, self.indexfile)
457 self.opener = _divertopener(self._realopener, self.indexfile)
455 else:
458 else:
456 self._delaybuf = []
459 self._delaybuf = []
457 self.opener = _delayopener(
460 self.opener = _delayopener(
458 self._realopener, self.indexfile, self._delaybuf
461 self._realopener, self.indexfile, self._delaybuf
459 )
462 )
460 self._delayed = True
463 self._delayed = True
461 tr.addpending(b'cl-%i' % id(self), self._writepending)
464 tr.addpending(b'cl-%i' % id(self), self._writepending)
462 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
465 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
463
466
464 def _finalize(self, tr):
467 def _finalize(self, tr):
465 """finalize index updates"""
468 """finalize index updates"""
466 self._delayed = False
469 self._delayed = False
467 self.opener = self._realopener
470 self.opener = self._realopener
468 # move redirected index data back into place
471 # move redirected index data back into place
469 if self._divert:
472 if self._divert:
470 assert not self._delaybuf
473 assert not self._delaybuf
471 tmpname = self.indexfile + b".a"
474 tmpname = self.indexfile + b".a"
472 nfile = self.opener.open(tmpname)
475 nfile = self.opener.open(tmpname)
473 nfile.close()
476 nfile.close()
474 self.opener.rename(tmpname, self.indexfile, checkambig=True)
477 self.opener.rename(tmpname, self.indexfile, checkambig=True)
475 elif self._delaybuf:
478 elif self._delaybuf:
476 fp = self.opener(self.indexfile, b'a', checkambig=True)
479 fp = self.opener(self.indexfile, b'a', checkambig=True)
477 fp.write(b"".join(self._delaybuf))
480 fp.write(b"".join(self._delaybuf))
478 fp.close()
481 fp.close()
479 self._delaybuf = None
482 self._delaybuf = None
480 self._divert = False
483 self._divert = False
481 # split when we're done
484 # split when we're done
482 self._enforceinlinesize(tr)
485 self._enforceinlinesize(tr)
483
486
484 def _writepending(self, tr):
487 def _writepending(self, tr):
485 """create a file containing the unfinalized state for
488 """create a file containing the unfinalized state for
486 pretxnchangegroup"""
489 pretxnchangegroup"""
487 if self._delaybuf:
490 if self._delaybuf:
488 # make a temporary copy of the index
491 # make a temporary copy of the index
489 fp1 = self._realopener(self.indexfile)
492 fp1 = self._realopener(self.indexfile)
490 pendingfilename = self.indexfile + b".a"
493 pendingfilename = self.indexfile + b".a"
491 # register as a temp file to ensure cleanup on failure
494 # register as a temp file to ensure cleanup on failure
492 tr.registertmp(pendingfilename)
495 tr.registertmp(pendingfilename)
493 # write existing data
496 # write existing data
494 fp2 = self._realopener(pendingfilename, b"w")
497 fp2 = self._realopener(pendingfilename, b"w")
495 fp2.write(fp1.read())
498 fp2.write(fp1.read())
496 # add pending data
499 # add pending data
497 fp2.write(b"".join(self._delaybuf))
500 fp2.write(b"".join(self._delaybuf))
498 fp2.close()
501 fp2.close()
499 # switch modes so finalize can simply rename
502 # switch modes so finalize can simply rename
500 self._delaybuf = None
503 self._delaybuf = None
501 self._divert = True
504 self._divert = True
502 self.opener = _divertopener(self._realopener, self.indexfile)
505 self.opener = _divertopener(self._realopener, self.indexfile)
503
506
504 if self._divert:
507 if self._divert:
505 return True
508 return True
506
509
507 return False
510 return False
508
511
509 def _enforceinlinesize(self, tr, fp=None):
512 def _enforceinlinesize(self, tr, fp=None):
510 if not self._delayed:
513 if not self._delayed:
511 revlog.revlog._enforceinlinesize(self, tr, fp)
514 revlog.revlog._enforceinlinesize(self, tr, fp)
512
515
513 def read(self, nodeorrev):
516 def read(self, nodeorrev):
514 """Obtain data from a parsed changelog revision.
517 """Obtain data from a parsed changelog revision.
515
518
516 Returns a 6-tuple of:
519 Returns a 6-tuple of:
517
520
518 - manifest node in binary
521 - manifest node in binary
519 - author/user as a localstr
522 - author/user as a localstr
520 - date as a 2-tuple of (time, timezone)
523 - date as a 2-tuple of (time, timezone)
521 - list of files
524 - list of files
522 - commit message as a localstr
525 - commit message as a localstr
523 - dict of extra metadata
526 - dict of extra metadata
524
527
525 Unless you need to access all fields, consider calling
528 Unless you need to access all fields, consider calling
526 ``changelogrevision`` instead, as it is faster for partial object
529 ``changelogrevision`` instead, as it is faster for partial object
527 access.
530 access.
528 """
531 """
529 d, s = self._revisiondata(nodeorrev)
532 d, s = self._revisiondata(nodeorrev)
530 c = changelogrevision(
533 c = changelogrevision(
531 self, d, s, self._copiesstorage == b'changeset-sidedata'
534 self, d, s, self._copiesstorage == b'changeset-sidedata'
532 )
535 )
533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
536 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
534
537
535 def changelogrevision(self, nodeorrev):
538 def changelogrevision(self, nodeorrev):
536 """Obtain a ``changelogrevision`` for a node or revision."""
539 """Obtain a ``changelogrevision`` for a node or revision."""
537 text, sidedata = self._revisiondata(nodeorrev)
540 text, sidedata = self._revisiondata(nodeorrev)
538 return changelogrevision(
541 return changelogrevision(
539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
542 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
540 )
543 )
541
544
542 def readfiles(self, nodeorrev):
545 def readfiles(self, nodeorrev):
543 """
546 """
544 short version of read that only returns the files modified by the cset
547 short version of read that only returns the files modified by the cset
545 """
548 """
546 text = self.revision(nodeorrev)
549 text = self.revision(nodeorrev)
547 if not text:
550 if not text:
548 return []
551 return []
549 last = text.index(b"\n\n")
552 last = text.index(b"\n\n")
550 l = text[:last].split(b'\n')
553 l = text[:last].split(b'\n')
551 return l[3:]
554 return l[3:]
552
555
553 def add(
556 def add(
554 self,
557 self,
555 manifest,
558 manifest,
556 files,
559 files,
557 desc,
560 desc,
558 transaction,
561 transaction,
559 p1,
562 p1,
560 p2,
563 p2,
561 user,
564 user,
562 date=None,
565 date=None,
563 extra=None,
566 extra=None,
564 ):
567 ):
565 # Convert to UTF-8 encoded bytestrings as the very first
568 # Convert to UTF-8 encoded bytestrings as the very first
566 # thing: calling any method on a localstr object will turn it
569 # thing: calling any method on a localstr object will turn it
567 # into a str object and the cached UTF-8 string is thus lost.
570 # into a str object and the cached UTF-8 string is thus lost.
568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
571 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
569
572
570 user = user.strip()
573 user = user.strip()
571 # An empty username or a username with a "\n" will make the
574 # An empty username or a username with a "\n" will make the
572 # revision text contain two "\n\n" sequences -> corrupt
575 # revision text contain two "\n\n" sequences -> corrupt
573 # repository since read cannot unpack the revision.
576 # repository since read cannot unpack the revision.
574 if not user:
577 if not user:
575 raise error.StorageError(_(b"empty username"))
578 raise error.StorageError(_(b"empty username"))
576 if b"\n" in user:
579 if b"\n" in user:
577 raise error.StorageError(
580 raise error.StorageError(
578 _(b"username %r contains a newline") % pycompat.bytestr(user)
581 _(b"username %r contains a newline") % pycompat.bytestr(user)
579 )
582 )
580
583
581 desc = stripdesc(desc)
584 desc = stripdesc(desc)
582
585
583 if date:
586 if date:
584 parseddate = b"%d %d" % dateutil.parsedate(date)
587 parseddate = b"%d %d" % dateutil.parsedate(date)
585 else:
588 else:
586 parseddate = b"%d %d" % dateutil.makedate()
589 parseddate = b"%d %d" % dateutil.makedate()
587 if extra:
590 if extra:
588 branch = extra.get(b"branch")
591 branch = extra.get(b"branch")
589 if branch in (b"default", b""):
592 if branch in (b"default", b""):
590 del extra[b"branch"]
593 del extra[b"branch"]
591 elif branch in (b".", b"null", b"tip"):
594 elif branch in (b".", b"null", b"tip"):
592 raise error.StorageError(
595 raise error.StorageError(
593 _(b'the name \'%s\' is reserved') % branch
596 _(b'the name \'%s\' is reserved') % branch
594 )
597 )
595 sortedfiles = sorted(files.touched)
598 sortedfiles = sorted(files.touched)
596 flags = 0
599 flags = 0
597 sidedata = None
600 sidedata = None
598 if self._copiesstorage == b'changeset-sidedata':
601 if self._copiesstorage == b'changeset-sidedata':
599 if files.has_copies_info:
602 if files.has_copies_info:
600 flags |= flagutil.REVIDX_HASCOPIESINFO
603 flags |= flagutil.REVIDX_HASCOPIESINFO
601 sidedata = metadata.encode_files_sidedata(files)
604 sidedata = metadata.encode_files_sidedata(files)
602
605
603 if extra:
606 if extra:
604 extra = encodeextra(extra)
607 extra = encodeextra(extra)
605 parseddate = b"%s %s" % (parseddate, extra)
608 parseddate = b"%s %s" % (parseddate, extra)
606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
609 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
607 text = b"\n".join(l)
610 text = b"\n".join(l)
608 rev = self.addrevision(
611 rev = self.addrevision(
609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
612 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
610 )
613 )
611 return self.node(rev)
614 return self.node(rev)
612
615
613 def branchinfo(self, rev):
616 def branchinfo(self, rev):
614 """return the branch name and open/close state of a revision
617 """return the branch name and open/close state of a revision
615
618
616 This function exists because creating a changectx object
619 This function exists because creating a changectx object
617 just to access this is costly."""
620 just to access this is costly."""
618 return self.changelogrevision(rev).branchinfo
621 return self.changelogrevision(rev).branchinfo
619
622
620 def _nodeduplicatecallback(self, transaction, rev):
623 def _nodeduplicatecallback(self, transaction, rev):
621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
624 # keep track of revisions that got "re-added", eg: unbunde of know rev.
622 #
625 #
623 # We track them in a list to preserve their order from the source bundle
626 # We track them in a list to preserve their order from the source bundle
624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
627 duplicates = transaction.changes.setdefault(b'revduplicates', [])
625 duplicates.append(rev)
628 duplicates.append(rev)
@@ -1,3162 +1,3171
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 INDEX_HEADER,
40 INDEX_HEADER,
41 REVLOGV0,
41 REVLOGV0,
42 REVLOGV1,
42 REVLOGV1,
43 REVLOGV1_FLAGS,
43 REVLOGV1_FLAGS,
44 REVLOGV2,
44 REVLOGV2,
45 REVLOGV2_FLAGS,
45 REVLOGV2_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
46 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FORMAT,
47 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_VERSION,
48 REVLOG_DEFAULT_VERSION,
49 )
49 )
50 from .revlogutils.flagutil import (
50 from .revlogutils.flagutil import (
51 REVIDX_DEFAULT_FLAGS,
51 REVIDX_DEFAULT_FLAGS,
52 REVIDX_ELLIPSIS,
52 REVIDX_ELLIPSIS,
53 REVIDX_EXTSTORED,
53 REVIDX_EXTSTORED,
54 REVIDX_FLAGS_ORDER,
54 REVIDX_FLAGS_ORDER,
55 REVIDX_HASCOPIESINFO,
55 REVIDX_HASCOPIESINFO,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 )
58 )
59 from .thirdparty import attr
59 from .thirdparty import attr
60 from . import (
60 from . import (
61 ancestor,
61 ancestor,
62 dagop,
62 dagop,
63 error,
63 error,
64 mdiff,
64 mdiff,
65 policy,
65 policy,
66 pycompat,
66 pycompat,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .interfaces import (
70 from .interfaces import (
71 repository,
71 repository,
72 util as interfaceutil,
72 util as interfaceutil,
73 )
73 )
74 from .revlogutils import (
74 from .revlogutils import (
75 deltas as deltautil,
75 deltas as deltautil,
76 flagutil,
76 flagutil,
77 nodemap as nodemaputil,
77 nodemap as nodemaputil,
78 revlogv0,
78 revlogv0,
79 sidedata as sidedatautil,
79 sidedata as sidedatautil,
80 )
80 )
81 from .utils import (
81 from .utils import (
82 storageutil,
82 storageutil,
83 stringutil,
83 stringutil,
84 )
84 )
85
85
86 # blanked usage of all the name to prevent pyflakes constraints
86 # blanked usage of all the name to prevent pyflakes constraints
87 # We need these name available in the module for extensions.
87 # We need these name available in the module for extensions.
88
88
89 REVLOGV0
89 REVLOGV0
90 REVLOGV1
90 REVLOGV1
91 REVLOGV2
91 REVLOGV2
92 FLAG_INLINE_DATA
92 FLAG_INLINE_DATA
93 FLAG_GENERALDELTA
93 FLAG_GENERALDELTA
94 REVLOG_DEFAULT_FLAGS
94 REVLOG_DEFAULT_FLAGS
95 REVLOG_DEFAULT_FORMAT
95 REVLOG_DEFAULT_FORMAT
96 REVLOG_DEFAULT_VERSION
96 REVLOG_DEFAULT_VERSION
97 REVLOGV1_FLAGS
97 REVLOGV1_FLAGS
98 REVLOGV2_FLAGS
98 REVLOGV2_FLAGS
99 REVIDX_ISCENSORED
99 REVIDX_ISCENSORED
100 REVIDX_ELLIPSIS
100 REVIDX_ELLIPSIS
101 REVIDX_HASCOPIESINFO
101 REVIDX_HASCOPIESINFO
102 REVIDX_EXTSTORED
102 REVIDX_EXTSTORED
103 REVIDX_DEFAULT_FLAGS
103 REVIDX_DEFAULT_FLAGS
104 REVIDX_FLAGS_ORDER
104 REVIDX_FLAGS_ORDER
105 REVIDX_RAWTEXT_CHANGING_FLAGS
105 REVIDX_RAWTEXT_CHANGING_FLAGS
106
106
107 parsers = policy.importmod('parsers')
107 parsers = policy.importmod('parsers')
108 rustancestor = policy.importrust('ancestor')
108 rustancestor = policy.importrust('ancestor')
109 rustdagop = policy.importrust('dagop')
109 rustdagop = policy.importrust('dagop')
110 rustrevlog = policy.importrust('revlog')
110 rustrevlog = policy.importrust('revlog')
111
111
112 # Aliased for performance.
112 # Aliased for performance.
113 _zlibdecompress = zlib.decompress
113 _zlibdecompress = zlib.decompress
114
114
115 # max size of revlog with inline data
115 # max size of revlog with inline data
116 _maxinline = 131072
116 _maxinline = 131072
117 _chunksize = 1048576
117 _chunksize = 1048576
118
118
119 # Flag processors for REVIDX_ELLIPSIS.
119 # Flag processors for REVIDX_ELLIPSIS.
120 def ellipsisreadprocessor(rl, text):
120 def ellipsisreadprocessor(rl, text):
121 return text, False
121 return text, False
122
122
123
123
124 def ellipsiswriteprocessor(rl, text):
124 def ellipsiswriteprocessor(rl, text):
125 return text, False
125 return text, False
126
126
127
127
128 def ellipsisrawprocessor(rl, text):
128 def ellipsisrawprocessor(rl, text):
129 return False
129 return False
130
130
131
131
132 ellipsisprocessor = (
132 ellipsisprocessor = (
133 ellipsisreadprocessor,
133 ellipsisreadprocessor,
134 ellipsiswriteprocessor,
134 ellipsiswriteprocessor,
135 ellipsisrawprocessor,
135 ellipsisrawprocessor,
136 )
136 )
137
137
138
138
139 def offset_type(offset, type):
139 def offset_type(offset, type):
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
140 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
141 raise ValueError(b'unknown revlog index flags')
141 raise ValueError(b'unknown revlog index flags')
142 return int(int(offset) << 16 | type)
142 return int(int(offset) << 16 | type)
143
143
144
144
145 def _verify_revision(rl, skipflags, state, node):
145 def _verify_revision(rl, skipflags, state, node):
146 """Verify the integrity of the given revlog ``node`` while providing a hook
146 """Verify the integrity of the given revlog ``node`` while providing a hook
147 point for extensions to influence the operation."""
147 point for extensions to influence the operation."""
148 if skipflags:
148 if skipflags:
149 state[b'skipread'].add(node)
149 state[b'skipread'].add(node)
150 else:
150 else:
151 # Side-effect: read content and verify hash.
151 # Side-effect: read content and verify hash.
152 rl.revision(node)
152 rl.revision(node)
153
153
154
154
155 # True if a fast implementation for persistent-nodemap is available
155 # True if a fast implementation for persistent-nodemap is available
156 #
156 #
157 # We also consider we have a "fast" implementation in "pure" python because
157 # We also consider we have a "fast" implementation in "pure" python because
158 # people using pure don't really have performance consideration (and a
158 # people using pure don't really have performance consideration (and a
159 # wheelbarrow of other slowness source)
159 # wheelbarrow of other slowness source)
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
160 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
161 parsers, 'BaseIndexObject'
161 parsers, 'BaseIndexObject'
162 )
162 )
163
163
164
164
165 @attr.s(slots=True, frozen=True)
165 @attr.s(slots=True, frozen=True)
166 class _revisioninfo(object):
166 class _revisioninfo(object):
167 """Information about a revision that allows building its fulltext
167 """Information about a revision that allows building its fulltext
168 node: expected hash of the revision
168 node: expected hash of the revision
169 p1, p2: parent revs of the revision
169 p1, p2: parent revs of the revision
170 btext: built text cache consisting of a one-element list
170 btext: built text cache consisting of a one-element list
171 cachedelta: (baserev, uncompressed_delta) or None
171 cachedelta: (baserev, uncompressed_delta) or None
172 flags: flags associated to the revision storage
172 flags: flags associated to the revision storage
173
173
174 One of btext[0] or cachedelta must be set.
174 One of btext[0] or cachedelta must be set.
175 """
175 """
176
176
177 node = attr.ib()
177 node = attr.ib()
178 p1 = attr.ib()
178 p1 = attr.ib()
179 p2 = attr.ib()
179 p2 = attr.ib()
180 btext = attr.ib()
180 btext = attr.ib()
181 textlen = attr.ib()
181 textlen = attr.ib()
182 cachedelta = attr.ib()
182 cachedelta = attr.ib()
183 flags = attr.ib()
183 flags = attr.ib()
184
184
185
185
186 @interfaceutil.implementer(repository.irevisiondelta)
186 @interfaceutil.implementer(repository.irevisiondelta)
187 @attr.s(slots=True)
187 @attr.s(slots=True)
188 class revlogrevisiondelta(object):
188 class revlogrevisiondelta(object):
189 node = attr.ib()
189 node = attr.ib()
190 p1node = attr.ib()
190 p1node = attr.ib()
191 p2node = attr.ib()
191 p2node = attr.ib()
192 basenode = attr.ib()
192 basenode = attr.ib()
193 flags = attr.ib()
193 flags = attr.ib()
194 baserevisionsize = attr.ib()
194 baserevisionsize = attr.ib()
195 revision = attr.ib()
195 revision = attr.ib()
196 delta = attr.ib()
196 delta = attr.ib()
197 sidedata = attr.ib()
197 sidedata = attr.ib()
198 protocol_flags = attr.ib()
198 protocol_flags = attr.ib()
199 linknode = attr.ib(default=None)
199 linknode = attr.ib(default=None)
200
200
201
201
202 @interfaceutil.implementer(repository.iverifyproblem)
202 @interfaceutil.implementer(repository.iverifyproblem)
203 @attr.s(frozen=True)
203 @attr.s(frozen=True)
204 class revlogproblem(object):
204 class revlogproblem(object):
205 warning = attr.ib(default=None)
205 warning = attr.ib(default=None)
206 error = attr.ib(default=None)
206 error = attr.ib(default=None)
207 node = attr.ib(default=None)
207 node = attr.ib(default=None)
208
208
209
209
210 def parse_index_v1(data, inline):
210 def parse_index_v1(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline)
212 index, cache = parsers.parse_index2(data, inline)
213 return index, cache
213 return index, cache
214
214
215
215
216 def parse_index_v2(data, inline):
216 def parse_index_v2(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
218 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
219 return index, cache
219 return index, cache
220
220
221
221
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
222 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
223
223
224 def parse_index_v1_nodemap(data, inline):
224 def parse_index_v1_nodemap(data, inline):
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
225 index, cache = parsers.parse_index_devel_nodemap(data, inline)
226 return index, cache
226 return index, cache
227
227
228
228
229 else:
229 else:
230 parse_index_v1_nodemap = None
230 parse_index_v1_nodemap = None
231
231
232
232
233 def parse_index_v1_mixed(data, inline):
233 def parse_index_v1_mixed(data, inline):
234 index, cache = parse_index_v1(data, inline)
234 index, cache = parse_index_v1(data, inline)
235 return rustrevlog.MixedIndex(index), cache
235 return rustrevlog.MixedIndex(index), cache
236
236
237
237
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
238 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
239 # signed integer)
239 # signed integer)
240 _maxentrysize = 0x7FFFFFFF
240 _maxentrysize = 0x7FFFFFFF
241
241
242
242
243 class revlog(object):
243 class revlog(object):
244 """
244 """
245 the underlying revision storage object
245 the underlying revision storage object
246
246
247 A revlog consists of two parts, an index and the revision data.
247 A revlog consists of two parts, an index and the revision data.
248
248
249 The index is a file with a fixed record size containing
249 The index is a file with a fixed record size containing
250 information on each revision, including its nodeid (hash), the
250 information on each revision, including its nodeid (hash), the
251 nodeids of its parents, the position and offset of its data within
251 nodeids of its parents, the position and offset of its data within
252 the data file, and the revision it's based on. Finally, each entry
252 the data file, and the revision it's based on. Finally, each entry
253 contains a linkrev entry that can serve as a pointer to external
253 contains a linkrev entry that can serve as a pointer to external
254 data.
254 data.
255
255
256 The revision data itself is a linear collection of data chunks.
256 The revision data itself is a linear collection of data chunks.
257 Each chunk represents a revision and is usually represented as a
257 Each chunk represents a revision and is usually represented as a
258 delta against the previous chunk. To bound lookup time, runs of
258 delta against the previous chunk. To bound lookup time, runs of
259 deltas are limited to about 2 times the length of the original
259 deltas are limited to about 2 times the length of the original
260 version data. This makes retrieval of a version proportional to
260 version data. This makes retrieval of a version proportional to
261 its size, or O(1) relative to the number of revisions.
261 its size, or O(1) relative to the number of revisions.
262
262
263 Both pieces of the revlog are written to in an append-only
263 Both pieces of the revlog are written to in an append-only
264 fashion, which means we never need to rewrite a file to insert or
264 fashion, which means we never need to rewrite a file to insert or
265 remove data, and can use some simple techniques to avoid the need
265 remove data, and can use some simple techniques to avoid the need
266 for locking while reading.
266 for locking while reading.
267
267
268 If checkambig, indexfile is opened with checkambig=True at
268 If checkambig, indexfile is opened with checkambig=True at
269 writing, to avoid file stat ambiguity.
269 writing, to avoid file stat ambiguity.
270
270
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
271 If mmaplargeindex is True, and an mmapindexthreshold is set, the
272 index will be mmapped rather than read if it is larger than the
272 index will be mmapped rather than read if it is larger than the
273 configured threshold.
273 configured threshold.
274
274
275 If censorable is True, the revlog can have censored revisions.
275 If censorable is True, the revlog can have censored revisions.
276
276
277 If `upperboundcomp` is not None, this is the expected maximal gain from
277 If `upperboundcomp` is not None, this is the expected maximal gain from
278 compression for the data content.
278 compression for the data content.
279
279
280 `concurrencychecker` is an optional function that receives 3 arguments: a
280 `concurrencychecker` is an optional function that receives 3 arguments: a
281 file handle, a filename, and an expected position. It should check whether
281 file handle, a filename, and an expected position. It should check whether
282 the current position in the file handle is valid, and log/warn/fail (by
282 the current position in the file handle is valid, and log/warn/fail (by
283 raising).
283 raising).
284 """
284 """
285
285
286 _flagserrorclass = error.RevlogError
286 _flagserrorclass = error.RevlogError
287
287
288 def __init__(
288 def __init__(
289 self,
289 self,
290 opener,
290 opener,
291 target,
291 target,
292 postfix=None,
292 indexfile=None,
293 indexfile=None,
293 datafile=None,
294 datafile=None,
294 checkambig=False,
295 checkambig=False,
295 mmaplargeindex=False,
296 mmaplargeindex=False,
296 censorable=False,
297 censorable=False,
297 upperboundcomp=None,
298 upperboundcomp=None,
298 persistentnodemap=False,
299 persistentnodemap=False,
299 concurrencychecker=None,
300 concurrencychecker=None,
300 ):
301 ):
301 """
302 """
302 create a revlog object
303 create a revlog object
303
304
304 opener is a function that abstracts the file opening operation
305 opener is a function that abstracts the file opening operation
305 and can be used to implement COW semantics or the like.
306 and can be used to implement COW semantics or the like.
306
307
307 `target`: a (KIND, ID) tuple that identify the content stored in
308 `target`: a (KIND, ID) tuple that identify the content stored in
308 this revlog. It help the rest of the code to understand what the revlog
309 this revlog. It help the rest of the code to understand what the revlog
309 is about without having to resort to heuristic and index filename
310 is about without having to resort to heuristic and index filename
310 analysis. Note: that this must be reliably be set by normal code, but
311 analysis. Note: that this must be reliably be set by normal code, but
311 that test, debug, or performance measurement code might not set this to
312 that test, debug, or performance measurement code might not set this to
312 accurate value.
313 accurate value.
313 """
314 """
314 self.upperboundcomp = upperboundcomp
315 self.upperboundcomp = upperboundcomp
316 if not indexfile.endswith(b'.i'):
317 raise error.ProgrammingError(
318 b"revlog's indexfile should end with `.i`"
319 )
320 if datafile is None:
321 datafile = indexfile[:-2] + b".d"
322 if postfix is not None:
323 datafile = b'%s.%s' % (datafile, postfix)
324 if postfix is not None:
325 indexfile = b'%s.%s' % (indexfile, postfix)
315 self.indexfile = indexfile
326 self.indexfile = indexfile
316 self.datafile = datafile or (indexfile[:-2] + b".d")
327 self.datafile = datafile
317 self.nodemap_file = None
328 self.nodemap_file = None
329 self.postfix = postfix
318 if persistentnodemap:
330 if persistentnodemap:
319 self.nodemap_file = nodemaputil.get_nodemap_file(
331 self.nodemap_file = nodemaputil.get_nodemap_file(
320 opener, self.indexfile
332 opener, self.indexfile
321 )
333 )
322
334
323 self.opener = opener
335 self.opener = opener
324 assert target[0] in ALL_KINDS
336 assert target[0] in ALL_KINDS
325 assert len(target) == 2
337 assert len(target) == 2
326 self.target = target
338 self.target = target
327 # When True, indexfile is opened with checkambig=True at writing, to
339 # When True, indexfile is opened with checkambig=True at writing, to
328 # avoid file stat ambiguity.
340 # avoid file stat ambiguity.
329 self._checkambig = checkambig
341 self._checkambig = checkambig
330 self._mmaplargeindex = mmaplargeindex
342 self._mmaplargeindex = mmaplargeindex
331 self._censorable = censorable
343 self._censorable = censorable
332 # 3-tuple of (node, rev, text) for a raw revision.
344 # 3-tuple of (node, rev, text) for a raw revision.
333 self._revisioncache = None
345 self._revisioncache = None
334 # Maps rev to chain base rev.
346 # Maps rev to chain base rev.
335 self._chainbasecache = util.lrucachedict(100)
347 self._chainbasecache = util.lrucachedict(100)
336 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
337 self._chunkcache = (0, b'')
349 self._chunkcache = (0, b'')
338 # How much data to read and cache into the raw revlog data cache.
350 # How much data to read and cache into the raw revlog data cache.
339 self._chunkcachesize = 65536
351 self._chunkcachesize = 65536
340 self._maxchainlen = None
352 self._maxchainlen = None
341 self._deltabothparents = True
353 self._deltabothparents = True
342 self.index = None
354 self.index = None
343 self._nodemap_docket = None
355 self._nodemap_docket = None
344 # Mapping of partial identifiers to full nodes.
356 # Mapping of partial identifiers to full nodes.
345 self._pcache = {}
357 self._pcache = {}
346 # Mapping of revision integer to full node.
358 # Mapping of revision integer to full node.
347 self._compengine = b'zlib'
359 self._compengine = b'zlib'
348 self._compengineopts = {}
360 self._compengineopts = {}
349 self._maxdeltachainspan = -1
361 self._maxdeltachainspan = -1
350 self._withsparseread = False
362 self._withsparseread = False
351 self._sparserevlog = False
363 self._sparserevlog = False
352 self._srdensitythreshold = 0.50
364 self._srdensitythreshold = 0.50
353 self._srmingapsize = 262144
365 self._srmingapsize = 262144
354
366
355 # Make copy of flag processors so each revlog instance can support
367 # Make copy of flag processors so each revlog instance can support
356 # custom flags.
368 # custom flags.
357 self._flagprocessors = dict(flagutil.flagprocessors)
369 self._flagprocessors = dict(flagutil.flagprocessors)
358
370
359 # 2-tuple of file handles being used for active writing.
371 # 2-tuple of file handles being used for active writing.
360 self._writinghandles = None
372 self._writinghandles = None
361
373
362 self._loadindex()
374 self._loadindex()
363
375
364 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
365
377
366 def _init_opts(self):
378 def _init_opts(self):
367 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
368
380
369 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
370
382
371 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
372
384
373 * newversionflags:
385 * newversionflags:
374 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
375
387
376 * mmapindexthreshold:
388 * mmapindexthreshold:
377 minimal index size for start to use mmap
389 minimal index size for start to use mmap
378
390
379 * force_nodemap:
391 * force_nodemap:
380 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
381 """
393 """
382 mmapindexthreshold = None
394 mmapindexthreshold = None
383 opts = self.opener.options
395 opts = self.opener.options
384
396
385 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
386 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
398 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
387 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
388 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
400 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
389 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
390 newversionflags |= FLAG_GENERALDELTA
402 newversionflags |= FLAG_GENERALDELTA
391 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
392 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
393 else:
405 else:
394 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
395
407
396 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
397 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
398 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
399 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
400 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
401 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
402 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
403 self._lazydeltabase = False
415 self._lazydeltabase = False
404 if self._lazydelta:
416 if self._lazydelta:
405 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
406 if b'compengine' in opts:
418 if b'compengine' in opts:
407 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
408 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
409 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
410 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
411 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
412 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
413 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
414 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
415 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
416 self.hassidedata = bool(opts.get(b'side-data', False))
428 self.hassidedata = bool(opts.get(b'side-data', False))
417 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
418 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
419 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
420 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
421 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
422 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
423 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
424 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
425 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
426 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
427
439
428 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
429 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
430 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
431 ):
443 ):
432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
433
445
434 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
435 raise error.RevlogError(
447 raise error.RevlogError(
436 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
437 % self._chunkcachesize
449 % self._chunkcachesize
438 )
450 )
439 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
440 raise error.RevlogError(
452 raise error.RevlogError(
441 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
442 % self._chunkcachesize
454 % self._chunkcachesize
443 )
455 )
444 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
445 return newversionflags, mmapindexthreshold, force_nodemap
457 return newversionflags, mmapindexthreshold, force_nodemap
446
458
447 def _loadindex(self):
459 def _loadindex(self):
448
460
449 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
461 newversionflags, mmapindexthreshold, force_nodemap = self._init_opts()
450 indexdata = b''
462 indexdata = b''
451 self._initempty = True
463 self._initempty = True
452 try:
464 try:
453 with self._indexfp() as f:
465 with self._indexfp() as f:
454 if (
466 if (
455 mmapindexthreshold is not None
467 mmapindexthreshold is not None
456 and self.opener.fstat(f).st_size >= mmapindexthreshold
468 and self.opener.fstat(f).st_size >= mmapindexthreshold
457 ):
469 ):
458 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
459 # relying on Python GC
471 # relying on Python GC
460 indexdata = util.buffer(util.mmapread(f))
472 indexdata = util.buffer(util.mmapread(f))
461 else:
473 else:
462 indexdata = f.read()
474 indexdata = f.read()
463 if len(indexdata) > 0:
475 if len(indexdata) > 0:
464 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
476 versionflags = INDEX_HEADER.unpack(indexdata[:4])[0]
465 self._initempty = False
477 self._initempty = False
466 else:
478 else:
467 versionflags = newversionflags
479 versionflags = newversionflags
468 except IOError as inst:
480 except IOError as inst:
469 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
470 raise
482 raise
471
483
472 versionflags = newversionflags
484 versionflags = newversionflags
473
485
474 flags = self._format_flags = versionflags & ~0xFFFF
486 flags = self._format_flags = versionflags & ~0xFFFF
475 fmt = self._format_version = versionflags & 0xFFFF
487 fmt = self._format_version = versionflags & 0xFFFF
476
488
477 if fmt == REVLOGV0:
489 if fmt == REVLOGV0:
478 if flags:
490 if flags:
479 raise error.RevlogError(
491 raise error.RevlogError(
480 _(b'unknown flags (%#04x) in version %d revlog %s')
492 _(b'unknown flags (%#04x) in version %d revlog %s')
481 % (flags >> 16, fmt, self.indexfile)
493 % (flags >> 16, fmt, self.indexfile)
482 )
494 )
483
495
484 self._inline = False
496 self._inline = False
485 self._generaldelta = False
497 self._generaldelta = False
486
498
487 elif fmt == REVLOGV1:
499 elif fmt == REVLOGV1:
488 if flags & ~REVLOGV1_FLAGS:
500 if flags & ~REVLOGV1_FLAGS:
489 raise error.RevlogError(
501 raise error.RevlogError(
490 _(b'unknown flags (%#04x) in version %d revlog %s')
502 _(b'unknown flags (%#04x) in version %d revlog %s')
491 % (flags >> 16, fmt, self.indexfile)
503 % (flags >> 16, fmt, self.indexfile)
492 )
504 )
493
505
494 self._inline = versionflags & FLAG_INLINE_DATA
506 self._inline = versionflags & FLAG_INLINE_DATA
495 self._generaldelta = versionflags & FLAG_GENERALDELTA
507 self._generaldelta = versionflags & FLAG_GENERALDELTA
496
508
497 elif fmt == REVLOGV2:
509 elif fmt == REVLOGV2:
498 if flags & ~REVLOGV2_FLAGS:
510 if flags & ~REVLOGV2_FLAGS:
499 raise error.RevlogError(
511 raise error.RevlogError(
500 _(b'unknown flags (%#04x) in version %d revlog %s')
512 _(b'unknown flags (%#04x) in version %d revlog %s')
501 % (flags >> 16, fmt, self.indexfile)
513 % (flags >> 16, fmt, self.indexfile)
502 )
514 )
503
515
504 # There is a bug in the transaction handling when going from an
516 # There is a bug in the transaction handling when going from an
505 # inline revlog to a separate index and data file. Turn it off until
517 # inline revlog to a separate index and data file. Turn it off until
506 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
518 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
507 # See issue6485
519 # See issue6485
508 self._inline = False
520 self._inline = False
509 # generaldelta implied by version 2 revlogs.
521 # generaldelta implied by version 2 revlogs.
510 self._generaldelta = True
522 self._generaldelta = True
511
523
512 else:
524 else:
513 raise error.RevlogError(
525 raise error.RevlogError(
514 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
515 )
527 )
516
528
517 self.nodeconstants = sha1nodeconstants
529 self.nodeconstants = sha1nodeconstants
518 self.nullid = self.nodeconstants.nullid
530 self.nullid = self.nodeconstants.nullid
519
531
520 # sparse-revlog can't be on without general-delta (issue6056)
532 # sparse-revlog can't be on without general-delta (issue6056)
521 if not self._generaldelta:
533 if not self._generaldelta:
522 self._sparserevlog = False
534 self._sparserevlog = False
523
535
524 self._storedeltachains = True
536 self._storedeltachains = True
525
537
526 devel_nodemap = (
538 devel_nodemap = (
527 self.nodemap_file
539 self.nodemap_file
528 and force_nodemap
540 and force_nodemap
529 and parse_index_v1_nodemap is not None
541 and parse_index_v1_nodemap is not None
530 )
542 )
531
543
532 use_rust_index = False
544 use_rust_index = False
533 if rustrevlog is not None:
545 if rustrevlog is not None:
534 if self.nodemap_file is not None:
546 if self.nodemap_file is not None:
535 use_rust_index = True
547 use_rust_index = True
536 else:
548 else:
537 use_rust_index = self.opener.options.get(b'rust.index')
549 use_rust_index = self.opener.options.get(b'rust.index')
538
550
539 self._parse_index = parse_index_v1
551 self._parse_index = parse_index_v1
540 if self._format_version == REVLOGV0:
552 if self._format_version == REVLOGV0:
541 self._parse_index = revlogv0.parse_index_v0
553 self._parse_index = revlogv0.parse_index_v0
542 elif fmt == REVLOGV2:
554 elif fmt == REVLOGV2:
543 self._parse_index = parse_index_v2
555 self._parse_index = parse_index_v2
544 elif devel_nodemap:
556 elif devel_nodemap:
545 self._parse_index = parse_index_v1_nodemap
557 self._parse_index = parse_index_v1_nodemap
546 elif use_rust_index:
558 elif use_rust_index:
547 self._parse_index = parse_index_v1_mixed
559 self._parse_index = parse_index_v1_mixed
548 try:
560 try:
549 d = self._parse_index(indexdata, self._inline)
561 d = self._parse_index(indexdata, self._inline)
550 index, _chunkcache = d
562 index, _chunkcache = d
551 use_nodemap = (
563 use_nodemap = (
552 not self._inline
564 not self._inline
553 and self.nodemap_file is not None
565 and self.nodemap_file is not None
554 and util.safehasattr(index, 'update_nodemap_data')
566 and util.safehasattr(index, 'update_nodemap_data')
555 )
567 )
556 if use_nodemap:
568 if use_nodemap:
557 nodemap_data = nodemaputil.persisted_data(self)
569 nodemap_data = nodemaputil.persisted_data(self)
558 if nodemap_data is not None:
570 if nodemap_data is not None:
559 docket = nodemap_data[0]
571 docket = nodemap_data[0]
560 if (
572 if (
561 len(d[0]) > docket.tip_rev
573 len(d[0]) > docket.tip_rev
562 and d[0][docket.tip_rev][7] == docket.tip_node
574 and d[0][docket.tip_rev][7] == docket.tip_node
563 ):
575 ):
564 # no changelog tampering
576 # no changelog tampering
565 self._nodemap_docket = docket
577 self._nodemap_docket = docket
566 index.update_nodemap_data(*nodemap_data)
578 index.update_nodemap_data(*nodemap_data)
567 except (ValueError, IndexError):
579 except (ValueError, IndexError):
568 raise error.RevlogError(
580 raise error.RevlogError(
569 _(b"index %s is corrupted") % self.indexfile
581 _(b"index %s is corrupted") % self.indexfile
570 )
582 )
571 self.index, self._chunkcache = d
583 self.index, self._chunkcache = d
572 if not self._chunkcache:
584 if not self._chunkcache:
573 self._chunkclear()
585 self._chunkclear()
574 # revnum -> (chain-length, sum-delta-length)
586 # revnum -> (chain-length, sum-delta-length)
575 self._chaininfocache = util.lrucachedict(500)
587 self._chaininfocache = util.lrucachedict(500)
576 # revlog header -> revlog compressor
588 # revlog header -> revlog compressor
577 self._decompressors = {}
589 self._decompressors = {}
578
590
579 @util.propertycache
591 @util.propertycache
580 def revlog_kind(self):
592 def revlog_kind(self):
581 return self.target[0]
593 return self.target[0]
582
594
583 @util.propertycache
595 @util.propertycache
584 def _compressor(self):
596 def _compressor(self):
585 engine = util.compengines[self._compengine]
597 engine = util.compengines[self._compengine]
586 return engine.revlogcompressor(self._compengineopts)
598 return engine.revlogcompressor(self._compengineopts)
587
599
588 def _indexfp(self, mode=b'r'):
600 def _indexfp(self, mode=b'r'):
589 """file object for the revlog's index file"""
601 """file object for the revlog's index file"""
590 args = {'mode': mode}
602 args = {'mode': mode}
591 if mode != b'r':
603 if mode != b'r':
592 args['checkambig'] = self._checkambig
604 args['checkambig'] = self._checkambig
593 if mode == b'w':
605 if mode == b'w':
594 args['atomictemp'] = True
606 args['atomictemp'] = True
595 return self.opener(self.indexfile, **args)
607 return self.opener(self.indexfile, **args)
596
608
597 def _datafp(self, mode=b'r'):
609 def _datafp(self, mode=b'r'):
598 """file object for the revlog's data file"""
610 """file object for the revlog's data file"""
599 return self.opener(self.datafile, mode=mode)
611 return self.opener(self.datafile, mode=mode)
600
612
601 @contextlib.contextmanager
613 @contextlib.contextmanager
602 def _datareadfp(self, existingfp=None):
614 def _datareadfp(self, existingfp=None):
603 """file object suitable to read data"""
615 """file object suitable to read data"""
604 # Use explicit file handle, if given.
616 # Use explicit file handle, if given.
605 if existingfp is not None:
617 if existingfp is not None:
606 yield existingfp
618 yield existingfp
607
619
608 # Use a file handle being actively used for writes, if available.
620 # Use a file handle being actively used for writes, if available.
609 # There is some danger to doing this because reads will seek the
621 # There is some danger to doing this because reads will seek the
610 # file. However, _writeentry() performs a SEEK_END before all writes,
622 # file. However, _writeentry() performs a SEEK_END before all writes,
611 # so we should be safe.
623 # so we should be safe.
612 elif self._writinghandles:
624 elif self._writinghandles:
613 if self._inline:
625 if self._inline:
614 yield self._writinghandles[0]
626 yield self._writinghandles[0]
615 else:
627 else:
616 yield self._writinghandles[1]
628 yield self._writinghandles[1]
617
629
618 # Otherwise open a new file handle.
630 # Otherwise open a new file handle.
619 else:
631 else:
620 if self._inline:
632 if self._inline:
621 func = self._indexfp
633 func = self._indexfp
622 else:
634 else:
623 func = self._datafp
635 func = self._datafp
624 with func() as fp:
636 with func() as fp:
625 yield fp
637 yield fp
626
638
627 def tiprev(self):
639 def tiprev(self):
628 return len(self.index) - 1
640 return len(self.index) - 1
629
641
630 def tip(self):
642 def tip(self):
631 return self.node(self.tiprev())
643 return self.node(self.tiprev())
632
644
633 def __contains__(self, rev):
645 def __contains__(self, rev):
634 return 0 <= rev < len(self)
646 return 0 <= rev < len(self)
635
647
636 def __len__(self):
648 def __len__(self):
637 return len(self.index)
649 return len(self.index)
638
650
639 def __iter__(self):
651 def __iter__(self):
640 return iter(pycompat.xrange(len(self)))
652 return iter(pycompat.xrange(len(self)))
641
653
642 def revs(self, start=0, stop=None):
654 def revs(self, start=0, stop=None):
643 """iterate over all rev in this revlog (from start to stop)"""
655 """iterate over all rev in this revlog (from start to stop)"""
644 return storageutil.iterrevs(len(self), start=start, stop=stop)
656 return storageutil.iterrevs(len(self), start=start, stop=stop)
645
657
646 @property
658 @property
647 def nodemap(self):
659 def nodemap(self):
648 msg = (
660 msg = (
649 b"revlog.nodemap is deprecated, "
661 b"revlog.nodemap is deprecated, "
650 b"use revlog.index.[has_node|rev|get_rev]"
662 b"use revlog.index.[has_node|rev|get_rev]"
651 )
663 )
652 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
664 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
653 return self.index.nodemap
665 return self.index.nodemap
654
666
655 @property
667 @property
656 def _nodecache(self):
668 def _nodecache(self):
657 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
669 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
658 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
670 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
659 return self.index.nodemap
671 return self.index.nodemap
660
672
661 def hasnode(self, node):
673 def hasnode(self, node):
662 try:
674 try:
663 self.rev(node)
675 self.rev(node)
664 return True
676 return True
665 except KeyError:
677 except KeyError:
666 return False
678 return False
667
679
668 def candelta(self, baserev, rev):
680 def candelta(self, baserev, rev):
669 """whether two revisions (baserev, rev) can be delta-ed or not"""
681 """whether two revisions (baserev, rev) can be delta-ed or not"""
670 # Disable delta if either rev requires a content-changing flag
682 # Disable delta if either rev requires a content-changing flag
671 # processor (ex. LFS). This is because such flag processor can alter
683 # processor (ex. LFS). This is because such flag processor can alter
672 # the rawtext content that the delta will be based on, and two clients
684 # the rawtext content that the delta will be based on, and two clients
673 # could have a same revlog node with different flags (i.e. different
685 # could have a same revlog node with different flags (i.e. different
674 # rawtext contents) and the delta could be incompatible.
686 # rawtext contents) and the delta could be incompatible.
675 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
687 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
676 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
688 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
677 ):
689 ):
678 return False
690 return False
679 return True
691 return True
680
692
681 def update_caches(self, transaction):
693 def update_caches(self, transaction):
682 if self.nodemap_file is not None:
694 if self.nodemap_file is not None:
683 if transaction is None:
695 if transaction is None:
684 nodemaputil.update_persistent_nodemap(self)
696 nodemaputil.update_persistent_nodemap(self)
685 else:
697 else:
686 nodemaputil.setup_persistent_nodemap(transaction, self)
698 nodemaputil.setup_persistent_nodemap(transaction, self)
687
699
688 def clearcaches(self):
700 def clearcaches(self):
689 self._revisioncache = None
701 self._revisioncache = None
690 self._chainbasecache.clear()
702 self._chainbasecache.clear()
691 self._chunkcache = (0, b'')
703 self._chunkcache = (0, b'')
692 self._pcache = {}
704 self._pcache = {}
693 self._nodemap_docket = None
705 self._nodemap_docket = None
694 self.index.clearcaches()
706 self.index.clearcaches()
695 # The python code is the one responsible for validating the docket, we
707 # The python code is the one responsible for validating the docket, we
696 # end up having to refresh it here.
708 # end up having to refresh it here.
697 use_nodemap = (
709 use_nodemap = (
698 not self._inline
710 not self._inline
699 and self.nodemap_file is not None
711 and self.nodemap_file is not None
700 and util.safehasattr(self.index, 'update_nodemap_data')
712 and util.safehasattr(self.index, 'update_nodemap_data')
701 )
713 )
702 if use_nodemap:
714 if use_nodemap:
703 nodemap_data = nodemaputil.persisted_data(self)
715 nodemap_data = nodemaputil.persisted_data(self)
704 if nodemap_data is not None:
716 if nodemap_data is not None:
705 self._nodemap_docket = nodemap_data[0]
717 self._nodemap_docket = nodemap_data[0]
706 self.index.update_nodemap_data(*nodemap_data)
718 self.index.update_nodemap_data(*nodemap_data)
707
719
708 def rev(self, node):
720 def rev(self, node):
709 try:
721 try:
710 return self.index.rev(node)
722 return self.index.rev(node)
711 except TypeError:
723 except TypeError:
712 raise
724 raise
713 except error.RevlogError:
725 except error.RevlogError:
714 # parsers.c radix tree lookup failed
726 # parsers.c radix tree lookup failed
715 if (
727 if (
716 node == self.nodeconstants.wdirid
728 node == self.nodeconstants.wdirid
717 or node in self.nodeconstants.wdirfilenodeids
729 or node in self.nodeconstants.wdirfilenodeids
718 ):
730 ):
719 raise error.WdirUnsupported
731 raise error.WdirUnsupported
720 raise error.LookupError(node, self.indexfile, _(b'no node'))
732 raise error.LookupError(node, self.indexfile, _(b'no node'))
721
733
722 # Accessors for index entries.
734 # Accessors for index entries.
723
735
724 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
736 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
725 # are flags.
737 # are flags.
726 def start(self, rev):
738 def start(self, rev):
727 return int(self.index[rev][0] >> 16)
739 return int(self.index[rev][0] >> 16)
728
740
729 def flags(self, rev):
741 def flags(self, rev):
730 return self.index[rev][0] & 0xFFFF
742 return self.index[rev][0] & 0xFFFF
731
743
732 def length(self, rev):
744 def length(self, rev):
733 return self.index[rev][1]
745 return self.index[rev][1]
734
746
735 def sidedata_length(self, rev):
747 def sidedata_length(self, rev):
736 if not self.hassidedata:
748 if not self.hassidedata:
737 return 0
749 return 0
738 return self.index[rev][9]
750 return self.index[rev][9]
739
751
740 def rawsize(self, rev):
752 def rawsize(self, rev):
741 """return the length of the uncompressed text for a given revision"""
753 """return the length of the uncompressed text for a given revision"""
742 l = self.index[rev][2]
754 l = self.index[rev][2]
743 if l >= 0:
755 if l >= 0:
744 return l
756 return l
745
757
746 t = self.rawdata(rev)
758 t = self.rawdata(rev)
747 return len(t)
759 return len(t)
748
760
749 def size(self, rev):
761 def size(self, rev):
750 """length of non-raw text (processed by a "read" flag processor)"""
762 """length of non-raw text (processed by a "read" flag processor)"""
751 # fast path: if no "read" flag processor could change the content,
763 # fast path: if no "read" flag processor could change the content,
752 # size is rawsize. note: ELLIPSIS is known to not change the content.
764 # size is rawsize. note: ELLIPSIS is known to not change the content.
753 flags = self.flags(rev)
765 flags = self.flags(rev)
754 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
766 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
755 return self.rawsize(rev)
767 return self.rawsize(rev)
756
768
757 return len(self.revision(rev, raw=False))
769 return len(self.revision(rev, raw=False))
758
770
759 def chainbase(self, rev):
771 def chainbase(self, rev):
760 base = self._chainbasecache.get(rev)
772 base = self._chainbasecache.get(rev)
761 if base is not None:
773 if base is not None:
762 return base
774 return base
763
775
764 index = self.index
776 index = self.index
765 iterrev = rev
777 iterrev = rev
766 base = index[iterrev][3]
778 base = index[iterrev][3]
767 while base != iterrev:
779 while base != iterrev:
768 iterrev = base
780 iterrev = base
769 base = index[iterrev][3]
781 base = index[iterrev][3]
770
782
771 self._chainbasecache[rev] = base
783 self._chainbasecache[rev] = base
772 return base
784 return base
773
785
774 def linkrev(self, rev):
786 def linkrev(self, rev):
775 return self.index[rev][4]
787 return self.index[rev][4]
776
788
777 def parentrevs(self, rev):
789 def parentrevs(self, rev):
778 try:
790 try:
779 entry = self.index[rev]
791 entry = self.index[rev]
780 except IndexError:
792 except IndexError:
781 if rev == wdirrev:
793 if rev == wdirrev:
782 raise error.WdirUnsupported
794 raise error.WdirUnsupported
783 raise
795 raise
784 if entry[5] == nullrev:
796 if entry[5] == nullrev:
785 return entry[6], entry[5]
797 return entry[6], entry[5]
786 else:
798 else:
787 return entry[5], entry[6]
799 return entry[5], entry[6]
788
800
789 # fast parentrevs(rev) where rev isn't filtered
801 # fast parentrevs(rev) where rev isn't filtered
790 _uncheckedparentrevs = parentrevs
802 _uncheckedparentrevs = parentrevs
791
803
792 def node(self, rev):
804 def node(self, rev):
793 try:
805 try:
794 return self.index[rev][7]
806 return self.index[rev][7]
795 except IndexError:
807 except IndexError:
796 if rev == wdirrev:
808 if rev == wdirrev:
797 raise error.WdirUnsupported
809 raise error.WdirUnsupported
798 raise
810 raise
799
811
800 # Derived from index values.
812 # Derived from index values.
801
813
802 def end(self, rev):
814 def end(self, rev):
803 return self.start(rev) + self.length(rev)
815 return self.start(rev) + self.length(rev)
804
816
805 def parents(self, node):
817 def parents(self, node):
806 i = self.index
818 i = self.index
807 d = i[self.rev(node)]
819 d = i[self.rev(node)]
808 # inline node() to avoid function call overhead
820 # inline node() to avoid function call overhead
809 if d[5] == self.nullid:
821 if d[5] == self.nullid:
810 return i[d[6]][7], i[d[5]][7]
822 return i[d[6]][7], i[d[5]][7]
811 else:
823 else:
812 return i[d[5]][7], i[d[6]][7]
824 return i[d[5]][7], i[d[6]][7]
813
825
814 def chainlen(self, rev):
826 def chainlen(self, rev):
815 return self._chaininfo(rev)[0]
827 return self._chaininfo(rev)[0]
816
828
817 def _chaininfo(self, rev):
829 def _chaininfo(self, rev):
818 chaininfocache = self._chaininfocache
830 chaininfocache = self._chaininfocache
819 if rev in chaininfocache:
831 if rev in chaininfocache:
820 return chaininfocache[rev]
832 return chaininfocache[rev]
821 index = self.index
833 index = self.index
822 generaldelta = self._generaldelta
834 generaldelta = self._generaldelta
823 iterrev = rev
835 iterrev = rev
824 e = index[iterrev]
836 e = index[iterrev]
825 clen = 0
837 clen = 0
826 compresseddeltalen = 0
838 compresseddeltalen = 0
827 while iterrev != e[3]:
839 while iterrev != e[3]:
828 clen += 1
840 clen += 1
829 compresseddeltalen += e[1]
841 compresseddeltalen += e[1]
830 if generaldelta:
842 if generaldelta:
831 iterrev = e[3]
843 iterrev = e[3]
832 else:
844 else:
833 iterrev -= 1
845 iterrev -= 1
834 if iterrev in chaininfocache:
846 if iterrev in chaininfocache:
835 t = chaininfocache[iterrev]
847 t = chaininfocache[iterrev]
836 clen += t[0]
848 clen += t[0]
837 compresseddeltalen += t[1]
849 compresseddeltalen += t[1]
838 break
850 break
839 e = index[iterrev]
851 e = index[iterrev]
840 else:
852 else:
841 # Add text length of base since decompressing that also takes
853 # Add text length of base since decompressing that also takes
842 # work. For cache hits the length is already included.
854 # work. For cache hits the length is already included.
843 compresseddeltalen += e[1]
855 compresseddeltalen += e[1]
844 r = (clen, compresseddeltalen)
856 r = (clen, compresseddeltalen)
845 chaininfocache[rev] = r
857 chaininfocache[rev] = r
846 return r
858 return r
847
859
848 def _deltachain(self, rev, stoprev=None):
860 def _deltachain(self, rev, stoprev=None):
849 """Obtain the delta chain for a revision.
861 """Obtain the delta chain for a revision.
850
862
851 ``stoprev`` specifies a revision to stop at. If not specified, we
863 ``stoprev`` specifies a revision to stop at. If not specified, we
852 stop at the base of the chain.
864 stop at the base of the chain.
853
865
854 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
866 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
855 revs in ascending order and ``stopped`` is a bool indicating whether
867 revs in ascending order and ``stopped`` is a bool indicating whether
856 ``stoprev`` was hit.
868 ``stoprev`` was hit.
857 """
869 """
858 # Try C implementation.
870 # Try C implementation.
859 try:
871 try:
860 return self.index.deltachain(rev, stoprev, self._generaldelta)
872 return self.index.deltachain(rev, stoprev, self._generaldelta)
861 except AttributeError:
873 except AttributeError:
862 pass
874 pass
863
875
864 chain = []
876 chain = []
865
877
866 # Alias to prevent attribute lookup in tight loop.
878 # Alias to prevent attribute lookup in tight loop.
867 index = self.index
879 index = self.index
868 generaldelta = self._generaldelta
880 generaldelta = self._generaldelta
869
881
870 iterrev = rev
882 iterrev = rev
871 e = index[iterrev]
883 e = index[iterrev]
872 while iterrev != e[3] and iterrev != stoprev:
884 while iterrev != e[3] and iterrev != stoprev:
873 chain.append(iterrev)
885 chain.append(iterrev)
874 if generaldelta:
886 if generaldelta:
875 iterrev = e[3]
887 iterrev = e[3]
876 else:
888 else:
877 iterrev -= 1
889 iterrev -= 1
878 e = index[iterrev]
890 e = index[iterrev]
879
891
880 if iterrev == stoprev:
892 if iterrev == stoprev:
881 stopped = True
893 stopped = True
882 else:
894 else:
883 chain.append(iterrev)
895 chain.append(iterrev)
884 stopped = False
896 stopped = False
885
897
886 chain.reverse()
898 chain.reverse()
887 return chain, stopped
899 return chain, stopped
888
900
889 def ancestors(self, revs, stoprev=0, inclusive=False):
901 def ancestors(self, revs, stoprev=0, inclusive=False):
890 """Generate the ancestors of 'revs' in reverse revision order.
902 """Generate the ancestors of 'revs' in reverse revision order.
891 Does not generate revs lower than stoprev.
903 Does not generate revs lower than stoprev.
892
904
893 See the documentation for ancestor.lazyancestors for more details."""
905 See the documentation for ancestor.lazyancestors for more details."""
894
906
895 # first, make sure start revisions aren't filtered
907 # first, make sure start revisions aren't filtered
896 revs = list(revs)
908 revs = list(revs)
897 checkrev = self.node
909 checkrev = self.node
898 for r in revs:
910 for r in revs:
899 checkrev(r)
911 checkrev(r)
900 # and we're sure ancestors aren't filtered as well
912 # and we're sure ancestors aren't filtered as well
901
913
902 if rustancestor is not None:
914 if rustancestor is not None:
903 lazyancestors = rustancestor.LazyAncestors
915 lazyancestors = rustancestor.LazyAncestors
904 arg = self.index
916 arg = self.index
905 else:
917 else:
906 lazyancestors = ancestor.lazyancestors
918 lazyancestors = ancestor.lazyancestors
907 arg = self._uncheckedparentrevs
919 arg = self._uncheckedparentrevs
908 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
920 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
909
921
910 def descendants(self, revs):
922 def descendants(self, revs):
911 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
923 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
912
924
913 def findcommonmissing(self, common=None, heads=None):
925 def findcommonmissing(self, common=None, heads=None):
914 """Return a tuple of the ancestors of common and the ancestors of heads
926 """Return a tuple of the ancestors of common and the ancestors of heads
915 that are not ancestors of common. In revset terminology, we return the
927 that are not ancestors of common. In revset terminology, we return the
916 tuple:
928 tuple:
917
929
918 ::common, (::heads) - (::common)
930 ::common, (::heads) - (::common)
919
931
920 The list is sorted by revision number, meaning it is
932 The list is sorted by revision number, meaning it is
921 topologically sorted.
933 topologically sorted.
922
934
923 'heads' and 'common' are both lists of node IDs. If heads is
935 'heads' and 'common' are both lists of node IDs. If heads is
924 not supplied, uses all of the revlog's heads. If common is not
936 not supplied, uses all of the revlog's heads. If common is not
925 supplied, uses nullid."""
937 supplied, uses nullid."""
926 if common is None:
938 if common is None:
927 common = [self.nullid]
939 common = [self.nullid]
928 if heads is None:
940 if heads is None:
929 heads = self.heads()
941 heads = self.heads()
930
942
931 common = [self.rev(n) for n in common]
943 common = [self.rev(n) for n in common]
932 heads = [self.rev(n) for n in heads]
944 heads = [self.rev(n) for n in heads]
933
945
934 # we want the ancestors, but inclusive
946 # we want the ancestors, but inclusive
935 class lazyset(object):
947 class lazyset(object):
936 def __init__(self, lazyvalues):
948 def __init__(self, lazyvalues):
937 self.addedvalues = set()
949 self.addedvalues = set()
938 self.lazyvalues = lazyvalues
950 self.lazyvalues = lazyvalues
939
951
940 def __contains__(self, value):
952 def __contains__(self, value):
941 return value in self.addedvalues or value in self.lazyvalues
953 return value in self.addedvalues or value in self.lazyvalues
942
954
943 def __iter__(self):
955 def __iter__(self):
944 added = self.addedvalues
956 added = self.addedvalues
945 for r in added:
957 for r in added:
946 yield r
958 yield r
947 for r in self.lazyvalues:
959 for r in self.lazyvalues:
948 if not r in added:
960 if not r in added:
949 yield r
961 yield r
950
962
951 def add(self, value):
963 def add(self, value):
952 self.addedvalues.add(value)
964 self.addedvalues.add(value)
953
965
954 def update(self, values):
966 def update(self, values):
955 self.addedvalues.update(values)
967 self.addedvalues.update(values)
956
968
957 has = lazyset(self.ancestors(common))
969 has = lazyset(self.ancestors(common))
958 has.add(nullrev)
970 has.add(nullrev)
959 has.update(common)
971 has.update(common)
960
972
961 # take all ancestors from heads that aren't in has
973 # take all ancestors from heads that aren't in has
962 missing = set()
974 missing = set()
963 visit = collections.deque(r for r in heads if r not in has)
975 visit = collections.deque(r for r in heads if r not in has)
964 while visit:
976 while visit:
965 r = visit.popleft()
977 r = visit.popleft()
966 if r in missing:
978 if r in missing:
967 continue
979 continue
968 else:
980 else:
969 missing.add(r)
981 missing.add(r)
970 for p in self.parentrevs(r):
982 for p in self.parentrevs(r):
971 if p not in has:
983 if p not in has:
972 visit.append(p)
984 visit.append(p)
973 missing = list(missing)
985 missing = list(missing)
974 missing.sort()
986 missing.sort()
975 return has, [self.node(miss) for miss in missing]
987 return has, [self.node(miss) for miss in missing]
976
988
977 def incrementalmissingrevs(self, common=None):
989 def incrementalmissingrevs(self, common=None):
978 """Return an object that can be used to incrementally compute the
990 """Return an object that can be used to incrementally compute the
979 revision numbers of the ancestors of arbitrary sets that are not
991 revision numbers of the ancestors of arbitrary sets that are not
980 ancestors of common. This is an ancestor.incrementalmissingancestors
992 ancestors of common. This is an ancestor.incrementalmissingancestors
981 object.
993 object.
982
994
983 'common' is a list of revision numbers. If common is not supplied, uses
995 'common' is a list of revision numbers. If common is not supplied, uses
984 nullrev.
996 nullrev.
985 """
997 """
986 if common is None:
998 if common is None:
987 common = [nullrev]
999 common = [nullrev]
988
1000
989 if rustancestor is not None:
1001 if rustancestor is not None:
990 return rustancestor.MissingAncestors(self.index, common)
1002 return rustancestor.MissingAncestors(self.index, common)
991 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1003 return ancestor.incrementalmissingancestors(self.parentrevs, common)
992
1004
993 def findmissingrevs(self, common=None, heads=None):
1005 def findmissingrevs(self, common=None, heads=None):
994 """Return the revision numbers of the ancestors of heads that
1006 """Return the revision numbers of the ancestors of heads that
995 are not ancestors of common.
1007 are not ancestors of common.
996
1008
997 More specifically, return a list of revision numbers corresponding to
1009 More specifically, return a list of revision numbers corresponding to
998 nodes N such that every N satisfies the following constraints:
1010 nodes N such that every N satisfies the following constraints:
999
1011
1000 1. N is an ancestor of some node in 'heads'
1012 1. N is an ancestor of some node in 'heads'
1001 2. N is not an ancestor of any node in 'common'
1013 2. N is not an ancestor of any node in 'common'
1002
1014
1003 The list is sorted by revision number, meaning it is
1015 The list is sorted by revision number, meaning it is
1004 topologically sorted.
1016 topologically sorted.
1005
1017
1006 'heads' and 'common' are both lists of revision numbers. If heads is
1018 'heads' and 'common' are both lists of revision numbers. If heads is
1007 not supplied, uses all of the revlog's heads. If common is not
1019 not supplied, uses all of the revlog's heads. If common is not
1008 supplied, uses nullid."""
1020 supplied, uses nullid."""
1009 if common is None:
1021 if common is None:
1010 common = [nullrev]
1022 common = [nullrev]
1011 if heads is None:
1023 if heads is None:
1012 heads = self.headrevs()
1024 heads = self.headrevs()
1013
1025
1014 inc = self.incrementalmissingrevs(common=common)
1026 inc = self.incrementalmissingrevs(common=common)
1015 return inc.missingancestors(heads)
1027 return inc.missingancestors(heads)
1016
1028
1017 def findmissing(self, common=None, heads=None):
1029 def findmissing(self, common=None, heads=None):
1018 """Return the ancestors of heads that are not ancestors of common.
1030 """Return the ancestors of heads that are not ancestors of common.
1019
1031
1020 More specifically, return a list of nodes N such that every N
1032 More specifically, return a list of nodes N such that every N
1021 satisfies the following constraints:
1033 satisfies the following constraints:
1022
1034
1023 1. N is an ancestor of some node in 'heads'
1035 1. N is an ancestor of some node in 'heads'
1024 2. N is not an ancestor of any node in 'common'
1036 2. N is not an ancestor of any node in 'common'
1025
1037
1026 The list is sorted by revision number, meaning it is
1038 The list is sorted by revision number, meaning it is
1027 topologically sorted.
1039 topologically sorted.
1028
1040
1029 'heads' and 'common' are both lists of node IDs. If heads is
1041 'heads' and 'common' are both lists of node IDs. If heads is
1030 not supplied, uses all of the revlog's heads. If common is not
1042 not supplied, uses all of the revlog's heads. If common is not
1031 supplied, uses nullid."""
1043 supplied, uses nullid."""
1032 if common is None:
1044 if common is None:
1033 common = [self.nullid]
1045 common = [self.nullid]
1034 if heads is None:
1046 if heads is None:
1035 heads = self.heads()
1047 heads = self.heads()
1036
1048
1037 common = [self.rev(n) for n in common]
1049 common = [self.rev(n) for n in common]
1038 heads = [self.rev(n) for n in heads]
1050 heads = [self.rev(n) for n in heads]
1039
1051
1040 inc = self.incrementalmissingrevs(common=common)
1052 inc = self.incrementalmissingrevs(common=common)
1041 return [self.node(r) for r in inc.missingancestors(heads)]
1053 return [self.node(r) for r in inc.missingancestors(heads)]
1042
1054
1043 def nodesbetween(self, roots=None, heads=None):
1055 def nodesbetween(self, roots=None, heads=None):
1044 """Return a topological path from 'roots' to 'heads'.
1056 """Return a topological path from 'roots' to 'heads'.
1045
1057
1046 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1058 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1047 topologically sorted list of all nodes N that satisfy both of
1059 topologically sorted list of all nodes N that satisfy both of
1048 these constraints:
1060 these constraints:
1049
1061
1050 1. N is a descendant of some node in 'roots'
1062 1. N is a descendant of some node in 'roots'
1051 2. N is an ancestor of some node in 'heads'
1063 2. N is an ancestor of some node in 'heads'
1052
1064
1053 Every node is considered to be both a descendant and an ancestor
1065 Every node is considered to be both a descendant and an ancestor
1054 of itself, so every reachable node in 'roots' and 'heads' will be
1066 of itself, so every reachable node in 'roots' and 'heads' will be
1055 included in 'nodes'.
1067 included in 'nodes'.
1056
1068
1057 'outroots' is the list of reachable nodes in 'roots', i.e., the
1069 'outroots' is the list of reachable nodes in 'roots', i.e., the
1058 subset of 'roots' that is returned in 'nodes'. Likewise,
1070 subset of 'roots' that is returned in 'nodes'. Likewise,
1059 'outheads' is the subset of 'heads' that is also in 'nodes'.
1071 'outheads' is the subset of 'heads' that is also in 'nodes'.
1060
1072
1061 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1073 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1062 unspecified, uses nullid as the only root. If 'heads' is
1074 unspecified, uses nullid as the only root. If 'heads' is
1063 unspecified, uses list of all of the revlog's heads."""
1075 unspecified, uses list of all of the revlog's heads."""
1064 nonodes = ([], [], [])
1076 nonodes = ([], [], [])
1065 if roots is not None:
1077 if roots is not None:
1066 roots = list(roots)
1078 roots = list(roots)
1067 if not roots:
1079 if not roots:
1068 return nonodes
1080 return nonodes
1069 lowestrev = min([self.rev(n) for n in roots])
1081 lowestrev = min([self.rev(n) for n in roots])
1070 else:
1082 else:
1071 roots = [self.nullid] # Everybody's a descendant of nullid
1083 roots = [self.nullid] # Everybody's a descendant of nullid
1072 lowestrev = nullrev
1084 lowestrev = nullrev
1073 if (lowestrev == nullrev) and (heads is None):
1085 if (lowestrev == nullrev) and (heads is None):
1074 # We want _all_ the nodes!
1086 # We want _all_ the nodes!
1075 return (
1087 return (
1076 [self.node(r) for r in self],
1088 [self.node(r) for r in self],
1077 [self.nullid],
1089 [self.nullid],
1078 list(self.heads()),
1090 list(self.heads()),
1079 )
1091 )
1080 if heads is None:
1092 if heads is None:
1081 # All nodes are ancestors, so the latest ancestor is the last
1093 # All nodes are ancestors, so the latest ancestor is the last
1082 # node.
1094 # node.
1083 highestrev = len(self) - 1
1095 highestrev = len(self) - 1
1084 # Set ancestors to None to signal that every node is an ancestor.
1096 # Set ancestors to None to signal that every node is an ancestor.
1085 ancestors = None
1097 ancestors = None
1086 # Set heads to an empty dictionary for later discovery of heads
1098 # Set heads to an empty dictionary for later discovery of heads
1087 heads = {}
1099 heads = {}
1088 else:
1100 else:
1089 heads = list(heads)
1101 heads = list(heads)
1090 if not heads:
1102 if not heads:
1091 return nonodes
1103 return nonodes
1092 ancestors = set()
1104 ancestors = set()
1093 # Turn heads into a dictionary so we can remove 'fake' heads.
1105 # Turn heads into a dictionary so we can remove 'fake' heads.
1094 # Also, later we will be using it to filter out the heads we can't
1106 # Also, later we will be using it to filter out the heads we can't
1095 # find from roots.
1107 # find from roots.
1096 heads = dict.fromkeys(heads, False)
1108 heads = dict.fromkeys(heads, False)
1097 # Start at the top and keep marking parents until we're done.
1109 # Start at the top and keep marking parents until we're done.
1098 nodestotag = set(heads)
1110 nodestotag = set(heads)
1099 # Remember where the top was so we can use it as a limit later.
1111 # Remember where the top was so we can use it as a limit later.
1100 highestrev = max([self.rev(n) for n in nodestotag])
1112 highestrev = max([self.rev(n) for n in nodestotag])
1101 while nodestotag:
1113 while nodestotag:
1102 # grab a node to tag
1114 # grab a node to tag
1103 n = nodestotag.pop()
1115 n = nodestotag.pop()
1104 # Never tag nullid
1116 # Never tag nullid
1105 if n == self.nullid:
1117 if n == self.nullid:
1106 continue
1118 continue
1107 # A node's revision number represents its place in a
1119 # A node's revision number represents its place in a
1108 # topologically sorted list of nodes.
1120 # topologically sorted list of nodes.
1109 r = self.rev(n)
1121 r = self.rev(n)
1110 if r >= lowestrev:
1122 if r >= lowestrev:
1111 if n not in ancestors:
1123 if n not in ancestors:
1112 # If we are possibly a descendant of one of the roots
1124 # If we are possibly a descendant of one of the roots
1113 # and we haven't already been marked as an ancestor
1125 # and we haven't already been marked as an ancestor
1114 ancestors.add(n) # Mark as ancestor
1126 ancestors.add(n) # Mark as ancestor
1115 # Add non-nullid parents to list of nodes to tag.
1127 # Add non-nullid parents to list of nodes to tag.
1116 nodestotag.update(
1128 nodestotag.update(
1117 [p for p in self.parents(n) if p != self.nullid]
1129 [p for p in self.parents(n) if p != self.nullid]
1118 )
1130 )
1119 elif n in heads: # We've seen it before, is it a fake head?
1131 elif n in heads: # We've seen it before, is it a fake head?
1120 # So it is, real heads should not be the ancestors of
1132 # So it is, real heads should not be the ancestors of
1121 # any other heads.
1133 # any other heads.
1122 heads.pop(n)
1134 heads.pop(n)
1123 if not ancestors:
1135 if not ancestors:
1124 return nonodes
1136 return nonodes
1125 # Now that we have our set of ancestors, we want to remove any
1137 # Now that we have our set of ancestors, we want to remove any
1126 # roots that are not ancestors.
1138 # roots that are not ancestors.
1127
1139
1128 # If one of the roots was nullid, everything is included anyway.
1140 # If one of the roots was nullid, everything is included anyway.
1129 if lowestrev > nullrev:
1141 if lowestrev > nullrev:
1130 # But, since we weren't, let's recompute the lowest rev to not
1142 # But, since we weren't, let's recompute the lowest rev to not
1131 # include roots that aren't ancestors.
1143 # include roots that aren't ancestors.
1132
1144
1133 # Filter out roots that aren't ancestors of heads
1145 # Filter out roots that aren't ancestors of heads
1134 roots = [root for root in roots if root in ancestors]
1146 roots = [root for root in roots if root in ancestors]
1135 # Recompute the lowest revision
1147 # Recompute the lowest revision
1136 if roots:
1148 if roots:
1137 lowestrev = min([self.rev(root) for root in roots])
1149 lowestrev = min([self.rev(root) for root in roots])
1138 else:
1150 else:
1139 # No more roots? Return empty list
1151 # No more roots? Return empty list
1140 return nonodes
1152 return nonodes
1141 else:
1153 else:
1142 # We are descending from nullid, and don't need to care about
1154 # We are descending from nullid, and don't need to care about
1143 # any other roots.
1155 # any other roots.
1144 lowestrev = nullrev
1156 lowestrev = nullrev
1145 roots = [self.nullid]
1157 roots = [self.nullid]
1146 # Transform our roots list into a set.
1158 # Transform our roots list into a set.
1147 descendants = set(roots)
1159 descendants = set(roots)
1148 # Also, keep the original roots so we can filter out roots that aren't
1160 # Also, keep the original roots so we can filter out roots that aren't
1149 # 'real' roots (i.e. are descended from other roots).
1161 # 'real' roots (i.e. are descended from other roots).
1150 roots = descendants.copy()
1162 roots = descendants.copy()
1151 # Our topologically sorted list of output nodes.
1163 # Our topologically sorted list of output nodes.
1152 orderedout = []
1164 orderedout = []
1153 # Don't start at nullid since we don't want nullid in our output list,
1165 # Don't start at nullid since we don't want nullid in our output list,
1154 # and if nullid shows up in descendants, empty parents will look like
1166 # and if nullid shows up in descendants, empty parents will look like
1155 # they're descendants.
1167 # they're descendants.
1156 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1168 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1157 n = self.node(r)
1169 n = self.node(r)
1158 isdescendant = False
1170 isdescendant = False
1159 if lowestrev == nullrev: # Everybody is a descendant of nullid
1171 if lowestrev == nullrev: # Everybody is a descendant of nullid
1160 isdescendant = True
1172 isdescendant = True
1161 elif n in descendants:
1173 elif n in descendants:
1162 # n is already a descendant
1174 # n is already a descendant
1163 isdescendant = True
1175 isdescendant = True
1164 # This check only needs to be done here because all the roots
1176 # This check only needs to be done here because all the roots
1165 # will start being marked is descendants before the loop.
1177 # will start being marked is descendants before the loop.
1166 if n in roots:
1178 if n in roots:
1167 # If n was a root, check if it's a 'real' root.
1179 # If n was a root, check if it's a 'real' root.
1168 p = tuple(self.parents(n))
1180 p = tuple(self.parents(n))
1169 # If any of its parents are descendants, it's not a root.
1181 # If any of its parents are descendants, it's not a root.
1170 if (p[0] in descendants) or (p[1] in descendants):
1182 if (p[0] in descendants) or (p[1] in descendants):
1171 roots.remove(n)
1183 roots.remove(n)
1172 else:
1184 else:
1173 p = tuple(self.parents(n))
1185 p = tuple(self.parents(n))
1174 # A node is a descendant if either of its parents are
1186 # A node is a descendant if either of its parents are
1175 # descendants. (We seeded the dependents list with the roots
1187 # descendants. (We seeded the dependents list with the roots
1176 # up there, remember?)
1188 # up there, remember?)
1177 if (p[0] in descendants) or (p[1] in descendants):
1189 if (p[0] in descendants) or (p[1] in descendants):
1178 descendants.add(n)
1190 descendants.add(n)
1179 isdescendant = True
1191 isdescendant = True
1180 if isdescendant and ((ancestors is None) or (n in ancestors)):
1192 if isdescendant and ((ancestors is None) or (n in ancestors)):
1181 # Only include nodes that are both descendants and ancestors.
1193 # Only include nodes that are both descendants and ancestors.
1182 orderedout.append(n)
1194 orderedout.append(n)
1183 if (ancestors is not None) and (n in heads):
1195 if (ancestors is not None) and (n in heads):
1184 # We're trying to figure out which heads are reachable
1196 # We're trying to figure out which heads are reachable
1185 # from roots.
1197 # from roots.
1186 # Mark this head as having been reached
1198 # Mark this head as having been reached
1187 heads[n] = True
1199 heads[n] = True
1188 elif ancestors is None:
1200 elif ancestors is None:
1189 # Otherwise, we're trying to discover the heads.
1201 # Otherwise, we're trying to discover the heads.
1190 # Assume this is a head because if it isn't, the next step
1202 # Assume this is a head because if it isn't, the next step
1191 # will eventually remove it.
1203 # will eventually remove it.
1192 heads[n] = True
1204 heads[n] = True
1193 # But, obviously its parents aren't.
1205 # But, obviously its parents aren't.
1194 for p in self.parents(n):
1206 for p in self.parents(n):
1195 heads.pop(p, None)
1207 heads.pop(p, None)
1196 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1208 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1197 roots = list(roots)
1209 roots = list(roots)
1198 assert orderedout
1210 assert orderedout
1199 assert roots
1211 assert roots
1200 assert heads
1212 assert heads
1201 return (orderedout, roots, heads)
1213 return (orderedout, roots, heads)
1202
1214
1203 def headrevs(self, revs=None):
1215 def headrevs(self, revs=None):
1204 if revs is None:
1216 if revs is None:
1205 try:
1217 try:
1206 return self.index.headrevs()
1218 return self.index.headrevs()
1207 except AttributeError:
1219 except AttributeError:
1208 return self._headrevs()
1220 return self._headrevs()
1209 if rustdagop is not None:
1221 if rustdagop is not None:
1210 return rustdagop.headrevs(self.index, revs)
1222 return rustdagop.headrevs(self.index, revs)
1211 return dagop.headrevs(revs, self._uncheckedparentrevs)
1223 return dagop.headrevs(revs, self._uncheckedparentrevs)
1212
1224
1213 def computephases(self, roots):
1225 def computephases(self, roots):
1214 return self.index.computephasesmapsets(roots)
1226 return self.index.computephasesmapsets(roots)
1215
1227
1216 def _headrevs(self):
1228 def _headrevs(self):
1217 count = len(self)
1229 count = len(self)
1218 if not count:
1230 if not count:
1219 return [nullrev]
1231 return [nullrev]
1220 # we won't iter over filtered rev so nobody is a head at start
1232 # we won't iter over filtered rev so nobody is a head at start
1221 ishead = [0] * (count + 1)
1233 ishead = [0] * (count + 1)
1222 index = self.index
1234 index = self.index
1223 for r in self:
1235 for r in self:
1224 ishead[r] = 1 # I may be an head
1236 ishead[r] = 1 # I may be an head
1225 e = index[r]
1237 e = index[r]
1226 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1238 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1227 return [r for r, val in enumerate(ishead) if val]
1239 return [r for r, val in enumerate(ishead) if val]
1228
1240
1229 def heads(self, start=None, stop=None):
1241 def heads(self, start=None, stop=None):
1230 """return the list of all nodes that have no children
1242 """return the list of all nodes that have no children
1231
1243
1232 if start is specified, only heads that are descendants of
1244 if start is specified, only heads that are descendants of
1233 start will be returned
1245 start will be returned
1234 if stop is specified, it will consider all the revs from stop
1246 if stop is specified, it will consider all the revs from stop
1235 as if they had no children
1247 as if they had no children
1236 """
1248 """
1237 if start is None and stop is None:
1249 if start is None and stop is None:
1238 if not len(self):
1250 if not len(self):
1239 return [self.nullid]
1251 return [self.nullid]
1240 return [self.node(r) for r in self.headrevs()]
1252 return [self.node(r) for r in self.headrevs()]
1241
1253
1242 if start is None:
1254 if start is None:
1243 start = nullrev
1255 start = nullrev
1244 else:
1256 else:
1245 start = self.rev(start)
1257 start = self.rev(start)
1246
1258
1247 stoprevs = {self.rev(n) for n in stop or []}
1259 stoprevs = {self.rev(n) for n in stop or []}
1248
1260
1249 revs = dagop.headrevssubset(
1261 revs = dagop.headrevssubset(
1250 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1262 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1251 )
1263 )
1252
1264
1253 return [self.node(rev) for rev in revs]
1265 return [self.node(rev) for rev in revs]
1254
1266
1255 def children(self, node):
1267 def children(self, node):
1256 """find the children of a given node"""
1268 """find the children of a given node"""
1257 c = []
1269 c = []
1258 p = self.rev(node)
1270 p = self.rev(node)
1259 for r in self.revs(start=p + 1):
1271 for r in self.revs(start=p + 1):
1260 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1272 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1261 if prevs:
1273 if prevs:
1262 for pr in prevs:
1274 for pr in prevs:
1263 if pr == p:
1275 if pr == p:
1264 c.append(self.node(r))
1276 c.append(self.node(r))
1265 elif p == nullrev:
1277 elif p == nullrev:
1266 c.append(self.node(r))
1278 c.append(self.node(r))
1267 return c
1279 return c
1268
1280
1269 def commonancestorsheads(self, a, b):
1281 def commonancestorsheads(self, a, b):
1270 """calculate all the heads of the common ancestors of nodes a and b"""
1282 """calculate all the heads of the common ancestors of nodes a and b"""
1271 a, b = self.rev(a), self.rev(b)
1283 a, b = self.rev(a), self.rev(b)
1272 ancs = self._commonancestorsheads(a, b)
1284 ancs = self._commonancestorsheads(a, b)
1273 return pycompat.maplist(self.node, ancs)
1285 return pycompat.maplist(self.node, ancs)
1274
1286
1275 def _commonancestorsheads(self, *revs):
1287 def _commonancestorsheads(self, *revs):
1276 """calculate all the heads of the common ancestors of revs"""
1288 """calculate all the heads of the common ancestors of revs"""
1277 try:
1289 try:
1278 ancs = self.index.commonancestorsheads(*revs)
1290 ancs = self.index.commonancestorsheads(*revs)
1279 except (AttributeError, OverflowError): # C implementation failed
1291 except (AttributeError, OverflowError): # C implementation failed
1280 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1292 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1281 return ancs
1293 return ancs
1282
1294
1283 def isancestor(self, a, b):
1295 def isancestor(self, a, b):
1284 """return True if node a is an ancestor of node b
1296 """return True if node a is an ancestor of node b
1285
1297
1286 A revision is considered an ancestor of itself."""
1298 A revision is considered an ancestor of itself."""
1287 a, b = self.rev(a), self.rev(b)
1299 a, b = self.rev(a), self.rev(b)
1288 return self.isancestorrev(a, b)
1300 return self.isancestorrev(a, b)
1289
1301
1290 def isancestorrev(self, a, b):
1302 def isancestorrev(self, a, b):
1291 """return True if revision a is an ancestor of revision b
1303 """return True if revision a is an ancestor of revision b
1292
1304
1293 A revision is considered an ancestor of itself.
1305 A revision is considered an ancestor of itself.
1294
1306
1295 The implementation of this is trivial but the use of
1307 The implementation of this is trivial but the use of
1296 reachableroots is not."""
1308 reachableroots is not."""
1297 if a == nullrev:
1309 if a == nullrev:
1298 return True
1310 return True
1299 elif a == b:
1311 elif a == b:
1300 return True
1312 return True
1301 elif a > b:
1313 elif a > b:
1302 return False
1314 return False
1303 return bool(self.reachableroots(a, [b], [a], includepath=False))
1315 return bool(self.reachableroots(a, [b], [a], includepath=False))
1304
1316
1305 def reachableroots(self, minroot, heads, roots, includepath=False):
1317 def reachableroots(self, minroot, heads, roots, includepath=False):
1306 """return (heads(::(<roots> and <roots>::<heads>)))
1318 """return (heads(::(<roots> and <roots>::<heads>)))
1307
1319
1308 If includepath is True, return (<roots>::<heads>)."""
1320 If includepath is True, return (<roots>::<heads>)."""
1309 try:
1321 try:
1310 return self.index.reachableroots2(
1322 return self.index.reachableroots2(
1311 minroot, heads, roots, includepath
1323 minroot, heads, roots, includepath
1312 )
1324 )
1313 except AttributeError:
1325 except AttributeError:
1314 return dagop._reachablerootspure(
1326 return dagop._reachablerootspure(
1315 self.parentrevs, minroot, roots, heads, includepath
1327 self.parentrevs, minroot, roots, heads, includepath
1316 )
1328 )
1317
1329
1318 def ancestor(self, a, b):
1330 def ancestor(self, a, b):
1319 """calculate the "best" common ancestor of nodes a and b"""
1331 """calculate the "best" common ancestor of nodes a and b"""
1320
1332
1321 a, b = self.rev(a), self.rev(b)
1333 a, b = self.rev(a), self.rev(b)
1322 try:
1334 try:
1323 ancs = self.index.ancestors(a, b)
1335 ancs = self.index.ancestors(a, b)
1324 except (AttributeError, OverflowError):
1336 except (AttributeError, OverflowError):
1325 ancs = ancestor.ancestors(self.parentrevs, a, b)
1337 ancs = ancestor.ancestors(self.parentrevs, a, b)
1326 if ancs:
1338 if ancs:
1327 # choose a consistent winner when there's a tie
1339 # choose a consistent winner when there's a tie
1328 return min(map(self.node, ancs))
1340 return min(map(self.node, ancs))
1329 return self.nullid
1341 return self.nullid
1330
1342
1331 def _match(self, id):
1343 def _match(self, id):
1332 if isinstance(id, int):
1344 if isinstance(id, int):
1333 # rev
1345 # rev
1334 return self.node(id)
1346 return self.node(id)
1335 if len(id) == self.nodeconstants.nodelen:
1347 if len(id) == self.nodeconstants.nodelen:
1336 # possibly a binary node
1348 # possibly a binary node
1337 # odds of a binary node being all hex in ASCII are 1 in 10**25
1349 # odds of a binary node being all hex in ASCII are 1 in 10**25
1338 try:
1350 try:
1339 node = id
1351 node = id
1340 self.rev(node) # quick search the index
1352 self.rev(node) # quick search the index
1341 return node
1353 return node
1342 except error.LookupError:
1354 except error.LookupError:
1343 pass # may be partial hex id
1355 pass # may be partial hex id
1344 try:
1356 try:
1345 # str(rev)
1357 # str(rev)
1346 rev = int(id)
1358 rev = int(id)
1347 if b"%d" % rev != id:
1359 if b"%d" % rev != id:
1348 raise ValueError
1360 raise ValueError
1349 if rev < 0:
1361 if rev < 0:
1350 rev = len(self) + rev
1362 rev = len(self) + rev
1351 if rev < 0 or rev >= len(self):
1363 if rev < 0 or rev >= len(self):
1352 raise ValueError
1364 raise ValueError
1353 return self.node(rev)
1365 return self.node(rev)
1354 except (ValueError, OverflowError):
1366 except (ValueError, OverflowError):
1355 pass
1367 pass
1356 if len(id) == 2 * self.nodeconstants.nodelen:
1368 if len(id) == 2 * self.nodeconstants.nodelen:
1357 try:
1369 try:
1358 # a full hex nodeid?
1370 # a full hex nodeid?
1359 node = bin(id)
1371 node = bin(id)
1360 self.rev(node)
1372 self.rev(node)
1361 return node
1373 return node
1362 except (TypeError, error.LookupError):
1374 except (TypeError, error.LookupError):
1363 pass
1375 pass
1364
1376
1365 def _partialmatch(self, id):
1377 def _partialmatch(self, id):
1366 # we don't care wdirfilenodeids as they should be always full hash
1378 # we don't care wdirfilenodeids as they should be always full hash
1367 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1379 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1368 try:
1380 try:
1369 partial = self.index.partialmatch(id)
1381 partial = self.index.partialmatch(id)
1370 if partial and self.hasnode(partial):
1382 if partial and self.hasnode(partial):
1371 if maybewdir:
1383 if maybewdir:
1372 # single 'ff...' match in radix tree, ambiguous with wdir
1384 # single 'ff...' match in radix tree, ambiguous with wdir
1373 raise error.RevlogError
1385 raise error.RevlogError
1374 return partial
1386 return partial
1375 if maybewdir:
1387 if maybewdir:
1376 # no 'ff...' match in radix tree, wdir identified
1388 # no 'ff...' match in radix tree, wdir identified
1377 raise error.WdirUnsupported
1389 raise error.WdirUnsupported
1378 return None
1390 return None
1379 except error.RevlogError:
1391 except error.RevlogError:
1380 # parsers.c radix tree lookup gave multiple matches
1392 # parsers.c radix tree lookup gave multiple matches
1381 # fast path: for unfiltered changelog, radix tree is accurate
1393 # fast path: for unfiltered changelog, radix tree is accurate
1382 if not getattr(self, 'filteredrevs', None):
1394 if not getattr(self, 'filteredrevs', None):
1383 raise error.AmbiguousPrefixLookupError(
1395 raise error.AmbiguousPrefixLookupError(
1384 id, self.indexfile, _(b'ambiguous identifier')
1396 id, self.indexfile, _(b'ambiguous identifier')
1385 )
1397 )
1386 # fall through to slow path that filters hidden revisions
1398 # fall through to slow path that filters hidden revisions
1387 except (AttributeError, ValueError):
1399 except (AttributeError, ValueError):
1388 # we are pure python, or key was too short to search radix tree
1400 # we are pure python, or key was too short to search radix tree
1389 pass
1401 pass
1390
1402
1391 if id in self._pcache:
1403 if id in self._pcache:
1392 return self._pcache[id]
1404 return self._pcache[id]
1393
1405
1394 if len(id) <= 40:
1406 if len(id) <= 40:
1395 try:
1407 try:
1396 # hex(node)[:...]
1408 # hex(node)[:...]
1397 l = len(id) // 2 # grab an even number of digits
1409 l = len(id) // 2 # grab an even number of digits
1398 prefix = bin(id[: l * 2])
1410 prefix = bin(id[: l * 2])
1399 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1411 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1400 nl = [
1412 nl = [
1401 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1413 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1402 ]
1414 ]
1403 if self.nodeconstants.nullhex.startswith(id):
1415 if self.nodeconstants.nullhex.startswith(id):
1404 nl.append(self.nullid)
1416 nl.append(self.nullid)
1405 if len(nl) > 0:
1417 if len(nl) > 0:
1406 if len(nl) == 1 and not maybewdir:
1418 if len(nl) == 1 and not maybewdir:
1407 self._pcache[id] = nl[0]
1419 self._pcache[id] = nl[0]
1408 return nl[0]
1420 return nl[0]
1409 raise error.AmbiguousPrefixLookupError(
1421 raise error.AmbiguousPrefixLookupError(
1410 id, self.indexfile, _(b'ambiguous identifier')
1422 id, self.indexfile, _(b'ambiguous identifier')
1411 )
1423 )
1412 if maybewdir:
1424 if maybewdir:
1413 raise error.WdirUnsupported
1425 raise error.WdirUnsupported
1414 return None
1426 return None
1415 except TypeError:
1427 except TypeError:
1416 pass
1428 pass
1417
1429
1418 def lookup(self, id):
1430 def lookup(self, id):
1419 """locate a node based on:
1431 """locate a node based on:
1420 - revision number or str(revision number)
1432 - revision number or str(revision number)
1421 - nodeid or subset of hex nodeid
1433 - nodeid or subset of hex nodeid
1422 """
1434 """
1423 n = self._match(id)
1435 n = self._match(id)
1424 if n is not None:
1436 if n is not None:
1425 return n
1437 return n
1426 n = self._partialmatch(id)
1438 n = self._partialmatch(id)
1427 if n:
1439 if n:
1428 return n
1440 return n
1429
1441
1430 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1442 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1431
1443
1432 def shortest(self, node, minlength=1):
1444 def shortest(self, node, minlength=1):
1433 """Find the shortest unambiguous prefix that matches node."""
1445 """Find the shortest unambiguous prefix that matches node."""
1434
1446
1435 def isvalid(prefix):
1447 def isvalid(prefix):
1436 try:
1448 try:
1437 matchednode = self._partialmatch(prefix)
1449 matchednode = self._partialmatch(prefix)
1438 except error.AmbiguousPrefixLookupError:
1450 except error.AmbiguousPrefixLookupError:
1439 return False
1451 return False
1440 except error.WdirUnsupported:
1452 except error.WdirUnsupported:
1441 # single 'ff...' match
1453 # single 'ff...' match
1442 return True
1454 return True
1443 if matchednode is None:
1455 if matchednode is None:
1444 raise error.LookupError(node, self.indexfile, _(b'no node'))
1456 raise error.LookupError(node, self.indexfile, _(b'no node'))
1445 return True
1457 return True
1446
1458
1447 def maybewdir(prefix):
1459 def maybewdir(prefix):
1448 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1460 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1449
1461
1450 hexnode = hex(node)
1462 hexnode = hex(node)
1451
1463
1452 def disambiguate(hexnode, minlength):
1464 def disambiguate(hexnode, minlength):
1453 """Disambiguate against wdirid."""
1465 """Disambiguate against wdirid."""
1454 for length in range(minlength, len(hexnode) + 1):
1466 for length in range(minlength, len(hexnode) + 1):
1455 prefix = hexnode[:length]
1467 prefix = hexnode[:length]
1456 if not maybewdir(prefix):
1468 if not maybewdir(prefix):
1457 return prefix
1469 return prefix
1458
1470
1459 if not getattr(self, 'filteredrevs', None):
1471 if not getattr(self, 'filteredrevs', None):
1460 try:
1472 try:
1461 length = max(self.index.shortest(node), minlength)
1473 length = max(self.index.shortest(node), minlength)
1462 return disambiguate(hexnode, length)
1474 return disambiguate(hexnode, length)
1463 except error.RevlogError:
1475 except error.RevlogError:
1464 if node != self.nodeconstants.wdirid:
1476 if node != self.nodeconstants.wdirid:
1465 raise error.LookupError(node, self.indexfile, _(b'no node'))
1477 raise error.LookupError(node, self.indexfile, _(b'no node'))
1466 except AttributeError:
1478 except AttributeError:
1467 # Fall through to pure code
1479 # Fall through to pure code
1468 pass
1480 pass
1469
1481
1470 if node == self.nodeconstants.wdirid:
1482 if node == self.nodeconstants.wdirid:
1471 for length in range(minlength, len(hexnode) + 1):
1483 for length in range(minlength, len(hexnode) + 1):
1472 prefix = hexnode[:length]
1484 prefix = hexnode[:length]
1473 if isvalid(prefix):
1485 if isvalid(prefix):
1474 return prefix
1486 return prefix
1475
1487
1476 for length in range(minlength, len(hexnode) + 1):
1488 for length in range(minlength, len(hexnode) + 1):
1477 prefix = hexnode[:length]
1489 prefix = hexnode[:length]
1478 if isvalid(prefix):
1490 if isvalid(prefix):
1479 return disambiguate(hexnode, length)
1491 return disambiguate(hexnode, length)
1480
1492
1481 def cmp(self, node, text):
1493 def cmp(self, node, text):
1482 """compare text with a given file revision
1494 """compare text with a given file revision
1483
1495
1484 returns True if text is different than what is stored.
1496 returns True if text is different than what is stored.
1485 """
1497 """
1486 p1, p2 = self.parents(node)
1498 p1, p2 = self.parents(node)
1487 return storageutil.hashrevisionsha1(text, p1, p2) != node
1499 return storageutil.hashrevisionsha1(text, p1, p2) != node
1488
1500
1489 def _cachesegment(self, offset, data):
1501 def _cachesegment(self, offset, data):
1490 """Add a segment to the revlog cache.
1502 """Add a segment to the revlog cache.
1491
1503
1492 Accepts an absolute offset and the data that is at that location.
1504 Accepts an absolute offset and the data that is at that location.
1493 """
1505 """
1494 o, d = self._chunkcache
1506 o, d = self._chunkcache
1495 # try to add to existing cache
1507 # try to add to existing cache
1496 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1508 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1497 self._chunkcache = o, d + data
1509 self._chunkcache = o, d + data
1498 else:
1510 else:
1499 self._chunkcache = offset, data
1511 self._chunkcache = offset, data
1500
1512
1501 def _readsegment(self, offset, length, df=None):
1513 def _readsegment(self, offset, length, df=None):
1502 """Load a segment of raw data from the revlog.
1514 """Load a segment of raw data from the revlog.
1503
1515
1504 Accepts an absolute offset, length to read, and an optional existing
1516 Accepts an absolute offset, length to read, and an optional existing
1505 file handle to read from.
1517 file handle to read from.
1506
1518
1507 If an existing file handle is passed, it will be seeked and the
1519 If an existing file handle is passed, it will be seeked and the
1508 original seek position will NOT be restored.
1520 original seek position will NOT be restored.
1509
1521
1510 Returns a str or buffer of raw byte data.
1522 Returns a str or buffer of raw byte data.
1511
1523
1512 Raises if the requested number of bytes could not be read.
1524 Raises if the requested number of bytes could not be read.
1513 """
1525 """
1514 # Cache data both forward and backward around the requested
1526 # Cache data both forward and backward around the requested
1515 # data, in a fixed size window. This helps speed up operations
1527 # data, in a fixed size window. This helps speed up operations
1516 # involving reading the revlog backwards.
1528 # involving reading the revlog backwards.
1517 cachesize = self._chunkcachesize
1529 cachesize = self._chunkcachesize
1518 realoffset = offset & ~(cachesize - 1)
1530 realoffset = offset & ~(cachesize - 1)
1519 reallength = (
1531 reallength = (
1520 (offset + length + cachesize) & ~(cachesize - 1)
1532 (offset + length + cachesize) & ~(cachesize - 1)
1521 ) - realoffset
1533 ) - realoffset
1522 with self._datareadfp(df) as df:
1534 with self._datareadfp(df) as df:
1523 df.seek(realoffset)
1535 df.seek(realoffset)
1524 d = df.read(reallength)
1536 d = df.read(reallength)
1525
1537
1526 self._cachesegment(realoffset, d)
1538 self._cachesegment(realoffset, d)
1527 if offset != realoffset or reallength != length:
1539 if offset != realoffset or reallength != length:
1528 startoffset = offset - realoffset
1540 startoffset = offset - realoffset
1529 if len(d) - startoffset < length:
1541 if len(d) - startoffset < length:
1530 raise error.RevlogError(
1542 raise error.RevlogError(
1531 _(
1543 _(
1532 b'partial read of revlog %s; expected %d bytes from '
1544 b'partial read of revlog %s; expected %d bytes from '
1533 b'offset %d, got %d'
1545 b'offset %d, got %d'
1534 )
1546 )
1535 % (
1547 % (
1536 self.indexfile if self._inline else self.datafile,
1548 self.indexfile if self._inline else self.datafile,
1537 length,
1549 length,
1538 realoffset,
1550 realoffset,
1539 len(d) - startoffset,
1551 len(d) - startoffset,
1540 )
1552 )
1541 )
1553 )
1542
1554
1543 return util.buffer(d, startoffset, length)
1555 return util.buffer(d, startoffset, length)
1544
1556
1545 if len(d) < length:
1557 if len(d) < length:
1546 raise error.RevlogError(
1558 raise error.RevlogError(
1547 _(
1559 _(
1548 b'partial read of revlog %s; expected %d bytes from offset '
1560 b'partial read of revlog %s; expected %d bytes from offset '
1549 b'%d, got %d'
1561 b'%d, got %d'
1550 )
1562 )
1551 % (
1563 % (
1552 self.indexfile if self._inline else self.datafile,
1564 self.indexfile if self._inline else self.datafile,
1553 length,
1565 length,
1554 offset,
1566 offset,
1555 len(d),
1567 len(d),
1556 )
1568 )
1557 )
1569 )
1558
1570
1559 return d
1571 return d
1560
1572
1561 def _getsegment(self, offset, length, df=None):
1573 def _getsegment(self, offset, length, df=None):
1562 """Obtain a segment of raw data from the revlog.
1574 """Obtain a segment of raw data from the revlog.
1563
1575
1564 Accepts an absolute offset, length of bytes to obtain, and an
1576 Accepts an absolute offset, length of bytes to obtain, and an
1565 optional file handle to the already-opened revlog. If the file
1577 optional file handle to the already-opened revlog. If the file
1566 handle is used, it's original seek position will not be preserved.
1578 handle is used, it's original seek position will not be preserved.
1567
1579
1568 Requests for data may be returned from a cache.
1580 Requests for data may be returned from a cache.
1569
1581
1570 Returns a str or a buffer instance of raw byte data.
1582 Returns a str or a buffer instance of raw byte data.
1571 """
1583 """
1572 o, d = self._chunkcache
1584 o, d = self._chunkcache
1573 l = len(d)
1585 l = len(d)
1574
1586
1575 # is it in the cache?
1587 # is it in the cache?
1576 cachestart = offset - o
1588 cachestart = offset - o
1577 cacheend = cachestart + length
1589 cacheend = cachestart + length
1578 if cachestart >= 0 and cacheend <= l:
1590 if cachestart >= 0 and cacheend <= l:
1579 if cachestart == 0 and cacheend == l:
1591 if cachestart == 0 and cacheend == l:
1580 return d # avoid a copy
1592 return d # avoid a copy
1581 return util.buffer(d, cachestart, cacheend - cachestart)
1593 return util.buffer(d, cachestart, cacheend - cachestart)
1582
1594
1583 return self._readsegment(offset, length, df=df)
1595 return self._readsegment(offset, length, df=df)
1584
1596
1585 def _getsegmentforrevs(self, startrev, endrev, df=None):
1597 def _getsegmentforrevs(self, startrev, endrev, df=None):
1586 """Obtain a segment of raw data corresponding to a range of revisions.
1598 """Obtain a segment of raw data corresponding to a range of revisions.
1587
1599
1588 Accepts the start and end revisions and an optional already-open
1600 Accepts the start and end revisions and an optional already-open
1589 file handle to be used for reading. If the file handle is read, its
1601 file handle to be used for reading. If the file handle is read, its
1590 seek position will not be preserved.
1602 seek position will not be preserved.
1591
1603
1592 Requests for data may be satisfied by a cache.
1604 Requests for data may be satisfied by a cache.
1593
1605
1594 Returns a 2-tuple of (offset, data) for the requested range of
1606 Returns a 2-tuple of (offset, data) for the requested range of
1595 revisions. Offset is the integer offset from the beginning of the
1607 revisions. Offset is the integer offset from the beginning of the
1596 revlog and data is a str or buffer of the raw byte data.
1608 revlog and data is a str or buffer of the raw byte data.
1597
1609
1598 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1610 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1599 to determine where each revision's data begins and ends.
1611 to determine where each revision's data begins and ends.
1600 """
1612 """
1601 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1613 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1602 # (functions are expensive).
1614 # (functions are expensive).
1603 index = self.index
1615 index = self.index
1604 istart = index[startrev]
1616 istart = index[startrev]
1605 start = int(istart[0] >> 16)
1617 start = int(istart[0] >> 16)
1606 if startrev == endrev:
1618 if startrev == endrev:
1607 end = start + istart[1]
1619 end = start + istart[1]
1608 else:
1620 else:
1609 iend = index[endrev]
1621 iend = index[endrev]
1610 end = int(iend[0] >> 16) + iend[1]
1622 end = int(iend[0] >> 16) + iend[1]
1611
1623
1612 if self._inline:
1624 if self._inline:
1613 start += (startrev + 1) * self.index.entry_size
1625 start += (startrev + 1) * self.index.entry_size
1614 end += (endrev + 1) * self.index.entry_size
1626 end += (endrev + 1) * self.index.entry_size
1615 length = end - start
1627 length = end - start
1616
1628
1617 return start, self._getsegment(start, length, df=df)
1629 return start, self._getsegment(start, length, df=df)
1618
1630
1619 def _chunk(self, rev, df=None):
1631 def _chunk(self, rev, df=None):
1620 """Obtain a single decompressed chunk for a revision.
1632 """Obtain a single decompressed chunk for a revision.
1621
1633
1622 Accepts an integer revision and an optional already-open file handle
1634 Accepts an integer revision and an optional already-open file handle
1623 to be used for reading. If used, the seek position of the file will not
1635 to be used for reading. If used, the seek position of the file will not
1624 be preserved.
1636 be preserved.
1625
1637
1626 Returns a str holding uncompressed data for the requested revision.
1638 Returns a str holding uncompressed data for the requested revision.
1627 """
1639 """
1628 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1640 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1629
1641
1630 def _chunks(self, revs, df=None, targetsize=None):
1642 def _chunks(self, revs, df=None, targetsize=None):
1631 """Obtain decompressed chunks for the specified revisions.
1643 """Obtain decompressed chunks for the specified revisions.
1632
1644
1633 Accepts an iterable of numeric revisions that are assumed to be in
1645 Accepts an iterable of numeric revisions that are assumed to be in
1634 ascending order. Also accepts an optional already-open file handle
1646 ascending order. Also accepts an optional already-open file handle
1635 to be used for reading. If used, the seek position of the file will
1647 to be used for reading. If used, the seek position of the file will
1636 not be preserved.
1648 not be preserved.
1637
1649
1638 This function is similar to calling ``self._chunk()`` multiple times,
1650 This function is similar to calling ``self._chunk()`` multiple times,
1639 but is faster.
1651 but is faster.
1640
1652
1641 Returns a list with decompressed data for each requested revision.
1653 Returns a list with decompressed data for each requested revision.
1642 """
1654 """
1643 if not revs:
1655 if not revs:
1644 return []
1656 return []
1645 start = self.start
1657 start = self.start
1646 length = self.length
1658 length = self.length
1647 inline = self._inline
1659 inline = self._inline
1648 iosize = self.index.entry_size
1660 iosize = self.index.entry_size
1649 buffer = util.buffer
1661 buffer = util.buffer
1650
1662
1651 l = []
1663 l = []
1652 ladd = l.append
1664 ladd = l.append
1653
1665
1654 if not self._withsparseread:
1666 if not self._withsparseread:
1655 slicedchunks = (revs,)
1667 slicedchunks = (revs,)
1656 else:
1668 else:
1657 slicedchunks = deltautil.slicechunk(
1669 slicedchunks = deltautil.slicechunk(
1658 self, revs, targetsize=targetsize
1670 self, revs, targetsize=targetsize
1659 )
1671 )
1660
1672
1661 for revschunk in slicedchunks:
1673 for revschunk in slicedchunks:
1662 firstrev = revschunk[0]
1674 firstrev = revschunk[0]
1663 # Skip trailing revisions with empty diff
1675 # Skip trailing revisions with empty diff
1664 for lastrev in revschunk[::-1]:
1676 for lastrev in revschunk[::-1]:
1665 if length(lastrev) != 0:
1677 if length(lastrev) != 0:
1666 break
1678 break
1667
1679
1668 try:
1680 try:
1669 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1681 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1670 except OverflowError:
1682 except OverflowError:
1671 # issue4215 - we can't cache a run of chunks greater than
1683 # issue4215 - we can't cache a run of chunks greater than
1672 # 2G on Windows
1684 # 2G on Windows
1673 return [self._chunk(rev, df=df) for rev in revschunk]
1685 return [self._chunk(rev, df=df) for rev in revschunk]
1674
1686
1675 decomp = self.decompress
1687 decomp = self.decompress
1676 for rev in revschunk:
1688 for rev in revschunk:
1677 chunkstart = start(rev)
1689 chunkstart = start(rev)
1678 if inline:
1690 if inline:
1679 chunkstart += (rev + 1) * iosize
1691 chunkstart += (rev + 1) * iosize
1680 chunklength = length(rev)
1692 chunklength = length(rev)
1681 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1693 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1682
1694
1683 return l
1695 return l
1684
1696
1685 def _chunkclear(self):
1697 def _chunkclear(self):
1686 """Clear the raw chunk cache."""
1698 """Clear the raw chunk cache."""
1687 self._chunkcache = (0, b'')
1699 self._chunkcache = (0, b'')
1688
1700
1689 def deltaparent(self, rev):
1701 def deltaparent(self, rev):
1690 """return deltaparent of the given revision"""
1702 """return deltaparent of the given revision"""
1691 base = self.index[rev][3]
1703 base = self.index[rev][3]
1692 if base == rev:
1704 if base == rev:
1693 return nullrev
1705 return nullrev
1694 elif self._generaldelta:
1706 elif self._generaldelta:
1695 return base
1707 return base
1696 else:
1708 else:
1697 return rev - 1
1709 return rev - 1
1698
1710
1699 def issnapshot(self, rev):
1711 def issnapshot(self, rev):
1700 """tells whether rev is a snapshot"""
1712 """tells whether rev is a snapshot"""
1701 if not self._sparserevlog:
1713 if not self._sparserevlog:
1702 return self.deltaparent(rev) == nullrev
1714 return self.deltaparent(rev) == nullrev
1703 elif util.safehasattr(self.index, b'issnapshot'):
1715 elif util.safehasattr(self.index, b'issnapshot'):
1704 # directly assign the method to cache the testing and access
1716 # directly assign the method to cache the testing and access
1705 self.issnapshot = self.index.issnapshot
1717 self.issnapshot = self.index.issnapshot
1706 return self.issnapshot(rev)
1718 return self.issnapshot(rev)
1707 if rev == nullrev:
1719 if rev == nullrev:
1708 return True
1720 return True
1709 entry = self.index[rev]
1721 entry = self.index[rev]
1710 base = entry[3]
1722 base = entry[3]
1711 if base == rev:
1723 if base == rev:
1712 return True
1724 return True
1713 if base == nullrev:
1725 if base == nullrev:
1714 return True
1726 return True
1715 p1 = entry[5]
1727 p1 = entry[5]
1716 p2 = entry[6]
1728 p2 = entry[6]
1717 if base == p1 or base == p2:
1729 if base == p1 or base == p2:
1718 return False
1730 return False
1719 return self.issnapshot(base)
1731 return self.issnapshot(base)
1720
1732
1721 def snapshotdepth(self, rev):
1733 def snapshotdepth(self, rev):
1722 """number of snapshot in the chain before this one"""
1734 """number of snapshot in the chain before this one"""
1723 if not self.issnapshot(rev):
1735 if not self.issnapshot(rev):
1724 raise error.ProgrammingError(b'revision %d not a snapshot')
1736 raise error.ProgrammingError(b'revision %d not a snapshot')
1725 return len(self._deltachain(rev)[0]) - 1
1737 return len(self._deltachain(rev)[0]) - 1
1726
1738
1727 def revdiff(self, rev1, rev2):
1739 def revdiff(self, rev1, rev2):
1728 """return or calculate a delta between two revisions
1740 """return or calculate a delta between two revisions
1729
1741
1730 The delta calculated is in binary form and is intended to be written to
1742 The delta calculated is in binary form and is intended to be written to
1731 revlog data directly. So this function needs raw revision data.
1743 revlog data directly. So this function needs raw revision data.
1732 """
1744 """
1733 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1745 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1734 return bytes(self._chunk(rev2))
1746 return bytes(self._chunk(rev2))
1735
1747
1736 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1748 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1737
1749
1738 def _processflags(self, text, flags, operation, raw=False):
1750 def _processflags(self, text, flags, operation, raw=False):
1739 """deprecated entry point to access flag processors"""
1751 """deprecated entry point to access flag processors"""
1740 msg = b'_processflag(...) use the specialized variant'
1752 msg = b'_processflag(...) use the specialized variant'
1741 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1753 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1742 if raw:
1754 if raw:
1743 return text, flagutil.processflagsraw(self, text, flags)
1755 return text, flagutil.processflagsraw(self, text, flags)
1744 elif operation == b'read':
1756 elif operation == b'read':
1745 return flagutil.processflagsread(self, text, flags)
1757 return flagutil.processflagsread(self, text, flags)
1746 else: # write operation
1758 else: # write operation
1747 return flagutil.processflagswrite(self, text, flags)
1759 return flagutil.processflagswrite(self, text, flags)
1748
1760
1749 def revision(self, nodeorrev, _df=None, raw=False):
1761 def revision(self, nodeorrev, _df=None, raw=False):
1750 """return an uncompressed revision of a given node or revision
1762 """return an uncompressed revision of a given node or revision
1751 number.
1763 number.
1752
1764
1753 _df - an existing file handle to read from. (internal-only)
1765 _df - an existing file handle to read from. (internal-only)
1754 raw - an optional argument specifying if the revision data is to be
1766 raw - an optional argument specifying if the revision data is to be
1755 treated as raw data when applying flag transforms. 'raw' should be set
1767 treated as raw data when applying flag transforms. 'raw' should be set
1756 to True when generating changegroups or in debug commands.
1768 to True when generating changegroups or in debug commands.
1757 """
1769 """
1758 if raw:
1770 if raw:
1759 msg = (
1771 msg = (
1760 b'revlog.revision(..., raw=True) is deprecated, '
1772 b'revlog.revision(..., raw=True) is deprecated, '
1761 b'use revlog.rawdata(...)'
1773 b'use revlog.rawdata(...)'
1762 )
1774 )
1763 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1775 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1764 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1776 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1765
1777
1766 def sidedata(self, nodeorrev, _df=None):
1778 def sidedata(self, nodeorrev, _df=None):
1767 """a map of extra data related to the changeset but not part of the hash
1779 """a map of extra data related to the changeset but not part of the hash
1768
1780
1769 This function currently return a dictionary. However, more advanced
1781 This function currently return a dictionary. However, more advanced
1770 mapping object will likely be used in the future for a more
1782 mapping object will likely be used in the future for a more
1771 efficient/lazy code.
1783 efficient/lazy code.
1772 """
1784 """
1773 return self._revisiondata(nodeorrev, _df)[1]
1785 return self._revisiondata(nodeorrev, _df)[1]
1774
1786
1775 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1787 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1776 # deal with <nodeorrev> argument type
1788 # deal with <nodeorrev> argument type
1777 if isinstance(nodeorrev, int):
1789 if isinstance(nodeorrev, int):
1778 rev = nodeorrev
1790 rev = nodeorrev
1779 node = self.node(rev)
1791 node = self.node(rev)
1780 else:
1792 else:
1781 node = nodeorrev
1793 node = nodeorrev
1782 rev = None
1794 rev = None
1783
1795
1784 # fast path the special `nullid` rev
1796 # fast path the special `nullid` rev
1785 if node == self.nullid:
1797 if node == self.nullid:
1786 return b"", {}
1798 return b"", {}
1787
1799
1788 # ``rawtext`` is the text as stored inside the revlog. Might be the
1800 # ``rawtext`` is the text as stored inside the revlog. Might be the
1789 # revision or might need to be processed to retrieve the revision.
1801 # revision or might need to be processed to retrieve the revision.
1790 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1802 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1791
1803
1792 if self.hassidedata:
1804 if self.hassidedata:
1793 if rev is None:
1805 if rev is None:
1794 rev = self.rev(node)
1806 rev = self.rev(node)
1795 sidedata = self._sidedata(rev)
1807 sidedata = self._sidedata(rev)
1796 else:
1808 else:
1797 sidedata = {}
1809 sidedata = {}
1798
1810
1799 if raw and validated:
1811 if raw and validated:
1800 # if we don't want to process the raw text and that raw
1812 # if we don't want to process the raw text and that raw
1801 # text is cached, we can exit early.
1813 # text is cached, we can exit early.
1802 return rawtext, sidedata
1814 return rawtext, sidedata
1803 if rev is None:
1815 if rev is None:
1804 rev = self.rev(node)
1816 rev = self.rev(node)
1805 # the revlog's flag for this revision
1817 # the revlog's flag for this revision
1806 # (usually alter its state or content)
1818 # (usually alter its state or content)
1807 flags = self.flags(rev)
1819 flags = self.flags(rev)
1808
1820
1809 if validated and flags == REVIDX_DEFAULT_FLAGS:
1821 if validated and flags == REVIDX_DEFAULT_FLAGS:
1810 # no extra flags set, no flag processor runs, text = rawtext
1822 # no extra flags set, no flag processor runs, text = rawtext
1811 return rawtext, sidedata
1823 return rawtext, sidedata
1812
1824
1813 if raw:
1825 if raw:
1814 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1826 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1815 text = rawtext
1827 text = rawtext
1816 else:
1828 else:
1817 r = flagutil.processflagsread(self, rawtext, flags)
1829 r = flagutil.processflagsread(self, rawtext, flags)
1818 text, validatehash = r
1830 text, validatehash = r
1819 if validatehash:
1831 if validatehash:
1820 self.checkhash(text, node, rev=rev)
1832 self.checkhash(text, node, rev=rev)
1821 if not validated:
1833 if not validated:
1822 self._revisioncache = (node, rev, rawtext)
1834 self._revisioncache = (node, rev, rawtext)
1823
1835
1824 return text, sidedata
1836 return text, sidedata
1825
1837
1826 def _rawtext(self, node, rev, _df=None):
1838 def _rawtext(self, node, rev, _df=None):
1827 """return the possibly unvalidated rawtext for a revision
1839 """return the possibly unvalidated rawtext for a revision
1828
1840
1829 returns (rev, rawtext, validated)
1841 returns (rev, rawtext, validated)
1830 """
1842 """
1831
1843
1832 # revision in the cache (could be useful to apply delta)
1844 # revision in the cache (could be useful to apply delta)
1833 cachedrev = None
1845 cachedrev = None
1834 # An intermediate text to apply deltas to
1846 # An intermediate text to apply deltas to
1835 basetext = None
1847 basetext = None
1836
1848
1837 # Check if we have the entry in cache
1849 # Check if we have the entry in cache
1838 # The cache entry looks like (node, rev, rawtext)
1850 # The cache entry looks like (node, rev, rawtext)
1839 if self._revisioncache:
1851 if self._revisioncache:
1840 if self._revisioncache[0] == node:
1852 if self._revisioncache[0] == node:
1841 return (rev, self._revisioncache[2], True)
1853 return (rev, self._revisioncache[2], True)
1842 cachedrev = self._revisioncache[1]
1854 cachedrev = self._revisioncache[1]
1843
1855
1844 if rev is None:
1856 if rev is None:
1845 rev = self.rev(node)
1857 rev = self.rev(node)
1846
1858
1847 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1859 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1848 if stopped:
1860 if stopped:
1849 basetext = self._revisioncache[2]
1861 basetext = self._revisioncache[2]
1850
1862
1851 # drop cache to save memory, the caller is expected to
1863 # drop cache to save memory, the caller is expected to
1852 # update self._revisioncache after validating the text
1864 # update self._revisioncache after validating the text
1853 self._revisioncache = None
1865 self._revisioncache = None
1854
1866
1855 targetsize = None
1867 targetsize = None
1856 rawsize = self.index[rev][2]
1868 rawsize = self.index[rev][2]
1857 if 0 <= rawsize:
1869 if 0 <= rawsize:
1858 targetsize = 4 * rawsize
1870 targetsize = 4 * rawsize
1859
1871
1860 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1872 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1861 if basetext is None:
1873 if basetext is None:
1862 basetext = bytes(bins[0])
1874 basetext = bytes(bins[0])
1863 bins = bins[1:]
1875 bins = bins[1:]
1864
1876
1865 rawtext = mdiff.patches(basetext, bins)
1877 rawtext = mdiff.patches(basetext, bins)
1866 del basetext # let us have a chance to free memory early
1878 del basetext # let us have a chance to free memory early
1867 return (rev, rawtext, False)
1879 return (rev, rawtext, False)
1868
1880
1869 def _sidedata(self, rev):
1881 def _sidedata(self, rev):
1870 """Return the sidedata for a given revision number."""
1882 """Return the sidedata for a given revision number."""
1871 index_entry = self.index[rev]
1883 index_entry = self.index[rev]
1872 sidedata_offset = index_entry[8]
1884 sidedata_offset = index_entry[8]
1873 sidedata_size = index_entry[9]
1885 sidedata_size = index_entry[9]
1874
1886
1875 if self._inline:
1887 if self._inline:
1876 sidedata_offset += self.index.entry_size * (1 + rev)
1888 sidedata_offset += self.index.entry_size * (1 + rev)
1877 if sidedata_size == 0:
1889 if sidedata_size == 0:
1878 return {}
1890 return {}
1879
1891
1880 segment = self._getsegment(sidedata_offset, sidedata_size)
1892 segment = self._getsegment(sidedata_offset, sidedata_size)
1881 sidedata = sidedatautil.deserialize_sidedata(segment)
1893 sidedata = sidedatautil.deserialize_sidedata(segment)
1882 return sidedata
1894 return sidedata
1883
1895
1884 def rawdata(self, nodeorrev, _df=None):
1896 def rawdata(self, nodeorrev, _df=None):
1885 """return an uncompressed raw data of a given node or revision number.
1897 """return an uncompressed raw data of a given node or revision number.
1886
1898
1887 _df - an existing file handle to read from. (internal-only)
1899 _df - an existing file handle to read from. (internal-only)
1888 """
1900 """
1889 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1901 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1890
1902
1891 def hash(self, text, p1, p2):
1903 def hash(self, text, p1, p2):
1892 """Compute a node hash.
1904 """Compute a node hash.
1893
1905
1894 Available as a function so that subclasses can replace the hash
1906 Available as a function so that subclasses can replace the hash
1895 as needed.
1907 as needed.
1896 """
1908 """
1897 return storageutil.hashrevisionsha1(text, p1, p2)
1909 return storageutil.hashrevisionsha1(text, p1, p2)
1898
1910
1899 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1911 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1900 """Check node hash integrity.
1912 """Check node hash integrity.
1901
1913
1902 Available as a function so that subclasses can extend hash mismatch
1914 Available as a function so that subclasses can extend hash mismatch
1903 behaviors as needed.
1915 behaviors as needed.
1904 """
1916 """
1905 try:
1917 try:
1906 if p1 is None and p2 is None:
1918 if p1 is None and p2 is None:
1907 p1, p2 = self.parents(node)
1919 p1, p2 = self.parents(node)
1908 if node != self.hash(text, p1, p2):
1920 if node != self.hash(text, p1, p2):
1909 # Clear the revision cache on hash failure. The revision cache
1921 # Clear the revision cache on hash failure. The revision cache
1910 # only stores the raw revision and clearing the cache does have
1922 # only stores the raw revision and clearing the cache does have
1911 # the side-effect that we won't have a cache hit when the raw
1923 # the side-effect that we won't have a cache hit when the raw
1912 # revision data is accessed. But this case should be rare and
1924 # revision data is accessed. But this case should be rare and
1913 # it is extra work to teach the cache about the hash
1925 # it is extra work to teach the cache about the hash
1914 # verification state.
1926 # verification state.
1915 if self._revisioncache and self._revisioncache[0] == node:
1927 if self._revisioncache and self._revisioncache[0] == node:
1916 self._revisioncache = None
1928 self._revisioncache = None
1917
1929
1918 revornode = rev
1930 revornode = rev
1919 if revornode is None:
1931 if revornode is None:
1920 revornode = templatefilters.short(hex(node))
1932 revornode = templatefilters.short(hex(node))
1921 raise error.RevlogError(
1933 raise error.RevlogError(
1922 _(b"integrity check failed on %s:%s")
1934 _(b"integrity check failed on %s:%s")
1923 % (self.indexfile, pycompat.bytestr(revornode))
1935 % (self.indexfile, pycompat.bytestr(revornode))
1924 )
1936 )
1925 except error.RevlogError:
1937 except error.RevlogError:
1926 if self._censorable and storageutil.iscensoredtext(text):
1938 if self._censorable and storageutil.iscensoredtext(text):
1927 raise error.CensoredNodeError(self.indexfile, node, text)
1939 raise error.CensoredNodeError(self.indexfile, node, text)
1928 raise
1940 raise
1929
1941
1930 def _enforceinlinesize(self, tr, fp=None):
1942 def _enforceinlinesize(self, tr, fp=None):
1931 """Check if the revlog is too big for inline and convert if so.
1943 """Check if the revlog is too big for inline and convert if so.
1932
1944
1933 This should be called after revisions are added to the revlog. If the
1945 This should be called after revisions are added to the revlog. If the
1934 revlog has grown too large to be an inline revlog, it will convert it
1946 revlog has grown too large to be an inline revlog, it will convert it
1935 to use multiple index and data files.
1947 to use multiple index and data files.
1936 """
1948 """
1937 tiprev = len(self) - 1
1949 tiprev = len(self) - 1
1938 if (
1950 if (
1939 not self._inline
1951 not self._inline
1940 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1952 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1941 ):
1953 ):
1942 return
1954 return
1943
1955
1944 troffset = tr.findoffset(self.indexfile)
1956 troffset = tr.findoffset(self.indexfile)
1945 if troffset is None:
1957 if troffset is None:
1946 raise error.RevlogError(
1958 raise error.RevlogError(
1947 _(b"%s not found in the transaction") % self.indexfile
1959 _(b"%s not found in the transaction") % self.indexfile
1948 )
1960 )
1949 trindex = 0
1961 trindex = 0
1950 tr.add(self.datafile, 0)
1962 tr.add(self.datafile, 0)
1951
1963
1952 if fp:
1964 if fp:
1953 fp.flush()
1965 fp.flush()
1954 fp.close()
1966 fp.close()
1955 # We can't use the cached file handle after close(). So prevent
1967 # We can't use the cached file handle after close(). So prevent
1956 # its usage.
1968 # its usage.
1957 self._writinghandles = None
1969 self._writinghandles = None
1958
1970
1959 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1971 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1960 for r in self:
1972 for r in self:
1961 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1973 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1962 if troffset <= self.start(r):
1974 if troffset <= self.start(r):
1963 trindex = r
1975 trindex = r
1964
1976
1965 with self._indexfp(b'w') as fp:
1977 with self._indexfp(b'w') as fp:
1966 self._format_flags &= ~FLAG_INLINE_DATA
1978 self._format_flags &= ~FLAG_INLINE_DATA
1967 self._inline = False
1979 self._inline = False
1968 for i in self:
1980 for i in self:
1969 e = self.index.entry_binary(i)
1981 e = self.index.entry_binary(i)
1970 if i == 0:
1982 if i == 0:
1971 header = self._format_flags | self._format_version
1983 header = self._format_flags | self._format_version
1972 header = self.index.pack_header(header)
1984 header = self.index.pack_header(header)
1973 e = header + e
1985 e = header + e
1974 fp.write(e)
1986 fp.write(e)
1975
1987
1976 # the temp file replace the real index when we exit the context
1988 # the temp file replace the real index when we exit the context
1977 # manager
1989 # manager
1978
1990
1979 tr.replace(self.indexfile, trindex * self.index.entry_size)
1991 tr.replace(self.indexfile, trindex * self.index.entry_size)
1980 nodemaputil.setup_persistent_nodemap(tr, self)
1992 nodemaputil.setup_persistent_nodemap(tr, self)
1981 self._chunkclear()
1993 self._chunkclear()
1982
1994
1983 def _nodeduplicatecallback(self, transaction, node):
1995 def _nodeduplicatecallback(self, transaction, node):
1984 """called when trying to add a node already stored."""
1996 """called when trying to add a node already stored."""
1985
1997
1986 def addrevision(
1998 def addrevision(
1987 self,
1999 self,
1988 text,
2000 text,
1989 transaction,
2001 transaction,
1990 link,
2002 link,
1991 p1,
2003 p1,
1992 p2,
2004 p2,
1993 cachedelta=None,
2005 cachedelta=None,
1994 node=None,
2006 node=None,
1995 flags=REVIDX_DEFAULT_FLAGS,
2007 flags=REVIDX_DEFAULT_FLAGS,
1996 deltacomputer=None,
2008 deltacomputer=None,
1997 sidedata=None,
2009 sidedata=None,
1998 ):
2010 ):
1999 """add a revision to the log
2011 """add a revision to the log
2000
2012
2001 text - the revision data to add
2013 text - the revision data to add
2002 transaction - the transaction object used for rollback
2014 transaction - the transaction object used for rollback
2003 link - the linkrev data to add
2015 link - the linkrev data to add
2004 p1, p2 - the parent nodeids of the revision
2016 p1, p2 - the parent nodeids of the revision
2005 cachedelta - an optional precomputed delta
2017 cachedelta - an optional precomputed delta
2006 node - nodeid of revision; typically node is not specified, and it is
2018 node - nodeid of revision; typically node is not specified, and it is
2007 computed by default as hash(text, p1, p2), however subclasses might
2019 computed by default as hash(text, p1, p2), however subclasses might
2008 use different hashing method (and override checkhash() in such case)
2020 use different hashing method (and override checkhash() in such case)
2009 flags - the known flags to set on the revision
2021 flags - the known flags to set on the revision
2010 deltacomputer - an optional deltacomputer instance shared between
2022 deltacomputer - an optional deltacomputer instance shared between
2011 multiple calls
2023 multiple calls
2012 """
2024 """
2013 if link == nullrev:
2025 if link == nullrev:
2014 raise error.RevlogError(
2026 raise error.RevlogError(
2015 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2027 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2016 )
2028 )
2017
2029
2018 if sidedata is None:
2030 if sidedata is None:
2019 sidedata = {}
2031 sidedata = {}
2020 elif sidedata and not self.hassidedata:
2032 elif sidedata and not self.hassidedata:
2021 raise error.ProgrammingError(
2033 raise error.ProgrammingError(
2022 _(b"trying to add sidedata to a revlog who don't support them")
2034 _(b"trying to add sidedata to a revlog who don't support them")
2023 )
2035 )
2024
2036
2025 if flags:
2037 if flags:
2026 node = node or self.hash(text, p1, p2)
2038 node = node or self.hash(text, p1, p2)
2027
2039
2028 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2040 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2029
2041
2030 # If the flag processor modifies the revision data, ignore any provided
2042 # If the flag processor modifies the revision data, ignore any provided
2031 # cachedelta.
2043 # cachedelta.
2032 if rawtext != text:
2044 if rawtext != text:
2033 cachedelta = None
2045 cachedelta = None
2034
2046
2035 if len(rawtext) > _maxentrysize:
2047 if len(rawtext) > _maxentrysize:
2036 raise error.RevlogError(
2048 raise error.RevlogError(
2037 _(
2049 _(
2038 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2050 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2039 )
2051 )
2040 % (self.indexfile, len(rawtext))
2052 % (self.indexfile, len(rawtext))
2041 )
2053 )
2042
2054
2043 node = node or self.hash(rawtext, p1, p2)
2055 node = node or self.hash(rawtext, p1, p2)
2044 rev = self.index.get_rev(node)
2056 rev = self.index.get_rev(node)
2045 if rev is not None:
2057 if rev is not None:
2046 return rev
2058 return rev
2047
2059
2048 if validatehash:
2060 if validatehash:
2049 self.checkhash(rawtext, node, p1=p1, p2=p2)
2061 self.checkhash(rawtext, node, p1=p1, p2=p2)
2050
2062
2051 return self.addrawrevision(
2063 return self.addrawrevision(
2052 rawtext,
2064 rawtext,
2053 transaction,
2065 transaction,
2054 link,
2066 link,
2055 p1,
2067 p1,
2056 p2,
2068 p2,
2057 node,
2069 node,
2058 flags,
2070 flags,
2059 cachedelta=cachedelta,
2071 cachedelta=cachedelta,
2060 deltacomputer=deltacomputer,
2072 deltacomputer=deltacomputer,
2061 sidedata=sidedata,
2073 sidedata=sidedata,
2062 )
2074 )
2063
2075
2064 def addrawrevision(
2076 def addrawrevision(
2065 self,
2077 self,
2066 rawtext,
2078 rawtext,
2067 transaction,
2079 transaction,
2068 link,
2080 link,
2069 p1,
2081 p1,
2070 p2,
2082 p2,
2071 node,
2083 node,
2072 flags,
2084 flags,
2073 cachedelta=None,
2085 cachedelta=None,
2074 deltacomputer=None,
2086 deltacomputer=None,
2075 sidedata=None,
2087 sidedata=None,
2076 ):
2088 ):
2077 """add a raw revision with known flags, node and parents
2089 """add a raw revision with known flags, node and parents
2078 useful when reusing a revision not stored in this revlog (ex: received
2090 useful when reusing a revision not stored in this revlog (ex: received
2079 over wire, or read from an external bundle).
2091 over wire, or read from an external bundle).
2080 """
2092 """
2081 dfh = None
2093 dfh = None
2082 if not self._inline:
2094 if not self._inline:
2083 dfh = self._datafp(b"a+")
2095 dfh = self._datafp(b"a+")
2084 ifh = self._indexfp(b"a+")
2096 ifh = self._indexfp(b"a+")
2085 try:
2097 try:
2086 return self._addrevision(
2098 return self._addrevision(
2087 node,
2099 node,
2088 rawtext,
2100 rawtext,
2089 transaction,
2101 transaction,
2090 link,
2102 link,
2091 p1,
2103 p1,
2092 p2,
2104 p2,
2093 flags,
2105 flags,
2094 cachedelta,
2106 cachedelta,
2095 ifh,
2107 ifh,
2096 dfh,
2108 dfh,
2097 deltacomputer=deltacomputer,
2109 deltacomputer=deltacomputer,
2098 sidedata=sidedata,
2110 sidedata=sidedata,
2099 )
2111 )
2100 finally:
2112 finally:
2101 if dfh:
2113 if dfh:
2102 dfh.close()
2114 dfh.close()
2103 ifh.close()
2115 ifh.close()
2104
2116
2105 def compress(self, data):
2117 def compress(self, data):
2106 """Generate a possibly-compressed representation of data."""
2118 """Generate a possibly-compressed representation of data."""
2107 if not data:
2119 if not data:
2108 return b'', data
2120 return b'', data
2109
2121
2110 compressed = self._compressor.compress(data)
2122 compressed = self._compressor.compress(data)
2111
2123
2112 if compressed:
2124 if compressed:
2113 # The revlog compressor added the header in the returned data.
2125 # The revlog compressor added the header in the returned data.
2114 return b'', compressed
2126 return b'', compressed
2115
2127
2116 if data[0:1] == b'\0':
2128 if data[0:1] == b'\0':
2117 return b'', data
2129 return b'', data
2118 return b'u', data
2130 return b'u', data
2119
2131
2120 def decompress(self, data):
2132 def decompress(self, data):
2121 """Decompress a revlog chunk.
2133 """Decompress a revlog chunk.
2122
2134
2123 The chunk is expected to begin with a header identifying the
2135 The chunk is expected to begin with a header identifying the
2124 format type so it can be routed to an appropriate decompressor.
2136 format type so it can be routed to an appropriate decompressor.
2125 """
2137 """
2126 if not data:
2138 if not data:
2127 return data
2139 return data
2128
2140
2129 # Revlogs are read much more frequently than they are written and many
2141 # Revlogs are read much more frequently than they are written and many
2130 # chunks only take microseconds to decompress, so performance is
2142 # chunks only take microseconds to decompress, so performance is
2131 # important here.
2143 # important here.
2132 #
2144 #
2133 # We can make a few assumptions about revlogs:
2145 # We can make a few assumptions about revlogs:
2134 #
2146 #
2135 # 1) the majority of chunks will be compressed (as opposed to inline
2147 # 1) the majority of chunks will be compressed (as opposed to inline
2136 # raw data).
2148 # raw data).
2137 # 2) decompressing *any* data will likely by at least 10x slower than
2149 # 2) decompressing *any* data will likely by at least 10x slower than
2138 # returning raw inline data.
2150 # returning raw inline data.
2139 # 3) we want to prioritize common and officially supported compression
2151 # 3) we want to prioritize common and officially supported compression
2140 # engines
2152 # engines
2141 #
2153 #
2142 # It follows that we want to optimize for "decompress compressed data
2154 # It follows that we want to optimize for "decompress compressed data
2143 # when encoded with common and officially supported compression engines"
2155 # when encoded with common and officially supported compression engines"
2144 # case over "raw data" and "data encoded by less common or non-official
2156 # case over "raw data" and "data encoded by less common or non-official
2145 # compression engines." That is why we have the inline lookup first
2157 # compression engines." That is why we have the inline lookup first
2146 # followed by the compengines lookup.
2158 # followed by the compengines lookup.
2147 #
2159 #
2148 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2160 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2149 # compressed chunks. And this matters for changelog and manifest reads.
2161 # compressed chunks. And this matters for changelog and manifest reads.
2150 t = data[0:1]
2162 t = data[0:1]
2151
2163
2152 if t == b'x':
2164 if t == b'x':
2153 try:
2165 try:
2154 return _zlibdecompress(data)
2166 return _zlibdecompress(data)
2155 except zlib.error as e:
2167 except zlib.error as e:
2156 raise error.RevlogError(
2168 raise error.RevlogError(
2157 _(b'revlog decompress error: %s')
2169 _(b'revlog decompress error: %s')
2158 % stringutil.forcebytestr(e)
2170 % stringutil.forcebytestr(e)
2159 )
2171 )
2160 # '\0' is more common than 'u' so it goes first.
2172 # '\0' is more common than 'u' so it goes first.
2161 elif t == b'\0':
2173 elif t == b'\0':
2162 return data
2174 return data
2163 elif t == b'u':
2175 elif t == b'u':
2164 return util.buffer(data, 1)
2176 return util.buffer(data, 1)
2165
2177
2166 try:
2178 try:
2167 compressor = self._decompressors[t]
2179 compressor = self._decompressors[t]
2168 except KeyError:
2180 except KeyError:
2169 try:
2181 try:
2170 engine = util.compengines.forrevlogheader(t)
2182 engine = util.compengines.forrevlogheader(t)
2171 compressor = engine.revlogcompressor(self._compengineopts)
2183 compressor = engine.revlogcompressor(self._compengineopts)
2172 self._decompressors[t] = compressor
2184 self._decompressors[t] = compressor
2173 except KeyError:
2185 except KeyError:
2174 raise error.RevlogError(
2186 raise error.RevlogError(
2175 _(b'unknown compression type %s') % binascii.hexlify(t)
2187 _(b'unknown compression type %s') % binascii.hexlify(t)
2176 )
2188 )
2177
2189
2178 return compressor.decompress(data)
2190 return compressor.decompress(data)
2179
2191
2180 def _addrevision(
2192 def _addrevision(
2181 self,
2193 self,
2182 node,
2194 node,
2183 rawtext,
2195 rawtext,
2184 transaction,
2196 transaction,
2185 link,
2197 link,
2186 p1,
2198 p1,
2187 p2,
2199 p2,
2188 flags,
2200 flags,
2189 cachedelta,
2201 cachedelta,
2190 ifh,
2202 ifh,
2191 dfh,
2203 dfh,
2192 alwayscache=False,
2204 alwayscache=False,
2193 deltacomputer=None,
2205 deltacomputer=None,
2194 sidedata=None,
2206 sidedata=None,
2195 ):
2207 ):
2196 """internal function to add revisions to the log
2208 """internal function to add revisions to the log
2197
2209
2198 see addrevision for argument descriptions.
2210 see addrevision for argument descriptions.
2199
2211
2200 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2212 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2201
2213
2202 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2214 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2203 be used.
2215 be used.
2204
2216
2205 invariants:
2217 invariants:
2206 - rawtext is optional (can be None); if not set, cachedelta must be set.
2218 - rawtext is optional (can be None); if not set, cachedelta must be set.
2207 if both are set, they must correspond to each other.
2219 if both are set, they must correspond to each other.
2208 """
2220 """
2209 if node == self.nullid:
2221 if node == self.nullid:
2210 raise error.RevlogError(
2222 raise error.RevlogError(
2211 _(b"%s: attempt to add null revision") % self.indexfile
2223 _(b"%s: attempt to add null revision") % self.indexfile
2212 )
2224 )
2213 if (
2225 if (
2214 node == self.nodeconstants.wdirid
2226 node == self.nodeconstants.wdirid
2215 or node in self.nodeconstants.wdirfilenodeids
2227 or node in self.nodeconstants.wdirfilenodeids
2216 ):
2228 ):
2217 raise error.RevlogError(
2229 raise error.RevlogError(
2218 _(b"%s: attempt to add wdir revision") % self.indexfile
2230 _(b"%s: attempt to add wdir revision") % self.indexfile
2219 )
2231 )
2220
2232
2221 if self._inline:
2233 if self._inline:
2222 fh = ifh
2234 fh = ifh
2223 else:
2235 else:
2224 fh = dfh
2236 fh = dfh
2225
2237
2226 btext = [rawtext]
2238 btext = [rawtext]
2227
2239
2228 curr = len(self)
2240 curr = len(self)
2229 prev = curr - 1
2241 prev = curr - 1
2230
2242
2231 offset = self._get_data_offset(prev)
2243 offset = self._get_data_offset(prev)
2232
2244
2233 if self._concurrencychecker:
2245 if self._concurrencychecker:
2234 if self._inline:
2246 if self._inline:
2235 # offset is "as if" it were in the .d file, so we need to add on
2247 # offset is "as if" it were in the .d file, so we need to add on
2236 # the size of the entry metadata.
2248 # the size of the entry metadata.
2237 self._concurrencychecker(
2249 self._concurrencychecker(
2238 ifh, self.indexfile, offset + curr * self.index.entry_size
2250 ifh, self.indexfile, offset + curr * self.index.entry_size
2239 )
2251 )
2240 else:
2252 else:
2241 # Entries in the .i are a consistent size.
2253 # Entries in the .i are a consistent size.
2242 self._concurrencychecker(
2254 self._concurrencychecker(
2243 ifh, self.indexfile, curr * self.index.entry_size
2255 ifh, self.indexfile, curr * self.index.entry_size
2244 )
2256 )
2245 self._concurrencychecker(dfh, self.datafile, offset)
2257 self._concurrencychecker(dfh, self.datafile, offset)
2246
2258
2247 p1r, p2r = self.rev(p1), self.rev(p2)
2259 p1r, p2r = self.rev(p1), self.rev(p2)
2248
2260
2249 # full versions are inserted when the needed deltas
2261 # full versions are inserted when the needed deltas
2250 # become comparable to the uncompressed text
2262 # become comparable to the uncompressed text
2251 if rawtext is None:
2263 if rawtext is None:
2252 # need rawtext size, before changed by flag processors, which is
2264 # need rawtext size, before changed by flag processors, which is
2253 # the non-raw size. use revlog explicitly to avoid filelog's extra
2265 # the non-raw size. use revlog explicitly to avoid filelog's extra
2254 # logic that might remove metadata size.
2266 # logic that might remove metadata size.
2255 textlen = mdiff.patchedsize(
2267 textlen = mdiff.patchedsize(
2256 revlog.size(self, cachedelta[0]), cachedelta[1]
2268 revlog.size(self, cachedelta[0]), cachedelta[1]
2257 )
2269 )
2258 else:
2270 else:
2259 textlen = len(rawtext)
2271 textlen = len(rawtext)
2260
2272
2261 if deltacomputer is None:
2273 if deltacomputer is None:
2262 deltacomputer = deltautil.deltacomputer(self)
2274 deltacomputer = deltautil.deltacomputer(self)
2263
2275
2264 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2276 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2265
2277
2266 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2278 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2267
2279
2268 if sidedata and self.hassidedata:
2280 if sidedata and self.hassidedata:
2269 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2281 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2270 sidedata_offset = offset + deltainfo.deltalen
2282 sidedata_offset = offset + deltainfo.deltalen
2271 else:
2283 else:
2272 serialized_sidedata = b""
2284 serialized_sidedata = b""
2273 # Don't store the offset if the sidedata is empty, that way
2285 # Don't store the offset if the sidedata is empty, that way
2274 # we can easily detect empty sidedata and they will be no different
2286 # we can easily detect empty sidedata and they will be no different
2275 # than ones we manually add.
2287 # than ones we manually add.
2276 sidedata_offset = 0
2288 sidedata_offset = 0
2277
2289
2278 e = (
2290 e = (
2279 offset_type(offset, flags),
2291 offset_type(offset, flags),
2280 deltainfo.deltalen,
2292 deltainfo.deltalen,
2281 textlen,
2293 textlen,
2282 deltainfo.base,
2294 deltainfo.base,
2283 link,
2295 link,
2284 p1r,
2296 p1r,
2285 p2r,
2297 p2r,
2286 node,
2298 node,
2287 sidedata_offset,
2299 sidedata_offset,
2288 len(serialized_sidedata),
2300 len(serialized_sidedata),
2289 )
2301 )
2290
2302
2291 self.index.append(e)
2303 self.index.append(e)
2292 entry = self.index.entry_binary(curr)
2304 entry = self.index.entry_binary(curr)
2293 if curr == 0:
2305 if curr == 0:
2294 header = self._format_flags | self._format_version
2306 header = self._format_flags | self._format_version
2295 header = self.index.pack_header(header)
2307 header = self.index.pack_header(header)
2296 entry = header + entry
2308 entry = header + entry
2297 self._writeentry(
2309 self._writeentry(
2298 transaction,
2310 transaction,
2299 ifh,
2311 ifh,
2300 dfh,
2312 dfh,
2301 entry,
2313 entry,
2302 deltainfo.data,
2314 deltainfo.data,
2303 link,
2315 link,
2304 offset,
2316 offset,
2305 serialized_sidedata,
2317 serialized_sidedata,
2306 )
2318 )
2307
2319
2308 rawtext = btext[0]
2320 rawtext = btext[0]
2309
2321
2310 if alwayscache and rawtext is None:
2322 if alwayscache and rawtext is None:
2311 rawtext = deltacomputer.buildtext(revinfo, fh)
2323 rawtext = deltacomputer.buildtext(revinfo, fh)
2312
2324
2313 if type(rawtext) == bytes: # only accept immutable objects
2325 if type(rawtext) == bytes: # only accept immutable objects
2314 self._revisioncache = (node, curr, rawtext)
2326 self._revisioncache = (node, curr, rawtext)
2315 self._chainbasecache[curr] = deltainfo.chainbase
2327 self._chainbasecache[curr] = deltainfo.chainbase
2316 return curr
2328 return curr
2317
2329
2318 def _get_data_offset(self, prev):
2330 def _get_data_offset(self, prev):
2319 """Returns the current offset in the (in-transaction) data file.
2331 """Returns the current offset in the (in-transaction) data file.
2320 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2332 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2321 file to store that information: since sidedata can be rewritten to the
2333 file to store that information: since sidedata can be rewritten to the
2322 end of the data file within a transaction, you can have cases where, for
2334 end of the data file within a transaction, you can have cases where, for
2323 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2335 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2324 to `n - 1`'s sidedata being written after `n`'s data.
2336 to `n - 1`'s sidedata being written after `n`'s data.
2325
2337
2326 TODO cache this in a docket file before getting out of experimental."""
2338 TODO cache this in a docket file before getting out of experimental."""
2327 if self._format_version != REVLOGV2:
2339 if self._format_version != REVLOGV2:
2328 return self.end(prev)
2340 return self.end(prev)
2329
2341
2330 offset = 0
2342 offset = 0
2331 for rev, entry in enumerate(self.index):
2343 for rev, entry in enumerate(self.index):
2332 sidedata_end = entry[8] + entry[9]
2344 sidedata_end = entry[8] + entry[9]
2333 # Sidedata for a previous rev has potentially been written after
2345 # Sidedata for a previous rev has potentially been written after
2334 # this rev's end, so take the max.
2346 # this rev's end, so take the max.
2335 offset = max(self.end(rev), offset, sidedata_end)
2347 offset = max(self.end(rev), offset, sidedata_end)
2336 return offset
2348 return offset
2337
2349
2338 def _writeentry(
2350 def _writeentry(
2339 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2351 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2340 ):
2352 ):
2341 # Files opened in a+ mode have inconsistent behavior on various
2353 # Files opened in a+ mode have inconsistent behavior on various
2342 # platforms. Windows requires that a file positioning call be made
2354 # platforms. Windows requires that a file positioning call be made
2343 # when the file handle transitions between reads and writes. See
2355 # when the file handle transitions between reads and writes. See
2344 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2356 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2345 # platforms, Python or the platform itself can be buggy. Some versions
2357 # platforms, Python or the platform itself can be buggy. Some versions
2346 # of Solaris have been observed to not append at the end of the file
2358 # of Solaris have been observed to not append at the end of the file
2347 # if the file was seeked to before the end. See issue4943 for more.
2359 # if the file was seeked to before the end. See issue4943 for more.
2348 #
2360 #
2349 # We work around this issue by inserting a seek() before writing.
2361 # We work around this issue by inserting a seek() before writing.
2350 # Note: This is likely not necessary on Python 3. However, because
2362 # Note: This is likely not necessary on Python 3. However, because
2351 # the file handle is reused for reads and may be seeked there, we need
2363 # the file handle is reused for reads and may be seeked there, we need
2352 # to be careful before changing this.
2364 # to be careful before changing this.
2353 ifh.seek(0, os.SEEK_END)
2365 ifh.seek(0, os.SEEK_END)
2354 if dfh:
2366 if dfh:
2355 dfh.seek(0, os.SEEK_END)
2367 dfh.seek(0, os.SEEK_END)
2356
2368
2357 curr = len(self) - 1
2369 curr = len(self) - 1
2358 if not self._inline:
2370 if not self._inline:
2359 transaction.add(self.datafile, offset)
2371 transaction.add(self.datafile, offset)
2360 transaction.add(self.indexfile, curr * len(entry))
2372 transaction.add(self.indexfile, curr * len(entry))
2361 if data[0]:
2373 if data[0]:
2362 dfh.write(data[0])
2374 dfh.write(data[0])
2363 dfh.write(data[1])
2375 dfh.write(data[1])
2364 if sidedata:
2376 if sidedata:
2365 dfh.write(sidedata)
2377 dfh.write(sidedata)
2366 ifh.write(entry)
2378 ifh.write(entry)
2367 else:
2379 else:
2368 offset += curr * self.index.entry_size
2380 offset += curr * self.index.entry_size
2369 transaction.add(self.indexfile, offset)
2381 transaction.add(self.indexfile, offset)
2370 ifh.write(entry)
2382 ifh.write(entry)
2371 ifh.write(data[0])
2383 ifh.write(data[0])
2372 ifh.write(data[1])
2384 ifh.write(data[1])
2373 if sidedata:
2385 if sidedata:
2374 ifh.write(sidedata)
2386 ifh.write(sidedata)
2375 self._enforceinlinesize(transaction, ifh)
2387 self._enforceinlinesize(transaction, ifh)
2376 nodemaputil.setup_persistent_nodemap(transaction, self)
2388 nodemaputil.setup_persistent_nodemap(transaction, self)
2377
2389
2378 def addgroup(
2390 def addgroup(
2379 self,
2391 self,
2380 deltas,
2392 deltas,
2381 linkmapper,
2393 linkmapper,
2382 transaction,
2394 transaction,
2383 alwayscache=False,
2395 alwayscache=False,
2384 addrevisioncb=None,
2396 addrevisioncb=None,
2385 duplicaterevisioncb=None,
2397 duplicaterevisioncb=None,
2386 ):
2398 ):
2387 """
2399 """
2388 add a delta group
2400 add a delta group
2389
2401
2390 given a set of deltas, add them to the revision log. the
2402 given a set of deltas, add them to the revision log. the
2391 first delta is against its parent, which should be in our
2403 first delta is against its parent, which should be in our
2392 log, the rest are against the previous delta.
2404 log, the rest are against the previous delta.
2393
2405
2394 If ``addrevisioncb`` is defined, it will be called with arguments of
2406 If ``addrevisioncb`` is defined, it will be called with arguments of
2395 this revlog and the node that was added.
2407 this revlog and the node that was added.
2396 """
2408 """
2397
2409
2398 if self._writinghandles:
2410 if self._writinghandles:
2399 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2411 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2400
2412
2401 r = len(self)
2413 r = len(self)
2402 end = 0
2414 end = 0
2403 if r:
2415 if r:
2404 end = self.end(r - 1)
2416 end = self.end(r - 1)
2405 ifh = self._indexfp(b"a+")
2417 ifh = self._indexfp(b"a+")
2406 isize = r * self.index.entry_size
2418 isize = r * self.index.entry_size
2407 if self._inline:
2419 if self._inline:
2408 transaction.add(self.indexfile, end + isize)
2420 transaction.add(self.indexfile, end + isize)
2409 dfh = None
2421 dfh = None
2410 else:
2422 else:
2411 transaction.add(self.indexfile, isize)
2423 transaction.add(self.indexfile, isize)
2412 transaction.add(self.datafile, end)
2424 transaction.add(self.datafile, end)
2413 dfh = self._datafp(b"a+")
2425 dfh = self._datafp(b"a+")
2414
2426
2415 def flush():
2427 def flush():
2416 if dfh:
2428 if dfh:
2417 dfh.flush()
2429 dfh.flush()
2418 ifh.flush()
2430 ifh.flush()
2419
2431
2420 self._writinghandles = (ifh, dfh)
2432 self._writinghandles = (ifh, dfh)
2421 empty = True
2433 empty = True
2422
2434
2423 try:
2435 try:
2424 deltacomputer = deltautil.deltacomputer(self)
2436 deltacomputer = deltautil.deltacomputer(self)
2425 # loop through our set of deltas
2437 # loop through our set of deltas
2426 for data in deltas:
2438 for data in deltas:
2427 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2439 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2428 link = linkmapper(linknode)
2440 link = linkmapper(linknode)
2429 flags = flags or REVIDX_DEFAULT_FLAGS
2441 flags = flags or REVIDX_DEFAULT_FLAGS
2430
2442
2431 rev = self.index.get_rev(node)
2443 rev = self.index.get_rev(node)
2432 if rev is not None:
2444 if rev is not None:
2433 # this can happen if two branches make the same change
2445 # this can happen if two branches make the same change
2434 self._nodeduplicatecallback(transaction, rev)
2446 self._nodeduplicatecallback(transaction, rev)
2435 if duplicaterevisioncb:
2447 if duplicaterevisioncb:
2436 duplicaterevisioncb(self, rev)
2448 duplicaterevisioncb(self, rev)
2437 empty = False
2449 empty = False
2438 continue
2450 continue
2439
2451
2440 for p in (p1, p2):
2452 for p in (p1, p2):
2441 if not self.index.has_node(p):
2453 if not self.index.has_node(p):
2442 raise error.LookupError(
2454 raise error.LookupError(
2443 p, self.indexfile, _(b'unknown parent')
2455 p, self.indexfile, _(b'unknown parent')
2444 )
2456 )
2445
2457
2446 if not self.index.has_node(deltabase):
2458 if not self.index.has_node(deltabase):
2447 raise error.LookupError(
2459 raise error.LookupError(
2448 deltabase, self.indexfile, _(b'unknown delta base')
2460 deltabase, self.indexfile, _(b'unknown delta base')
2449 )
2461 )
2450
2462
2451 baserev = self.rev(deltabase)
2463 baserev = self.rev(deltabase)
2452
2464
2453 if baserev != nullrev and self.iscensored(baserev):
2465 if baserev != nullrev and self.iscensored(baserev):
2454 # if base is censored, delta must be full replacement in a
2466 # if base is censored, delta must be full replacement in a
2455 # single patch operation
2467 # single patch operation
2456 hlen = struct.calcsize(b">lll")
2468 hlen = struct.calcsize(b">lll")
2457 oldlen = self.rawsize(baserev)
2469 oldlen = self.rawsize(baserev)
2458 newlen = len(delta) - hlen
2470 newlen = len(delta) - hlen
2459 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2471 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2460 raise error.CensoredBaseError(
2472 raise error.CensoredBaseError(
2461 self.indexfile, self.node(baserev)
2473 self.indexfile, self.node(baserev)
2462 )
2474 )
2463
2475
2464 if not flags and self._peek_iscensored(baserev, delta, flush):
2476 if not flags and self._peek_iscensored(baserev, delta, flush):
2465 flags |= REVIDX_ISCENSORED
2477 flags |= REVIDX_ISCENSORED
2466
2478
2467 # We assume consumers of addrevisioncb will want to retrieve
2479 # We assume consumers of addrevisioncb will want to retrieve
2468 # the added revision, which will require a call to
2480 # the added revision, which will require a call to
2469 # revision(). revision() will fast path if there is a cache
2481 # revision(). revision() will fast path if there is a cache
2470 # hit. So, we tell _addrevision() to always cache in this case.
2482 # hit. So, we tell _addrevision() to always cache in this case.
2471 # We're only using addgroup() in the context of changegroup
2483 # We're only using addgroup() in the context of changegroup
2472 # generation so the revision data can always be handled as raw
2484 # generation so the revision data can always be handled as raw
2473 # by the flagprocessor.
2485 # by the flagprocessor.
2474 rev = self._addrevision(
2486 rev = self._addrevision(
2475 node,
2487 node,
2476 None,
2488 None,
2477 transaction,
2489 transaction,
2478 link,
2490 link,
2479 p1,
2491 p1,
2480 p2,
2492 p2,
2481 flags,
2493 flags,
2482 (baserev, delta),
2494 (baserev, delta),
2483 ifh,
2495 ifh,
2484 dfh,
2496 dfh,
2485 alwayscache=alwayscache,
2497 alwayscache=alwayscache,
2486 deltacomputer=deltacomputer,
2498 deltacomputer=deltacomputer,
2487 sidedata=sidedata,
2499 sidedata=sidedata,
2488 )
2500 )
2489
2501
2490 if addrevisioncb:
2502 if addrevisioncb:
2491 addrevisioncb(self, rev)
2503 addrevisioncb(self, rev)
2492 empty = False
2504 empty = False
2493
2505
2494 if not dfh and not self._inline:
2506 if not dfh and not self._inline:
2495 # addrevision switched from inline to conventional
2507 # addrevision switched from inline to conventional
2496 # reopen the index
2508 # reopen the index
2497 ifh.close()
2509 ifh.close()
2498 dfh = self._datafp(b"a+")
2510 dfh = self._datafp(b"a+")
2499 ifh = self._indexfp(b"a+")
2511 ifh = self._indexfp(b"a+")
2500 self._writinghandles = (ifh, dfh)
2512 self._writinghandles = (ifh, dfh)
2501 finally:
2513 finally:
2502 self._writinghandles = None
2514 self._writinghandles = None
2503
2515
2504 if dfh:
2516 if dfh:
2505 dfh.close()
2517 dfh.close()
2506 ifh.close()
2518 ifh.close()
2507 return not empty
2519 return not empty
2508
2520
2509 def iscensored(self, rev):
2521 def iscensored(self, rev):
2510 """Check if a file revision is censored."""
2522 """Check if a file revision is censored."""
2511 if not self._censorable:
2523 if not self._censorable:
2512 return False
2524 return False
2513
2525
2514 return self.flags(rev) & REVIDX_ISCENSORED
2526 return self.flags(rev) & REVIDX_ISCENSORED
2515
2527
2516 def _peek_iscensored(self, baserev, delta, flush):
2528 def _peek_iscensored(self, baserev, delta, flush):
2517 """Quickly check if a delta produces a censored revision."""
2529 """Quickly check if a delta produces a censored revision."""
2518 if not self._censorable:
2530 if not self._censorable:
2519 return False
2531 return False
2520
2532
2521 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2533 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2522
2534
2523 def getstrippoint(self, minlink):
2535 def getstrippoint(self, minlink):
2524 """find the minimum rev that must be stripped to strip the linkrev
2536 """find the minimum rev that must be stripped to strip the linkrev
2525
2537
2526 Returns a tuple containing the minimum rev and a set of all revs that
2538 Returns a tuple containing the minimum rev and a set of all revs that
2527 have linkrevs that will be broken by this strip.
2539 have linkrevs that will be broken by this strip.
2528 """
2540 """
2529 return storageutil.resolvestripinfo(
2541 return storageutil.resolvestripinfo(
2530 minlink,
2542 minlink,
2531 len(self) - 1,
2543 len(self) - 1,
2532 self.headrevs(),
2544 self.headrevs(),
2533 self.linkrev,
2545 self.linkrev,
2534 self.parentrevs,
2546 self.parentrevs,
2535 )
2547 )
2536
2548
2537 def strip(self, minlink, transaction):
2549 def strip(self, minlink, transaction):
2538 """truncate the revlog on the first revision with a linkrev >= minlink
2550 """truncate the revlog on the first revision with a linkrev >= minlink
2539
2551
2540 This function is called when we're stripping revision minlink and
2552 This function is called when we're stripping revision minlink and
2541 its descendants from the repository.
2553 its descendants from the repository.
2542
2554
2543 We have to remove all revisions with linkrev >= minlink, because
2555 We have to remove all revisions with linkrev >= minlink, because
2544 the equivalent changelog revisions will be renumbered after the
2556 the equivalent changelog revisions will be renumbered after the
2545 strip.
2557 strip.
2546
2558
2547 So we truncate the revlog on the first of these revisions, and
2559 So we truncate the revlog on the first of these revisions, and
2548 trust that the caller has saved the revisions that shouldn't be
2560 trust that the caller has saved the revisions that shouldn't be
2549 removed and that it'll re-add them after this truncation.
2561 removed and that it'll re-add them after this truncation.
2550 """
2562 """
2551 if len(self) == 0:
2563 if len(self) == 0:
2552 return
2564 return
2553
2565
2554 rev, _ = self.getstrippoint(minlink)
2566 rev, _ = self.getstrippoint(minlink)
2555 if rev == len(self):
2567 if rev == len(self):
2556 return
2568 return
2557
2569
2558 # first truncate the files on disk
2570 # first truncate the files on disk
2559 end = self.start(rev)
2571 end = self.start(rev)
2560 if not self._inline:
2572 if not self._inline:
2561 transaction.add(self.datafile, end)
2573 transaction.add(self.datafile, end)
2562 end = rev * self.index.entry_size
2574 end = rev * self.index.entry_size
2563 else:
2575 else:
2564 end += rev * self.index.entry_size
2576 end += rev * self.index.entry_size
2565
2577
2566 transaction.add(self.indexfile, end)
2578 transaction.add(self.indexfile, end)
2567
2579
2568 # then reset internal state in memory to forget those revisions
2580 # then reset internal state in memory to forget those revisions
2569 self._revisioncache = None
2581 self._revisioncache = None
2570 self._chaininfocache = util.lrucachedict(500)
2582 self._chaininfocache = util.lrucachedict(500)
2571 self._chunkclear()
2583 self._chunkclear()
2572
2584
2573 del self.index[rev:-1]
2585 del self.index[rev:-1]
2574
2586
2575 def checksize(self):
2587 def checksize(self):
2576 """Check size of index and data files
2588 """Check size of index and data files
2577
2589
2578 return a (dd, di) tuple.
2590 return a (dd, di) tuple.
2579 - dd: extra bytes for the "data" file
2591 - dd: extra bytes for the "data" file
2580 - di: extra bytes for the "index" file
2592 - di: extra bytes for the "index" file
2581
2593
2582 A healthy revlog will return (0, 0).
2594 A healthy revlog will return (0, 0).
2583 """
2595 """
2584 expected = 0
2596 expected = 0
2585 if len(self):
2597 if len(self):
2586 expected = max(0, self.end(len(self) - 1))
2598 expected = max(0, self.end(len(self) - 1))
2587
2599
2588 try:
2600 try:
2589 with self._datafp() as f:
2601 with self._datafp() as f:
2590 f.seek(0, io.SEEK_END)
2602 f.seek(0, io.SEEK_END)
2591 actual = f.tell()
2603 actual = f.tell()
2592 dd = actual - expected
2604 dd = actual - expected
2593 except IOError as inst:
2605 except IOError as inst:
2594 if inst.errno != errno.ENOENT:
2606 if inst.errno != errno.ENOENT:
2595 raise
2607 raise
2596 dd = 0
2608 dd = 0
2597
2609
2598 try:
2610 try:
2599 f = self.opener(self.indexfile)
2611 f = self.opener(self.indexfile)
2600 f.seek(0, io.SEEK_END)
2612 f.seek(0, io.SEEK_END)
2601 actual = f.tell()
2613 actual = f.tell()
2602 f.close()
2614 f.close()
2603 s = self.index.entry_size
2615 s = self.index.entry_size
2604 i = max(0, actual // s)
2616 i = max(0, actual // s)
2605 di = actual - (i * s)
2617 di = actual - (i * s)
2606 if self._inline:
2618 if self._inline:
2607 databytes = 0
2619 databytes = 0
2608 for r in self:
2620 for r in self:
2609 databytes += max(0, self.length(r))
2621 databytes += max(0, self.length(r))
2610 dd = 0
2622 dd = 0
2611 di = actual - len(self) * s - databytes
2623 di = actual - len(self) * s - databytes
2612 except IOError as inst:
2624 except IOError as inst:
2613 if inst.errno != errno.ENOENT:
2625 if inst.errno != errno.ENOENT:
2614 raise
2626 raise
2615 di = 0
2627 di = 0
2616
2628
2617 return (dd, di)
2629 return (dd, di)
2618
2630
2619 def files(self):
2631 def files(self):
2620 res = [self.indexfile]
2632 res = [self.indexfile]
2621 if not self._inline:
2633 if not self._inline:
2622 res.append(self.datafile)
2634 res.append(self.datafile)
2623 return res
2635 return res
2624
2636
2625 def emitrevisions(
2637 def emitrevisions(
2626 self,
2638 self,
2627 nodes,
2639 nodes,
2628 nodesorder=None,
2640 nodesorder=None,
2629 revisiondata=False,
2641 revisiondata=False,
2630 assumehaveparentrevisions=False,
2642 assumehaveparentrevisions=False,
2631 deltamode=repository.CG_DELTAMODE_STD,
2643 deltamode=repository.CG_DELTAMODE_STD,
2632 sidedata_helpers=None,
2644 sidedata_helpers=None,
2633 ):
2645 ):
2634 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2646 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2635 raise error.ProgrammingError(
2647 raise error.ProgrammingError(
2636 b'unhandled value for nodesorder: %s' % nodesorder
2648 b'unhandled value for nodesorder: %s' % nodesorder
2637 )
2649 )
2638
2650
2639 if nodesorder is None and not self._generaldelta:
2651 if nodesorder is None and not self._generaldelta:
2640 nodesorder = b'storage'
2652 nodesorder = b'storage'
2641
2653
2642 if (
2654 if (
2643 not self._storedeltachains
2655 not self._storedeltachains
2644 and deltamode != repository.CG_DELTAMODE_PREV
2656 and deltamode != repository.CG_DELTAMODE_PREV
2645 ):
2657 ):
2646 deltamode = repository.CG_DELTAMODE_FULL
2658 deltamode = repository.CG_DELTAMODE_FULL
2647
2659
2648 return storageutil.emitrevisions(
2660 return storageutil.emitrevisions(
2649 self,
2661 self,
2650 nodes,
2662 nodes,
2651 nodesorder,
2663 nodesorder,
2652 revlogrevisiondelta,
2664 revlogrevisiondelta,
2653 deltaparentfn=self.deltaparent,
2665 deltaparentfn=self.deltaparent,
2654 candeltafn=self.candelta,
2666 candeltafn=self.candelta,
2655 rawsizefn=self.rawsize,
2667 rawsizefn=self.rawsize,
2656 revdifffn=self.revdiff,
2668 revdifffn=self.revdiff,
2657 flagsfn=self.flags,
2669 flagsfn=self.flags,
2658 deltamode=deltamode,
2670 deltamode=deltamode,
2659 revisiondata=revisiondata,
2671 revisiondata=revisiondata,
2660 assumehaveparentrevisions=assumehaveparentrevisions,
2672 assumehaveparentrevisions=assumehaveparentrevisions,
2661 sidedata_helpers=sidedata_helpers,
2673 sidedata_helpers=sidedata_helpers,
2662 )
2674 )
2663
2675
2664 DELTAREUSEALWAYS = b'always'
2676 DELTAREUSEALWAYS = b'always'
2665 DELTAREUSESAMEREVS = b'samerevs'
2677 DELTAREUSESAMEREVS = b'samerevs'
2666 DELTAREUSENEVER = b'never'
2678 DELTAREUSENEVER = b'never'
2667
2679
2668 DELTAREUSEFULLADD = b'fulladd'
2680 DELTAREUSEFULLADD = b'fulladd'
2669
2681
2670 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2682 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2671
2683
2672 def clone(
2684 def clone(
2673 self,
2685 self,
2674 tr,
2686 tr,
2675 destrevlog,
2687 destrevlog,
2676 addrevisioncb=None,
2688 addrevisioncb=None,
2677 deltareuse=DELTAREUSESAMEREVS,
2689 deltareuse=DELTAREUSESAMEREVS,
2678 forcedeltabothparents=None,
2690 forcedeltabothparents=None,
2679 sidedata_helpers=None,
2691 sidedata_helpers=None,
2680 ):
2692 ):
2681 """Copy this revlog to another, possibly with format changes.
2693 """Copy this revlog to another, possibly with format changes.
2682
2694
2683 The destination revlog will contain the same revisions and nodes.
2695 The destination revlog will contain the same revisions and nodes.
2684 However, it may not be bit-for-bit identical due to e.g. delta encoding
2696 However, it may not be bit-for-bit identical due to e.g. delta encoding
2685 differences.
2697 differences.
2686
2698
2687 The ``deltareuse`` argument control how deltas from the existing revlog
2699 The ``deltareuse`` argument control how deltas from the existing revlog
2688 are preserved in the destination revlog. The argument can have the
2700 are preserved in the destination revlog. The argument can have the
2689 following values:
2701 following values:
2690
2702
2691 DELTAREUSEALWAYS
2703 DELTAREUSEALWAYS
2692 Deltas will always be reused (if possible), even if the destination
2704 Deltas will always be reused (if possible), even if the destination
2693 revlog would not select the same revisions for the delta. This is the
2705 revlog would not select the same revisions for the delta. This is the
2694 fastest mode of operation.
2706 fastest mode of operation.
2695 DELTAREUSESAMEREVS
2707 DELTAREUSESAMEREVS
2696 Deltas will be reused if the destination revlog would pick the same
2708 Deltas will be reused if the destination revlog would pick the same
2697 revisions for the delta. This mode strikes a balance between speed
2709 revisions for the delta. This mode strikes a balance between speed
2698 and optimization.
2710 and optimization.
2699 DELTAREUSENEVER
2711 DELTAREUSENEVER
2700 Deltas will never be reused. This is the slowest mode of execution.
2712 Deltas will never be reused. This is the slowest mode of execution.
2701 This mode can be used to recompute deltas (e.g. if the diff/delta
2713 This mode can be used to recompute deltas (e.g. if the diff/delta
2702 algorithm changes).
2714 algorithm changes).
2703 DELTAREUSEFULLADD
2715 DELTAREUSEFULLADD
2704 Revision will be re-added as if their were new content. This is
2716 Revision will be re-added as if their were new content. This is
2705 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2717 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2706 eg: large file detection and handling.
2718 eg: large file detection and handling.
2707
2719
2708 Delta computation can be slow, so the choice of delta reuse policy can
2720 Delta computation can be slow, so the choice of delta reuse policy can
2709 significantly affect run time.
2721 significantly affect run time.
2710
2722
2711 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2723 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2712 two extremes. Deltas will be reused if they are appropriate. But if the
2724 two extremes. Deltas will be reused if they are appropriate. But if the
2713 delta could choose a better revision, it will do so. This means if you
2725 delta could choose a better revision, it will do so. This means if you
2714 are converting a non-generaldelta revlog to a generaldelta revlog,
2726 are converting a non-generaldelta revlog to a generaldelta revlog,
2715 deltas will be recomputed if the delta's parent isn't a parent of the
2727 deltas will be recomputed if the delta's parent isn't a parent of the
2716 revision.
2728 revision.
2717
2729
2718 In addition to the delta policy, the ``forcedeltabothparents``
2730 In addition to the delta policy, the ``forcedeltabothparents``
2719 argument controls whether to force compute deltas against both parents
2731 argument controls whether to force compute deltas against both parents
2720 for merges. By default, the current default is used.
2732 for merges. By default, the current default is used.
2721
2733
2722 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2734 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2723 `sidedata_helpers`.
2735 `sidedata_helpers`.
2724 """
2736 """
2725 if deltareuse not in self.DELTAREUSEALL:
2737 if deltareuse not in self.DELTAREUSEALL:
2726 raise ValueError(
2738 raise ValueError(
2727 _(b'value for deltareuse invalid: %s') % deltareuse
2739 _(b'value for deltareuse invalid: %s') % deltareuse
2728 )
2740 )
2729
2741
2730 if len(destrevlog):
2742 if len(destrevlog):
2731 raise ValueError(_(b'destination revlog is not empty'))
2743 raise ValueError(_(b'destination revlog is not empty'))
2732
2744
2733 if getattr(self, 'filteredrevs', None):
2745 if getattr(self, 'filteredrevs', None):
2734 raise ValueError(_(b'source revlog has filtered revisions'))
2746 raise ValueError(_(b'source revlog has filtered revisions'))
2735 if getattr(destrevlog, 'filteredrevs', None):
2747 if getattr(destrevlog, 'filteredrevs', None):
2736 raise ValueError(_(b'destination revlog has filtered revisions'))
2748 raise ValueError(_(b'destination revlog has filtered revisions'))
2737
2749
2738 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2750 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 # if possible.
2751 # if possible.
2740 oldlazydelta = destrevlog._lazydelta
2752 oldlazydelta = destrevlog._lazydelta
2741 oldlazydeltabase = destrevlog._lazydeltabase
2753 oldlazydeltabase = destrevlog._lazydeltabase
2742 oldamd = destrevlog._deltabothparents
2754 oldamd = destrevlog._deltabothparents
2743
2755
2744 try:
2756 try:
2745 if deltareuse == self.DELTAREUSEALWAYS:
2757 if deltareuse == self.DELTAREUSEALWAYS:
2746 destrevlog._lazydeltabase = True
2758 destrevlog._lazydeltabase = True
2747 destrevlog._lazydelta = True
2759 destrevlog._lazydelta = True
2748 elif deltareuse == self.DELTAREUSESAMEREVS:
2760 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 destrevlog._lazydeltabase = False
2761 destrevlog._lazydeltabase = False
2750 destrevlog._lazydelta = True
2762 destrevlog._lazydelta = True
2751 elif deltareuse == self.DELTAREUSENEVER:
2763 elif deltareuse == self.DELTAREUSENEVER:
2752 destrevlog._lazydeltabase = False
2764 destrevlog._lazydeltabase = False
2753 destrevlog._lazydelta = False
2765 destrevlog._lazydelta = False
2754
2766
2755 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2767 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756
2768
2757 self._clone(
2769 self._clone(
2758 tr,
2770 tr,
2759 destrevlog,
2771 destrevlog,
2760 addrevisioncb,
2772 addrevisioncb,
2761 deltareuse,
2773 deltareuse,
2762 forcedeltabothparents,
2774 forcedeltabothparents,
2763 sidedata_helpers,
2775 sidedata_helpers,
2764 )
2776 )
2765
2777
2766 finally:
2778 finally:
2767 destrevlog._lazydelta = oldlazydelta
2779 destrevlog._lazydelta = oldlazydelta
2768 destrevlog._lazydeltabase = oldlazydeltabase
2780 destrevlog._lazydeltabase = oldlazydeltabase
2769 destrevlog._deltabothparents = oldamd
2781 destrevlog._deltabothparents = oldamd
2770
2782
2771 def _clone(
2783 def _clone(
2772 self,
2784 self,
2773 tr,
2785 tr,
2774 destrevlog,
2786 destrevlog,
2775 addrevisioncb,
2787 addrevisioncb,
2776 deltareuse,
2788 deltareuse,
2777 forcedeltabothparents,
2789 forcedeltabothparents,
2778 sidedata_helpers,
2790 sidedata_helpers,
2779 ):
2791 ):
2780 """perform the core duty of `revlog.clone` after parameter processing"""
2792 """perform the core duty of `revlog.clone` after parameter processing"""
2781 deltacomputer = deltautil.deltacomputer(destrevlog)
2793 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 index = self.index
2794 index = self.index
2783 for rev in self:
2795 for rev in self:
2784 entry = index[rev]
2796 entry = index[rev]
2785
2797
2786 # Some classes override linkrev to take filtered revs into
2798 # Some classes override linkrev to take filtered revs into
2787 # account. Use raw entry from index.
2799 # account. Use raw entry from index.
2788 flags = entry[0] & 0xFFFF
2800 flags = entry[0] & 0xFFFF
2789 linkrev = entry[4]
2801 linkrev = entry[4]
2790 p1 = index[entry[5]][7]
2802 p1 = index[entry[5]][7]
2791 p2 = index[entry[6]][7]
2803 p2 = index[entry[6]][7]
2792 node = entry[7]
2804 node = entry[7]
2793
2805
2794 # (Possibly) reuse the delta from the revlog if allowed and
2806 # (Possibly) reuse the delta from the revlog if allowed and
2795 # the revlog chunk is a delta.
2807 # the revlog chunk is a delta.
2796 cachedelta = None
2808 cachedelta = None
2797 rawtext = None
2809 rawtext = None
2798 if deltareuse == self.DELTAREUSEFULLADD:
2810 if deltareuse == self.DELTAREUSEFULLADD:
2799 text, sidedata = self._revisiondata(rev)
2811 text, sidedata = self._revisiondata(rev)
2800
2812
2801 if sidedata_helpers is not None:
2813 if sidedata_helpers is not None:
2802 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2814 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2803 self, sidedata_helpers, sidedata, rev
2815 self, sidedata_helpers, sidedata, rev
2804 )
2816 )
2805 flags = flags | new_flags[0] & ~new_flags[1]
2817 flags = flags | new_flags[0] & ~new_flags[1]
2806
2818
2807 destrevlog.addrevision(
2819 destrevlog.addrevision(
2808 text,
2820 text,
2809 tr,
2821 tr,
2810 linkrev,
2822 linkrev,
2811 p1,
2823 p1,
2812 p2,
2824 p2,
2813 cachedelta=cachedelta,
2825 cachedelta=cachedelta,
2814 node=node,
2826 node=node,
2815 flags=flags,
2827 flags=flags,
2816 deltacomputer=deltacomputer,
2828 deltacomputer=deltacomputer,
2817 sidedata=sidedata,
2829 sidedata=sidedata,
2818 )
2830 )
2819 else:
2831 else:
2820 if destrevlog._lazydelta:
2832 if destrevlog._lazydelta:
2821 dp = self.deltaparent(rev)
2833 dp = self.deltaparent(rev)
2822 if dp != nullrev:
2834 if dp != nullrev:
2823 cachedelta = (dp, bytes(self._chunk(rev)))
2835 cachedelta = (dp, bytes(self._chunk(rev)))
2824
2836
2825 sidedata = None
2837 sidedata = None
2826 if not cachedelta:
2838 if not cachedelta:
2827 rawtext, sidedata = self._revisiondata(rev)
2839 rawtext, sidedata = self._revisiondata(rev)
2828 if sidedata is None:
2840 if sidedata is None:
2829 sidedata = self.sidedata(rev)
2841 sidedata = self.sidedata(rev)
2830
2842
2831 if sidedata_helpers is not None:
2843 if sidedata_helpers is not None:
2832 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2844 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2833 self, sidedata_helpers, sidedata, rev
2845 self, sidedata_helpers, sidedata, rev
2834 )
2846 )
2835 flags = flags | new_flags[0] & ~new_flags[1]
2847 flags = flags | new_flags[0] & ~new_flags[1]
2836
2848
2837 ifh = destrevlog.opener(
2849 ifh = destrevlog.opener(
2838 destrevlog.indexfile, b'a+', checkambig=False
2850 destrevlog.indexfile, b'a+', checkambig=False
2839 )
2851 )
2840 dfh = None
2852 dfh = None
2841 if not destrevlog._inline:
2853 if not destrevlog._inline:
2842 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2854 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2843 try:
2855 try:
2844 destrevlog._addrevision(
2856 destrevlog._addrevision(
2845 node,
2857 node,
2846 rawtext,
2858 rawtext,
2847 tr,
2859 tr,
2848 linkrev,
2860 linkrev,
2849 p1,
2861 p1,
2850 p2,
2862 p2,
2851 flags,
2863 flags,
2852 cachedelta,
2864 cachedelta,
2853 ifh,
2865 ifh,
2854 dfh,
2866 dfh,
2855 deltacomputer=deltacomputer,
2867 deltacomputer=deltacomputer,
2856 sidedata=sidedata,
2868 sidedata=sidedata,
2857 )
2869 )
2858 finally:
2870 finally:
2859 if dfh:
2871 if dfh:
2860 dfh.close()
2872 dfh.close()
2861 ifh.close()
2873 ifh.close()
2862
2874
2863 if addrevisioncb:
2875 if addrevisioncb:
2864 addrevisioncb(self, rev, node)
2876 addrevisioncb(self, rev, node)
2865
2877
2866 def censorrevision(self, tr, censornode, tombstone=b''):
2878 def censorrevision(self, tr, censornode, tombstone=b''):
2867 if self._format_version == REVLOGV0:
2879 if self._format_version == REVLOGV0:
2868 raise error.RevlogError(
2880 raise error.RevlogError(
2869 _(b'cannot censor with version %d revlogs')
2881 _(b'cannot censor with version %d revlogs')
2870 % self._format_version
2882 % self._format_version
2871 )
2883 )
2872
2884
2873 censorrev = self.rev(censornode)
2885 censorrev = self.rev(censornode)
2874 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2886 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2875
2887
2876 if len(tombstone) > self.rawsize(censorrev):
2888 if len(tombstone) > self.rawsize(censorrev):
2877 raise error.Abort(
2889 raise error.Abort(
2878 _(b'censor tombstone must be no longer than censored data')
2890 _(b'censor tombstone must be no longer than censored data')
2879 )
2891 )
2880
2892
2881 # Rewriting the revlog in place is hard. Our strategy for censoring is
2893 # Rewriting the revlog in place is hard. Our strategy for censoring is
2882 # to create a new revlog, copy all revisions to it, then replace the
2894 # to create a new revlog, copy all revisions to it, then replace the
2883 # revlogs on transaction close.
2895 # revlogs on transaction close.
2884
2896 #
2885 newindexfile = self.indexfile + b'.tmpcensored'
2886 newdatafile = self.datafile + b'.tmpcensored'
2887
2888 # This is a bit dangerous. We could easily have a mismatch of state.
2897 # This is a bit dangerous. We could easily have a mismatch of state.
2889 newrl = revlog(
2898 newrl = revlog(
2890 self.opener,
2899 self.opener,
2891 target=self.target,
2900 target=self.target,
2892 indexfile=newindexfile,
2901 postfix=b'tmpcensored',
2893 datafile=newdatafile,
2902 indexfile=self.indexfile,
2894 censorable=True,
2903 censorable=True,
2895 )
2904 )
2896 newrl._format_version = self._format_version
2905 newrl._format_version = self._format_version
2897 newrl._format_flags = self._format_flags
2906 newrl._format_flags = self._format_flags
2898 newrl._generaldelta = self._generaldelta
2907 newrl._generaldelta = self._generaldelta
2899 newrl._parse_index = self._parse_index
2908 newrl._parse_index = self._parse_index
2900
2909
2901 for rev in self.revs():
2910 for rev in self.revs():
2902 node = self.node(rev)
2911 node = self.node(rev)
2903 p1, p2 = self.parents(node)
2912 p1, p2 = self.parents(node)
2904
2913
2905 if rev == censorrev:
2914 if rev == censorrev:
2906 newrl.addrawrevision(
2915 newrl.addrawrevision(
2907 tombstone,
2916 tombstone,
2908 tr,
2917 tr,
2909 self.linkrev(censorrev),
2918 self.linkrev(censorrev),
2910 p1,
2919 p1,
2911 p2,
2920 p2,
2912 censornode,
2921 censornode,
2913 REVIDX_ISCENSORED,
2922 REVIDX_ISCENSORED,
2914 )
2923 )
2915
2924
2916 if newrl.deltaparent(rev) != nullrev:
2925 if newrl.deltaparent(rev) != nullrev:
2917 raise error.Abort(
2926 raise error.Abort(
2918 _(
2927 _(
2919 b'censored revision stored as delta; '
2928 b'censored revision stored as delta; '
2920 b'cannot censor'
2929 b'cannot censor'
2921 ),
2930 ),
2922 hint=_(
2931 hint=_(
2923 b'censoring of revlogs is not '
2932 b'censoring of revlogs is not '
2924 b'fully implemented; please report '
2933 b'fully implemented; please report '
2925 b'this bug'
2934 b'this bug'
2926 ),
2935 ),
2927 )
2936 )
2928 continue
2937 continue
2929
2938
2930 if self.iscensored(rev):
2939 if self.iscensored(rev):
2931 if self.deltaparent(rev) != nullrev:
2940 if self.deltaparent(rev) != nullrev:
2932 raise error.Abort(
2941 raise error.Abort(
2933 _(
2942 _(
2934 b'cannot censor due to censored '
2943 b'cannot censor due to censored '
2935 b'revision having delta stored'
2944 b'revision having delta stored'
2936 )
2945 )
2937 )
2946 )
2938 rawtext = self._chunk(rev)
2947 rawtext = self._chunk(rev)
2939 else:
2948 else:
2940 rawtext = self.rawdata(rev)
2949 rawtext = self.rawdata(rev)
2941
2950
2942 newrl.addrawrevision(
2951 newrl.addrawrevision(
2943 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2952 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2944 )
2953 )
2945
2954
2946 tr.addbackup(self.indexfile, location=b'store')
2955 tr.addbackup(self.indexfile, location=b'store')
2947 if not self._inline:
2956 if not self._inline:
2948 tr.addbackup(self.datafile, location=b'store')
2957 tr.addbackup(self.datafile, location=b'store')
2949
2958
2950 self.opener.rename(newrl.indexfile, self.indexfile)
2959 self.opener.rename(newrl.indexfile, self.indexfile)
2951 if not self._inline:
2960 if not self._inline:
2952 self.opener.rename(newrl.datafile, self.datafile)
2961 self.opener.rename(newrl.datafile, self.datafile)
2953
2962
2954 self.clearcaches()
2963 self.clearcaches()
2955 self._loadindex()
2964 self._loadindex()
2956
2965
2957 def verifyintegrity(self, state):
2966 def verifyintegrity(self, state):
2958 """Verifies the integrity of the revlog.
2967 """Verifies the integrity of the revlog.
2959
2968
2960 Yields ``revlogproblem`` instances describing problems that are
2969 Yields ``revlogproblem`` instances describing problems that are
2961 found.
2970 found.
2962 """
2971 """
2963 dd, di = self.checksize()
2972 dd, di = self.checksize()
2964 if dd:
2973 if dd:
2965 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2974 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2966 if di:
2975 if di:
2967 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2976 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2968
2977
2969 version = self._format_version
2978 version = self._format_version
2970
2979
2971 # The verifier tells us what version revlog we should be.
2980 # The verifier tells us what version revlog we should be.
2972 if version != state[b'expectedversion']:
2981 if version != state[b'expectedversion']:
2973 yield revlogproblem(
2982 yield revlogproblem(
2974 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2983 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2975 % (self.indexfile, version, state[b'expectedversion'])
2984 % (self.indexfile, version, state[b'expectedversion'])
2976 )
2985 )
2977
2986
2978 state[b'skipread'] = set()
2987 state[b'skipread'] = set()
2979 state[b'safe_renamed'] = set()
2988 state[b'safe_renamed'] = set()
2980
2989
2981 for rev in self:
2990 for rev in self:
2982 node = self.node(rev)
2991 node = self.node(rev)
2983
2992
2984 # Verify contents. 4 cases to care about:
2993 # Verify contents. 4 cases to care about:
2985 #
2994 #
2986 # common: the most common case
2995 # common: the most common case
2987 # rename: with a rename
2996 # rename: with a rename
2988 # meta: file content starts with b'\1\n', the metadata
2997 # meta: file content starts with b'\1\n', the metadata
2989 # header defined in filelog.py, but without a rename
2998 # header defined in filelog.py, but without a rename
2990 # ext: content stored externally
2999 # ext: content stored externally
2991 #
3000 #
2992 # More formally, their differences are shown below:
3001 # More formally, their differences are shown below:
2993 #
3002 #
2994 # | common | rename | meta | ext
3003 # | common | rename | meta | ext
2995 # -------------------------------------------------------
3004 # -------------------------------------------------------
2996 # flags() | 0 | 0 | 0 | not 0
3005 # flags() | 0 | 0 | 0 | not 0
2997 # renamed() | False | True | False | ?
3006 # renamed() | False | True | False | ?
2998 # rawtext[0:2]=='\1\n'| False | True | True | ?
3007 # rawtext[0:2]=='\1\n'| False | True | True | ?
2999 #
3008 #
3000 # "rawtext" means the raw text stored in revlog data, which
3009 # "rawtext" means the raw text stored in revlog data, which
3001 # could be retrieved by "rawdata(rev)". "text"
3010 # could be retrieved by "rawdata(rev)". "text"
3002 # mentioned below is "revision(rev)".
3011 # mentioned below is "revision(rev)".
3003 #
3012 #
3004 # There are 3 different lengths stored physically:
3013 # There are 3 different lengths stored physically:
3005 # 1. L1: rawsize, stored in revlog index
3014 # 1. L1: rawsize, stored in revlog index
3006 # 2. L2: len(rawtext), stored in revlog data
3015 # 2. L2: len(rawtext), stored in revlog data
3007 # 3. L3: len(text), stored in revlog data if flags==0, or
3016 # 3. L3: len(text), stored in revlog data if flags==0, or
3008 # possibly somewhere else if flags!=0
3017 # possibly somewhere else if flags!=0
3009 #
3018 #
3010 # L1 should be equal to L2. L3 could be different from them.
3019 # L1 should be equal to L2. L3 could be different from them.
3011 # "text" may or may not affect commit hash depending on flag
3020 # "text" may or may not affect commit hash depending on flag
3012 # processors (see flagutil.addflagprocessor).
3021 # processors (see flagutil.addflagprocessor).
3013 #
3022 #
3014 # | common | rename | meta | ext
3023 # | common | rename | meta | ext
3015 # -------------------------------------------------
3024 # -------------------------------------------------
3016 # rawsize() | L1 | L1 | L1 | L1
3025 # rawsize() | L1 | L1 | L1 | L1
3017 # size() | L1 | L2-LM | L1(*) | L1 (?)
3026 # size() | L1 | L2-LM | L1(*) | L1 (?)
3018 # len(rawtext) | L2 | L2 | L2 | L2
3027 # len(rawtext) | L2 | L2 | L2 | L2
3019 # len(text) | L2 | L2 | L2 | L3
3028 # len(text) | L2 | L2 | L2 | L3
3020 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3029 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3021 #
3030 #
3022 # LM: length of metadata, depending on rawtext
3031 # LM: length of metadata, depending on rawtext
3023 # (*): not ideal, see comment in filelog.size
3032 # (*): not ideal, see comment in filelog.size
3024 # (?): could be "- len(meta)" if the resolved content has
3033 # (?): could be "- len(meta)" if the resolved content has
3025 # rename metadata
3034 # rename metadata
3026 #
3035 #
3027 # Checks needed to be done:
3036 # Checks needed to be done:
3028 # 1. length check: L1 == L2, in all cases.
3037 # 1. length check: L1 == L2, in all cases.
3029 # 2. hash check: depending on flag processor, we may need to
3038 # 2. hash check: depending on flag processor, we may need to
3030 # use either "text" (external), or "rawtext" (in revlog).
3039 # use either "text" (external), or "rawtext" (in revlog).
3031
3040
3032 try:
3041 try:
3033 skipflags = state.get(b'skipflags', 0)
3042 skipflags = state.get(b'skipflags', 0)
3034 if skipflags:
3043 if skipflags:
3035 skipflags &= self.flags(rev)
3044 skipflags &= self.flags(rev)
3036
3045
3037 _verify_revision(self, skipflags, state, node)
3046 _verify_revision(self, skipflags, state, node)
3038
3047
3039 l1 = self.rawsize(rev)
3048 l1 = self.rawsize(rev)
3040 l2 = len(self.rawdata(node))
3049 l2 = len(self.rawdata(node))
3041
3050
3042 if l1 != l2:
3051 if l1 != l2:
3043 yield revlogproblem(
3052 yield revlogproblem(
3044 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3053 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3045 node=node,
3054 node=node,
3046 )
3055 )
3047
3056
3048 except error.CensoredNodeError:
3057 except error.CensoredNodeError:
3049 if state[b'erroroncensored']:
3058 if state[b'erroroncensored']:
3050 yield revlogproblem(
3059 yield revlogproblem(
3051 error=_(b'censored file data'), node=node
3060 error=_(b'censored file data'), node=node
3052 )
3061 )
3053 state[b'skipread'].add(node)
3062 state[b'skipread'].add(node)
3054 except Exception as e:
3063 except Exception as e:
3055 yield revlogproblem(
3064 yield revlogproblem(
3056 error=_(b'unpacking %s: %s')
3065 error=_(b'unpacking %s: %s')
3057 % (short(node), stringutil.forcebytestr(e)),
3066 % (short(node), stringutil.forcebytestr(e)),
3058 node=node,
3067 node=node,
3059 )
3068 )
3060 state[b'skipread'].add(node)
3069 state[b'skipread'].add(node)
3061
3070
3062 def storageinfo(
3071 def storageinfo(
3063 self,
3072 self,
3064 exclusivefiles=False,
3073 exclusivefiles=False,
3065 sharedfiles=False,
3074 sharedfiles=False,
3066 revisionscount=False,
3075 revisionscount=False,
3067 trackedsize=False,
3076 trackedsize=False,
3068 storedsize=False,
3077 storedsize=False,
3069 ):
3078 ):
3070 d = {}
3079 d = {}
3071
3080
3072 if exclusivefiles:
3081 if exclusivefiles:
3073 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3082 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3074 if not self._inline:
3083 if not self._inline:
3075 d[b'exclusivefiles'].append((self.opener, self.datafile))
3084 d[b'exclusivefiles'].append((self.opener, self.datafile))
3076
3085
3077 if sharedfiles:
3086 if sharedfiles:
3078 d[b'sharedfiles'] = []
3087 d[b'sharedfiles'] = []
3079
3088
3080 if revisionscount:
3089 if revisionscount:
3081 d[b'revisionscount'] = len(self)
3090 d[b'revisionscount'] = len(self)
3082
3091
3083 if trackedsize:
3092 if trackedsize:
3084 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3093 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3085
3094
3086 if storedsize:
3095 if storedsize:
3087 d[b'storedsize'] = sum(
3096 d[b'storedsize'] = sum(
3088 self.opener.stat(path).st_size for path in self.files()
3097 self.opener.stat(path).st_size for path in self.files()
3089 )
3098 )
3090
3099
3091 return d
3100 return d
3092
3101
3093 def rewrite_sidedata(self, helpers, startrev, endrev):
3102 def rewrite_sidedata(self, helpers, startrev, endrev):
3094 if not self.hassidedata:
3103 if not self.hassidedata:
3095 return
3104 return
3096 # inline are not yet supported because they suffer from an issue when
3105 # inline are not yet supported because they suffer from an issue when
3097 # rewriting them (since it's not an append-only operation).
3106 # rewriting them (since it's not an append-only operation).
3098 # See issue6485.
3107 # See issue6485.
3099 assert not self._inline
3108 assert not self._inline
3100 if not helpers[1] and not helpers[2]:
3109 if not helpers[1] and not helpers[2]:
3101 # Nothing to generate or remove
3110 # Nothing to generate or remove
3102 return
3111 return
3103
3112
3104 # changelog implement some "delayed" writing mechanism that assume that
3113 # changelog implement some "delayed" writing mechanism that assume that
3105 # all index data is writen in append mode and is therefor incompatible
3114 # all index data is writen in append mode and is therefor incompatible
3106 # with the seeked write done in this method. The use of such "delayed"
3115 # with the seeked write done in this method. The use of such "delayed"
3107 # writing will soon be removed for revlog version that support side
3116 # writing will soon be removed for revlog version that support side
3108 # data, so for now, we only keep this simple assert to highlight the
3117 # data, so for now, we only keep this simple assert to highlight the
3109 # situation.
3118 # situation.
3110 delayed = getattr(self, '_delayed', False)
3119 delayed = getattr(self, '_delayed', False)
3111 diverted = getattr(self, '_divert', False)
3120 diverted = getattr(self, '_divert', False)
3112 if delayed and not diverted:
3121 if delayed and not diverted:
3113 msg = "cannot rewrite_sidedata of a delayed revlog"
3122 msg = "cannot rewrite_sidedata of a delayed revlog"
3114 raise error.ProgrammingError(msg)
3123 raise error.ProgrammingError(msg)
3115
3124
3116 new_entries = []
3125 new_entries = []
3117 # append the new sidedata
3126 # append the new sidedata
3118 with self._datafp(b'a+') as fp:
3127 with self._datafp(b'a+') as fp:
3119 # Maybe this bug still exists, see revlog._writeentry
3128 # Maybe this bug still exists, see revlog._writeentry
3120 fp.seek(0, os.SEEK_END)
3129 fp.seek(0, os.SEEK_END)
3121 current_offset = fp.tell()
3130 current_offset = fp.tell()
3122 for rev in range(startrev, endrev + 1):
3131 for rev in range(startrev, endrev + 1):
3123 entry = self.index[rev]
3132 entry = self.index[rev]
3124 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3133 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3125 store=self,
3134 store=self,
3126 sidedata_helpers=helpers,
3135 sidedata_helpers=helpers,
3127 sidedata={},
3136 sidedata={},
3128 rev=rev,
3137 rev=rev,
3129 )
3138 )
3130
3139
3131 serialized_sidedata = sidedatautil.serialize_sidedata(
3140 serialized_sidedata = sidedatautil.serialize_sidedata(
3132 new_sidedata
3141 new_sidedata
3133 )
3142 )
3134 if entry[8] != 0 or entry[9] != 0:
3143 if entry[8] != 0 or entry[9] != 0:
3135 # rewriting entries that already have sidedata is not
3144 # rewriting entries that already have sidedata is not
3136 # supported yet, because it introduces garbage data in the
3145 # supported yet, because it introduces garbage data in the
3137 # revlog.
3146 # revlog.
3138 msg = b"Rewriting existing sidedata is not supported yet"
3147 msg = b"Rewriting existing sidedata is not supported yet"
3139 raise error.Abort(msg)
3148 raise error.Abort(msg)
3140
3149
3141 # Apply (potential) flags to add and to remove after running
3150 # Apply (potential) flags to add and to remove after running
3142 # the sidedata helpers
3151 # the sidedata helpers
3143 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3152 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3144 entry = (new_offset_flags,) + entry[1:8]
3153 entry = (new_offset_flags,) + entry[1:8]
3145 entry += (current_offset, len(serialized_sidedata))
3154 entry += (current_offset, len(serialized_sidedata))
3146
3155
3147 fp.write(serialized_sidedata)
3156 fp.write(serialized_sidedata)
3148 new_entries.append(entry)
3157 new_entries.append(entry)
3149 current_offset += len(serialized_sidedata)
3158 current_offset += len(serialized_sidedata)
3150
3159
3151 # rewrite the new index entries
3160 # rewrite the new index entries
3152 with self._indexfp(b'r+') as fp:
3161 with self._indexfp(b'r+') as fp:
3153 fp.seek(startrev * self.index.entry_size)
3162 fp.seek(startrev * self.index.entry_size)
3154 for i, e in enumerate(new_entries):
3163 for i, e in enumerate(new_entries):
3155 rev = startrev + i
3164 rev = startrev + i
3156 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3165 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3157 packed = self.index.entry_binary(rev)
3166 packed = self.index.entry_binary(rev)
3158 if rev == 0:
3167 if rev == 0:
3159 header = self._format_flags | self._format_version
3168 header = self._format_flags | self._format_version
3160 header = self.index.pack_header(header)
3169 header = self.index.pack_header(header)
3161 packed = header + packed
3170 packed = header + packed
3162 fp.write(packed)
3171 fp.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now