##// END OF EJS Templates
revlog: use file read caching for sidedata...
Simon Sapin -
r48219:cac0e062 default
parent child Browse files
Show More
@@ -1,630 +1,633 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 )
14 )
15 from .thirdparty import attr
15 from .thirdparty import attr
16
16
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 metadata,
20 metadata,
21 pycompat,
21 pycompat,
22 revlog,
22 revlog,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 dateutil,
25 dateutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from .revlogutils import (
28 from .revlogutils import (
29 constants as revlog_constants,
29 constants as revlog_constants,
30 flagutil,
30 flagutil,
31 )
31 )
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 return b"\0".join(items)
87 return b"\0".join(items)
88
88
89
89
90 def stripdesc(desc):
90 def stripdesc(desc):
91 """strip trailing whitespace and leading and trailing empty lines"""
91 """strip trailing whitespace and leading and trailing empty lines"""
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93
93
94
94
95 class appender(object):
95 class appender(object):
96 """the changelog index must be updated last on disk, so we use this class
96 """the changelog index must be updated last on disk, so we use this class
97 to delay writes to it"""
97 to delay writes to it"""
98
98
99 def __init__(self, vfs, name, mode, buf):
99 def __init__(self, vfs, name, mode, buf):
100 self.data = buf
100 self.data = buf
101 fp = vfs(name, mode)
101 fp = vfs(name, mode)
102 self.fp = fp
102 self.fp = fp
103 self.offset = fp.tell()
103 self.offset = fp.tell()
104 self.size = vfs.fstat(fp).st_size
104 self.size = vfs.fstat(fp).st_size
105 self._end = self.size
105 self._end = self.size
106
106
107 def end(self):
107 def end(self):
108 return self._end
108 return self._end
109
109
110 def tell(self):
110 def tell(self):
111 return self.offset
111 return self.offset
112
112
113 def flush(self):
113 def flush(self):
114 pass
114 pass
115
115
116 @property
116 @property
117 def closed(self):
117 def closed(self):
118 return self.fp.closed
118 return self.fp.closed
119
119
120 def close(self):
120 def close(self):
121 self.fp.close()
121 self.fp.close()
122
122
123 def seek(self, offset, whence=0):
123 def seek(self, offset, whence=0):
124 '''virtual file offset spans real file and data'''
124 '''virtual file offset spans real file and data'''
125 if whence == 0:
125 if whence == 0:
126 self.offset = offset
126 self.offset = offset
127 elif whence == 1:
127 elif whence == 1:
128 self.offset += offset
128 self.offset += offset
129 elif whence == 2:
129 elif whence == 2:
130 self.offset = self.end() + offset
130 self.offset = self.end() + offset
131 if self.offset < self.size:
131 if self.offset < self.size:
132 self.fp.seek(self.offset)
132 self.fp.seek(self.offset)
133
133
134 def read(self, count=-1):
134 def read(self, count=-1):
135 '''only trick here is reads that span real file and data'''
135 '''only trick here is reads that span real file and data'''
136 ret = b""
136 ret = b""
137 if self.offset < self.size:
137 if self.offset < self.size:
138 s = self.fp.read(count)
138 s = self.fp.read(count)
139 ret = s
139 ret = s
140 self.offset += len(s)
140 self.offset += len(s)
141 if count > 0:
141 if count > 0:
142 count -= len(s)
142 count -= len(s)
143 if count != 0:
143 if count != 0:
144 doff = self.offset - self.size
144 doff = self.offset - self.size
145 self.data.insert(0, b"".join(self.data))
145 self.data.insert(0, b"".join(self.data))
146 del self.data[1:]
146 del self.data[1:]
147 s = self.data[0][doff : doff + count]
147 s = self.data[0][doff : doff + count]
148 self.offset += len(s)
148 self.offset += len(s)
149 ret += s
149 ret += s
150 return ret
150 return ret
151
151
152 def write(self, s):
152 def write(self, s):
153 self.data.append(bytes(s))
153 self.data.append(bytes(s))
154 self.offset += len(s)
154 self.offset += len(s)
155 self._end += len(s)
155 self._end += len(s)
156
156
157 def __enter__(self):
157 def __enter__(self):
158 self.fp.__enter__()
158 self.fp.__enter__()
159 return self
159 return self
160
160
161 def __exit__(self, *args):
161 def __exit__(self, *args):
162 return self.fp.__exit__(*args)
162 return self.fp.__exit__(*args)
163
163
164
164
165 class _divertopener(object):
165 class _divertopener(object):
166 def __init__(self, opener, target):
166 def __init__(self, opener, target):
167 self._opener = opener
167 self._opener = opener
168 self._target = target
168 self._target = target
169
169
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 if name != self._target:
171 if name != self._target:
172 return self._opener(name, mode, **kwargs)
172 return self._opener(name, mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
174
174
175 def __getattr__(self, attr):
175 def __getattr__(self, attr):
176 return getattr(self._opener, attr)
176 return getattr(self._opener, attr)
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 if name != target:
183 if name != target:
184 return opener(name, mode, **kwargs)
184 return opener(name, mode, **kwargs)
185 assert not kwargs
185 assert not kwargs
186 return appender(opener, name, mode, buf)
186 return appender(opener, name, mode, buf)
187
187
188 return _delay
188 return _delay
189
189
190
190
191 @attr.s
191 @attr.s
192 class _changelogrevision(object):
192 class _changelogrevision(object):
193 # Extensions might modify _defaultextra, so let the constructor below pass
193 # Extensions might modify _defaultextra, so let the constructor below pass
194 # it in
194 # it in
195 extra = attr.ib()
195 extra = attr.ib()
196 manifest = attr.ib()
196 manifest = attr.ib()
197 user = attr.ib(default=b'')
197 user = attr.ib(default=b'')
198 date = attr.ib(default=(0, 0))
198 date = attr.ib(default=(0, 0))
199 files = attr.ib(default=attr.Factory(list))
199 files = attr.ib(default=attr.Factory(list))
200 filesadded = attr.ib(default=None)
200 filesadded = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
204 description = attr.ib(default=b'')
204 description = attr.ib(default=b'')
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206
206
207
207
208 class changelogrevision(object):
208 class changelogrevision(object):
209 """Holds results of a parsed changelog revision.
209 """Holds results of a parsed changelog revision.
210
210
211 Changelog revisions consist of multiple pieces of data, including
211 Changelog revisions consist of multiple pieces of data, including
212 the manifest node, user, and date. This object exposes a view into
212 the manifest node, user, and date. This object exposes a view into
213 the parsed object.
213 the parsed object.
214 """
214 """
215
215
216 __slots__ = (
216 __slots__ = (
217 '_offsets',
217 '_offsets',
218 '_text',
218 '_text',
219 '_sidedata',
219 '_sidedata',
220 '_cpsd',
220 '_cpsd',
221 '_changes',
221 '_changes',
222 )
222 )
223
223
224 def __new__(cls, cl, text, sidedata, cpsd):
224 def __new__(cls, cl, text, sidedata, cpsd):
225 if not text:
225 if not text:
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227
227
228 self = super(changelogrevision, cls).__new__(cls)
228 self = super(changelogrevision, cls).__new__(cls)
229 # We could return here and implement the following as an __init__.
229 # We could return here and implement the following as an __init__.
230 # But doing it here is equivalent and saves an extra function call.
230 # But doing it here is equivalent and saves an extra function call.
231
231
232 # format used:
232 # format used:
233 # nodeid\n : manifest node in ascii
233 # nodeid\n : manifest node in ascii
234 # user\n : user, no \n or \r allowed
234 # user\n : user, no \n or \r allowed
235 # time tz extra\n : date (time is int or float, timezone is int)
235 # time tz extra\n : date (time is int or float, timezone is int)
236 # : extra is metadata, encoded and separated by '\0'
236 # : extra is metadata, encoded and separated by '\0'
237 # : older versions ignore it
237 # : older versions ignore it
238 # files\n\n : files modified by the cset, no \n or \r allowed
238 # files\n\n : files modified by the cset, no \n or \r allowed
239 # (.*) : comment (free text, ideally utf-8)
239 # (.*) : comment (free text, ideally utf-8)
240 #
240 #
241 # changelog v0 doesn't use extra
241 # changelog v0 doesn't use extra
242
242
243 nl1 = text.index(b'\n')
243 nl1 = text.index(b'\n')
244 nl2 = text.index(b'\n', nl1 + 1)
244 nl2 = text.index(b'\n', nl1 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
246
246
247 # The list of files may be empty. Which means nl3 is the first of the
247 # The list of files may be empty. Which means nl3 is the first of the
248 # double newline that precedes the description.
248 # double newline that precedes the description.
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 doublenl = nl3
250 doublenl = nl3
251 else:
251 else:
252 doublenl = text.index(b'\n\n', nl3 + 1)
252 doublenl = text.index(b'\n\n', nl3 + 1)
253
253
254 self._offsets = (nl1, nl2, nl3, doublenl)
254 self._offsets = (nl1, nl2, nl3, doublenl)
255 self._text = text
255 self._text = text
256 self._sidedata = sidedata
256 self._sidedata = sidedata
257 self._cpsd = cpsd
257 self._cpsd = cpsd
258 self._changes = None
258 self._changes = None
259
259
260 return self
260 return self
261
261
262 @property
262 @property
263 def manifest(self):
263 def manifest(self):
264 return bin(self._text[0 : self._offsets[0]])
264 return bin(self._text[0 : self._offsets[0]])
265
265
266 @property
266 @property
267 def user(self):
267 def user(self):
268 off = self._offsets
268 off = self._offsets
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270
270
271 @property
271 @property
272 def _rawdate(self):
272 def _rawdate(self):
273 off = self._offsets
273 off = self._offsets
274 dateextra = self._text[off[1] + 1 : off[2]]
274 dateextra = self._text[off[1] + 1 : off[2]]
275 return dateextra.split(b' ', 2)[0:2]
275 return dateextra.split(b' ', 2)[0:2]
276
276
277 @property
277 @property
278 def _rawextra(self):
278 def _rawextra(self):
279 off = self._offsets
279 off = self._offsets
280 dateextra = self._text[off[1] + 1 : off[2]]
280 dateextra = self._text[off[1] + 1 : off[2]]
281 fields = dateextra.split(b' ', 2)
281 fields = dateextra.split(b' ', 2)
282 if len(fields) != 3:
282 if len(fields) != 3:
283 return None
283 return None
284
284
285 return fields[2]
285 return fields[2]
286
286
287 @property
287 @property
288 def date(self):
288 def date(self):
289 raw = self._rawdate
289 raw = self._rawdate
290 time = float(raw[0])
290 time = float(raw[0])
291 # Various tools did silly things with the timezone.
291 # Various tools did silly things with the timezone.
292 try:
292 try:
293 timezone = int(raw[1])
293 timezone = int(raw[1])
294 except ValueError:
294 except ValueError:
295 timezone = 0
295 timezone = 0
296
296
297 return time, timezone
297 return time, timezone
298
298
299 @property
299 @property
300 def extra(self):
300 def extra(self):
301 raw = self._rawextra
301 raw = self._rawextra
302 if raw is None:
302 if raw is None:
303 return _defaultextra
303 return _defaultextra
304
304
305 return decodeextra(raw)
305 return decodeextra(raw)
306
306
307 @property
307 @property
308 def changes(self):
308 def changes(self):
309 if self._changes is not None:
309 if self._changes is not None:
310 return self._changes
310 return self._changes
311 if self._cpsd:
311 if self._cpsd:
312 changes = metadata.decode_files_sidedata(self._sidedata)
312 changes = metadata.decode_files_sidedata(self._sidedata)
313 else:
313 else:
314 changes = metadata.ChangingFiles(
314 changes = metadata.ChangingFiles(
315 touched=self.files or (),
315 touched=self.files or (),
316 added=self.filesadded or (),
316 added=self.filesadded or (),
317 removed=self.filesremoved or (),
317 removed=self.filesremoved or (),
318 p1_copies=self.p1copies or {},
318 p1_copies=self.p1copies or {},
319 p2_copies=self.p2copies or {},
319 p2_copies=self.p2copies or {},
320 )
320 )
321 self._changes = changes
321 self._changes = changes
322 return changes
322 return changes
323
323
324 @property
324 @property
325 def files(self):
325 def files(self):
326 if self._cpsd:
326 if self._cpsd:
327 return sorted(self.changes.touched)
327 return sorted(self.changes.touched)
328 off = self._offsets
328 off = self._offsets
329 if off[2] == off[3]:
329 if off[2] == off[3]:
330 return []
330 return []
331
331
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333
333
334 @property
334 @property
335 def filesadded(self):
335 def filesadded(self):
336 if self._cpsd:
336 if self._cpsd:
337 return self.changes.added
337 return self.changes.added
338 else:
338 else:
339 rawindices = self.extra.get(b'filesadded')
339 rawindices = self.extra.get(b'filesadded')
340 if rawindices is None:
340 if rawindices is None:
341 return None
341 return None
342 return metadata.decodefileindices(self.files, rawindices)
342 return metadata.decodefileindices(self.files, rawindices)
343
343
344 @property
344 @property
345 def filesremoved(self):
345 def filesremoved(self):
346 if self._cpsd:
346 if self._cpsd:
347 return self.changes.removed
347 return self.changes.removed
348 else:
348 else:
349 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
350 if rawindices is None:
350 if rawindices is None:
351 return None
351 return None
352 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
353
353
354 @property
354 @property
355 def p1copies(self):
355 def p1copies(self):
356 if self._cpsd:
356 if self._cpsd:
357 return self.changes.copied_from_p1
357 return self.changes.copied_from_p1
358 else:
358 else:
359 rawcopies = self.extra.get(b'p1copies')
359 rawcopies = self.extra.get(b'p1copies')
360 if rawcopies is None:
360 if rawcopies is None:
361 return None
361 return None
362 return metadata.decodecopies(self.files, rawcopies)
362 return metadata.decodecopies(self.files, rawcopies)
363
363
364 @property
364 @property
365 def p2copies(self):
365 def p2copies(self):
366 if self._cpsd:
366 if self._cpsd:
367 return self.changes.copied_from_p2
367 return self.changes.copied_from_p2
368 else:
368 else:
369 rawcopies = self.extra.get(b'p2copies')
369 rawcopies = self.extra.get(b'p2copies')
370 if rawcopies is None:
370 if rawcopies is None:
371 return None
371 return None
372 return metadata.decodecopies(self.files, rawcopies)
372 return metadata.decodecopies(self.files, rawcopies)
373
373
374 @property
374 @property
375 def description(self):
375 def description(self):
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377
377
378 @property
378 @property
379 def branchinfo(self):
379 def branchinfo(self):
380 extra = self.extra
380 extra = self.extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382
382
383
383
384 class changelog(revlog.revlog):
384 class changelog(revlog.revlog):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 """Load a changelog revlog using an opener.
386 """Load a changelog revlog using an opener.
387
387
388 If ``trypending`` is true, we attempt to load the index from a
388 If ``trypending`` is true, we attempt to load the index from a
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 The ``00changelog.i.a`` file contains index (and possibly inline
390 The ``00changelog.i.a`` file contains index (and possibly inline
391 revision) data for a transaction that hasn't been finalized yet.
391 revision) data for a transaction that hasn't been finalized yet.
392 It exists in a separate file to facilitate readers (such as
392 It exists in a separate file to facilitate readers (such as
393 hooks processes) accessing data before a transaction is finalized.
393 hooks processes) accessing data before a transaction is finalized.
394
394
395 ``concurrencychecker`` will be passed to the revlog init function, see
395 ``concurrencychecker`` will be passed to the revlog init function, see
396 the documentation there.
396 the documentation there.
397 """
397 """
398 revlog.revlog.__init__(
398 revlog.revlog.__init__(
399 self,
399 self,
400 opener,
400 opener,
401 target=(revlog_constants.KIND_CHANGELOG, None),
401 target=(revlog_constants.KIND_CHANGELOG, None),
402 radix=b'00changelog',
402 radix=b'00changelog',
403 checkambig=True,
403 checkambig=True,
404 mmaplargeindex=True,
404 mmaplargeindex=True,
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
406 concurrencychecker=concurrencychecker,
406 concurrencychecker=concurrencychecker,
407 trypending=trypending,
407 trypending=trypending,
408 )
408 )
409
409
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
411 # changelogs don't benefit from generaldelta.
411 # changelogs don't benefit from generaldelta.
412
412
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
414 self._generaldelta = False
414 self._generaldelta = False
415
415
416 # Delta chains for changelogs tend to be very small because entries
416 # Delta chains for changelogs tend to be very small because entries
417 # tend to be small and don't delta well with each. So disable delta
417 # tend to be small and don't delta well with each. So disable delta
418 # chains.
418 # chains.
419 self._storedeltachains = False
419 self._storedeltachains = False
420
420
421 self._realopener = opener
421 self._realopener = opener
422 self._delayed = False
422 self._delayed = False
423 self._delaybuf = None
423 self._delaybuf = None
424 self._divert = False
424 self._divert = False
425 self._filteredrevs = frozenset()
425 self._filteredrevs = frozenset()
426 self._filteredrevs_hashcache = {}
426 self._filteredrevs_hashcache = {}
427 self._copiesstorage = opener.options.get(b'copies-storage')
427 self._copiesstorage = opener.options.get(b'copies-storage')
428
428
429 @property
429 @property
430 def filteredrevs(self):
430 def filteredrevs(self):
431 return self._filteredrevs
431 return self._filteredrevs
432
432
433 @filteredrevs.setter
433 @filteredrevs.setter
434 def filteredrevs(self, val):
434 def filteredrevs(self, val):
435 # Ensure all updates go through this function
435 # Ensure all updates go through this function
436 assert isinstance(val, frozenset)
436 assert isinstance(val, frozenset)
437 self._filteredrevs = val
437 self._filteredrevs = val
438 self._filteredrevs_hashcache = {}
438 self._filteredrevs_hashcache = {}
439
439
440 def _write_docket(self, tr):
440 def _write_docket(self, tr):
441 if not self._delayed:
441 if not self._delayed:
442 super(changelog, self)._write_docket(tr)
442 super(changelog, self)._write_docket(tr)
443
443
444 def delayupdate(self, tr):
444 def delayupdate(self, tr):
445 """delay visibility of index updates to other readers"""
445 """delay visibility of index updates to other readers"""
446 if self._docket is None and not self._delayed:
446 if self._docket is None and not self._delayed:
447 if len(self) == 0:
447 if len(self) == 0:
448 self._divert = True
448 self._divert = True
449 if self._realopener.exists(self._indexfile + b'.a'):
449 if self._realopener.exists(self._indexfile + b'.a'):
450 self._realopener.unlink(self._indexfile + b'.a')
450 self._realopener.unlink(self._indexfile + b'.a')
451 self.opener = _divertopener(self._realopener, self._indexfile)
451 self.opener = _divertopener(self._realopener, self._indexfile)
452 else:
452 else:
453 self._delaybuf = []
453 self._delaybuf = []
454 self.opener = _delayopener(
454 self.opener = _delayopener(
455 self._realopener, self._indexfile, self._delaybuf
455 self._realopener, self._indexfile, self._delaybuf
456 )
456 )
457 self._segmentfile.opener = self.opener
457 self._segmentfile.opener = self.opener
458 self._segmentfile_sidedata.opener = self.opener
458 self._delayed = True
459 self._delayed = True
459 tr.addpending(b'cl-%i' % id(self), self._writepending)
460 tr.addpending(b'cl-%i' % id(self), self._writepending)
460 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
461 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
461
462
462 def _finalize(self, tr):
463 def _finalize(self, tr):
463 """finalize index updates"""
464 """finalize index updates"""
464 self._delayed = False
465 self._delayed = False
465 self.opener = self._realopener
466 self.opener = self._realopener
466 self._segmentfile.opener = self.opener
467 self._segmentfile.opener = self.opener
468 self._segmentfile_sidedata.opener = self.opener
467 # move redirected index data back into place
469 # move redirected index data back into place
468 if self._docket is not None:
470 if self._docket is not None:
469 self._write_docket(tr)
471 self._write_docket(tr)
470 elif self._divert:
472 elif self._divert:
471 assert not self._delaybuf
473 assert not self._delaybuf
472 tmpname = self._indexfile + b".a"
474 tmpname = self._indexfile + b".a"
473 nfile = self.opener.open(tmpname)
475 nfile = self.opener.open(tmpname)
474 nfile.close()
476 nfile.close()
475 self.opener.rename(tmpname, self._indexfile, checkambig=True)
477 self.opener.rename(tmpname, self._indexfile, checkambig=True)
476 elif self._delaybuf:
478 elif self._delaybuf:
477 fp = self.opener(self._indexfile, b'a', checkambig=True)
479 fp = self.opener(self._indexfile, b'a', checkambig=True)
478 fp.write(b"".join(self._delaybuf))
480 fp.write(b"".join(self._delaybuf))
479 fp.close()
481 fp.close()
480 self._delaybuf = None
482 self._delaybuf = None
481 self._divert = False
483 self._divert = False
482 # split when we're done
484 # split when we're done
483 self._enforceinlinesize(tr)
485 self._enforceinlinesize(tr)
484
486
485 def _writepending(self, tr):
487 def _writepending(self, tr):
486 """create a file containing the unfinalized state for
488 """create a file containing the unfinalized state for
487 pretxnchangegroup"""
489 pretxnchangegroup"""
488 if self._docket:
490 if self._docket:
489 return self._docket.write(tr, pending=True)
491 return self._docket.write(tr, pending=True)
490 if self._delaybuf:
492 if self._delaybuf:
491 # make a temporary copy of the index
493 # make a temporary copy of the index
492 fp1 = self._realopener(self._indexfile)
494 fp1 = self._realopener(self._indexfile)
493 pendingfilename = self._indexfile + b".a"
495 pendingfilename = self._indexfile + b".a"
494 # register as a temp file to ensure cleanup on failure
496 # register as a temp file to ensure cleanup on failure
495 tr.registertmp(pendingfilename)
497 tr.registertmp(pendingfilename)
496 # write existing data
498 # write existing data
497 fp2 = self._realopener(pendingfilename, b"w")
499 fp2 = self._realopener(pendingfilename, b"w")
498 fp2.write(fp1.read())
500 fp2.write(fp1.read())
499 # add pending data
501 # add pending data
500 fp2.write(b"".join(self._delaybuf))
502 fp2.write(b"".join(self._delaybuf))
501 fp2.close()
503 fp2.close()
502 # switch modes so finalize can simply rename
504 # switch modes so finalize can simply rename
503 self._delaybuf = None
505 self._delaybuf = None
504 self._divert = True
506 self._divert = True
505 self.opener = _divertopener(self._realopener, self._indexfile)
507 self.opener = _divertopener(self._realopener, self._indexfile)
506 self._segmentfile.opener = self.opener
508 self._segmentfile.opener = self.opener
509 self._segmentfile_sidedata.opener = self.opener
507
510
508 if self._divert:
511 if self._divert:
509 return True
512 return True
510
513
511 return False
514 return False
512
515
513 def _enforceinlinesize(self, tr):
516 def _enforceinlinesize(self, tr):
514 if not self._delayed:
517 if not self._delayed:
515 revlog.revlog._enforceinlinesize(self, tr)
518 revlog.revlog._enforceinlinesize(self, tr)
516
519
517 def read(self, nodeorrev):
520 def read(self, nodeorrev):
518 """Obtain data from a parsed changelog revision.
521 """Obtain data from a parsed changelog revision.
519
522
520 Returns a 6-tuple of:
523 Returns a 6-tuple of:
521
524
522 - manifest node in binary
525 - manifest node in binary
523 - author/user as a localstr
526 - author/user as a localstr
524 - date as a 2-tuple of (time, timezone)
527 - date as a 2-tuple of (time, timezone)
525 - list of files
528 - list of files
526 - commit message as a localstr
529 - commit message as a localstr
527 - dict of extra metadata
530 - dict of extra metadata
528
531
529 Unless you need to access all fields, consider calling
532 Unless you need to access all fields, consider calling
530 ``changelogrevision`` instead, as it is faster for partial object
533 ``changelogrevision`` instead, as it is faster for partial object
531 access.
534 access.
532 """
535 """
533 d = self._revisiondata(nodeorrev)
536 d = self._revisiondata(nodeorrev)
534 sidedata = self.sidedata(nodeorrev)
537 sidedata = self.sidedata(nodeorrev)
535 copy_sd = self._copiesstorage == b'changeset-sidedata'
538 copy_sd = self._copiesstorage == b'changeset-sidedata'
536 c = changelogrevision(self, d, sidedata, copy_sd)
539 c = changelogrevision(self, d, sidedata, copy_sd)
537 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
540 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
538
541
539 def changelogrevision(self, nodeorrev):
542 def changelogrevision(self, nodeorrev):
540 """Obtain a ``changelogrevision`` for a node or revision."""
543 """Obtain a ``changelogrevision`` for a node or revision."""
541 text = self._revisiondata(nodeorrev)
544 text = self._revisiondata(nodeorrev)
542 sidedata = self.sidedata(nodeorrev)
545 sidedata = self.sidedata(nodeorrev)
543 return changelogrevision(
546 return changelogrevision(
544 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
547 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
545 )
548 )
546
549
547 def readfiles(self, nodeorrev):
550 def readfiles(self, nodeorrev):
548 """
551 """
549 short version of read that only returns the files modified by the cset
552 short version of read that only returns the files modified by the cset
550 """
553 """
551 text = self.revision(nodeorrev)
554 text = self.revision(nodeorrev)
552 if not text:
555 if not text:
553 return []
556 return []
554 last = text.index(b"\n\n")
557 last = text.index(b"\n\n")
555 l = text[:last].split(b'\n')
558 l = text[:last].split(b'\n')
556 return l[3:]
559 return l[3:]
557
560
558 def add(
561 def add(
559 self,
562 self,
560 manifest,
563 manifest,
561 files,
564 files,
562 desc,
565 desc,
563 transaction,
566 transaction,
564 p1,
567 p1,
565 p2,
568 p2,
566 user,
569 user,
567 date=None,
570 date=None,
568 extra=None,
571 extra=None,
569 ):
572 ):
570 # Convert to UTF-8 encoded bytestrings as the very first
573 # Convert to UTF-8 encoded bytestrings as the very first
571 # thing: calling any method on a localstr object will turn it
574 # thing: calling any method on a localstr object will turn it
572 # into a str object and the cached UTF-8 string is thus lost.
575 # into a str object and the cached UTF-8 string is thus lost.
573 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
576 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
574
577
575 user = user.strip()
578 user = user.strip()
576 # An empty username or a username with a "\n" will make the
579 # An empty username or a username with a "\n" will make the
577 # revision text contain two "\n\n" sequences -> corrupt
580 # revision text contain two "\n\n" sequences -> corrupt
578 # repository since read cannot unpack the revision.
581 # repository since read cannot unpack the revision.
579 if not user:
582 if not user:
580 raise error.StorageError(_(b"empty username"))
583 raise error.StorageError(_(b"empty username"))
581 if b"\n" in user:
584 if b"\n" in user:
582 raise error.StorageError(
585 raise error.StorageError(
583 _(b"username %r contains a newline") % pycompat.bytestr(user)
586 _(b"username %r contains a newline") % pycompat.bytestr(user)
584 )
587 )
585
588
586 desc = stripdesc(desc)
589 desc = stripdesc(desc)
587
590
588 if date:
591 if date:
589 parseddate = b"%d %d" % dateutil.parsedate(date)
592 parseddate = b"%d %d" % dateutil.parsedate(date)
590 else:
593 else:
591 parseddate = b"%d %d" % dateutil.makedate()
594 parseddate = b"%d %d" % dateutil.makedate()
592 if extra:
595 if extra:
593 branch = extra.get(b"branch")
596 branch = extra.get(b"branch")
594 if branch in (b"default", b""):
597 if branch in (b"default", b""):
595 del extra[b"branch"]
598 del extra[b"branch"]
596 elif branch in (b".", b"null", b"tip"):
599 elif branch in (b".", b"null", b"tip"):
597 raise error.StorageError(
600 raise error.StorageError(
598 _(b'the name \'%s\' is reserved') % branch
601 _(b'the name \'%s\' is reserved') % branch
599 )
602 )
600 sortedfiles = sorted(files.touched)
603 sortedfiles = sorted(files.touched)
601 flags = 0
604 flags = 0
602 sidedata = None
605 sidedata = None
603 if self._copiesstorage == b'changeset-sidedata':
606 if self._copiesstorage == b'changeset-sidedata':
604 if files.has_copies_info:
607 if files.has_copies_info:
605 flags |= flagutil.REVIDX_HASCOPIESINFO
608 flags |= flagutil.REVIDX_HASCOPIESINFO
606 sidedata = metadata.encode_files_sidedata(files)
609 sidedata = metadata.encode_files_sidedata(files)
607
610
608 if extra:
611 if extra:
609 extra = encodeextra(extra)
612 extra = encodeextra(extra)
610 parseddate = b"%s %s" % (parseddate, extra)
613 parseddate = b"%s %s" % (parseddate, extra)
611 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
614 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
612 text = b"\n".join(l)
615 text = b"\n".join(l)
613 rev = self.addrevision(
616 rev = self.addrevision(
614 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
617 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
615 )
618 )
616 return self.node(rev)
619 return self.node(rev)
617
620
618 def branchinfo(self, rev):
621 def branchinfo(self, rev):
619 """return the branch name and open/close state of a revision
622 """return the branch name and open/close state of a revision
620
623
621 This function exists because creating a changectx object
624 This function exists because creating a changectx object
622 just to access this is costly."""
625 just to access this is costly."""
623 return self.changelogrevision(rev).branchinfo
626 return self.changelogrevision(rev).branchinfo
624
627
625 def _nodeduplicatecallback(self, transaction, rev):
628 def _nodeduplicatecallback(self, transaction, rev):
626 # keep track of revisions that got "re-added", eg: unbunde of know rev.
629 # keep track of revisions that got "re-added", eg: unbunde of know rev.
627 #
630 #
628 # We track them in a list to preserve their order from the source bundle
631 # We track them in a list to preserve their order from the source bundle
629 duplicates = transaction.changes.setdefault(b'revduplicates', [])
632 duplicates = transaction.changes.setdefault(b'revduplicates', [])
630 duplicates.append(rev)
633 duplicates.append(rev)
@@ -1,3298 +1,3299 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 randomaccessfile,
90 revlogv0,
90 revlogv0,
91 sidedata as sidedatautil,
91 sidedata as sidedatautil,
92 )
92 )
93 from .utils import (
93 from .utils import (
94 storageutil,
94 storageutil,
95 stringutil,
95 stringutil,
96 )
96 )
97
97
98 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
99 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
100
100
101 REVLOGV0
101 REVLOGV0
102 REVLOGV1
102 REVLOGV1
103 REVLOGV2
103 REVLOGV2
104 FLAG_INLINE_DATA
104 FLAG_INLINE_DATA
105 FLAG_GENERALDELTA
105 FLAG_GENERALDELTA
106 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FLAGS
107 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_FORMAT
108 REVLOG_DEFAULT_VERSION
108 REVLOG_DEFAULT_VERSION
109 REVLOGV1_FLAGS
109 REVLOGV1_FLAGS
110 REVLOGV2_FLAGS
110 REVLOGV2_FLAGS
111 REVIDX_ISCENSORED
111 REVIDX_ISCENSORED
112 REVIDX_ELLIPSIS
112 REVIDX_ELLIPSIS
113 REVIDX_HASCOPIESINFO
113 REVIDX_HASCOPIESINFO
114 REVIDX_EXTSTORED
114 REVIDX_EXTSTORED
115 REVIDX_DEFAULT_FLAGS
115 REVIDX_DEFAULT_FLAGS
116 REVIDX_FLAGS_ORDER
116 REVIDX_FLAGS_ORDER
117 REVIDX_RAWTEXT_CHANGING_FLAGS
117 REVIDX_RAWTEXT_CHANGING_FLAGS
118
118
119 parsers = policy.importmod('parsers')
119 parsers = policy.importmod('parsers')
120 rustancestor = policy.importrust('ancestor')
120 rustancestor = policy.importrust('ancestor')
121 rustdagop = policy.importrust('dagop')
121 rustdagop = policy.importrust('dagop')
122 rustrevlog = policy.importrust('revlog')
122 rustrevlog = policy.importrust('revlog')
123
123
124 # Aliased for performance.
124 # Aliased for performance.
125 _zlibdecompress = zlib.decompress
125 _zlibdecompress = zlib.decompress
126
126
127 # max size of revlog with inline data
127 # max size of revlog with inline data
128 _maxinline = 131072
128 _maxinline = 131072
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
236 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
237 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
238 )
238 )
239
239
240
240
241 class revlog(object):
241 class revlog(object):
242 """
242 """
243 the underlying revision storage object
243 the underlying revision storage object
244
244
245 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
246
246
247 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
248 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
249 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
250 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
251 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
252 data.
252 data.
253
253
254 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
255 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
256 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
257 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
258 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
259 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
260
260
261 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
262 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
263 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
264 for locking while reading.
264 for locking while reading.
265
265
266 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
267 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
268
268
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
270 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
271 configured threshold.
271 configured threshold.
272
272
273 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
274
274
275 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
276 compression for the data content.
276 compression for the data content.
277
277
278 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
279 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
280 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
281 raising).
281 raising).
282
282
283 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
284 index entry.
284 index entry.
285 """
285 """
286
286
287 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
288
288
289 def __init__(
289 def __init__(
290 self,
290 self,
291 opener,
291 opener,
292 target,
292 target,
293 radix,
293 radix,
294 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
295 checkambig=False,
295 checkambig=False,
296 mmaplargeindex=False,
296 mmaplargeindex=False,
297 censorable=False,
297 censorable=False,
298 upperboundcomp=None,
298 upperboundcomp=None,
299 persistentnodemap=False,
299 persistentnodemap=False,
300 concurrencychecker=None,
300 concurrencychecker=None,
301 trypending=False,
301 trypending=False,
302 ):
302 ):
303 """
303 """
304 create a revlog object
304 create a revlog object
305
305
306 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
307 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
308
308
309 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
310 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
311 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
312 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
313 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
314 accurate value.
314 accurate value.
315 """
315 """
316 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
317
317
318 self.radix = radix
318 self.radix = radix
319
319
320 self._docket_file = None
320 self._docket_file = None
321 self._indexfile = None
321 self._indexfile = None
322 self._datafile = None
322 self._datafile = None
323 self._sidedatafile = None
323 self._sidedatafile = None
324 self._nodemap_file = None
324 self._nodemap_file = None
325 self.postfix = postfix
325 self.postfix = postfix
326 self._trypending = trypending
326 self._trypending = trypending
327 self.opener = opener
327 self.opener = opener
328 if persistentnodemap:
328 if persistentnodemap:
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330
330
331 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
332 assert len(target) == 2
332 assert len(target) == 2
333 self.target = target
333 self.target = target
334 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
335 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
336 self._checkambig = checkambig
336 self._checkambig = checkambig
337 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
338 self._censorable = censorable
338 self._censorable = censorable
339 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
340 self._revisioncache = None
340 self._revisioncache = None
341 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
342 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
345 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
346 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
347 self._maxchainlen = None
347 self._maxchainlen = None
348 self._deltabothparents = True
348 self._deltabothparents = True
349 self.index = None
349 self.index = None
350 self._docket = None
350 self._docket = None
351 self._nodemap_docket = None
351 self._nodemap_docket = None
352 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
353 self._pcache = {}
353 self._pcache = {}
354 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
355 self._compengine = b'zlib'
355 self._compengine = b'zlib'
356 self._compengineopts = {}
356 self._compengineopts = {}
357 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
358 self._withsparseread = False
358 self._withsparseread = False
359 self._sparserevlog = False
359 self._sparserevlog = False
360 self.hassidedata = False
360 self.hassidedata = False
361 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
362 self._srmingapsize = 262144
362 self._srmingapsize = 262144
363
363
364 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
365 # custom flags.
365 # custom flags.
366 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
367
367
368 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
369 self._writinghandles = None
369 self._writinghandles = None
370 # prevent nesting of addgroup
370 # prevent nesting of addgroup
371 self._adding_group = None
371 self._adding_group = None
372
372
373 self._loadindex()
373 self._loadindex()
374
374
375 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
376
376
377 def _init_opts(self):
377 def _init_opts(self):
378 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
379
379
380 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
381
381
382 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
383
383
384 * newversionflags:
384 * newversionflags:
385 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
386
386
387 * mmapindexthreshold:
387 * mmapindexthreshold:
388 minimal index size for start to use mmap
388 minimal index size for start to use mmap
389
389
390 * force_nodemap:
390 * force_nodemap:
391 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
392 """
392 """
393 mmapindexthreshold = None
393 mmapindexthreshold = None
394 opts = self.opener.options
394 opts = self.opener.options
395
395
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
397 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
398 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
399 new_header = REVLOGV2
399 new_header = REVLOGV2
400 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
402 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
403 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
404 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
405 new_header = REVLOGV0
405 new_header = REVLOGV0
406 else:
406 else:
407 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
408
408
409 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
410 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
411 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
412 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
413 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
414 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
416 self._lazydeltabase = False
416 self._lazydeltabase = False
417 if self._lazydelta:
417 if self._lazydelta:
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
419 if b'compengine' in opts:
419 if b'compengine' in opts:
420 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
421 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
423 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
425 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
428 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
431 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
432 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
433 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
435 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
437 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
439
439
440 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
441 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
442 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
443 ):
443 ):
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
445
445
446 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
447 raise error.RevlogError(
447 raise error.RevlogError(
448 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
449 % self._chunkcachesize
449 % self._chunkcachesize
450 )
450 )
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
452 raise error.RevlogError(
452 raise error.RevlogError(
453 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
454 % self._chunkcachesize
454 % self._chunkcachesize
455 )
455 )
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
457 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
458
458
459 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
460 """return a file content with or without mmap
460 """return a file content with or without mmap
461
461
462 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
463 try:
463 try:
464 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
465 if mmap_threshold is not None:
465 if mmap_threshold is not None:
466 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
467 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
468 if size is not None:
468 if size is not None:
469 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
470 size = min(file_size, size)
470 size = min(file_size, size)
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 if size is None:
473 if size is None:
474 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
475 else:
475 else:
476 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
477 if size is None:
477 if size is None:
478 return fp.read()
478 return fp.read()
479 else:
479 else:
480 return fp.read(size)
480 return fp.read(size)
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484 return b''
484 return b''
485
485
486 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
487
487
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
489
489
490 if self.postfix is not None:
490 if self.postfix is not None:
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
493 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
494 else:
494 else:
495 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
496
496
497 if docket is not None:
497 if docket is not None:
498 self._docket = docket
498 self._docket = docket
499 self._docket_file = entry_point
499 self._docket_file = entry_point
500 else:
500 else:
501 entry_data = b''
501 entry_data = b''
502 self._initempty = True
502 self._initempty = True
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
504 if len(entry_data) > 0:
504 if len(entry_data) > 0:
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
506 self._initempty = False
506 self._initempty = False
507 else:
507 else:
508 header = new_header
508 header = new_header
509
509
510 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
511 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
512
512
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
514 if supported_flags is None:
514 if supported_flags is None:
515 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
516 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
517 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
518 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
520 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
521 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
522 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
523
523
524 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
525 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
527 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
528
528
529 if not features[b'docket']:
529 if not features[b'docket']:
530 self._indexfile = entry_point
530 self._indexfile = entry_point
531 index_data = entry_data
531 index_data = entry_data
532 else:
532 else:
533 self._docket_file = entry_point
533 self._docket_file = entry_point
534 if self._initempty:
534 if self._initempty:
535 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
536 else:
536 else:
537 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
538 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
539 )
539 )
540
540
541 if self._docket is not None:
541 if self._docket is not None:
542 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
543 index_data = b''
543 index_data = b''
544 index_size = self._docket.index_end
544 index_size = self._docket.index_end
545 if index_size > 0:
545 if index_size > 0:
546 index_data = self._get_data(
546 index_data = self._get_data(
547 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
548 )
548 )
549 if len(index_data) < index_size:
549 if len(index_data) < index_size:
550 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
551 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
552 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
553
553
554 self._inline = False
554 self._inline = False
555 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
556 self._generaldelta = True
556 self._generaldelta = True
557 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
558 # main docket, so disable it for now.
558 # main docket, so disable it for now.
559 self._nodemap_file = None
559 self._nodemap_file = None
560
560
561 if self._docket is not None:
561 if self._docket is not None:
562 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
564 elif self.postfix is None:
564 elif self.postfix is None:
565 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
566 else:
566 else:
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
568
568
569 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
570 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
571
571
572 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
573 if not self._generaldelta:
573 if not self._generaldelta:
574 self._sparserevlog = False
574 self._sparserevlog = False
575
575
576 self._storedeltachains = True
576 self._storedeltachains = True
577
577
578 devel_nodemap = (
578 devel_nodemap = (
579 self._nodemap_file
579 self._nodemap_file
580 and force_nodemap
580 and force_nodemap
581 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
582 )
582 )
583
583
584 use_rust_index = False
584 use_rust_index = False
585 if rustrevlog is not None:
585 if rustrevlog is not None:
586 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
587 use_rust_index = True
587 use_rust_index = True
588 else:
588 else:
589 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
590
590
591 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
592 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
593 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
594 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
595 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
596 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
597 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
598 elif devel_nodemap:
598 elif devel_nodemap:
599 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
600 elif use_rust_index:
600 elif use_rust_index:
601 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
602 try:
602 try:
603 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
604 index, chunkcache = d
604 index, chunkcache = d
605 use_nodemap = (
605 use_nodemap = (
606 not self._inline
606 not self._inline
607 and self._nodemap_file is not None
607 and self._nodemap_file is not None
608 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
609 )
609 )
610 if use_nodemap:
610 if use_nodemap:
611 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
612 if nodemap_data is not None:
612 if nodemap_data is not None:
613 docket = nodemap_data[0]
613 docket = nodemap_data[0]
614 if (
614 if (
615 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
616 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
617 ):
617 ):
618 # no changelog tampering
618 # no changelog tampering
619 self._nodemap_docket = docket
619 self._nodemap_docket = docket
620 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
621 except (ValueError, IndexError):
621 except (ValueError, IndexError):
622 raise error.RevlogError(
622 raise error.RevlogError(
623 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
624 )
624 )
625 self.index = index
625 self.index = index
626 self._segmentfile = randomaccessfile.randomaccessfile(
626 self._segmentfile = randomaccessfile.randomaccessfile(
627 self.opener,
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
629 self._chunkcachesize,
630 chunkcache,
630 chunkcache,
631 )
631 )
632 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
633 self.opener,
634 self._sidedatafile,
635 self._chunkcachesize,
636 )
632 # revnum -> (chain-length, sum-delta-length)
637 # revnum -> (chain-length, sum-delta-length)
633 self._chaininfocache = util.lrucachedict(500)
638 self._chaininfocache = util.lrucachedict(500)
634 # revlog header -> revlog compressor
639 # revlog header -> revlog compressor
635 self._decompressors = {}
640 self._decompressors = {}
636
641
637 @util.propertycache
642 @util.propertycache
638 def revlog_kind(self):
643 def revlog_kind(self):
639 return self.target[0]
644 return self.target[0]
640
645
641 @util.propertycache
646 @util.propertycache
642 def display_id(self):
647 def display_id(self):
643 """The public facing "ID" of the revlog that we use in message"""
648 """The public facing "ID" of the revlog that we use in message"""
644 # Maybe we should build a user facing representation of
649 # Maybe we should build a user facing representation of
645 # revlog.target instead of using `self.radix`
650 # revlog.target instead of using `self.radix`
646 return self.radix
651 return self.radix
647
652
648 def _get_decompressor(self, t):
653 def _get_decompressor(self, t):
649 try:
654 try:
650 compressor = self._decompressors[t]
655 compressor = self._decompressors[t]
651 except KeyError:
656 except KeyError:
652 try:
657 try:
653 engine = util.compengines.forrevlogheader(t)
658 engine = util.compengines.forrevlogheader(t)
654 compressor = engine.revlogcompressor(self._compengineopts)
659 compressor = engine.revlogcompressor(self._compengineopts)
655 self._decompressors[t] = compressor
660 self._decompressors[t] = compressor
656 except KeyError:
661 except KeyError:
657 raise error.RevlogError(
662 raise error.RevlogError(
658 _(b'unknown compression type %s') % binascii.hexlify(t)
663 _(b'unknown compression type %s') % binascii.hexlify(t)
659 )
664 )
660 return compressor
665 return compressor
661
666
662 @util.propertycache
667 @util.propertycache
663 def _compressor(self):
668 def _compressor(self):
664 engine = util.compengines[self._compengine]
669 engine = util.compengines[self._compengine]
665 return engine.revlogcompressor(self._compengineopts)
670 return engine.revlogcompressor(self._compengineopts)
666
671
667 @util.propertycache
672 @util.propertycache
668 def _decompressor(self):
673 def _decompressor(self):
669 """the default decompressor"""
674 """the default decompressor"""
670 if self._docket is None:
675 if self._docket is None:
671 return None
676 return None
672 t = self._docket.default_compression_header
677 t = self._docket.default_compression_header
673 c = self._get_decompressor(t)
678 c = self._get_decompressor(t)
674 return c.decompress
679 return c.decompress
675
680
676 def _indexfp(self):
681 def _indexfp(self):
677 """file object for the revlog's index file"""
682 """file object for the revlog's index file"""
678 return self.opener(self._indexfile, mode=b"r")
683 return self.opener(self._indexfile, mode=b"r")
679
684
680 def __index_write_fp(self):
685 def __index_write_fp(self):
681 # You should not use this directly and use `_writing` instead
686 # You should not use this directly and use `_writing` instead
682 try:
687 try:
683 f = self.opener(
688 f = self.opener(
684 self._indexfile, mode=b"r+", checkambig=self._checkambig
689 self._indexfile, mode=b"r+", checkambig=self._checkambig
685 )
690 )
686 if self._docket is None:
691 if self._docket is None:
687 f.seek(0, os.SEEK_END)
692 f.seek(0, os.SEEK_END)
688 else:
693 else:
689 f.seek(self._docket.index_end, os.SEEK_SET)
694 f.seek(self._docket.index_end, os.SEEK_SET)
690 return f
695 return f
691 except IOError as inst:
696 except IOError as inst:
692 if inst.errno != errno.ENOENT:
697 if inst.errno != errno.ENOENT:
693 raise
698 raise
694 return self.opener(
699 return self.opener(
695 self._indexfile, mode=b"w+", checkambig=self._checkambig
700 self._indexfile, mode=b"w+", checkambig=self._checkambig
696 )
701 )
697
702
698 def __index_new_fp(self):
703 def __index_new_fp(self):
699 # You should not use this unless you are upgrading from inline revlog
704 # You should not use this unless you are upgrading from inline revlog
700 return self.opener(
705 return self.opener(
701 self._indexfile,
706 self._indexfile,
702 mode=b"w",
707 mode=b"w",
703 checkambig=self._checkambig,
708 checkambig=self._checkambig,
704 atomictemp=True,
709 atomictemp=True,
705 )
710 )
706
711
707 def _datafp(self, mode=b'r'):
712 def _datafp(self, mode=b'r'):
708 """file object for the revlog's data file"""
713 """file object for the revlog's data file"""
709 return self.opener(self._datafile, mode=mode)
714 return self.opener(self._datafile, mode=mode)
710
715
711 @contextlib.contextmanager
716 @contextlib.contextmanager
712 def _sidedatareadfp(self):
717 def _sidedatareadfp(self):
713 """file object suitable to read sidedata"""
718 """file object suitable to read sidedata"""
714 if self._writinghandles:
719 if self._writinghandles:
715 yield self._writinghandles[2]
720 yield self._writinghandles[2]
716 else:
721 else:
717 with self.opener(self._sidedatafile) as fp:
722 with self.opener(self._sidedatafile) as fp:
718 yield fp
723 yield fp
719
724
720 def tiprev(self):
725 def tiprev(self):
721 return len(self.index) - 1
726 return len(self.index) - 1
722
727
723 def tip(self):
728 def tip(self):
724 return self.node(self.tiprev())
729 return self.node(self.tiprev())
725
730
726 def __contains__(self, rev):
731 def __contains__(self, rev):
727 return 0 <= rev < len(self)
732 return 0 <= rev < len(self)
728
733
729 def __len__(self):
734 def __len__(self):
730 return len(self.index)
735 return len(self.index)
731
736
732 def __iter__(self):
737 def __iter__(self):
733 return iter(pycompat.xrange(len(self)))
738 return iter(pycompat.xrange(len(self)))
734
739
735 def revs(self, start=0, stop=None):
740 def revs(self, start=0, stop=None):
736 """iterate over all rev in this revlog (from start to stop)"""
741 """iterate over all rev in this revlog (from start to stop)"""
737 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 return storageutil.iterrevs(len(self), start=start, stop=stop)
738
743
739 @property
744 @property
740 def nodemap(self):
745 def nodemap(self):
741 msg = (
746 msg = (
742 b"revlog.nodemap is deprecated, "
747 b"revlog.nodemap is deprecated, "
743 b"use revlog.index.[has_node|rev|get_rev]"
748 b"use revlog.index.[has_node|rev|get_rev]"
744 )
749 )
745 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
746 return self.index.nodemap
751 return self.index.nodemap
747
752
748 @property
753 @property
749 def _nodecache(self):
754 def _nodecache(self):
750 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
751 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
752 return self.index.nodemap
757 return self.index.nodemap
753
758
754 def hasnode(self, node):
759 def hasnode(self, node):
755 try:
760 try:
756 self.rev(node)
761 self.rev(node)
757 return True
762 return True
758 except KeyError:
763 except KeyError:
759 return False
764 return False
760
765
761 def candelta(self, baserev, rev):
766 def candelta(self, baserev, rev):
762 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 """whether two revisions (baserev, rev) can be delta-ed or not"""
763 # Disable delta if either rev requires a content-changing flag
768 # Disable delta if either rev requires a content-changing flag
764 # processor (ex. LFS). This is because such flag processor can alter
769 # processor (ex. LFS). This is because such flag processor can alter
765 # the rawtext content that the delta will be based on, and two clients
770 # the rawtext content that the delta will be based on, and two clients
766 # could have a same revlog node with different flags (i.e. different
771 # could have a same revlog node with different flags (i.e. different
767 # rawtext contents) and the delta could be incompatible.
772 # rawtext contents) and the delta could be incompatible.
768 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
769 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
770 ):
775 ):
771 return False
776 return False
772 return True
777 return True
773
778
774 def update_caches(self, transaction):
779 def update_caches(self, transaction):
775 if self._nodemap_file is not None:
780 if self._nodemap_file is not None:
776 if transaction is None:
781 if transaction is None:
777 nodemaputil.update_persistent_nodemap(self)
782 nodemaputil.update_persistent_nodemap(self)
778 else:
783 else:
779 nodemaputil.setup_persistent_nodemap(transaction, self)
784 nodemaputil.setup_persistent_nodemap(transaction, self)
780
785
781 def clearcaches(self):
786 def clearcaches(self):
782 self._revisioncache = None
787 self._revisioncache = None
783 self._chainbasecache.clear()
788 self._chainbasecache.clear()
784 self._segmentfile.clear_cache()
789 self._segmentfile.clear_cache()
790 self._segmentfile_sidedata.clear_cache()
785 self._pcache = {}
791 self._pcache = {}
786 self._nodemap_docket = None
792 self._nodemap_docket = None
787 self.index.clearcaches()
793 self.index.clearcaches()
788 # The python code is the one responsible for validating the docket, we
794 # The python code is the one responsible for validating the docket, we
789 # end up having to refresh it here.
795 # end up having to refresh it here.
790 use_nodemap = (
796 use_nodemap = (
791 not self._inline
797 not self._inline
792 and self._nodemap_file is not None
798 and self._nodemap_file is not None
793 and util.safehasattr(self.index, 'update_nodemap_data')
799 and util.safehasattr(self.index, 'update_nodemap_data')
794 )
800 )
795 if use_nodemap:
801 if use_nodemap:
796 nodemap_data = nodemaputil.persisted_data(self)
802 nodemap_data = nodemaputil.persisted_data(self)
797 if nodemap_data is not None:
803 if nodemap_data is not None:
798 self._nodemap_docket = nodemap_data[0]
804 self._nodemap_docket = nodemap_data[0]
799 self.index.update_nodemap_data(*nodemap_data)
805 self.index.update_nodemap_data(*nodemap_data)
800
806
801 def rev(self, node):
807 def rev(self, node):
802 try:
808 try:
803 return self.index.rev(node)
809 return self.index.rev(node)
804 except TypeError:
810 except TypeError:
805 raise
811 raise
806 except error.RevlogError:
812 except error.RevlogError:
807 # parsers.c radix tree lookup failed
813 # parsers.c radix tree lookup failed
808 if (
814 if (
809 node == self.nodeconstants.wdirid
815 node == self.nodeconstants.wdirid
810 or node in self.nodeconstants.wdirfilenodeids
816 or node in self.nodeconstants.wdirfilenodeids
811 ):
817 ):
812 raise error.WdirUnsupported
818 raise error.WdirUnsupported
813 raise error.LookupError(node, self.display_id, _(b'no node'))
819 raise error.LookupError(node, self.display_id, _(b'no node'))
814
820
815 # Accessors for index entries.
821 # Accessors for index entries.
816
822
817 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
823 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
818 # are flags.
824 # are flags.
819 def start(self, rev):
825 def start(self, rev):
820 return int(self.index[rev][0] >> 16)
826 return int(self.index[rev][0] >> 16)
821
827
822 def sidedata_cut_off(self, rev):
828 def sidedata_cut_off(self, rev):
823 sd_cut_off = self.index[rev][8]
829 sd_cut_off = self.index[rev][8]
824 if sd_cut_off != 0:
830 if sd_cut_off != 0:
825 return sd_cut_off
831 return sd_cut_off
826 # This is some annoying dance, because entries without sidedata
832 # This is some annoying dance, because entries without sidedata
827 # currently use 0 as their ofsset. (instead of previous-offset +
833 # currently use 0 as their ofsset. (instead of previous-offset +
828 # previous-size)
834 # previous-size)
829 #
835 #
830 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
836 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
831 # In the meantime, we need this.
837 # In the meantime, we need this.
832 while 0 <= rev:
838 while 0 <= rev:
833 e = self.index[rev]
839 e = self.index[rev]
834 if e[9] != 0:
840 if e[9] != 0:
835 return e[8] + e[9]
841 return e[8] + e[9]
836 rev -= 1
842 rev -= 1
837 return 0
843 return 0
838
844
839 def flags(self, rev):
845 def flags(self, rev):
840 return self.index[rev][0] & 0xFFFF
846 return self.index[rev][0] & 0xFFFF
841
847
842 def length(self, rev):
848 def length(self, rev):
843 return self.index[rev][1]
849 return self.index[rev][1]
844
850
845 def sidedata_length(self, rev):
851 def sidedata_length(self, rev):
846 if not self.hassidedata:
852 if not self.hassidedata:
847 return 0
853 return 0
848 return self.index[rev][9]
854 return self.index[rev][9]
849
855
850 def rawsize(self, rev):
856 def rawsize(self, rev):
851 """return the length of the uncompressed text for a given revision"""
857 """return the length of the uncompressed text for a given revision"""
852 l = self.index[rev][2]
858 l = self.index[rev][2]
853 if l >= 0:
859 if l >= 0:
854 return l
860 return l
855
861
856 t = self.rawdata(rev)
862 t = self.rawdata(rev)
857 return len(t)
863 return len(t)
858
864
859 def size(self, rev):
865 def size(self, rev):
860 """length of non-raw text (processed by a "read" flag processor)"""
866 """length of non-raw text (processed by a "read" flag processor)"""
861 # fast path: if no "read" flag processor could change the content,
867 # fast path: if no "read" flag processor could change the content,
862 # size is rawsize. note: ELLIPSIS is known to not change the content.
868 # size is rawsize. note: ELLIPSIS is known to not change the content.
863 flags = self.flags(rev)
869 flags = self.flags(rev)
864 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
870 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
865 return self.rawsize(rev)
871 return self.rawsize(rev)
866
872
867 return len(self.revision(rev, raw=False))
873 return len(self.revision(rev, raw=False))
868
874
869 def chainbase(self, rev):
875 def chainbase(self, rev):
870 base = self._chainbasecache.get(rev)
876 base = self._chainbasecache.get(rev)
871 if base is not None:
877 if base is not None:
872 return base
878 return base
873
879
874 index = self.index
880 index = self.index
875 iterrev = rev
881 iterrev = rev
876 base = index[iterrev][3]
882 base = index[iterrev][3]
877 while base != iterrev:
883 while base != iterrev:
878 iterrev = base
884 iterrev = base
879 base = index[iterrev][3]
885 base = index[iterrev][3]
880
886
881 self._chainbasecache[rev] = base
887 self._chainbasecache[rev] = base
882 return base
888 return base
883
889
884 def linkrev(self, rev):
890 def linkrev(self, rev):
885 return self.index[rev][4]
891 return self.index[rev][4]
886
892
887 def parentrevs(self, rev):
893 def parentrevs(self, rev):
888 try:
894 try:
889 entry = self.index[rev]
895 entry = self.index[rev]
890 except IndexError:
896 except IndexError:
891 if rev == wdirrev:
897 if rev == wdirrev:
892 raise error.WdirUnsupported
898 raise error.WdirUnsupported
893 raise
899 raise
894 if entry[5] == nullrev:
900 if entry[5] == nullrev:
895 return entry[6], entry[5]
901 return entry[6], entry[5]
896 else:
902 else:
897 return entry[5], entry[6]
903 return entry[5], entry[6]
898
904
899 # fast parentrevs(rev) where rev isn't filtered
905 # fast parentrevs(rev) where rev isn't filtered
900 _uncheckedparentrevs = parentrevs
906 _uncheckedparentrevs = parentrevs
901
907
902 def node(self, rev):
908 def node(self, rev):
903 try:
909 try:
904 return self.index[rev][7]
910 return self.index[rev][7]
905 except IndexError:
911 except IndexError:
906 if rev == wdirrev:
912 if rev == wdirrev:
907 raise error.WdirUnsupported
913 raise error.WdirUnsupported
908 raise
914 raise
909
915
910 # Derived from index values.
916 # Derived from index values.
911
917
912 def end(self, rev):
918 def end(self, rev):
913 return self.start(rev) + self.length(rev)
919 return self.start(rev) + self.length(rev)
914
920
915 def parents(self, node):
921 def parents(self, node):
916 i = self.index
922 i = self.index
917 d = i[self.rev(node)]
923 d = i[self.rev(node)]
918 # inline node() to avoid function call overhead
924 # inline node() to avoid function call overhead
919 if d[5] == self.nullid:
925 if d[5] == self.nullid:
920 return i[d[6]][7], i[d[5]][7]
926 return i[d[6]][7], i[d[5]][7]
921 else:
927 else:
922 return i[d[5]][7], i[d[6]][7]
928 return i[d[5]][7], i[d[6]][7]
923
929
924 def chainlen(self, rev):
930 def chainlen(self, rev):
925 return self._chaininfo(rev)[0]
931 return self._chaininfo(rev)[0]
926
932
927 def _chaininfo(self, rev):
933 def _chaininfo(self, rev):
928 chaininfocache = self._chaininfocache
934 chaininfocache = self._chaininfocache
929 if rev in chaininfocache:
935 if rev in chaininfocache:
930 return chaininfocache[rev]
936 return chaininfocache[rev]
931 index = self.index
937 index = self.index
932 generaldelta = self._generaldelta
938 generaldelta = self._generaldelta
933 iterrev = rev
939 iterrev = rev
934 e = index[iterrev]
940 e = index[iterrev]
935 clen = 0
941 clen = 0
936 compresseddeltalen = 0
942 compresseddeltalen = 0
937 while iterrev != e[3]:
943 while iterrev != e[3]:
938 clen += 1
944 clen += 1
939 compresseddeltalen += e[1]
945 compresseddeltalen += e[1]
940 if generaldelta:
946 if generaldelta:
941 iterrev = e[3]
947 iterrev = e[3]
942 else:
948 else:
943 iterrev -= 1
949 iterrev -= 1
944 if iterrev in chaininfocache:
950 if iterrev in chaininfocache:
945 t = chaininfocache[iterrev]
951 t = chaininfocache[iterrev]
946 clen += t[0]
952 clen += t[0]
947 compresseddeltalen += t[1]
953 compresseddeltalen += t[1]
948 break
954 break
949 e = index[iterrev]
955 e = index[iterrev]
950 else:
956 else:
951 # Add text length of base since decompressing that also takes
957 # Add text length of base since decompressing that also takes
952 # work. For cache hits the length is already included.
958 # work. For cache hits the length is already included.
953 compresseddeltalen += e[1]
959 compresseddeltalen += e[1]
954 r = (clen, compresseddeltalen)
960 r = (clen, compresseddeltalen)
955 chaininfocache[rev] = r
961 chaininfocache[rev] = r
956 return r
962 return r
957
963
958 def _deltachain(self, rev, stoprev=None):
964 def _deltachain(self, rev, stoprev=None):
959 """Obtain the delta chain for a revision.
965 """Obtain the delta chain for a revision.
960
966
961 ``stoprev`` specifies a revision to stop at. If not specified, we
967 ``stoprev`` specifies a revision to stop at. If not specified, we
962 stop at the base of the chain.
968 stop at the base of the chain.
963
969
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
970 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
965 revs in ascending order and ``stopped`` is a bool indicating whether
971 revs in ascending order and ``stopped`` is a bool indicating whether
966 ``stoprev`` was hit.
972 ``stoprev`` was hit.
967 """
973 """
968 # Try C implementation.
974 # Try C implementation.
969 try:
975 try:
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
976 return self.index.deltachain(rev, stoprev, self._generaldelta)
971 except AttributeError:
977 except AttributeError:
972 pass
978 pass
973
979
974 chain = []
980 chain = []
975
981
976 # Alias to prevent attribute lookup in tight loop.
982 # Alias to prevent attribute lookup in tight loop.
977 index = self.index
983 index = self.index
978 generaldelta = self._generaldelta
984 generaldelta = self._generaldelta
979
985
980 iterrev = rev
986 iterrev = rev
981 e = index[iterrev]
987 e = index[iterrev]
982 while iterrev != e[3] and iterrev != stoprev:
988 while iterrev != e[3] and iterrev != stoprev:
983 chain.append(iterrev)
989 chain.append(iterrev)
984 if generaldelta:
990 if generaldelta:
985 iterrev = e[3]
991 iterrev = e[3]
986 else:
992 else:
987 iterrev -= 1
993 iterrev -= 1
988 e = index[iterrev]
994 e = index[iterrev]
989
995
990 if iterrev == stoprev:
996 if iterrev == stoprev:
991 stopped = True
997 stopped = True
992 else:
998 else:
993 chain.append(iterrev)
999 chain.append(iterrev)
994 stopped = False
1000 stopped = False
995
1001
996 chain.reverse()
1002 chain.reverse()
997 return chain, stopped
1003 return chain, stopped
998
1004
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1005 def ancestors(self, revs, stoprev=0, inclusive=False):
1000 """Generate the ancestors of 'revs' in reverse revision order.
1006 """Generate the ancestors of 'revs' in reverse revision order.
1001 Does not generate revs lower than stoprev.
1007 Does not generate revs lower than stoprev.
1002
1008
1003 See the documentation for ancestor.lazyancestors for more details."""
1009 See the documentation for ancestor.lazyancestors for more details."""
1004
1010
1005 # first, make sure start revisions aren't filtered
1011 # first, make sure start revisions aren't filtered
1006 revs = list(revs)
1012 revs = list(revs)
1007 checkrev = self.node
1013 checkrev = self.node
1008 for r in revs:
1014 for r in revs:
1009 checkrev(r)
1015 checkrev(r)
1010 # and we're sure ancestors aren't filtered as well
1016 # and we're sure ancestors aren't filtered as well
1011
1017
1012 if rustancestor is not None and self.index.rust_ext_compat:
1018 if rustancestor is not None and self.index.rust_ext_compat:
1013 lazyancestors = rustancestor.LazyAncestors
1019 lazyancestors = rustancestor.LazyAncestors
1014 arg = self.index
1020 arg = self.index
1015 else:
1021 else:
1016 lazyancestors = ancestor.lazyancestors
1022 lazyancestors = ancestor.lazyancestors
1017 arg = self._uncheckedparentrevs
1023 arg = self._uncheckedparentrevs
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1024 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1019
1025
1020 def descendants(self, revs):
1026 def descendants(self, revs):
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1027 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1022
1028
1023 def findcommonmissing(self, common=None, heads=None):
1029 def findcommonmissing(self, common=None, heads=None):
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1030 """Return a tuple of the ancestors of common and the ancestors of heads
1025 that are not ancestors of common. In revset terminology, we return the
1031 that are not ancestors of common. In revset terminology, we return the
1026 tuple:
1032 tuple:
1027
1033
1028 ::common, (::heads) - (::common)
1034 ::common, (::heads) - (::common)
1029
1035
1030 The list is sorted by revision number, meaning it is
1036 The list is sorted by revision number, meaning it is
1031 topologically sorted.
1037 topologically sorted.
1032
1038
1033 'heads' and 'common' are both lists of node IDs. If heads is
1039 'heads' and 'common' are both lists of node IDs. If heads is
1034 not supplied, uses all of the revlog's heads. If common is not
1040 not supplied, uses all of the revlog's heads. If common is not
1035 supplied, uses nullid."""
1041 supplied, uses nullid."""
1036 if common is None:
1042 if common is None:
1037 common = [self.nullid]
1043 common = [self.nullid]
1038 if heads is None:
1044 if heads is None:
1039 heads = self.heads()
1045 heads = self.heads()
1040
1046
1041 common = [self.rev(n) for n in common]
1047 common = [self.rev(n) for n in common]
1042 heads = [self.rev(n) for n in heads]
1048 heads = [self.rev(n) for n in heads]
1043
1049
1044 # we want the ancestors, but inclusive
1050 # we want the ancestors, but inclusive
1045 class lazyset(object):
1051 class lazyset(object):
1046 def __init__(self, lazyvalues):
1052 def __init__(self, lazyvalues):
1047 self.addedvalues = set()
1053 self.addedvalues = set()
1048 self.lazyvalues = lazyvalues
1054 self.lazyvalues = lazyvalues
1049
1055
1050 def __contains__(self, value):
1056 def __contains__(self, value):
1051 return value in self.addedvalues or value in self.lazyvalues
1057 return value in self.addedvalues or value in self.lazyvalues
1052
1058
1053 def __iter__(self):
1059 def __iter__(self):
1054 added = self.addedvalues
1060 added = self.addedvalues
1055 for r in added:
1061 for r in added:
1056 yield r
1062 yield r
1057 for r in self.lazyvalues:
1063 for r in self.lazyvalues:
1058 if not r in added:
1064 if not r in added:
1059 yield r
1065 yield r
1060
1066
1061 def add(self, value):
1067 def add(self, value):
1062 self.addedvalues.add(value)
1068 self.addedvalues.add(value)
1063
1069
1064 def update(self, values):
1070 def update(self, values):
1065 self.addedvalues.update(values)
1071 self.addedvalues.update(values)
1066
1072
1067 has = lazyset(self.ancestors(common))
1073 has = lazyset(self.ancestors(common))
1068 has.add(nullrev)
1074 has.add(nullrev)
1069 has.update(common)
1075 has.update(common)
1070
1076
1071 # take all ancestors from heads that aren't in has
1077 # take all ancestors from heads that aren't in has
1072 missing = set()
1078 missing = set()
1073 visit = collections.deque(r for r in heads if r not in has)
1079 visit = collections.deque(r for r in heads if r not in has)
1074 while visit:
1080 while visit:
1075 r = visit.popleft()
1081 r = visit.popleft()
1076 if r in missing:
1082 if r in missing:
1077 continue
1083 continue
1078 else:
1084 else:
1079 missing.add(r)
1085 missing.add(r)
1080 for p in self.parentrevs(r):
1086 for p in self.parentrevs(r):
1081 if p not in has:
1087 if p not in has:
1082 visit.append(p)
1088 visit.append(p)
1083 missing = list(missing)
1089 missing = list(missing)
1084 missing.sort()
1090 missing.sort()
1085 return has, [self.node(miss) for miss in missing]
1091 return has, [self.node(miss) for miss in missing]
1086
1092
1087 def incrementalmissingrevs(self, common=None):
1093 def incrementalmissingrevs(self, common=None):
1088 """Return an object that can be used to incrementally compute the
1094 """Return an object that can be used to incrementally compute the
1089 revision numbers of the ancestors of arbitrary sets that are not
1095 revision numbers of the ancestors of arbitrary sets that are not
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1096 ancestors of common. This is an ancestor.incrementalmissingancestors
1091 object.
1097 object.
1092
1098
1093 'common' is a list of revision numbers. If common is not supplied, uses
1099 'common' is a list of revision numbers. If common is not supplied, uses
1094 nullrev.
1100 nullrev.
1095 """
1101 """
1096 if common is None:
1102 if common is None:
1097 common = [nullrev]
1103 common = [nullrev]
1098
1104
1099 if rustancestor is not None and self.index.rust_ext_compat:
1105 if rustancestor is not None and self.index.rust_ext_compat:
1100 return rustancestor.MissingAncestors(self.index, common)
1106 return rustancestor.MissingAncestors(self.index, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1107 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1102
1108
1103 def findmissingrevs(self, common=None, heads=None):
1109 def findmissingrevs(self, common=None, heads=None):
1104 """Return the revision numbers of the ancestors of heads that
1110 """Return the revision numbers of the ancestors of heads that
1105 are not ancestors of common.
1111 are not ancestors of common.
1106
1112
1107 More specifically, return a list of revision numbers corresponding to
1113 More specifically, return a list of revision numbers corresponding to
1108 nodes N such that every N satisfies the following constraints:
1114 nodes N such that every N satisfies the following constraints:
1109
1115
1110 1. N is an ancestor of some node in 'heads'
1116 1. N is an ancestor of some node in 'heads'
1111 2. N is not an ancestor of any node in 'common'
1117 2. N is not an ancestor of any node in 'common'
1112
1118
1113 The list is sorted by revision number, meaning it is
1119 The list is sorted by revision number, meaning it is
1114 topologically sorted.
1120 topologically sorted.
1115
1121
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1122 'heads' and 'common' are both lists of revision numbers. If heads is
1117 not supplied, uses all of the revlog's heads. If common is not
1123 not supplied, uses all of the revlog's heads. If common is not
1118 supplied, uses nullid."""
1124 supplied, uses nullid."""
1119 if common is None:
1125 if common is None:
1120 common = [nullrev]
1126 common = [nullrev]
1121 if heads is None:
1127 if heads is None:
1122 heads = self.headrevs()
1128 heads = self.headrevs()
1123
1129
1124 inc = self.incrementalmissingrevs(common=common)
1130 inc = self.incrementalmissingrevs(common=common)
1125 return inc.missingancestors(heads)
1131 return inc.missingancestors(heads)
1126
1132
1127 def findmissing(self, common=None, heads=None):
1133 def findmissing(self, common=None, heads=None):
1128 """Return the ancestors of heads that are not ancestors of common.
1134 """Return the ancestors of heads that are not ancestors of common.
1129
1135
1130 More specifically, return a list of nodes N such that every N
1136 More specifically, return a list of nodes N such that every N
1131 satisfies the following constraints:
1137 satisfies the following constraints:
1132
1138
1133 1. N is an ancestor of some node in 'heads'
1139 1. N is an ancestor of some node in 'heads'
1134 2. N is not an ancestor of any node in 'common'
1140 2. N is not an ancestor of any node in 'common'
1135
1141
1136 The list is sorted by revision number, meaning it is
1142 The list is sorted by revision number, meaning it is
1137 topologically sorted.
1143 topologically sorted.
1138
1144
1139 'heads' and 'common' are both lists of node IDs. If heads is
1145 'heads' and 'common' are both lists of node IDs. If heads is
1140 not supplied, uses all of the revlog's heads. If common is not
1146 not supplied, uses all of the revlog's heads. If common is not
1141 supplied, uses nullid."""
1147 supplied, uses nullid."""
1142 if common is None:
1148 if common is None:
1143 common = [self.nullid]
1149 common = [self.nullid]
1144 if heads is None:
1150 if heads is None:
1145 heads = self.heads()
1151 heads = self.heads()
1146
1152
1147 common = [self.rev(n) for n in common]
1153 common = [self.rev(n) for n in common]
1148 heads = [self.rev(n) for n in heads]
1154 heads = [self.rev(n) for n in heads]
1149
1155
1150 inc = self.incrementalmissingrevs(common=common)
1156 inc = self.incrementalmissingrevs(common=common)
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1157 return [self.node(r) for r in inc.missingancestors(heads)]
1152
1158
1153 def nodesbetween(self, roots=None, heads=None):
1159 def nodesbetween(self, roots=None, heads=None):
1154 """Return a topological path from 'roots' to 'heads'.
1160 """Return a topological path from 'roots' to 'heads'.
1155
1161
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1162 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1157 topologically sorted list of all nodes N that satisfy both of
1163 topologically sorted list of all nodes N that satisfy both of
1158 these constraints:
1164 these constraints:
1159
1165
1160 1. N is a descendant of some node in 'roots'
1166 1. N is a descendant of some node in 'roots'
1161 2. N is an ancestor of some node in 'heads'
1167 2. N is an ancestor of some node in 'heads'
1162
1168
1163 Every node is considered to be both a descendant and an ancestor
1169 Every node is considered to be both a descendant and an ancestor
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1170 of itself, so every reachable node in 'roots' and 'heads' will be
1165 included in 'nodes'.
1171 included in 'nodes'.
1166
1172
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1173 'outroots' is the list of reachable nodes in 'roots', i.e., the
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1174 subset of 'roots' that is returned in 'nodes'. Likewise,
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1175 'outheads' is the subset of 'heads' that is also in 'nodes'.
1170
1176
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1177 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1178 unspecified, uses nullid as the only root. If 'heads' is
1173 unspecified, uses list of all of the revlog's heads."""
1179 unspecified, uses list of all of the revlog's heads."""
1174 nonodes = ([], [], [])
1180 nonodes = ([], [], [])
1175 if roots is not None:
1181 if roots is not None:
1176 roots = list(roots)
1182 roots = list(roots)
1177 if not roots:
1183 if not roots:
1178 return nonodes
1184 return nonodes
1179 lowestrev = min([self.rev(n) for n in roots])
1185 lowestrev = min([self.rev(n) for n in roots])
1180 else:
1186 else:
1181 roots = [self.nullid] # Everybody's a descendant of nullid
1187 roots = [self.nullid] # Everybody's a descendant of nullid
1182 lowestrev = nullrev
1188 lowestrev = nullrev
1183 if (lowestrev == nullrev) and (heads is None):
1189 if (lowestrev == nullrev) and (heads is None):
1184 # We want _all_ the nodes!
1190 # We want _all_ the nodes!
1185 return (
1191 return (
1186 [self.node(r) for r in self],
1192 [self.node(r) for r in self],
1187 [self.nullid],
1193 [self.nullid],
1188 list(self.heads()),
1194 list(self.heads()),
1189 )
1195 )
1190 if heads is None:
1196 if heads is None:
1191 # All nodes are ancestors, so the latest ancestor is the last
1197 # All nodes are ancestors, so the latest ancestor is the last
1192 # node.
1198 # node.
1193 highestrev = len(self) - 1
1199 highestrev = len(self) - 1
1194 # Set ancestors to None to signal that every node is an ancestor.
1200 # Set ancestors to None to signal that every node is an ancestor.
1195 ancestors = None
1201 ancestors = None
1196 # Set heads to an empty dictionary for later discovery of heads
1202 # Set heads to an empty dictionary for later discovery of heads
1197 heads = {}
1203 heads = {}
1198 else:
1204 else:
1199 heads = list(heads)
1205 heads = list(heads)
1200 if not heads:
1206 if not heads:
1201 return nonodes
1207 return nonodes
1202 ancestors = set()
1208 ancestors = set()
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1209 # Turn heads into a dictionary so we can remove 'fake' heads.
1204 # Also, later we will be using it to filter out the heads we can't
1210 # Also, later we will be using it to filter out the heads we can't
1205 # find from roots.
1211 # find from roots.
1206 heads = dict.fromkeys(heads, False)
1212 heads = dict.fromkeys(heads, False)
1207 # Start at the top and keep marking parents until we're done.
1213 # Start at the top and keep marking parents until we're done.
1208 nodestotag = set(heads)
1214 nodestotag = set(heads)
1209 # Remember where the top was so we can use it as a limit later.
1215 # Remember where the top was so we can use it as a limit later.
1210 highestrev = max([self.rev(n) for n in nodestotag])
1216 highestrev = max([self.rev(n) for n in nodestotag])
1211 while nodestotag:
1217 while nodestotag:
1212 # grab a node to tag
1218 # grab a node to tag
1213 n = nodestotag.pop()
1219 n = nodestotag.pop()
1214 # Never tag nullid
1220 # Never tag nullid
1215 if n == self.nullid:
1221 if n == self.nullid:
1216 continue
1222 continue
1217 # A node's revision number represents its place in a
1223 # A node's revision number represents its place in a
1218 # topologically sorted list of nodes.
1224 # topologically sorted list of nodes.
1219 r = self.rev(n)
1225 r = self.rev(n)
1220 if r >= lowestrev:
1226 if r >= lowestrev:
1221 if n not in ancestors:
1227 if n not in ancestors:
1222 # If we are possibly a descendant of one of the roots
1228 # If we are possibly a descendant of one of the roots
1223 # and we haven't already been marked as an ancestor
1229 # and we haven't already been marked as an ancestor
1224 ancestors.add(n) # Mark as ancestor
1230 ancestors.add(n) # Mark as ancestor
1225 # Add non-nullid parents to list of nodes to tag.
1231 # Add non-nullid parents to list of nodes to tag.
1226 nodestotag.update(
1232 nodestotag.update(
1227 [p for p in self.parents(n) if p != self.nullid]
1233 [p for p in self.parents(n) if p != self.nullid]
1228 )
1234 )
1229 elif n in heads: # We've seen it before, is it a fake head?
1235 elif n in heads: # We've seen it before, is it a fake head?
1230 # So it is, real heads should not be the ancestors of
1236 # So it is, real heads should not be the ancestors of
1231 # any other heads.
1237 # any other heads.
1232 heads.pop(n)
1238 heads.pop(n)
1233 if not ancestors:
1239 if not ancestors:
1234 return nonodes
1240 return nonodes
1235 # Now that we have our set of ancestors, we want to remove any
1241 # Now that we have our set of ancestors, we want to remove any
1236 # roots that are not ancestors.
1242 # roots that are not ancestors.
1237
1243
1238 # If one of the roots was nullid, everything is included anyway.
1244 # If one of the roots was nullid, everything is included anyway.
1239 if lowestrev > nullrev:
1245 if lowestrev > nullrev:
1240 # But, since we weren't, let's recompute the lowest rev to not
1246 # But, since we weren't, let's recompute the lowest rev to not
1241 # include roots that aren't ancestors.
1247 # include roots that aren't ancestors.
1242
1248
1243 # Filter out roots that aren't ancestors of heads
1249 # Filter out roots that aren't ancestors of heads
1244 roots = [root for root in roots if root in ancestors]
1250 roots = [root for root in roots if root in ancestors]
1245 # Recompute the lowest revision
1251 # Recompute the lowest revision
1246 if roots:
1252 if roots:
1247 lowestrev = min([self.rev(root) for root in roots])
1253 lowestrev = min([self.rev(root) for root in roots])
1248 else:
1254 else:
1249 # No more roots? Return empty list
1255 # No more roots? Return empty list
1250 return nonodes
1256 return nonodes
1251 else:
1257 else:
1252 # We are descending from nullid, and don't need to care about
1258 # We are descending from nullid, and don't need to care about
1253 # any other roots.
1259 # any other roots.
1254 lowestrev = nullrev
1260 lowestrev = nullrev
1255 roots = [self.nullid]
1261 roots = [self.nullid]
1256 # Transform our roots list into a set.
1262 # Transform our roots list into a set.
1257 descendants = set(roots)
1263 descendants = set(roots)
1258 # Also, keep the original roots so we can filter out roots that aren't
1264 # Also, keep the original roots so we can filter out roots that aren't
1259 # 'real' roots (i.e. are descended from other roots).
1265 # 'real' roots (i.e. are descended from other roots).
1260 roots = descendants.copy()
1266 roots = descendants.copy()
1261 # Our topologically sorted list of output nodes.
1267 # Our topologically sorted list of output nodes.
1262 orderedout = []
1268 orderedout = []
1263 # Don't start at nullid since we don't want nullid in our output list,
1269 # Don't start at nullid since we don't want nullid in our output list,
1264 # and if nullid shows up in descendants, empty parents will look like
1270 # and if nullid shows up in descendants, empty parents will look like
1265 # they're descendants.
1271 # they're descendants.
1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1272 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1267 n = self.node(r)
1273 n = self.node(r)
1268 isdescendant = False
1274 isdescendant = False
1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1275 if lowestrev == nullrev: # Everybody is a descendant of nullid
1270 isdescendant = True
1276 isdescendant = True
1271 elif n in descendants:
1277 elif n in descendants:
1272 # n is already a descendant
1278 # n is already a descendant
1273 isdescendant = True
1279 isdescendant = True
1274 # This check only needs to be done here because all the roots
1280 # This check only needs to be done here because all the roots
1275 # will start being marked is descendants before the loop.
1281 # will start being marked is descendants before the loop.
1276 if n in roots:
1282 if n in roots:
1277 # If n was a root, check if it's a 'real' root.
1283 # If n was a root, check if it's a 'real' root.
1278 p = tuple(self.parents(n))
1284 p = tuple(self.parents(n))
1279 # If any of its parents are descendants, it's not a root.
1285 # If any of its parents are descendants, it's not a root.
1280 if (p[0] in descendants) or (p[1] in descendants):
1286 if (p[0] in descendants) or (p[1] in descendants):
1281 roots.remove(n)
1287 roots.remove(n)
1282 else:
1288 else:
1283 p = tuple(self.parents(n))
1289 p = tuple(self.parents(n))
1284 # A node is a descendant if either of its parents are
1290 # A node is a descendant if either of its parents are
1285 # descendants. (We seeded the dependents list with the roots
1291 # descendants. (We seeded the dependents list with the roots
1286 # up there, remember?)
1292 # up there, remember?)
1287 if (p[0] in descendants) or (p[1] in descendants):
1293 if (p[0] in descendants) or (p[1] in descendants):
1288 descendants.add(n)
1294 descendants.add(n)
1289 isdescendant = True
1295 isdescendant = True
1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1296 if isdescendant and ((ancestors is None) or (n in ancestors)):
1291 # Only include nodes that are both descendants and ancestors.
1297 # Only include nodes that are both descendants and ancestors.
1292 orderedout.append(n)
1298 orderedout.append(n)
1293 if (ancestors is not None) and (n in heads):
1299 if (ancestors is not None) and (n in heads):
1294 # We're trying to figure out which heads are reachable
1300 # We're trying to figure out which heads are reachable
1295 # from roots.
1301 # from roots.
1296 # Mark this head as having been reached
1302 # Mark this head as having been reached
1297 heads[n] = True
1303 heads[n] = True
1298 elif ancestors is None:
1304 elif ancestors is None:
1299 # Otherwise, we're trying to discover the heads.
1305 # Otherwise, we're trying to discover the heads.
1300 # Assume this is a head because if it isn't, the next step
1306 # Assume this is a head because if it isn't, the next step
1301 # will eventually remove it.
1307 # will eventually remove it.
1302 heads[n] = True
1308 heads[n] = True
1303 # But, obviously its parents aren't.
1309 # But, obviously its parents aren't.
1304 for p in self.parents(n):
1310 for p in self.parents(n):
1305 heads.pop(p, None)
1311 heads.pop(p, None)
1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1312 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1307 roots = list(roots)
1313 roots = list(roots)
1308 assert orderedout
1314 assert orderedout
1309 assert roots
1315 assert roots
1310 assert heads
1316 assert heads
1311 return (orderedout, roots, heads)
1317 return (orderedout, roots, heads)
1312
1318
1313 def headrevs(self, revs=None):
1319 def headrevs(self, revs=None):
1314 if revs is None:
1320 if revs is None:
1315 try:
1321 try:
1316 return self.index.headrevs()
1322 return self.index.headrevs()
1317 except AttributeError:
1323 except AttributeError:
1318 return self._headrevs()
1324 return self._headrevs()
1319 if rustdagop is not None and self.index.rust_ext_compat:
1325 if rustdagop is not None and self.index.rust_ext_compat:
1320 return rustdagop.headrevs(self.index, revs)
1326 return rustdagop.headrevs(self.index, revs)
1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1327 return dagop.headrevs(revs, self._uncheckedparentrevs)
1322
1328
1323 def computephases(self, roots):
1329 def computephases(self, roots):
1324 return self.index.computephasesmapsets(roots)
1330 return self.index.computephasesmapsets(roots)
1325
1331
1326 def _headrevs(self):
1332 def _headrevs(self):
1327 count = len(self)
1333 count = len(self)
1328 if not count:
1334 if not count:
1329 return [nullrev]
1335 return [nullrev]
1330 # we won't iter over filtered rev so nobody is a head at start
1336 # we won't iter over filtered rev so nobody is a head at start
1331 ishead = [0] * (count + 1)
1337 ishead = [0] * (count + 1)
1332 index = self.index
1338 index = self.index
1333 for r in self:
1339 for r in self:
1334 ishead[r] = 1 # I may be an head
1340 ishead[r] = 1 # I may be an head
1335 e = index[r]
1341 e = index[r]
1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1342 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1337 return [r for r, val in enumerate(ishead) if val]
1343 return [r for r, val in enumerate(ishead) if val]
1338
1344
1339 def heads(self, start=None, stop=None):
1345 def heads(self, start=None, stop=None):
1340 """return the list of all nodes that have no children
1346 """return the list of all nodes that have no children
1341
1347
1342 if start is specified, only heads that are descendants of
1348 if start is specified, only heads that are descendants of
1343 start will be returned
1349 start will be returned
1344 if stop is specified, it will consider all the revs from stop
1350 if stop is specified, it will consider all the revs from stop
1345 as if they had no children
1351 as if they had no children
1346 """
1352 """
1347 if start is None and stop is None:
1353 if start is None and stop is None:
1348 if not len(self):
1354 if not len(self):
1349 return [self.nullid]
1355 return [self.nullid]
1350 return [self.node(r) for r in self.headrevs()]
1356 return [self.node(r) for r in self.headrevs()]
1351
1357
1352 if start is None:
1358 if start is None:
1353 start = nullrev
1359 start = nullrev
1354 else:
1360 else:
1355 start = self.rev(start)
1361 start = self.rev(start)
1356
1362
1357 stoprevs = {self.rev(n) for n in stop or []}
1363 stoprevs = {self.rev(n) for n in stop or []}
1358
1364
1359 revs = dagop.headrevssubset(
1365 revs = dagop.headrevssubset(
1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1366 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1361 )
1367 )
1362
1368
1363 return [self.node(rev) for rev in revs]
1369 return [self.node(rev) for rev in revs]
1364
1370
1365 def children(self, node):
1371 def children(self, node):
1366 """find the children of a given node"""
1372 """find the children of a given node"""
1367 c = []
1373 c = []
1368 p = self.rev(node)
1374 p = self.rev(node)
1369 for r in self.revs(start=p + 1):
1375 for r in self.revs(start=p + 1):
1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1376 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1371 if prevs:
1377 if prevs:
1372 for pr in prevs:
1378 for pr in prevs:
1373 if pr == p:
1379 if pr == p:
1374 c.append(self.node(r))
1380 c.append(self.node(r))
1375 elif p == nullrev:
1381 elif p == nullrev:
1376 c.append(self.node(r))
1382 c.append(self.node(r))
1377 return c
1383 return c
1378
1384
1379 def commonancestorsheads(self, a, b):
1385 def commonancestorsheads(self, a, b):
1380 """calculate all the heads of the common ancestors of nodes a and b"""
1386 """calculate all the heads of the common ancestors of nodes a and b"""
1381 a, b = self.rev(a), self.rev(b)
1387 a, b = self.rev(a), self.rev(b)
1382 ancs = self._commonancestorsheads(a, b)
1388 ancs = self._commonancestorsheads(a, b)
1383 return pycompat.maplist(self.node, ancs)
1389 return pycompat.maplist(self.node, ancs)
1384
1390
1385 def _commonancestorsheads(self, *revs):
1391 def _commonancestorsheads(self, *revs):
1386 """calculate all the heads of the common ancestors of revs"""
1392 """calculate all the heads of the common ancestors of revs"""
1387 try:
1393 try:
1388 ancs = self.index.commonancestorsheads(*revs)
1394 ancs = self.index.commonancestorsheads(*revs)
1389 except (AttributeError, OverflowError): # C implementation failed
1395 except (AttributeError, OverflowError): # C implementation failed
1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1396 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1391 return ancs
1397 return ancs
1392
1398
1393 def isancestor(self, a, b):
1399 def isancestor(self, a, b):
1394 """return True if node a is an ancestor of node b
1400 """return True if node a is an ancestor of node b
1395
1401
1396 A revision is considered an ancestor of itself."""
1402 A revision is considered an ancestor of itself."""
1397 a, b = self.rev(a), self.rev(b)
1403 a, b = self.rev(a), self.rev(b)
1398 return self.isancestorrev(a, b)
1404 return self.isancestorrev(a, b)
1399
1405
1400 def isancestorrev(self, a, b):
1406 def isancestorrev(self, a, b):
1401 """return True if revision a is an ancestor of revision b
1407 """return True if revision a is an ancestor of revision b
1402
1408
1403 A revision is considered an ancestor of itself.
1409 A revision is considered an ancestor of itself.
1404
1410
1405 The implementation of this is trivial but the use of
1411 The implementation of this is trivial but the use of
1406 reachableroots is not."""
1412 reachableroots is not."""
1407 if a == nullrev:
1413 if a == nullrev:
1408 return True
1414 return True
1409 elif a == b:
1415 elif a == b:
1410 return True
1416 return True
1411 elif a > b:
1417 elif a > b:
1412 return False
1418 return False
1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1419 return bool(self.reachableroots(a, [b], [a], includepath=False))
1414
1420
1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1421 def reachableroots(self, minroot, heads, roots, includepath=False):
1416 """return (heads(::(<roots> and <roots>::<heads>)))
1422 """return (heads(::(<roots> and <roots>::<heads>)))
1417
1423
1418 If includepath is True, return (<roots>::<heads>)."""
1424 If includepath is True, return (<roots>::<heads>)."""
1419 try:
1425 try:
1420 return self.index.reachableroots2(
1426 return self.index.reachableroots2(
1421 minroot, heads, roots, includepath
1427 minroot, heads, roots, includepath
1422 )
1428 )
1423 except AttributeError:
1429 except AttributeError:
1424 return dagop._reachablerootspure(
1430 return dagop._reachablerootspure(
1425 self.parentrevs, minroot, roots, heads, includepath
1431 self.parentrevs, minroot, roots, heads, includepath
1426 )
1432 )
1427
1433
1428 def ancestor(self, a, b):
1434 def ancestor(self, a, b):
1429 """calculate the "best" common ancestor of nodes a and b"""
1435 """calculate the "best" common ancestor of nodes a and b"""
1430
1436
1431 a, b = self.rev(a), self.rev(b)
1437 a, b = self.rev(a), self.rev(b)
1432 try:
1438 try:
1433 ancs = self.index.ancestors(a, b)
1439 ancs = self.index.ancestors(a, b)
1434 except (AttributeError, OverflowError):
1440 except (AttributeError, OverflowError):
1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1441 ancs = ancestor.ancestors(self.parentrevs, a, b)
1436 if ancs:
1442 if ancs:
1437 # choose a consistent winner when there's a tie
1443 # choose a consistent winner when there's a tie
1438 return min(map(self.node, ancs))
1444 return min(map(self.node, ancs))
1439 return self.nullid
1445 return self.nullid
1440
1446
1441 def _match(self, id):
1447 def _match(self, id):
1442 if isinstance(id, int):
1448 if isinstance(id, int):
1443 # rev
1449 # rev
1444 return self.node(id)
1450 return self.node(id)
1445 if len(id) == self.nodeconstants.nodelen:
1451 if len(id) == self.nodeconstants.nodelen:
1446 # possibly a binary node
1452 # possibly a binary node
1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1453 # odds of a binary node being all hex in ASCII are 1 in 10**25
1448 try:
1454 try:
1449 node = id
1455 node = id
1450 self.rev(node) # quick search the index
1456 self.rev(node) # quick search the index
1451 return node
1457 return node
1452 except error.LookupError:
1458 except error.LookupError:
1453 pass # may be partial hex id
1459 pass # may be partial hex id
1454 try:
1460 try:
1455 # str(rev)
1461 # str(rev)
1456 rev = int(id)
1462 rev = int(id)
1457 if b"%d" % rev != id:
1463 if b"%d" % rev != id:
1458 raise ValueError
1464 raise ValueError
1459 if rev < 0:
1465 if rev < 0:
1460 rev = len(self) + rev
1466 rev = len(self) + rev
1461 if rev < 0 or rev >= len(self):
1467 if rev < 0 or rev >= len(self):
1462 raise ValueError
1468 raise ValueError
1463 return self.node(rev)
1469 return self.node(rev)
1464 except (ValueError, OverflowError):
1470 except (ValueError, OverflowError):
1465 pass
1471 pass
1466 if len(id) == 2 * self.nodeconstants.nodelen:
1472 if len(id) == 2 * self.nodeconstants.nodelen:
1467 try:
1473 try:
1468 # a full hex nodeid?
1474 # a full hex nodeid?
1469 node = bin(id)
1475 node = bin(id)
1470 self.rev(node)
1476 self.rev(node)
1471 return node
1477 return node
1472 except (TypeError, error.LookupError):
1478 except (TypeError, error.LookupError):
1473 pass
1479 pass
1474
1480
1475 def _partialmatch(self, id):
1481 def _partialmatch(self, id):
1476 # we don't care wdirfilenodeids as they should be always full hash
1482 # we don't care wdirfilenodeids as they should be always full hash
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1483 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1478 ambiguous = False
1484 ambiguous = False
1479 try:
1485 try:
1480 partial = self.index.partialmatch(id)
1486 partial = self.index.partialmatch(id)
1481 if partial and self.hasnode(partial):
1487 if partial and self.hasnode(partial):
1482 if maybewdir:
1488 if maybewdir:
1483 # single 'ff...' match in radix tree, ambiguous with wdir
1489 # single 'ff...' match in radix tree, ambiguous with wdir
1484 ambiguous = True
1490 ambiguous = True
1485 else:
1491 else:
1486 return partial
1492 return partial
1487 elif maybewdir:
1493 elif maybewdir:
1488 # no 'ff...' match in radix tree, wdir identified
1494 # no 'ff...' match in radix tree, wdir identified
1489 raise error.WdirUnsupported
1495 raise error.WdirUnsupported
1490 else:
1496 else:
1491 return None
1497 return None
1492 except error.RevlogError:
1498 except error.RevlogError:
1493 # parsers.c radix tree lookup gave multiple matches
1499 # parsers.c radix tree lookup gave multiple matches
1494 # fast path: for unfiltered changelog, radix tree is accurate
1500 # fast path: for unfiltered changelog, radix tree is accurate
1495 if not getattr(self, 'filteredrevs', None):
1501 if not getattr(self, 'filteredrevs', None):
1496 ambiguous = True
1502 ambiguous = True
1497 # fall through to slow path that filters hidden revisions
1503 # fall through to slow path that filters hidden revisions
1498 except (AttributeError, ValueError):
1504 except (AttributeError, ValueError):
1499 # we are pure python, or key was too short to search radix tree
1505 # we are pure python, or key was too short to search radix tree
1500 pass
1506 pass
1501 if ambiguous:
1507 if ambiguous:
1502 raise error.AmbiguousPrefixLookupError(
1508 raise error.AmbiguousPrefixLookupError(
1503 id, self.display_id, _(b'ambiguous identifier')
1509 id, self.display_id, _(b'ambiguous identifier')
1504 )
1510 )
1505
1511
1506 if id in self._pcache:
1512 if id in self._pcache:
1507 return self._pcache[id]
1513 return self._pcache[id]
1508
1514
1509 if len(id) <= 40:
1515 if len(id) <= 40:
1510 try:
1516 try:
1511 # hex(node)[:...]
1517 # hex(node)[:...]
1512 l = len(id) // 2 # grab an even number of digits
1518 l = len(id) // 2 # grab an even number of digits
1513 prefix = bin(id[: l * 2])
1519 prefix = bin(id[: l * 2])
1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1520 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1515 nl = [
1521 nl = [
1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1522 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1517 ]
1523 ]
1518 if self.nodeconstants.nullhex.startswith(id):
1524 if self.nodeconstants.nullhex.startswith(id):
1519 nl.append(self.nullid)
1525 nl.append(self.nullid)
1520 if len(nl) > 0:
1526 if len(nl) > 0:
1521 if len(nl) == 1 and not maybewdir:
1527 if len(nl) == 1 and not maybewdir:
1522 self._pcache[id] = nl[0]
1528 self._pcache[id] = nl[0]
1523 return nl[0]
1529 return nl[0]
1524 raise error.AmbiguousPrefixLookupError(
1530 raise error.AmbiguousPrefixLookupError(
1525 id, self.display_id, _(b'ambiguous identifier')
1531 id, self.display_id, _(b'ambiguous identifier')
1526 )
1532 )
1527 if maybewdir:
1533 if maybewdir:
1528 raise error.WdirUnsupported
1534 raise error.WdirUnsupported
1529 return None
1535 return None
1530 except TypeError:
1536 except TypeError:
1531 pass
1537 pass
1532
1538
1533 def lookup(self, id):
1539 def lookup(self, id):
1534 """locate a node based on:
1540 """locate a node based on:
1535 - revision number or str(revision number)
1541 - revision number or str(revision number)
1536 - nodeid or subset of hex nodeid
1542 - nodeid or subset of hex nodeid
1537 """
1543 """
1538 n = self._match(id)
1544 n = self._match(id)
1539 if n is not None:
1545 if n is not None:
1540 return n
1546 return n
1541 n = self._partialmatch(id)
1547 n = self._partialmatch(id)
1542 if n:
1548 if n:
1543 return n
1549 return n
1544
1550
1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1551 raise error.LookupError(id, self.display_id, _(b'no match found'))
1546
1552
1547 def shortest(self, node, minlength=1):
1553 def shortest(self, node, minlength=1):
1548 """Find the shortest unambiguous prefix that matches node."""
1554 """Find the shortest unambiguous prefix that matches node."""
1549
1555
1550 def isvalid(prefix):
1556 def isvalid(prefix):
1551 try:
1557 try:
1552 matchednode = self._partialmatch(prefix)
1558 matchednode = self._partialmatch(prefix)
1553 except error.AmbiguousPrefixLookupError:
1559 except error.AmbiguousPrefixLookupError:
1554 return False
1560 return False
1555 except error.WdirUnsupported:
1561 except error.WdirUnsupported:
1556 # single 'ff...' match
1562 # single 'ff...' match
1557 return True
1563 return True
1558 if matchednode is None:
1564 if matchednode is None:
1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1565 raise error.LookupError(node, self.display_id, _(b'no node'))
1560 return True
1566 return True
1561
1567
1562 def maybewdir(prefix):
1568 def maybewdir(prefix):
1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1569 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1564
1570
1565 hexnode = hex(node)
1571 hexnode = hex(node)
1566
1572
1567 def disambiguate(hexnode, minlength):
1573 def disambiguate(hexnode, minlength):
1568 """Disambiguate against wdirid."""
1574 """Disambiguate against wdirid."""
1569 for length in range(minlength, len(hexnode) + 1):
1575 for length in range(minlength, len(hexnode) + 1):
1570 prefix = hexnode[:length]
1576 prefix = hexnode[:length]
1571 if not maybewdir(prefix):
1577 if not maybewdir(prefix):
1572 return prefix
1578 return prefix
1573
1579
1574 if not getattr(self, 'filteredrevs', None):
1580 if not getattr(self, 'filteredrevs', None):
1575 try:
1581 try:
1576 length = max(self.index.shortest(node), minlength)
1582 length = max(self.index.shortest(node), minlength)
1577 return disambiguate(hexnode, length)
1583 return disambiguate(hexnode, length)
1578 except error.RevlogError:
1584 except error.RevlogError:
1579 if node != self.nodeconstants.wdirid:
1585 if node != self.nodeconstants.wdirid:
1580 raise error.LookupError(
1586 raise error.LookupError(
1581 node, self.display_id, _(b'no node')
1587 node, self.display_id, _(b'no node')
1582 )
1588 )
1583 except AttributeError:
1589 except AttributeError:
1584 # Fall through to pure code
1590 # Fall through to pure code
1585 pass
1591 pass
1586
1592
1587 if node == self.nodeconstants.wdirid:
1593 if node == self.nodeconstants.wdirid:
1588 for length in range(minlength, len(hexnode) + 1):
1594 for length in range(minlength, len(hexnode) + 1):
1589 prefix = hexnode[:length]
1595 prefix = hexnode[:length]
1590 if isvalid(prefix):
1596 if isvalid(prefix):
1591 return prefix
1597 return prefix
1592
1598
1593 for length in range(minlength, len(hexnode) + 1):
1599 for length in range(minlength, len(hexnode) + 1):
1594 prefix = hexnode[:length]
1600 prefix = hexnode[:length]
1595 if isvalid(prefix):
1601 if isvalid(prefix):
1596 return disambiguate(hexnode, length)
1602 return disambiguate(hexnode, length)
1597
1603
1598 def cmp(self, node, text):
1604 def cmp(self, node, text):
1599 """compare text with a given file revision
1605 """compare text with a given file revision
1600
1606
1601 returns True if text is different than what is stored.
1607 returns True if text is different than what is stored.
1602 """
1608 """
1603 p1, p2 = self.parents(node)
1609 p1, p2 = self.parents(node)
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1610 return storageutil.hashrevisionsha1(text, p1, p2) != node
1605
1611
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1612 def _getsegmentforrevs(self, startrev, endrev, df=None):
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1613 """Obtain a segment of raw data corresponding to a range of revisions.
1608
1614
1609 Accepts the start and end revisions and an optional already-open
1615 Accepts the start and end revisions and an optional already-open
1610 file handle to be used for reading. If the file handle is read, its
1616 file handle to be used for reading. If the file handle is read, its
1611 seek position will not be preserved.
1617 seek position will not be preserved.
1612
1618
1613 Requests for data may be satisfied by a cache.
1619 Requests for data may be satisfied by a cache.
1614
1620
1615 Returns a 2-tuple of (offset, data) for the requested range of
1621 Returns a 2-tuple of (offset, data) for the requested range of
1616 revisions. Offset is the integer offset from the beginning of the
1622 revisions. Offset is the integer offset from the beginning of the
1617 revlog and data is a str or buffer of the raw byte data.
1623 revlog and data is a str or buffer of the raw byte data.
1618
1624
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1625 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1620 to determine where each revision's data begins and ends.
1626 to determine where each revision's data begins and ends.
1621 """
1627 """
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1628 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1623 # (functions are expensive).
1629 # (functions are expensive).
1624 index = self.index
1630 index = self.index
1625 istart = index[startrev]
1631 istart = index[startrev]
1626 start = int(istart[0] >> 16)
1632 start = int(istart[0] >> 16)
1627 if startrev == endrev:
1633 if startrev == endrev:
1628 end = start + istart[1]
1634 end = start + istart[1]
1629 else:
1635 else:
1630 iend = index[endrev]
1636 iend = index[endrev]
1631 end = int(iend[0] >> 16) + iend[1]
1637 end = int(iend[0] >> 16) + iend[1]
1632
1638
1633 if self._inline:
1639 if self._inline:
1634 start += (startrev + 1) * self.index.entry_size
1640 start += (startrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1641 end += (endrev + 1) * self.index.entry_size
1636 length = end - start
1642 length = end - start
1637
1643
1638 return start, self._segmentfile.read_chunk(start, length, df)
1644 return start, self._segmentfile.read_chunk(start, length, df)
1639
1645
1640 def _chunk(self, rev, df=None):
1646 def _chunk(self, rev, df=None):
1641 """Obtain a single decompressed chunk for a revision.
1647 """Obtain a single decompressed chunk for a revision.
1642
1648
1643 Accepts an integer revision and an optional already-open file handle
1649 Accepts an integer revision and an optional already-open file handle
1644 to be used for reading. If used, the seek position of the file will not
1650 to be used for reading. If used, the seek position of the file will not
1645 be preserved.
1651 be preserved.
1646
1652
1647 Returns a str holding uncompressed data for the requested revision.
1653 Returns a str holding uncompressed data for the requested revision.
1648 """
1654 """
1649 compression_mode = self.index[rev][10]
1655 compression_mode = self.index[rev][10]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1656 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1651 if compression_mode == COMP_MODE_PLAIN:
1657 if compression_mode == COMP_MODE_PLAIN:
1652 return data
1658 return data
1653 elif compression_mode == COMP_MODE_DEFAULT:
1659 elif compression_mode == COMP_MODE_DEFAULT:
1654 return self._decompressor(data)
1660 return self._decompressor(data)
1655 elif compression_mode == COMP_MODE_INLINE:
1661 elif compression_mode == COMP_MODE_INLINE:
1656 return self.decompress(data)
1662 return self.decompress(data)
1657 else:
1663 else:
1658 msg = b'unknown compression mode %d'
1664 msg = b'unknown compression mode %d'
1659 msg %= compression_mode
1665 msg %= compression_mode
1660 raise error.RevlogError(msg)
1666 raise error.RevlogError(msg)
1661
1667
1662 def _chunks(self, revs, df=None, targetsize=None):
1668 def _chunks(self, revs, df=None, targetsize=None):
1663 """Obtain decompressed chunks for the specified revisions.
1669 """Obtain decompressed chunks for the specified revisions.
1664
1670
1665 Accepts an iterable of numeric revisions that are assumed to be in
1671 Accepts an iterable of numeric revisions that are assumed to be in
1666 ascending order. Also accepts an optional already-open file handle
1672 ascending order. Also accepts an optional already-open file handle
1667 to be used for reading. If used, the seek position of the file will
1673 to be used for reading. If used, the seek position of the file will
1668 not be preserved.
1674 not be preserved.
1669
1675
1670 This function is similar to calling ``self._chunk()`` multiple times,
1676 This function is similar to calling ``self._chunk()`` multiple times,
1671 but is faster.
1677 but is faster.
1672
1678
1673 Returns a list with decompressed data for each requested revision.
1679 Returns a list with decompressed data for each requested revision.
1674 """
1680 """
1675 if not revs:
1681 if not revs:
1676 return []
1682 return []
1677 start = self.start
1683 start = self.start
1678 length = self.length
1684 length = self.length
1679 inline = self._inline
1685 inline = self._inline
1680 iosize = self.index.entry_size
1686 iosize = self.index.entry_size
1681 buffer = util.buffer
1687 buffer = util.buffer
1682
1688
1683 l = []
1689 l = []
1684 ladd = l.append
1690 ladd = l.append
1685
1691
1686 if not self._withsparseread:
1692 if not self._withsparseread:
1687 slicedchunks = (revs,)
1693 slicedchunks = (revs,)
1688 else:
1694 else:
1689 slicedchunks = deltautil.slicechunk(
1695 slicedchunks = deltautil.slicechunk(
1690 self, revs, targetsize=targetsize
1696 self, revs, targetsize=targetsize
1691 )
1697 )
1692
1698
1693 for revschunk in slicedchunks:
1699 for revschunk in slicedchunks:
1694 firstrev = revschunk[0]
1700 firstrev = revschunk[0]
1695 # Skip trailing revisions with empty diff
1701 # Skip trailing revisions with empty diff
1696 for lastrev in revschunk[::-1]:
1702 for lastrev in revschunk[::-1]:
1697 if length(lastrev) != 0:
1703 if length(lastrev) != 0:
1698 break
1704 break
1699
1705
1700 try:
1706 try:
1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1707 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1702 except OverflowError:
1708 except OverflowError:
1703 # issue4215 - we can't cache a run of chunks greater than
1709 # issue4215 - we can't cache a run of chunks greater than
1704 # 2G on Windows
1710 # 2G on Windows
1705 return [self._chunk(rev, df=df) for rev in revschunk]
1711 return [self._chunk(rev, df=df) for rev in revschunk]
1706
1712
1707 decomp = self.decompress
1713 decomp = self.decompress
1708 # self._decompressor might be None, but will not be used in that case
1714 # self._decompressor might be None, but will not be used in that case
1709 def_decomp = self._decompressor
1715 def_decomp = self._decompressor
1710 for rev in revschunk:
1716 for rev in revschunk:
1711 chunkstart = start(rev)
1717 chunkstart = start(rev)
1712 if inline:
1718 if inline:
1713 chunkstart += (rev + 1) * iosize
1719 chunkstart += (rev + 1) * iosize
1714 chunklength = length(rev)
1720 chunklength = length(rev)
1715 comp_mode = self.index[rev][10]
1721 comp_mode = self.index[rev][10]
1716 c = buffer(data, chunkstart - offset, chunklength)
1722 c = buffer(data, chunkstart - offset, chunklength)
1717 if comp_mode == COMP_MODE_PLAIN:
1723 if comp_mode == COMP_MODE_PLAIN:
1718 ladd(c)
1724 ladd(c)
1719 elif comp_mode == COMP_MODE_INLINE:
1725 elif comp_mode == COMP_MODE_INLINE:
1720 ladd(decomp(c))
1726 ladd(decomp(c))
1721 elif comp_mode == COMP_MODE_DEFAULT:
1727 elif comp_mode == COMP_MODE_DEFAULT:
1722 ladd(def_decomp(c))
1728 ladd(def_decomp(c))
1723 else:
1729 else:
1724 msg = b'unknown compression mode %d'
1730 msg = b'unknown compression mode %d'
1725 msg %= comp_mode
1731 msg %= comp_mode
1726 raise error.RevlogError(msg)
1732 raise error.RevlogError(msg)
1727
1733
1728 return l
1734 return l
1729
1735
1730 def deltaparent(self, rev):
1736 def deltaparent(self, rev):
1731 """return deltaparent of the given revision"""
1737 """return deltaparent of the given revision"""
1732 base = self.index[rev][3]
1738 base = self.index[rev][3]
1733 if base == rev:
1739 if base == rev:
1734 return nullrev
1740 return nullrev
1735 elif self._generaldelta:
1741 elif self._generaldelta:
1736 return base
1742 return base
1737 else:
1743 else:
1738 return rev - 1
1744 return rev - 1
1739
1745
1740 def issnapshot(self, rev):
1746 def issnapshot(self, rev):
1741 """tells whether rev is a snapshot"""
1747 """tells whether rev is a snapshot"""
1742 if not self._sparserevlog:
1748 if not self._sparserevlog:
1743 return self.deltaparent(rev) == nullrev
1749 return self.deltaparent(rev) == nullrev
1744 elif util.safehasattr(self.index, b'issnapshot'):
1750 elif util.safehasattr(self.index, b'issnapshot'):
1745 # directly assign the method to cache the testing and access
1751 # directly assign the method to cache the testing and access
1746 self.issnapshot = self.index.issnapshot
1752 self.issnapshot = self.index.issnapshot
1747 return self.issnapshot(rev)
1753 return self.issnapshot(rev)
1748 if rev == nullrev:
1754 if rev == nullrev:
1749 return True
1755 return True
1750 entry = self.index[rev]
1756 entry = self.index[rev]
1751 base = entry[3]
1757 base = entry[3]
1752 if base == rev:
1758 if base == rev:
1753 return True
1759 return True
1754 if base == nullrev:
1760 if base == nullrev:
1755 return True
1761 return True
1756 p1 = entry[5]
1762 p1 = entry[5]
1757 p2 = entry[6]
1763 p2 = entry[6]
1758 if base == p1 or base == p2:
1764 if base == p1 or base == p2:
1759 return False
1765 return False
1760 return self.issnapshot(base)
1766 return self.issnapshot(base)
1761
1767
1762 def snapshotdepth(self, rev):
1768 def snapshotdepth(self, rev):
1763 """number of snapshot in the chain before this one"""
1769 """number of snapshot in the chain before this one"""
1764 if not self.issnapshot(rev):
1770 if not self.issnapshot(rev):
1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1771 raise error.ProgrammingError(b'revision %d not a snapshot')
1766 return len(self._deltachain(rev)[0]) - 1
1772 return len(self._deltachain(rev)[0]) - 1
1767
1773
1768 def revdiff(self, rev1, rev2):
1774 def revdiff(self, rev1, rev2):
1769 """return or calculate a delta between two revisions
1775 """return or calculate a delta between two revisions
1770
1776
1771 The delta calculated is in binary form and is intended to be written to
1777 The delta calculated is in binary form and is intended to be written to
1772 revlog data directly. So this function needs raw revision data.
1778 revlog data directly. So this function needs raw revision data.
1773 """
1779 """
1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1780 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1775 return bytes(self._chunk(rev2))
1781 return bytes(self._chunk(rev2))
1776
1782
1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1783 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1778
1784
1779 def _processflags(self, text, flags, operation, raw=False):
1785 def _processflags(self, text, flags, operation, raw=False):
1780 """deprecated entry point to access flag processors"""
1786 """deprecated entry point to access flag processors"""
1781 msg = b'_processflag(...) use the specialized variant'
1787 msg = b'_processflag(...) use the specialized variant'
1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1788 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1783 if raw:
1789 if raw:
1784 return text, flagutil.processflagsraw(self, text, flags)
1790 return text, flagutil.processflagsraw(self, text, flags)
1785 elif operation == b'read':
1791 elif operation == b'read':
1786 return flagutil.processflagsread(self, text, flags)
1792 return flagutil.processflagsread(self, text, flags)
1787 else: # write operation
1793 else: # write operation
1788 return flagutil.processflagswrite(self, text, flags)
1794 return flagutil.processflagswrite(self, text, flags)
1789
1795
1790 def revision(self, nodeorrev, _df=None, raw=False):
1796 def revision(self, nodeorrev, _df=None, raw=False):
1791 """return an uncompressed revision of a given node or revision
1797 """return an uncompressed revision of a given node or revision
1792 number.
1798 number.
1793
1799
1794 _df - an existing file handle to read from. (internal-only)
1800 _df - an existing file handle to read from. (internal-only)
1795 raw - an optional argument specifying if the revision data is to be
1801 raw - an optional argument specifying if the revision data is to be
1796 treated as raw data when applying flag transforms. 'raw' should be set
1802 treated as raw data when applying flag transforms. 'raw' should be set
1797 to True when generating changegroups or in debug commands.
1803 to True when generating changegroups or in debug commands.
1798 """
1804 """
1799 if raw:
1805 if raw:
1800 msg = (
1806 msg = (
1801 b'revlog.revision(..., raw=True) is deprecated, '
1807 b'revlog.revision(..., raw=True) is deprecated, '
1802 b'use revlog.rawdata(...)'
1808 b'use revlog.rawdata(...)'
1803 )
1809 )
1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1810 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1811 return self._revisiondata(nodeorrev, _df, raw=raw)
1806
1812
1807 def sidedata(self, nodeorrev, _df=None):
1813 def sidedata(self, nodeorrev, _df=None):
1808 """a map of extra data related to the changeset but not part of the hash
1814 """a map of extra data related to the changeset but not part of the hash
1809
1815
1810 This function currently return a dictionary. However, more advanced
1816 This function currently return a dictionary. However, more advanced
1811 mapping object will likely be used in the future for a more
1817 mapping object will likely be used in the future for a more
1812 efficient/lazy code.
1818 efficient/lazy code.
1813 """
1819 """
1814 # deal with <nodeorrev> argument type
1820 # deal with <nodeorrev> argument type
1815 if isinstance(nodeorrev, int):
1821 if isinstance(nodeorrev, int):
1816 rev = nodeorrev
1822 rev = nodeorrev
1817 else:
1823 else:
1818 rev = self.rev(nodeorrev)
1824 rev = self.rev(nodeorrev)
1819 return self._sidedata(rev)
1825 return self._sidedata(rev)
1820
1826
1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1827 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1822 # deal with <nodeorrev> argument type
1828 # deal with <nodeorrev> argument type
1823 if isinstance(nodeorrev, int):
1829 if isinstance(nodeorrev, int):
1824 rev = nodeorrev
1830 rev = nodeorrev
1825 node = self.node(rev)
1831 node = self.node(rev)
1826 else:
1832 else:
1827 node = nodeorrev
1833 node = nodeorrev
1828 rev = None
1834 rev = None
1829
1835
1830 # fast path the special `nullid` rev
1836 # fast path the special `nullid` rev
1831 if node == self.nullid:
1837 if node == self.nullid:
1832 return b""
1838 return b""
1833
1839
1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1840 # ``rawtext`` is the text as stored inside the revlog. Might be the
1835 # revision or might need to be processed to retrieve the revision.
1841 # revision or might need to be processed to retrieve the revision.
1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1842 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1837
1843
1838 if raw and validated:
1844 if raw and validated:
1839 # if we don't want to process the raw text and that raw
1845 # if we don't want to process the raw text and that raw
1840 # text is cached, we can exit early.
1846 # text is cached, we can exit early.
1841 return rawtext
1847 return rawtext
1842 if rev is None:
1848 if rev is None:
1843 rev = self.rev(node)
1849 rev = self.rev(node)
1844 # the revlog's flag for this revision
1850 # the revlog's flag for this revision
1845 # (usually alter its state or content)
1851 # (usually alter its state or content)
1846 flags = self.flags(rev)
1852 flags = self.flags(rev)
1847
1853
1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1854 if validated and flags == REVIDX_DEFAULT_FLAGS:
1849 # no extra flags set, no flag processor runs, text = rawtext
1855 # no extra flags set, no flag processor runs, text = rawtext
1850 return rawtext
1856 return rawtext
1851
1857
1852 if raw:
1858 if raw:
1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1859 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1854 text = rawtext
1860 text = rawtext
1855 else:
1861 else:
1856 r = flagutil.processflagsread(self, rawtext, flags)
1862 r = flagutil.processflagsread(self, rawtext, flags)
1857 text, validatehash = r
1863 text, validatehash = r
1858 if validatehash:
1864 if validatehash:
1859 self.checkhash(text, node, rev=rev)
1865 self.checkhash(text, node, rev=rev)
1860 if not validated:
1866 if not validated:
1861 self._revisioncache = (node, rev, rawtext)
1867 self._revisioncache = (node, rev, rawtext)
1862
1868
1863 return text
1869 return text
1864
1870
1865 def _rawtext(self, node, rev, _df=None):
1871 def _rawtext(self, node, rev, _df=None):
1866 """return the possibly unvalidated rawtext for a revision
1872 """return the possibly unvalidated rawtext for a revision
1867
1873
1868 returns (rev, rawtext, validated)
1874 returns (rev, rawtext, validated)
1869 """
1875 """
1870
1876
1871 # revision in the cache (could be useful to apply delta)
1877 # revision in the cache (could be useful to apply delta)
1872 cachedrev = None
1878 cachedrev = None
1873 # An intermediate text to apply deltas to
1879 # An intermediate text to apply deltas to
1874 basetext = None
1880 basetext = None
1875
1881
1876 # Check if we have the entry in cache
1882 # Check if we have the entry in cache
1877 # The cache entry looks like (node, rev, rawtext)
1883 # The cache entry looks like (node, rev, rawtext)
1878 if self._revisioncache:
1884 if self._revisioncache:
1879 if self._revisioncache[0] == node:
1885 if self._revisioncache[0] == node:
1880 return (rev, self._revisioncache[2], True)
1886 return (rev, self._revisioncache[2], True)
1881 cachedrev = self._revisioncache[1]
1887 cachedrev = self._revisioncache[1]
1882
1888
1883 if rev is None:
1889 if rev is None:
1884 rev = self.rev(node)
1890 rev = self.rev(node)
1885
1891
1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1892 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1887 if stopped:
1893 if stopped:
1888 basetext = self._revisioncache[2]
1894 basetext = self._revisioncache[2]
1889
1895
1890 # drop cache to save memory, the caller is expected to
1896 # drop cache to save memory, the caller is expected to
1891 # update self._revisioncache after validating the text
1897 # update self._revisioncache after validating the text
1892 self._revisioncache = None
1898 self._revisioncache = None
1893
1899
1894 targetsize = None
1900 targetsize = None
1895 rawsize = self.index[rev][2]
1901 rawsize = self.index[rev][2]
1896 if 0 <= rawsize:
1902 if 0 <= rawsize:
1897 targetsize = 4 * rawsize
1903 targetsize = 4 * rawsize
1898
1904
1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1905 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1900 if basetext is None:
1906 if basetext is None:
1901 basetext = bytes(bins[0])
1907 basetext = bytes(bins[0])
1902 bins = bins[1:]
1908 bins = bins[1:]
1903
1909
1904 rawtext = mdiff.patches(basetext, bins)
1910 rawtext = mdiff.patches(basetext, bins)
1905 del basetext # let us have a chance to free memory early
1911 del basetext # let us have a chance to free memory early
1906 return (rev, rawtext, False)
1912 return (rev, rawtext, False)
1907
1913
1908 def _sidedata(self, rev):
1914 def _sidedata(self, rev):
1909 """Return the sidedata for a given revision number."""
1915 """Return the sidedata for a given revision number."""
1910 index_entry = self.index[rev]
1916 index_entry = self.index[rev]
1911 sidedata_offset = index_entry[8]
1917 sidedata_offset = index_entry[8]
1912 sidedata_size = index_entry[9]
1918 sidedata_size = index_entry[9]
1913
1919
1914 if self._inline:
1920 if self._inline:
1915 sidedata_offset += self.index.entry_size * (1 + rev)
1921 sidedata_offset += self.index.entry_size * (1 + rev)
1916 if sidedata_size == 0:
1922 if sidedata_size == 0:
1917 return {}
1923 return {}
1918
1924
1919 # XXX this need caching, as we do for data
1925 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1920 with self._sidedatareadfp() as sdf:
1926 filename = self._sidedatafile
1921 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1927 end = self._docket.sidedata_end
1922 filename = self._sidedatafile
1928 offset = sidedata_offset
1923 end = self._docket.sidedata_end
1929 length = sidedata_size
1924 offset = sidedata_offset
1930 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1925 length = sidedata_size
1931 raise error.RevlogError(m)
1926 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1932
1927 raise error.RevlogError(m)
1933 comp_segment = self._segmentfile_sidedata.read_chunk(
1928
1934 sidedata_offset, sidedata_size
1929 sdf.seek(sidedata_offset, os.SEEK_SET)
1935 )
1930 comp_segment = sdf.read(sidedata_size)
1931
1932 if len(comp_segment) < sidedata_size:
1933 filename = self._sidedatafile
1934 length = sidedata_size
1935 offset = sidedata_offset
1936 got = len(comp_segment)
1937 m = randomaccessfile.PARTIAL_READ_MSG % (
1938 filename,
1939 length,
1940 offset,
1941 got,
1942 )
1943 raise error.RevlogError(m)
1944
1936
1945 comp = self.index[rev][11]
1937 comp = self.index[rev][11]
1946 if comp == COMP_MODE_PLAIN:
1938 if comp == COMP_MODE_PLAIN:
1947 segment = comp_segment
1939 segment = comp_segment
1948 elif comp == COMP_MODE_DEFAULT:
1940 elif comp == COMP_MODE_DEFAULT:
1949 segment = self._decompressor(comp_segment)
1941 segment = self._decompressor(comp_segment)
1950 elif comp == COMP_MODE_INLINE:
1942 elif comp == COMP_MODE_INLINE:
1951 segment = self.decompress(comp_segment)
1943 segment = self.decompress(comp_segment)
1952 else:
1944 else:
1953 msg = b'unknown compression mode %d'
1945 msg = b'unknown compression mode %d'
1954 msg %= comp
1946 msg %= comp
1955 raise error.RevlogError(msg)
1947 raise error.RevlogError(msg)
1956
1948
1957 sidedata = sidedatautil.deserialize_sidedata(segment)
1949 sidedata = sidedatautil.deserialize_sidedata(segment)
1958 return sidedata
1950 return sidedata
1959
1951
1960 def rawdata(self, nodeorrev, _df=None):
1952 def rawdata(self, nodeorrev, _df=None):
1961 """return an uncompressed raw data of a given node or revision number.
1953 """return an uncompressed raw data of a given node or revision number.
1962
1954
1963 _df - an existing file handle to read from. (internal-only)
1955 _df - an existing file handle to read from. (internal-only)
1964 """
1956 """
1965 return self._revisiondata(nodeorrev, _df, raw=True)
1957 return self._revisiondata(nodeorrev, _df, raw=True)
1966
1958
1967 def hash(self, text, p1, p2):
1959 def hash(self, text, p1, p2):
1968 """Compute a node hash.
1960 """Compute a node hash.
1969
1961
1970 Available as a function so that subclasses can replace the hash
1962 Available as a function so that subclasses can replace the hash
1971 as needed.
1963 as needed.
1972 """
1964 """
1973 return storageutil.hashrevisionsha1(text, p1, p2)
1965 return storageutil.hashrevisionsha1(text, p1, p2)
1974
1966
1975 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1967 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1976 """Check node hash integrity.
1968 """Check node hash integrity.
1977
1969
1978 Available as a function so that subclasses can extend hash mismatch
1970 Available as a function so that subclasses can extend hash mismatch
1979 behaviors as needed.
1971 behaviors as needed.
1980 """
1972 """
1981 try:
1973 try:
1982 if p1 is None and p2 is None:
1974 if p1 is None and p2 is None:
1983 p1, p2 = self.parents(node)
1975 p1, p2 = self.parents(node)
1984 if node != self.hash(text, p1, p2):
1976 if node != self.hash(text, p1, p2):
1985 # Clear the revision cache on hash failure. The revision cache
1977 # Clear the revision cache on hash failure. The revision cache
1986 # only stores the raw revision and clearing the cache does have
1978 # only stores the raw revision and clearing the cache does have
1987 # the side-effect that we won't have a cache hit when the raw
1979 # the side-effect that we won't have a cache hit when the raw
1988 # revision data is accessed. But this case should be rare and
1980 # revision data is accessed. But this case should be rare and
1989 # it is extra work to teach the cache about the hash
1981 # it is extra work to teach the cache about the hash
1990 # verification state.
1982 # verification state.
1991 if self._revisioncache and self._revisioncache[0] == node:
1983 if self._revisioncache and self._revisioncache[0] == node:
1992 self._revisioncache = None
1984 self._revisioncache = None
1993
1985
1994 revornode = rev
1986 revornode = rev
1995 if revornode is None:
1987 if revornode is None:
1996 revornode = templatefilters.short(hex(node))
1988 revornode = templatefilters.short(hex(node))
1997 raise error.RevlogError(
1989 raise error.RevlogError(
1998 _(b"integrity check failed on %s:%s")
1990 _(b"integrity check failed on %s:%s")
1999 % (self.display_id, pycompat.bytestr(revornode))
1991 % (self.display_id, pycompat.bytestr(revornode))
2000 )
1992 )
2001 except error.RevlogError:
1993 except error.RevlogError:
2002 if self._censorable and storageutil.iscensoredtext(text):
1994 if self._censorable and storageutil.iscensoredtext(text):
2003 raise error.CensoredNodeError(self.display_id, node, text)
1995 raise error.CensoredNodeError(self.display_id, node, text)
2004 raise
1996 raise
2005
1997
2006 def _enforceinlinesize(self, tr):
1998 def _enforceinlinesize(self, tr):
2007 """Check if the revlog is too big for inline and convert if so.
1999 """Check if the revlog is too big for inline and convert if so.
2008
2000
2009 This should be called after revisions are added to the revlog. If the
2001 This should be called after revisions are added to the revlog. If the
2010 revlog has grown too large to be an inline revlog, it will convert it
2002 revlog has grown too large to be an inline revlog, it will convert it
2011 to use multiple index and data files.
2003 to use multiple index and data files.
2012 """
2004 """
2013 tiprev = len(self) - 1
2005 tiprev = len(self) - 1
2014 total_size = self.start(tiprev) + self.length(tiprev)
2006 total_size = self.start(tiprev) + self.length(tiprev)
2015 if not self._inline or total_size < _maxinline:
2007 if not self._inline or total_size < _maxinline:
2016 return
2008 return
2017
2009
2018 troffset = tr.findoffset(self._indexfile)
2010 troffset = tr.findoffset(self._indexfile)
2019 if troffset is None:
2011 if troffset is None:
2020 raise error.RevlogError(
2012 raise error.RevlogError(
2021 _(b"%s not found in the transaction") % self._indexfile
2013 _(b"%s not found in the transaction") % self._indexfile
2022 )
2014 )
2023 trindex = 0
2015 trindex = 0
2024 tr.add(self._datafile, 0)
2016 tr.add(self._datafile, 0)
2025
2017
2026 existing_handles = False
2018 existing_handles = False
2027 if self._writinghandles is not None:
2019 if self._writinghandles is not None:
2028 existing_handles = True
2020 existing_handles = True
2029 fp = self._writinghandles[0]
2021 fp = self._writinghandles[0]
2030 fp.flush()
2022 fp.flush()
2031 fp.close()
2023 fp.close()
2032 # We can't use the cached file handle after close(). So prevent
2024 # We can't use the cached file handle after close(). So prevent
2033 # its usage.
2025 # its usage.
2034 self._writinghandles = None
2026 self._writinghandles = None
2035 self._segmentfile.writing_handle = None
2027 self._segmentfile.writing_handle = None
2028 # No need to deal with sidedata writing handle as it is only
2029 # relevant with revlog-v2 which is never inline, not reaching
2030 # this code
2036
2031
2037 new_dfh = self._datafp(b'w+')
2032 new_dfh = self._datafp(b'w+')
2038 new_dfh.truncate(0) # drop any potentially existing data
2033 new_dfh.truncate(0) # drop any potentially existing data
2039 try:
2034 try:
2040 with self._indexfp() as read_ifh:
2035 with self._indexfp() as read_ifh:
2041 for r in self:
2036 for r in self:
2042 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2037 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2043 if troffset <= self.start(r) + r * self.index.entry_size:
2038 if troffset <= self.start(r) + r * self.index.entry_size:
2044 trindex = r
2039 trindex = r
2045 new_dfh.flush()
2040 new_dfh.flush()
2046
2041
2047 with self.__index_new_fp() as fp:
2042 with self.__index_new_fp() as fp:
2048 self._format_flags &= ~FLAG_INLINE_DATA
2043 self._format_flags &= ~FLAG_INLINE_DATA
2049 self._inline = False
2044 self._inline = False
2050 for i in self:
2045 for i in self:
2051 e = self.index.entry_binary(i)
2046 e = self.index.entry_binary(i)
2052 if i == 0 and self._docket is None:
2047 if i == 0 and self._docket is None:
2053 header = self._format_flags | self._format_version
2048 header = self._format_flags | self._format_version
2054 header = self.index.pack_header(header)
2049 header = self.index.pack_header(header)
2055 e = header + e
2050 e = header + e
2056 fp.write(e)
2051 fp.write(e)
2057 if self._docket is not None:
2052 if self._docket is not None:
2058 self._docket.index_end = fp.tell()
2053 self._docket.index_end = fp.tell()
2059
2054
2060 # There is a small transactional race here. If the rename of
2055 # There is a small transactional race here. If the rename of
2061 # the index fails, we should remove the datafile. It is more
2056 # the index fails, we should remove the datafile. It is more
2062 # important to ensure that the data file is not truncated
2057 # important to ensure that the data file is not truncated
2063 # when the index is replaced as otherwise data is lost.
2058 # when the index is replaced as otherwise data is lost.
2064 tr.replace(self._datafile, self.start(trindex))
2059 tr.replace(self._datafile, self.start(trindex))
2065
2060
2066 # the temp file replace the real index when we exit the context
2061 # the temp file replace the real index when we exit the context
2067 # manager
2062 # manager
2068
2063
2069 tr.replace(self._indexfile, trindex * self.index.entry_size)
2064 tr.replace(self._indexfile, trindex * self.index.entry_size)
2070 nodemaputil.setup_persistent_nodemap(tr, self)
2065 nodemaputil.setup_persistent_nodemap(tr, self)
2071 self._segmentfile = randomaccessfile.randomaccessfile(
2066 self._segmentfile = randomaccessfile.randomaccessfile(
2072 self.opener,
2067 self.opener,
2073 self._datafile,
2068 self._datafile,
2074 self._chunkcachesize,
2069 self._chunkcachesize,
2075 )
2070 )
2076
2071
2077 if existing_handles:
2072 if existing_handles:
2078 # switched from inline to conventional reopen the index
2073 # switched from inline to conventional reopen the index
2079 ifh = self.__index_write_fp()
2074 ifh = self.__index_write_fp()
2080 self._writinghandles = (ifh, new_dfh, None)
2075 self._writinghandles = (ifh, new_dfh, None)
2081 self._segmentfile.writing_handle = new_dfh
2076 self._segmentfile.writing_handle = new_dfh
2082 new_dfh = None
2077 new_dfh = None
2078 # No need to deal with sidedata writing handle as it is only
2079 # relevant with revlog-v2 which is never inline, not reaching
2080 # this code
2083 finally:
2081 finally:
2084 if new_dfh is not None:
2082 if new_dfh is not None:
2085 new_dfh.close()
2083 new_dfh.close()
2086
2084
2087 def _nodeduplicatecallback(self, transaction, node):
2085 def _nodeduplicatecallback(self, transaction, node):
2088 """called when trying to add a node already stored."""
2086 """called when trying to add a node already stored."""
2089
2087
2090 @contextlib.contextmanager
2088 @contextlib.contextmanager
2091 def _writing(self, transaction):
2089 def _writing(self, transaction):
2092 if self._trypending:
2090 if self._trypending:
2093 msg = b'try to write in a `trypending` revlog: %s'
2091 msg = b'try to write in a `trypending` revlog: %s'
2094 msg %= self.display_id
2092 msg %= self.display_id
2095 raise error.ProgrammingError(msg)
2093 raise error.ProgrammingError(msg)
2096 if self._writinghandles is not None:
2094 if self._writinghandles is not None:
2097 yield
2095 yield
2098 else:
2096 else:
2099 ifh = dfh = sdfh = None
2097 ifh = dfh = sdfh = None
2100 try:
2098 try:
2101 r = len(self)
2099 r = len(self)
2102 # opening the data file.
2100 # opening the data file.
2103 dsize = 0
2101 dsize = 0
2104 if r:
2102 if r:
2105 dsize = self.end(r - 1)
2103 dsize = self.end(r - 1)
2106 dfh = None
2104 dfh = None
2107 if not self._inline:
2105 if not self._inline:
2108 try:
2106 try:
2109 dfh = self._datafp(b"r+")
2107 dfh = self._datafp(b"r+")
2110 if self._docket is None:
2108 if self._docket is None:
2111 dfh.seek(0, os.SEEK_END)
2109 dfh.seek(0, os.SEEK_END)
2112 else:
2110 else:
2113 dfh.seek(self._docket.data_end, os.SEEK_SET)
2111 dfh.seek(self._docket.data_end, os.SEEK_SET)
2114 except IOError as inst:
2112 except IOError as inst:
2115 if inst.errno != errno.ENOENT:
2113 if inst.errno != errno.ENOENT:
2116 raise
2114 raise
2117 dfh = self._datafp(b"w+")
2115 dfh = self._datafp(b"w+")
2118 transaction.add(self._datafile, dsize)
2116 transaction.add(self._datafile, dsize)
2119 if self._sidedatafile is not None:
2117 if self._sidedatafile is not None:
2120 try:
2118 try:
2121 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2119 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2122 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2120 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2123 except IOError as inst:
2121 except IOError as inst:
2124 if inst.errno != errno.ENOENT:
2122 if inst.errno != errno.ENOENT:
2125 raise
2123 raise
2126 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2124 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2127 transaction.add(
2125 transaction.add(
2128 self._sidedatafile, self._docket.sidedata_end
2126 self._sidedatafile, self._docket.sidedata_end
2129 )
2127 )
2130
2128
2131 # opening the index file.
2129 # opening the index file.
2132 isize = r * self.index.entry_size
2130 isize = r * self.index.entry_size
2133 ifh = self.__index_write_fp()
2131 ifh = self.__index_write_fp()
2134 if self._inline:
2132 if self._inline:
2135 transaction.add(self._indexfile, dsize + isize)
2133 transaction.add(self._indexfile, dsize + isize)
2136 else:
2134 else:
2137 transaction.add(self._indexfile, isize)
2135 transaction.add(self._indexfile, isize)
2138 # exposing all file handle for writing.
2136 # exposing all file handle for writing.
2139 self._writinghandles = (ifh, dfh, sdfh)
2137 self._writinghandles = (ifh, dfh, sdfh)
2140 self._segmentfile.writing_handle = ifh if self._inline else dfh
2138 self._segmentfile.writing_handle = ifh if self._inline else dfh
2139 self._segmentfile_sidedata.writing_handle = sdfh
2141 yield
2140 yield
2142 if self._docket is not None:
2141 if self._docket is not None:
2143 self._write_docket(transaction)
2142 self._write_docket(transaction)
2144 finally:
2143 finally:
2145 self._writinghandles = None
2144 self._writinghandles = None
2146 self._segmentfile.writing_handle = None
2145 self._segmentfile.writing_handle = None
2146 self._segmentfile_sidedata.writing_handle = None
2147 if dfh is not None:
2147 if dfh is not None:
2148 dfh.close()
2148 dfh.close()
2149 if sdfh is not None:
2149 if sdfh is not None:
2150 sdfh.close()
2150 sdfh.close()
2151 # closing the index file last to avoid exposing referent to
2151 # closing the index file last to avoid exposing referent to
2152 # potential unflushed data content.
2152 # potential unflushed data content.
2153 if ifh is not None:
2153 if ifh is not None:
2154 ifh.close()
2154 ifh.close()
2155
2155
2156 def _write_docket(self, transaction):
2156 def _write_docket(self, transaction):
2157 """write the current docket on disk
2157 """write the current docket on disk
2158
2158
2159 Exist as a method to help changelog to implement transaction logic
2159 Exist as a method to help changelog to implement transaction logic
2160
2160
2161 We could also imagine using the same transaction logic for all revlog
2161 We could also imagine using the same transaction logic for all revlog
2162 since docket are cheap."""
2162 since docket are cheap."""
2163 self._docket.write(transaction)
2163 self._docket.write(transaction)
2164
2164
2165 def addrevision(
2165 def addrevision(
2166 self,
2166 self,
2167 text,
2167 text,
2168 transaction,
2168 transaction,
2169 link,
2169 link,
2170 p1,
2170 p1,
2171 p2,
2171 p2,
2172 cachedelta=None,
2172 cachedelta=None,
2173 node=None,
2173 node=None,
2174 flags=REVIDX_DEFAULT_FLAGS,
2174 flags=REVIDX_DEFAULT_FLAGS,
2175 deltacomputer=None,
2175 deltacomputer=None,
2176 sidedata=None,
2176 sidedata=None,
2177 ):
2177 ):
2178 """add a revision to the log
2178 """add a revision to the log
2179
2179
2180 text - the revision data to add
2180 text - the revision data to add
2181 transaction - the transaction object used for rollback
2181 transaction - the transaction object used for rollback
2182 link - the linkrev data to add
2182 link - the linkrev data to add
2183 p1, p2 - the parent nodeids of the revision
2183 p1, p2 - the parent nodeids of the revision
2184 cachedelta - an optional precomputed delta
2184 cachedelta - an optional precomputed delta
2185 node - nodeid of revision; typically node is not specified, and it is
2185 node - nodeid of revision; typically node is not specified, and it is
2186 computed by default as hash(text, p1, p2), however subclasses might
2186 computed by default as hash(text, p1, p2), however subclasses might
2187 use different hashing method (and override checkhash() in such case)
2187 use different hashing method (and override checkhash() in such case)
2188 flags - the known flags to set on the revision
2188 flags - the known flags to set on the revision
2189 deltacomputer - an optional deltacomputer instance shared between
2189 deltacomputer - an optional deltacomputer instance shared between
2190 multiple calls
2190 multiple calls
2191 """
2191 """
2192 if link == nullrev:
2192 if link == nullrev:
2193 raise error.RevlogError(
2193 raise error.RevlogError(
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2195 )
2195 )
2196
2196
2197 if sidedata is None:
2197 if sidedata is None:
2198 sidedata = {}
2198 sidedata = {}
2199 elif sidedata and not self.hassidedata:
2199 elif sidedata and not self.hassidedata:
2200 raise error.ProgrammingError(
2200 raise error.ProgrammingError(
2201 _(b"trying to add sidedata to a revlog who don't support them")
2201 _(b"trying to add sidedata to a revlog who don't support them")
2202 )
2202 )
2203
2203
2204 if flags:
2204 if flags:
2205 node = node or self.hash(text, p1, p2)
2205 node = node or self.hash(text, p1, p2)
2206
2206
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2208
2208
2209 # If the flag processor modifies the revision data, ignore any provided
2209 # If the flag processor modifies the revision data, ignore any provided
2210 # cachedelta.
2210 # cachedelta.
2211 if rawtext != text:
2211 if rawtext != text:
2212 cachedelta = None
2212 cachedelta = None
2213
2213
2214 if len(rawtext) > _maxentrysize:
2214 if len(rawtext) > _maxentrysize:
2215 raise error.RevlogError(
2215 raise error.RevlogError(
2216 _(
2216 _(
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2218 )
2218 )
2219 % (self.display_id, len(rawtext))
2219 % (self.display_id, len(rawtext))
2220 )
2220 )
2221
2221
2222 node = node or self.hash(rawtext, p1, p2)
2222 node = node or self.hash(rawtext, p1, p2)
2223 rev = self.index.get_rev(node)
2223 rev = self.index.get_rev(node)
2224 if rev is not None:
2224 if rev is not None:
2225 return rev
2225 return rev
2226
2226
2227 if validatehash:
2227 if validatehash:
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2229
2229
2230 return self.addrawrevision(
2230 return self.addrawrevision(
2231 rawtext,
2231 rawtext,
2232 transaction,
2232 transaction,
2233 link,
2233 link,
2234 p1,
2234 p1,
2235 p2,
2235 p2,
2236 node,
2236 node,
2237 flags,
2237 flags,
2238 cachedelta=cachedelta,
2238 cachedelta=cachedelta,
2239 deltacomputer=deltacomputer,
2239 deltacomputer=deltacomputer,
2240 sidedata=sidedata,
2240 sidedata=sidedata,
2241 )
2241 )
2242
2242
2243 def addrawrevision(
2243 def addrawrevision(
2244 self,
2244 self,
2245 rawtext,
2245 rawtext,
2246 transaction,
2246 transaction,
2247 link,
2247 link,
2248 p1,
2248 p1,
2249 p2,
2249 p2,
2250 node,
2250 node,
2251 flags,
2251 flags,
2252 cachedelta=None,
2252 cachedelta=None,
2253 deltacomputer=None,
2253 deltacomputer=None,
2254 sidedata=None,
2254 sidedata=None,
2255 ):
2255 ):
2256 """add a raw revision with known flags, node and parents
2256 """add a raw revision with known flags, node and parents
2257 useful when reusing a revision not stored in this revlog (ex: received
2257 useful when reusing a revision not stored in this revlog (ex: received
2258 over wire, or read from an external bundle).
2258 over wire, or read from an external bundle).
2259 """
2259 """
2260 with self._writing(transaction):
2260 with self._writing(transaction):
2261 return self._addrevision(
2261 return self._addrevision(
2262 node,
2262 node,
2263 rawtext,
2263 rawtext,
2264 transaction,
2264 transaction,
2265 link,
2265 link,
2266 p1,
2266 p1,
2267 p2,
2267 p2,
2268 flags,
2268 flags,
2269 cachedelta,
2269 cachedelta,
2270 deltacomputer=deltacomputer,
2270 deltacomputer=deltacomputer,
2271 sidedata=sidedata,
2271 sidedata=sidedata,
2272 )
2272 )
2273
2273
2274 def compress(self, data):
2274 def compress(self, data):
2275 """Generate a possibly-compressed representation of data."""
2275 """Generate a possibly-compressed representation of data."""
2276 if not data:
2276 if not data:
2277 return b'', data
2277 return b'', data
2278
2278
2279 compressed = self._compressor.compress(data)
2279 compressed = self._compressor.compress(data)
2280
2280
2281 if compressed:
2281 if compressed:
2282 # The revlog compressor added the header in the returned data.
2282 # The revlog compressor added the header in the returned data.
2283 return b'', compressed
2283 return b'', compressed
2284
2284
2285 if data[0:1] == b'\0':
2285 if data[0:1] == b'\0':
2286 return b'', data
2286 return b'', data
2287 return b'u', data
2287 return b'u', data
2288
2288
2289 def decompress(self, data):
2289 def decompress(self, data):
2290 """Decompress a revlog chunk.
2290 """Decompress a revlog chunk.
2291
2291
2292 The chunk is expected to begin with a header identifying the
2292 The chunk is expected to begin with a header identifying the
2293 format type so it can be routed to an appropriate decompressor.
2293 format type so it can be routed to an appropriate decompressor.
2294 """
2294 """
2295 if not data:
2295 if not data:
2296 return data
2296 return data
2297
2297
2298 # Revlogs are read much more frequently than they are written and many
2298 # Revlogs are read much more frequently than they are written and many
2299 # chunks only take microseconds to decompress, so performance is
2299 # chunks only take microseconds to decompress, so performance is
2300 # important here.
2300 # important here.
2301 #
2301 #
2302 # We can make a few assumptions about revlogs:
2302 # We can make a few assumptions about revlogs:
2303 #
2303 #
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2305 # raw data).
2305 # raw data).
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2307 # returning raw inline data.
2307 # returning raw inline data.
2308 # 3) we want to prioritize common and officially supported compression
2308 # 3) we want to prioritize common and officially supported compression
2309 # engines
2309 # engines
2310 #
2310 #
2311 # It follows that we want to optimize for "decompress compressed data
2311 # It follows that we want to optimize for "decompress compressed data
2312 # when encoded with common and officially supported compression engines"
2312 # when encoded with common and officially supported compression engines"
2313 # case over "raw data" and "data encoded by less common or non-official
2313 # case over "raw data" and "data encoded by less common or non-official
2314 # compression engines." That is why we have the inline lookup first
2314 # compression engines." That is why we have the inline lookup first
2315 # followed by the compengines lookup.
2315 # followed by the compengines lookup.
2316 #
2316 #
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2318 # compressed chunks. And this matters for changelog and manifest reads.
2318 # compressed chunks. And this matters for changelog and manifest reads.
2319 t = data[0:1]
2319 t = data[0:1]
2320
2320
2321 if t == b'x':
2321 if t == b'x':
2322 try:
2322 try:
2323 return _zlibdecompress(data)
2323 return _zlibdecompress(data)
2324 except zlib.error as e:
2324 except zlib.error as e:
2325 raise error.RevlogError(
2325 raise error.RevlogError(
2326 _(b'revlog decompress error: %s')
2326 _(b'revlog decompress error: %s')
2327 % stringutil.forcebytestr(e)
2327 % stringutil.forcebytestr(e)
2328 )
2328 )
2329 # '\0' is more common than 'u' so it goes first.
2329 # '\0' is more common than 'u' so it goes first.
2330 elif t == b'\0':
2330 elif t == b'\0':
2331 return data
2331 return data
2332 elif t == b'u':
2332 elif t == b'u':
2333 return util.buffer(data, 1)
2333 return util.buffer(data, 1)
2334
2334
2335 compressor = self._get_decompressor(t)
2335 compressor = self._get_decompressor(t)
2336
2336
2337 return compressor.decompress(data)
2337 return compressor.decompress(data)
2338
2338
2339 def _addrevision(
2339 def _addrevision(
2340 self,
2340 self,
2341 node,
2341 node,
2342 rawtext,
2342 rawtext,
2343 transaction,
2343 transaction,
2344 link,
2344 link,
2345 p1,
2345 p1,
2346 p2,
2346 p2,
2347 flags,
2347 flags,
2348 cachedelta,
2348 cachedelta,
2349 alwayscache=False,
2349 alwayscache=False,
2350 deltacomputer=None,
2350 deltacomputer=None,
2351 sidedata=None,
2351 sidedata=None,
2352 ):
2352 ):
2353 """internal function to add revisions to the log
2353 """internal function to add revisions to the log
2354
2354
2355 see addrevision for argument descriptions.
2355 see addrevision for argument descriptions.
2356
2356
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2358
2358
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2360 be used.
2360 be used.
2361
2361
2362 invariants:
2362 invariants:
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2364 if both are set, they must correspond to each other.
2364 if both are set, they must correspond to each other.
2365 """
2365 """
2366 if node == self.nullid:
2366 if node == self.nullid:
2367 raise error.RevlogError(
2367 raise error.RevlogError(
2368 _(b"%s: attempt to add null revision") % self.display_id
2368 _(b"%s: attempt to add null revision") % self.display_id
2369 )
2369 )
2370 if (
2370 if (
2371 node == self.nodeconstants.wdirid
2371 node == self.nodeconstants.wdirid
2372 or node in self.nodeconstants.wdirfilenodeids
2372 or node in self.nodeconstants.wdirfilenodeids
2373 ):
2373 ):
2374 raise error.RevlogError(
2374 raise error.RevlogError(
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2376 )
2376 )
2377 if self._writinghandles is None:
2377 if self._writinghandles is None:
2378 msg = b'adding revision outside `revlog._writing` context'
2378 msg = b'adding revision outside `revlog._writing` context'
2379 raise error.ProgrammingError(msg)
2379 raise error.ProgrammingError(msg)
2380
2380
2381 if self._inline:
2381 if self._inline:
2382 fh = self._writinghandles[0]
2382 fh = self._writinghandles[0]
2383 else:
2383 else:
2384 fh = self._writinghandles[1]
2384 fh = self._writinghandles[1]
2385
2385
2386 btext = [rawtext]
2386 btext = [rawtext]
2387
2387
2388 curr = len(self)
2388 curr = len(self)
2389 prev = curr - 1
2389 prev = curr - 1
2390
2390
2391 offset = self._get_data_offset(prev)
2391 offset = self._get_data_offset(prev)
2392
2392
2393 if self._concurrencychecker:
2393 if self._concurrencychecker:
2394 ifh, dfh, sdfh = self._writinghandles
2394 ifh, dfh, sdfh = self._writinghandles
2395 # XXX no checking for the sidedata file
2395 # XXX no checking for the sidedata file
2396 if self._inline:
2396 if self._inline:
2397 # offset is "as if" it were in the .d file, so we need to add on
2397 # offset is "as if" it were in the .d file, so we need to add on
2398 # the size of the entry metadata.
2398 # the size of the entry metadata.
2399 self._concurrencychecker(
2399 self._concurrencychecker(
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2401 )
2401 )
2402 else:
2402 else:
2403 # Entries in the .i are a consistent size.
2403 # Entries in the .i are a consistent size.
2404 self._concurrencychecker(
2404 self._concurrencychecker(
2405 ifh, self._indexfile, curr * self.index.entry_size
2405 ifh, self._indexfile, curr * self.index.entry_size
2406 )
2406 )
2407 self._concurrencychecker(dfh, self._datafile, offset)
2407 self._concurrencychecker(dfh, self._datafile, offset)
2408
2408
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2410
2410
2411 # full versions are inserted when the needed deltas
2411 # full versions are inserted when the needed deltas
2412 # become comparable to the uncompressed text
2412 # become comparable to the uncompressed text
2413 if rawtext is None:
2413 if rawtext is None:
2414 # need rawtext size, before changed by flag processors, which is
2414 # need rawtext size, before changed by flag processors, which is
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2416 # logic that might remove metadata size.
2416 # logic that might remove metadata size.
2417 textlen = mdiff.patchedsize(
2417 textlen = mdiff.patchedsize(
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2419 )
2419 )
2420 else:
2420 else:
2421 textlen = len(rawtext)
2421 textlen = len(rawtext)
2422
2422
2423 if deltacomputer is None:
2423 if deltacomputer is None:
2424 deltacomputer = deltautil.deltacomputer(self)
2424 deltacomputer = deltautil.deltacomputer(self)
2425
2425
2426 revinfo = revlogutils.revisioninfo(
2426 revinfo = revlogutils.revisioninfo(
2427 node,
2427 node,
2428 p1,
2428 p1,
2429 p2,
2429 p2,
2430 btext,
2430 btext,
2431 textlen,
2431 textlen,
2432 cachedelta,
2432 cachedelta,
2433 flags,
2433 flags,
2434 )
2434 )
2435
2435
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2437
2437
2438 compression_mode = COMP_MODE_INLINE
2438 compression_mode = COMP_MODE_INLINE
2439 if self._docket is not None:
2439 if self._docket is not None:
2440 h, d = deltainfo.data
2440 h, d = deltainfo.data
2441 if not h and not d:
2441 if not h and not d:
2442 # not data to store at all... declare them uncompressed
2442 # not data to store at all... declare them uncompressed
2443 compression_mode = COMP_MODE_PLAIN
2443 compression_mode = COMP_MODE_PLAIN
2444 elif not h:
2444 elif not h:
2445 t = d[0:1]
2445 t = d[0:1]
2446 if t == b'\0':
2446 if t == b'\0':
2447 compression_mode = COMP_MODE_PLAIN
2447 compression_mode = COMP_MODE_PLAIN
2448 elif t == self._docket.default_compression_header:
2448 elif t == self._docket.default_compression_header:
2449 compression_mode = COMP_MODE_DEFAULT
2449 compression_mode = COMP_MODE_DEFAULT
2450 elif h == b'u':
2450 elif h == b'u':
2451 # we have a more efficient way to declare uncompressed
2451 # we have a more efficient way to declare uncompressed
2452 h = b''
2452 h = b''
2453 compression_mode = COMP_MODE_PLAIN
2453 compression_mode = COMP_MODE_PLAIN
2454 deltainfo = deltautil.drop_u_compression(deltainfo)
2454 deltainfo = deltautil.drop_u_compression(deltainfo)
2455
2455
2456 sidedata_compression_mode = COMP_MODE_INLINE
2456 sidedata_compression_mode = COMP_MODE_INLINE
2457 if sidedata and self.hassidedata:
2457 if sidedata and self.hassidedata:
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2460 sidedata_offset = self._docket.sidedata_end
2460 sidedata_offset = self._docket.sidedata_end
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2462 if (
2462 if (
2463 h != b'u'
2463 h != b'u'
2464 and comp_sidedata[0:1] != b'\0'
2464 and comp_sidedata[0:1] != b'\0'
2465 and len(comp_sidedata) < len(serialized_sidedata)
2465 and len(comp_sidedata) < len(serialized_sidedata)
2466 ):
2466 ):
2467 assert not h
2467 assert not h
2468 if (
2468 if (
2469 comp_sidedata[0:1]
2469 comp_sidedata[0:1]
2470 == self._docket.default_compression_header
2470 == self._docket.default_compression_header
2471 ):
2471 ):
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2473 serialized_sidedata = comp_sidedata
2473 serialized_sidedata = comp_sidedata
2474 else:
2474 else:
2475 sidedata_compression_mode = COMP_MODE_INLINE
2475 sidedata_compression_mode = COMP_MODE_INLINE
2476 serialized_sidedata = comp_sidedata
2476 serialized_sidedata = comp_sidedata
2477 else:
2477 else:
2478 serialized_sidedata = b""
2478 serialized_sidedata = b""
2479 # Don't store the offset if the sidedata is empty, that way
2479 # Don't store the offset if the sidedata is empty, that way
2480 # we can easily detect empty sidedata and they will be no different
2480 # we can easily detect empty sidedata and they will be no different
2481 # than ones we manually add.
2481 # than ones we manually add.
2482 sidedata_offset = 0
2482 sidedata_offset = 0
2483
2483
2484 e = revlogutils.entry(
2484 e = revlogutils.entry(
2485 flags=flags,
2485 flags=flags,
2486 data_offset=offset,
2486 data_offset=offset,
2487 data_compressed_length=deltainfo.deltalen,
2487 data_compressed_length=deltainfo.deltalen,
2488 data_uncompressed_length=textlen,
2488 data_uncompressed_length=textlen,
2489 data_compression_mode=compression_mode,
2489 data_compression_mode=compression_mode,
2490 data_delta_base=deltainfo.base,
2490 data_delta_base=deltainfo.base,
2491 link_rev=link,
2491 link_rev=link,
2492 parent_rev_1=p1r,
2492 parent_rev_1=p1r,
2493 parent_rev_2=p2r,
2493 parent_rev_2=p2r,
2494 node_id=node,
2494 node_id=node,
2495 sidedata_offset=sidedata_offset,
2495 sidedata_offset=sidedata_offset,
2496 sidedata_compressed_length=len(serialized_sidedata),
2496 sidedata_compressed_length=len(serialized_sidedata),
2497 sidedata_compression_mode=sidedata_compression_mode,
2497 sidedata_compression_mode=sidedata_compression_mode,
2498 )
2498 )
2499
2499
2500 self.index.append(e)
2500 self.index.append(e)
2501 entry = self.index.entry_binary(curr)
2501 entry = self.index.entry_binary(curr)
2502 if curr == 0 and self._docket is None:
2502 if curr == 0 and self._docket is None:
2503 header = self._format_flags | self._format_version
2503 header = self._format_flags | self._format_version
2504 header = self.index.pack_header(header)
2504 header = self.index.pack_header(header)
2505 entry = header + entry
2505 entry = header + entry
2506 self._writeentry(
2506 self._writeentry(
2507 transaction,
2507 transaction,
2508 entry,
2508 entry,
2509 deltainfo.data,
2509 deltainfo.data,
2510 link,
2510 link,
2511 offset,
2511 offset,
2512 serialized_sidedata,
2512 serialized_sidedata,
2513 sidedata_offset,
2513 sidedata_offset,
2514 )
2514 )
2515
2515
2516 rawtext = btext[0]
2516 rawtext = btext[0]
2517
2517
2518 if alwayscache and rawtext is None:
2518 if alwayscache and rawtext is None:
2519 rawtext = deltacomputer.buildtext(revinfo, fh)
2519 rawtext = deltacomputer.buildtext(revinfo, fh)
2520
2520
2521 if type(rawtext) == bytes: # only accept immutable objects
2521 if type(rawtext) == bytes: # only accept immutable objects
2522 self._revisioncache = (node, curr, rawtext)
2522 self._revisioncache = (node, curr, rawtext)
2523 self._chainbasecache[curr] = deltainfo.chainbase
2523 self._chainbasecache[curr] = deltainfo.chainbase
2524 return curr
2524 return curr
2525
2525
2526 def _get_data_offset(self, prev):
2526 def _get_data_offset(self, prev):
2527 """Returns the current offset in the (in-transaction) data file.
2527 """Returns the current offset in the (in-transaction) data file.
2528 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2528 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2529 file to store that information: since sidedata can be rewritten to the
2529 file to store that information: since sidedata can be rewritten to the
2530 end of the data file within a transaction, you can have cases where, for
2530 end of the data file within a transaction, you can have cases where, for
2531 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2531 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2532 to `n - 1`'s sidedata being written after `n`'s data.
2532 to `n - 1`'s sidedata being written after `n`'s data.
2533
2533
2534 TODO cache this in a docket file before getting out of experimental."""
2534 TODO cache this in a docket file before getting out of experimental."""
2535 if self._docket is None:
2535 if self._docket is None:
2536 return self.end(prev)
2536 return self.end(prev)
2537 else:
2537 else:
2538 return self._docket.data_end
2538 return self._docket.data_end
2539
2539
2540 def _writeentry(
2540 def _writeentry(
2541 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2541 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2542 ):
2542 ):
2543 # Files opened in a+ mode have inconsistent behavior on various
2543 # Files opened in a+ mode have inconsistent behavior on various
2544 # platforms. Windows requires that a file positioning call be made
2544 # platforms. Windows requires that a file positioning call be made
2545 # when the file handle transitions between reads and writes. See
2545 # when the file handle transitions between reads and writes. See
2546 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2546 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2547 # platforms, Python or the platform itself can be buggy. Some versions
2547 # platforms, Python or the platform itself can be buggy. Some versions
2548 # of Solaris have been observed to not append at the end of the file
2548 # of Solaris have been observed to not append at the end of the file
2549 # if the file was seeked to before the end. See issue4943 for more.
2549 # if the file was seeked to before the end. See issue4943 for more.
2550 #
2550 #
2551 # We work around this issue by inserting a seek() before writing.
2551 # We work around this issue by inserting a seek() before writing.
2552 # Note: This is likely not necessary on Python 3. However, because
2552 # Note: This is likely not necessary on Python 3. However, because
2553 # the file handle is reused for reads and may be seeked there, we need
2553 # the file handle is reused for reads and may be seeked there, we need
2554 # to be careful before changing this.
2554 # to be careful before changing this.
2555 if self._writinghandles is None:
2555 if self._writinghandles is None:
2556 msg = b'adding revision outside `revlog._writing` context'
2556 msg = b'adding revision outside `revlog._writing` context'
2557 raise error.ProgrammingError(msg)
2557 raise error.ProgrammingError(msg)
2558 ifh, dfh, sdfh = self._writinghandles
2558 ifh, dfh, sdfh = self._writinghandles
2559 if self._docket is None:
2559 if self._docket is None:
2560 ifh.seek(0, os.SEEK_END)
2560 ifh.seek(0, os.SEEK_END)
2561 else:
2561 else:
2562 ifh.seek(self._docket.index_end, os.SEEK_SET)
2562 ifh.seek(self._docket.index_end, os.SEEK_SET)
2563 if dfh:
2563 if dfh:
2564 if self._docket is None:
2564 if self._docket is None:
2565 dfh.seek(0, os.SEEK_END)
2565 dfh.seek(0, os.SEEK_END)
2566 else:
2566 else:
2567 dfh.seek(self._docket.data_end, os.SEEK_SET)
2567 dfh.seek(self._docket.data_end, os.SEEK_SET)
2568 if sdfh:
2568 if sdfh:
2569 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2569 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2570
2570
2571 curr = len(self) - 1
2571 curr = len(self) - 1
2572 if not self._inline:
2572 if not self._inline:
2573 transaction.add(self._datafile, offset)
2573 transaction.add(self._datafile, offset)
2574 if self._sidedatafile:
2574 if self._sidedatafile:
2575 transaction.add(self._sidedatafile, sidedata_offset)
2575 transaction.add(self._sidedatafile, sidedata_offset)
2576 transaction.add(self._indexfile, curr * len(entry))
2576 transaction.add(self._indexfile, curr * len(entry))
2577 if data[0]:
2577 if data[0]:
2578 dfh.write(data[0])
2578 dfh.write(data[0])
2579 dfh.write(data[1])
2579 dfh.write(data[1])
2580 if sidedata:
2580 if sidedata:
2581 sdfh.write(sidedata)
2581 sdfh.write(sidedata)
2582 ifh.write(entry)
2582 ifh.write(entry)
2583 else:
2583 else:
2584 offset += curr * self.index.entry_size
2584 offset += curr * self.index.entry_size
2585 transaction.add(self._indexfile, offset)
2585 transaction.add(self._indexfile, offset)
2586 ifh.write(entry)
2586 ifh.write(entry)
2587 ifh.write(data[0])
2587 ifh.write(data[0])
2588 ifh.write(data[1])
2588 ifh.write(data[1])
2589 assert not sidedata
2589 assert not sidedata
2590 self._enforceinlinesize(transaction)
2590 self._enforceinlinesize(transaction)
2591 if self._docket is not None:
2591 if self._docket is not None:
2592 self._docket.index_end = self._writinghandles[0].tell()
2592 self._docket.index_end = self._writinghandles[0].tell()
2593 self._docket.data_end = self._writinghandles[1].tell()
2593 self._docket.data_end = self._writinghandles[1].tell()
2594 self._docket.sidedata_end = self._writinghandles[2].tell()
2594 self._docket.sidedata_end = self._writinghandles[2].tell()
2595
2595
2596 nodemaputil.setup_persistent_nodemap(transaction, self)
2596 nodemaputil.setup_persistent_nodemap(transaction, self)
2597
2597
2598 def addgroup(
2598 def addgroup(
2599 self,
2599 self,
2600 deltas,
2600 deltas,
2601 linkmapper,
2601 linkmapper,
2602 transaction,
2602 transaction,
2603 alwayscache=False,
2603 alwayscache=False,
2604 addrevisioncb=None,
2604 addrevisioncb=None,
2605 duplicaterevisioncb=None,
2605 duplicaterevisioncb=None,
2606 ):
2606 ):
2607 """
2607 """
2608 add a delta group
2608 add a delta group
2609
2609
2610 given a set of deltas, add them to the revision log. the
2610 given a set of deltas, add them to the revision log. the
2611 first delta is against its parent, which should be in our
2611 first delta is against its parent, which should be in our
2612 log, the rest are against the previous delta.
2612 log, the rest are against the previous delta.
2613
2613
2614 If ``addrevisioncb`` is defined, it will be called with arguments of
2614 If ``addrevisioncb`` is defined, it will be called with arguments of
2615 this revlog and the node that was added.
2615 this revlog and the node that was added.
2616 """
2616 """
2617
2617
2618 if self._adding_group:
2618 if self._adding_group:
2619 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2619 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2620
2620
2621 self._adding_group = True
2621 self._adding_group = True
2622 empty = True
2622 empty = True
2623 try:
2623 try:
2624 with self._writing(transaction):
2624 with self._writing(transaction):
2625 deltacomputer = deltautil.deltacomputer(self)
2625 deltacomputer = deltautil.deltacomputer(self)
2626 # loop through our set of deltas
2626 # loop through our set of deltas
2627 for data in deltas:
2627 for data in deltas:
2628 (
2628 (
2629 node,
2629 node,
2630 p1,
2630 p1,
2631 p2,
2631 p2,
2632 linknode,
2632 linknode,
2633 deltabase,
2633 deltabase,
2634 delta,
2634 delta,
2635 flags,
2635 flags,
2636 sidedata,
2636 sidedata,
2637 ) = data
2637 ) = data
2638 link = linkmapper(linknode)
2638 link = linkmapper(linknode)
2639 flags = flags or REVIDX_DEFAULT_FLAGS
2639 flags = flags or REVIDX_DEFAULT_FLAGS
2640
2640
2641 rev = self.index.get_rev(node)
2641 rev = self.index.get_rev(node)
2642 if rev is not None:
2642 if rev is not None:
2643 # this can happen if two branches make the same change
2643 # this can happen if two branches make the same change
2644 self._nodeduplicatecallback(transaction, rev)
2644 self._nodeduplicatecallback(transaction, rev)
2645 if duplicaterevisioncb:
2645 if duplicaterevisioncb:
2646 duplicaterevisioncb(self, rev)
2646 duplicaterevisioncb(self, rev)
2647 empty = False
2647 empty = False
2648 continue
2648 continue
2649
2649
2650 for p in (p1, p2):
2650 for p in (p1, p2):
2651 if not self.index.has_node(p):
2651 if not self.index.has_node(p):
2652 raise error.LookupError(
2652 raise error.LookupError(
2653 p, self.radix, _(b'unknown parent')
2653 p, self.radix, _(b'unknown parent')
2654 )
2654 )
2655
2655
2656 if not self.index.has_node(deltabase):
2656 if not self.index.has_node(deltabase):
2657 raise error.LookupError(
2657 raise error.LookupError(
2658 deltabase, self.display_id, _(b'unknown delta base')
2658 deltabase, self.display_id, _(b'unknown delta base')
2659 )
2659 )
2660
2660
2661 baserev = self.rev(deltabase)
2661 baserev = self.rev(deltabase)
2662
2662
2663 if baserev != nullrev and self.iscensored(baserev):
2663 if baserev != nullrev and self.iscensored(baserev):
2664 # if base is censored, delta must be full replacement in a
2664 # if base is censored, delta must be full replacement in a
2665 # single patch operation
2665 # single patch operation
2666 hlen = struct.calcsize(b">lll")
2666 hlen = struct.calcsize(b">lll")
2667 oldlen = self.rawsize(baserev)
2667 oldlen = self.rawsize(baserev)
2668 newlen = len(delta) - hlen
2668 newlen = len(delta) - hlen
2669 if delta[:hlen] != mdiff.replacediffheader(
2669 if delta[:hlen] != mdiff.replacediffheader(
2670 oldlen, newlen
2670 oldlen, newlen
2671 ):
2671 ):
2672 raise error.CensoredBaseError(
2672 raise error.CensoredBaseError(
2673 self.display_id, self.node(baserev)
2673 self.display_id, self.node(baserev)
2674 )
2674 )
2675
2675
2676 if not flags and self._peek_iscensored(baserev, delta):
2676 if not flags and self._peek_iscensored(baserev, delta):
2677 flags |= REVIDX_ISCENSORED
2677 flags |= REVIDX_ISCENSORED
2678
2678
2679 # We assume consumers of addrevisioncb will want to retrieve
2679 # We assume consumers of addrevisioncb will want to retrieve
2680 # the added revision, which will require a call to
2680 # the added revision, which will require a call to
2681 # revision(). revision() will fast path if there is a cache
2681 # revision(). revision() will fast path if there is a cache
2682 # hit. So, we tell _addrevision() to always cache in this case.
2682 # hit. So, we tell _addrevision() to always cache in this case.
2683 # We're only using addgroup() in the context of changegroup
2683 # We're only using addgroup() in the context of changegroup
2684 # generation so the revision data can always be handled as raw
2684 # generation so the revision data can always be handled as raw
2685 # by the flagprocessor.
2685 # by the flagprocessor.
2686 rev = self._addrevision(
2686 rev = self._addrevision(
2687 node,
2687 node,
2688 None,
2688 None,
2689 transaction,
2689 transaction,
2690 link,
2690 link,
2691 p1,
2691 p1,
2692 p2,
2692 p2,
2693 flags,
2693 flags,
2694 (baserev, delta),
2694 (baserev, delta),
2695 alwayscache=alwayscache,
2695 alwayscache=alwayscache,
2696 deltacomputer=deltacomputer,
2696 deltacomputer=deltacomputer,
2697 sidedata=sidedata,
2697 sidedata=sidedata,
2698 )
2698 )
2699
2699
2700 if addrevisioncb:
2700 if addrevisioncb:
2701 addrevisioncb(self, rev)
2701 addrevisioncb(self, rev)
2702 empty = False
2702 empty = False
2703 finally:
2703 finally:
2704 self._adding_group = False
2704 self._adding_group = False
2705 return not empty
2705 return not empty
2706
2706
2707 def iscensored(self, rev):
2707 def iscensored(self, rev):
2708 """Check if a file revision is censored."""
2708 """Check if a file revision is censored."""
2709 if not self._censorable:
2709 if not self._censorable:
2710 return False
2710 return False
2711
2711
2712 return self.flags(rev) & REVIDX_ISCENSORED
2712 return self.flags(rev) & REVIDX_ISCENSORED
2713
2713
2714 def _peek_iscensored(self, baserev, delta):
2714 def _peek_iscensored(self, baserev, delta):
2715 """Quickly check if a delta produces a censored revision."""
2715 """Quickly check if a delta produces a censored revision."""
2716 if not self._censorable:
2716 if not self._censorable:
2717 return False
2717 return False
2718
2718
2719 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2719 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2720
2720
2721 def getstrippoint(self, minlink):
2721 def getstrippoint(self, minlink):
2722 """find the minimum rev that must be stripped to strip the linkrev
2722 """find the minimum rev that must be stripped to strip the linkrev
2723
2723
2724 Returns a tuple containing the minimum rev and a set of all revs that
2724 Returns a tuple containing the minimum rev and a set of all revs that
2725 have linkrevs that will be broken by this strip.
2725 have linkrevs that will be broken by this strip.
2726 """
2726 """
2727 return storageutil.resolvestripinfo(
2727 return storageutil.resolvestripinfo(
2728 minlink,
2728 minlink,
2729 len(self) - 1,
2729 len(self) - 1,
2730 self.headrevs(),
2730 self.headrevs(),
2731 self.linkrev,
2731 self.linkrev,
2732 self.parentrevs,
2732 self.parentrevs,
2733 )
2733 )
2734
2734
2735 def strip(self, minlink, transaction):
2735 def strip(self, minlink, transaction):
2736 """truncate the revlog on the first revision with a linkrev >= minlink
2736 """truncate the revlog on the first revision with a linkrev >= minlink
2737
2737
2738 This function is called when we're stripping revision minlink and
2738 This function is called when we're stripping revision minlink and
2739 its descendants from the repository.
2739 its descendants from the repository.
2740
2740
2741 We have to remove all revisions with linkrev >= minlink, because
2741 We have to remove all revisions with linkrev >= minlink, because
2742 the equivalent changelog revisions will be renumbered after the
2742 the equivalent changelog revisions will be renumbered after the
2743 strip.
2743 strip.
2744
2744
2745 So we truncate the revlog on the first of these revisions, and
2745 So we truncate the revlog on the first of these revisions, and
2746 trust that the caller has saved the revisions that shouldn't be
2746 trust that the caller has saved the revisions that shouldn't be
2747 removed and that it'll re-add them after this truncation.
2747 removed and that it'll re-add them after this truncation.
2748 """
2748 """
2749 if len(self) == 0:
2749 if len(self) == 0:
2750 return
2750 return
2751
2751
2752 rev, _ = self.getstrippoint(minlink)
2752 rev, _ = self.getstrippoint(minlink)
2753 if rev == len(self):
2753 if rev == len(self):
2754 return
2754 return
2755
2755
2756 # first truncate the files on disk
2756 # first truncate the files on disk
2757 data_end = self.start(rev)
2757 data_end = self.start(rev)
2758 if not self._inline:
2758 if not self._inline:
2759 transaction.add(self._datafile, data_end)
2759 transaction.add(self._datafile, data_end)
2760 end = rev * self.index.entry_size
2760 end = rev * self.index.entry_size
2761 else:
2761 else:
2762 end = data_end + (rev * self.index.entry_size)
2762 end = data_end + (rev * self.index.entry_size)
2763
2763
2764 if self._sidedatafile:
2764 if self._sidedatafile:
2765 sidedata_end = self.sidedata_cut_off(rev)
2765 sidedata_end = self.sidedata_cut_off(rev)
2766 transaction.add(self._sidedatafile, sidedata_end)
2766 transaction.add(self._sidedatafile, sidedata_end)
2767
2767
2768 transaction.add(self._indexfile, end)
2768 transaction.add(self._indexfile, end)
2769 if self._docket is not None:
2769 if self._docket is not None:
2770 # XXX we could, leverage the docket while stripping. However it is
2770 # XXX we could, leverage the docket while stripping. However it is
2771 # not powerfull enough at the time of this comment
2771 # not powerfull enough at the time of this comment
2772 self._docket.index_end = end
2772 self._docket.index_end = end
2773 self._docket.data_end = data_end
2773 self._docket.data_end = data_end
2774 self._docket.sidedata_end = sidedata_end
2774 self._docket.sidedata_end = sidedata_end
2775 self._docket.write(transaction, stripping=True)
2775 self._docket.write(transaction, stripping=True)
2776
2776
2777 # then reset internal state in memory to forget those revisions
2777 # then reset internal state in memory to forget those revisions
2778 self._revisioncache = None
2778 self._revisioncache = None
2779 self._chaininfocache = util.lrucachedict(500)
2779 self._chaininfocache = util.lrucachedict(500)
2780 self._segmentfile.clear_cache()
2780 self._segmentfile.clear_cache()
2781 self._segmentfile_sidedata.clear_cache()
2781
2782
2782 del self.index[rev:-1]
2783 del self.index[rev:-1]
2783
2784
2784 def checksize(self):
2785 def checksize(self):
2785 """Check size of index and data files
2786 """Check size of index and data files
2786
2787
2787 return a (dd, di) tuple.
2788 return a (dd, di) tuple.
2788 - dd: extra bytes for the "data" file
2789 - dd: extra bytes for the "data" file
2789 - di: extra bytes for the "index" file
2790 - di: extra bytes for the "index" file
2790
2791
2791 A healthy revlog will return (0, 0).
2792 A healthy revlog will return (0, 0).
2792 """
2793 """
2793 expected = 0
2794 expected = 0
2794 if len(self):
2795 if len(self):
2795 expected = max(0, self.end(len(self) - 1))
2796 expected = max(0, self.end(len(self) - 1))
2796
2797
2797 try:
2798 try:
2798 with self._datafp() as f:
2799 with self._datafp() as f:
2799 f.seek(0, io.SEEK_END)
2800 f.seek(0, io.SEEK_END)
2800 actual = f.tell()
2801 actual = f.tell()
2801 dd = actual - expected
2802 dd = actual - expected
2802 except IOError as inst:
2803 except IOError as inst:
2803 if inst.errno != errno.ENOENT:
2804 if inst.errno != errno.ENOENT:
2804 raise
2805 raise
2805 dd = 0
2806 dd = 0
2806
2807
2807 try:
2808 try:
2808 f = self.opener(self._indexfile)
2809 f = self.opener(self._indexfile)
2809 f.seek(0, io.SEEK_END)
2810 f.seek(0, io.SEEK_END)
2810 actual = f.tell()
2811 actual = f.tell()
2811 f.close()
2812 f.close()
2812 s = self.index.entry_size
2813 s = self.index.entry_size
2813 i = max(0, actual // s)
2814 i = max(0, actual // s)
2814 di = actual - (i * s)
2815 di = actual - (i * s)
2815 if self._inline:
2816 if self._inline:
2816 databytes = 0
2817 databytes = 0
2817 for r in self:
2818 for r in self:
2818 databytes += max(0, self.length(r))
2819 databytes += max(0, self.length(r))
2819 dd = 0
2820 dd = 0
2820 di = actual - len(self) * s - databytes
2821 di = actual - len(self) * s - databytes
2821 except IOError as inst:
2822 except IOError as inst:
2822 if inst.errno != errno.ENOENT:
2823 if inst.errno != errno.ENOENT:
2823 raise
2824 raise
2824 di = 0
2825 di = 0
2825
2826
2826 return (dd, di)
2827 return (dd, di)
2827
2828
2828 def files(self):
2829 def files(self):
2829 res = [self._indexfile]
2830 res = [self._indexfile]
2830 if not self._inline:
2831 if not self._inline:
2831 res.append(self._datafile)
2832 res.append(self._datafile)
2832 return res
2833 return res
2833
2834
2834 def emitrevisions(
2835 def emitrevisions(
2835 self,
2836 self,
2836 nodes,
2837 nodes,
2837 nodesorder=None,
2838 nodesorder=None,
2838 revisiondata=False,
2839 revisiondata=False,
2839 assumehaveparentrevisions=False,
2840 assumehaveparentrevisions=False,
2840 deltamode=repository.CG_DELTAMODE_STD,
2841 deltamode=repository.CG_DELTAMODE_STD,
2841 sidedata_helpers=None,
2842 sidedata_helpers=None,
2842 ):
2843 ):
2843 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2844 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2844 raise error.ProgrammingError(
2845 raise error.ProgrammingError(
2845 b'unhandled value for nodesorder: %s' % nodesorder
2846 b'unhandled value for nodesorder: %s' % nodesorder
2846 )
2847 )
2847
2848
2848 if nodesorder is None and not self._generaldelta:
2849 if nodesorder is None and not self._generaldelta:
2849 nodesorder = b'storage'
2850 nodesorder = b'storage'
2850
2851
2851 if (
2852 if (
2852 not self._storedeltachains
2853 not self._storedeltachains
2853 and deltamode != repository.CG_DELTAMODE_PREV
2854 and deltamode != repository.CG_DELTAMODE_PREV
2854 ):
2855 ):
2855 deltamode = repository.CG_DELTAMODE_FULL
2856 deltamode = repository.CG_DELTAMODE_FULL
2856
2857
2857 return storageutil.emitrevisions(
2858 return storageutil.emitrevisions(
2858 self,
2859 self,
2859 nodes,
2860 nodes,
2860 nodesorder,
2861 nodesorder,
2861 revlogrevisiondelta,
2862 revlogrevisiondelta,
2862 deltaparentfn=self.deltaparent,
2863 deltaparentfn=self.deltaparent,
2863 candeltafn=self.candelta,
2864 candeltafn=self.candelta,
2864 rawsizefn=self.rawsize,
2865 rawsizefn=self.rawsize,
2865 revdifffn=self.revdiff,
2866 revdifffn=self.revdiff,
2866 flagsfn=self.flags,
2867 flagsfn=self.flags,
2867 deltamode=deltamode,
2868 deltamode=deltamode,
2868 revisiondata=revisiondata,
2869 revisiondata=revisiondata,
2869 assumehaveparentrevisions=assumehaveparentrevisions,
2870 assumehaveparentrevisions=assumehaveparentrevisions,
2870 sidedata_helpers=sidedata_helpers,
2871 sidedata_helpers=sidedata_helpers,
2871 )
2872 )
2872
2873
2873 DELTAREUSEALWAYS = b'always'
2874 DELTAREUSEALWAYS = b'always'
2874 DELTAREUSESAMEREVS = b'samerevs'
2875 DELTAREUSESAMEREVS = b'samerevs'
2875 DELTAREUSENEVER = b'never'
2876 DELTAREUSENEVER = b'never'
2876
2877
2877 DELTAREUSEFULLADD = b'fulladd'
2878 DELTAREUSEFULLADD = b'fulladd'
2878
2879
2879 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2880 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2880
2881
2881 def clone(
2882 def clone(
2882 self,
2883 self,
2883 tr,
2884 tr,
2884 destrevlog,
2885 destrevlog,
2885 addrevisioncb=None,
2886 addrevisioncb=None,
2886 deltareuse=DELTAREUSESAMEREVS,
2887 deltareuse=DELTAREUSESAMEREVS,
2887 forcedeltabothparents=None,
2888 forcedeltabothparents=None,
2888 sidedata_helpers=None,
2889 sidedata_helpers=None,
2889 ):
2890 ):
2890 """Copy this revlog to another, possibly with format changes.
2891 """Copy this revlog to another, possibly with format changes.
2891
2892
2892 The destination revlog will contain the same revisions and nodes.
2893 The destination revlog will contain the same revisions and nodes.
2893 However, it may not be bit-for-bit identical due to e.g. delta encoding
2894 However, it may not be bit-for-bit identical due to e.g. delta encoding
2894 differences.
2895 differences.
2895
2896
2896 The ``deltareuse`` argument control how deltas from the existing revlog
2897 The ``deltareuse`` argument control how deltas from the existing revlog
2897 are preserved in the destination revlog. The argument can have the
2898 are preserved in the destination revlog. The argument can have the
2898 following values:
2899 following values:
2899
2900
2900 DELTAREUSEALWAYS
2901 DELTAREUSEALWAYS
2901 Deltas will always be reused (if possible), even if the destination
2902 Deltas will always be reused (if possible), even if the destination
2902 revlog would not select the same revisions for the delta. This is the
2903 revlog would not select the same revisions for the delta. This is the
2903 fastest mode of operation.
2904 fastest mode of operation.
2904 DELTAREUSESAMEREVS
2905 DELTAREUSESAMEREVS
2905 Deltas will be reused if the destination revlog would pick the same
2906 Deltas will be reused if the destination revlog would pick the same
2906 revisions for the delta. This mode strikes a balance between speed
2907 revisions for the delta. This mode strikes a balance between speed
2907 and optimization.
2908 and optimization.
2908 DELTAREUSENEVER
2909 DELTAREUSENEVER
2909 Deltas will never be reused. This is the slowest mode of execution.
2910 Deltas will never be reused. This is the slowest mode of execution.
2910 This mode can be used to recompute deltas (e.g. if the diff/delta
2911 This mode can be used to recompute deltas (e.g. if the diff/delta
2911 algorithm changes).
2912 algorithm changes).
2912 DELTAREUSEFULLADD
2913 DELTAREUSEFULLADD
2913 Revision will be re-added as if their were new content. This is
2914 Revision will be re-added as if their were new content. This is
2914 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2915 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2915 eg: large file detection and handling.
2916 eg: large file detection and handling.
2916
2917
2917 Delta computation can be slow, so the choice of delta reuse policy can
2918 Delta computation can be slow, so the choice of delta reuse policy can
2918 significantly affect run time.
2919 significantly affect run time.
2919
2920
2920 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2921 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2921 two extremes. Deltas will be reused if they are appropriate. But if the
2922 two extremes. Deltas will be reused if they are appropriate. But if the
2922 delta could choose a better revision, it will do so. This means if you
2923 delta could choose a better revision, it will do so. This means if you
2923 are converting a non-generaldelta revlog to a generaldelta revlog,
2924 are converting a non-generaldelta revlog to a generaldelta revlog,
2924 deltas will be recomputed if the delta's parent isn't a parent of the
2925 deltas will be recomputed if the delta's parent isn't a parent of the
2925 revision.
2926 revision.
2926
2927
2927 In addition to the delta policy, the ``forcedeltabothparents``
2928 In addition to the delta policy, the ``forcedeltabothparents``
2928 argument controls whether to force compute deltas against both parents
2929 argument controls whether to force compute deltas against both parents
2929 for merges. By default, the current default is used.
2930 for merges. By default, the current default is used.
2930
2931
2931 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2932 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2932 `sidedata_helpers`.
2933 `sidedata_helpers`.
2933 """
2934 """
2934 if deltareuse not in self.DELTAREUSEALL:
2935 if deltareuse not in self.DELTAREUSEALL:
2935 raise ValueError(
2936 raise ValueError(
2936 _(b'value for deltareuse invalid: %s') % deltareuse
2937 _(b'value for deltareuse invalid: %s') % deltareuse
2937 )
2938 )
2938
2939
2939 if len(destrevlog):
2940 if len(destrevlog):
2940 raise ValueError(_(b'destination revlog is not empty'))
2941 raise ValueError(_(b'destination revlog is not empty'))
2941
2942
2942 if getattr(self, 'filteredrevs', None):
2943 if getattr(self, 'filteredrevs', None):
2943 raise ValueError(_(b'source revlog has filtered revisions'))
2944 raise ValueError(_(b'source revlog has filtered revisions'))
2944 if getattr(destrevlog, 'filteredrevs', None):
2945 if getattr(destrevlog, 'filteredrevs', None):
2945 raise ValueError(_(b'destination revlog has filtered revisions'))
2946 raise ValueError(_(b'destination revlog has filtered revisions'))
2946
2947
2947 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2948 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2948 # if possible.
2949 # if possible.
2949 oldlazydelta = destrevlog._lazydelta
2950 oldlazydelta = destrevlog._lazydelta
2950 oldlazydeltabase = destrevlog._lazydeltabase
2951 oldlazydeltabase = destrevlog._lazydeltabase
2951 oldamd = destrevlog._deltabothparents
2952 oldamd = destrevlog._deltabothparents
2952
2953
2953 try:
2954 try:
2954 if deltareuse == self.DELTAREUSEALWAYS:
2955 if deltareuse == self.DELTAREUSEALWAYS:
2955 destrevlog._lazydeltabase = True
2956 destrevlog._lazydeltabase = True
2956 destrevlog._lazydelta = True
2957 destrevlog._lazydelta = True
2957 elif deltareuse == self.DELTAREUSESAMEREVS:
2958 elif deltareuse == self.DELTAREUSESAMEREVS:
2958 destrevlog._lazydeltabase = False
2959 destrevlog._lazydeltabase = False
2959 destrevlog._lazydelta = True
2960 destrevlog._lazydelta = True
2960 elif deltareuse == self.DELTAREUSENEVER:
2961 elif deltareuse == self.DELTAREUSENEVER:
2961 destrevlog._lazydeltabase = False
2962 destrevlog._lazydeltabase = False
2962 destrevlog._lazydelta = False
2963 destrevlog._lazydelta = False
2963
2964
2964 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2965 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2965
2966
2966 self._clone(
2967 self._clone(
2967 tr,
2968 tr,
2968 destrevlog,
2969 destrevlog,
2969 addrevisioncb,
2970 addrevisioncb,
2970 deltareuse,
2971 deltareuse,
2971 forcedeltabothparents,
2972 forcedeltabothparents,
2972 sidedata_helpers,
2973 sidedata_helpers,
2973 )
2974 )
2974
2975
2975 finally:
2976 finally:
2976 destrevlog._lazydelta = oldlazydelta
2977 destrevlog._lazydelta = oldlazydelta
2977 destrevlog._lazydeltabase = oldlazydeltabase
2978 destrevlog._lazydeltabase = oldlazydeltabase
2978 destrevlog._deltabothparents = oldamd
2979 destrevlog._deltabothparents = oldamd
2979
2980
2980 def _clone(
2981 def _clone(
2981 self,
2982 self,
2982 tr,
2983 tr,
2983 destrevlog,
2984 destrevlog,
2984 addrevisioncb,
2985 addrevisioncb,
2985 deltareuse,
2986 deltareuse,
2986 forcedeltabothparents,
2987 forcedeltabothparents,
2987 sidedata_helpers,
2988 sidedata_helpers,
2988 ):
2989 ):
2989 """perform the core duty of `revlog.clone` after parameter processing"""
2990 """perform the core duty of `revlog.clone` after parameter processing"""
2990 deltacomputer = deltautil.deltacomputer(destrevlog)
2991 deltacomputer = deltautil.deltacomputer(destrevlog)
2991 index = self.index
2992 index = self.index
2992 for rev in self:
2993 for rev in self:
2993 entry = index[rev]
2994 entry = index[rev]
2994
2995
2995 # Some classes override linkrev to take filtered revs into
2996 # Some classes override linkrev to take filtered revs into
2996 # account. Use raw entry from index.
2997 # account. Use raw entry from index.
2997 flags = entry[0] & 0xFFFF
2998 flags = entry[0] & 0xFFFF
2998 linkrev = entry[4]
2999 linkrev = entry[4]
2999 p1 = index[entry[5]][7]
3000 p1 = index[entry[5]][7]
3000 p2 = index[entry[6]][7]
3001 p2 = index[entry[6]][7]
3001 node = entry[7]
3002 node = entry[7]
3002
3003
3003 # (Possibly) reuse the delta from the revlog if allowed and
3004 # (Possibly) reuse the delta from the revlog if allowed and
3004 # the revlog chunk is a delta.
3005 # the revlog chunk is a delta.
3005 cachedelta = None
3006 cachedelta = None
3006 rawtext = None
3007 rawtext = None
3007 if deltareuse == self.DELTAREUSEFULLADD:
3008 if deltareuse == self.DELTAREUSEFULLADD:
3008 text = self._revisiondata(rev)
3009 text = self._revisiondata(rev)
3009 sidedata = self.sidedata(rev)
3010 sidedata = self.sidedata(rev)
3010
3011
3011 if sidedata_helpers is not None:
3012 if sidedata_helpers is not None:
3012 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3013 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3013 self, sidedata_helpers, sidedata, rev
3014 self, sidedata_helpers, sidedata, rev
3014 )
3015 )
3015 flags = flags | new_flags[0] & ~new_flags[1]
3016 flags = flags | new_flags[0] & ~new_flags[1]
3016
3017
3017 destrevlog.addrevision(
3018 destrevlog.addrevision(
3018 text,
3019 text,
3019 tr,
3020 tr,
3020 linkrev,
3021 linkrev,
3021 p1,
3022 p1,
3022 p2,
3023 p2,
3023 cachedelta=cachedelta,
3024 cachedelta=cachedelta,
3024 node=node,
3025 node=node,
3025 flags=flags,
3026 flags=flags,
3026 deltacomputer=deltacomputer,
3027 deltacomputer=deltacomputer,
3027 sidedata=sidedata,
3028 sidedata=sidedata,
3028 )
3029 )
3029 else:
3030 else:
3030 if destrevlog._lazydelta:
3031 if destrevlog._lazydelta:
3031 dp = self.deltaparent(rev)
3032 dp = self.deltaparent(rev)
3032 if dp != nullrev:
3033 if dp != nullrev:
3033 cachedelta = (dp, bytes(self._chunk(rev)))
3034 cachedelta = (dp, bytes(self._chunk(rev)))
3034
3035
3035 sidedata = None
3036 sidedata = None
3036 if not cachedelta:
3037 if not cachedelta:
3037 rawtext = self._revisiondata(rev)
3038 rawtext = self._revisiondata(rev)
3038 sidedata = self.sidedata(rev)
3039 sidedata = self.sidedata(rev)
3039 if sidedata is None:
3040 if sidedata is None:
3040 sidedata = self.sidedata(rev)
3041 sidedata = self.sidedata(rev)
3041
3042
3042 if sidedata_helpers is not None:
3043 if sidedata_helpers is not None:
3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3044 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3044 self, sidedata_helpers, sidedata, rev
3045 self, sidedata_helpers, sidedata, rev
3045 )
3046 )
3046 flags = flags | new_flags[0] & ~new_flags[1]
3047 flags = flags | new_flags[0] & ~new_flags[1]
3047
3048
3048 with destrevlog._writing(tr):
3049 with destrevlog._writing(tr):
3049 destrevlog._addrevision(
3050 destrevlog._addrevision(
3050 node,
3051 node,
3051 rawtext,
3052 rawtext,
3052 tr,
3053 tr,
3053 linkrev,
3054 linkrev,
3054 p1,
3055 p1,
3055 p2,
3056 p2,
3056 flags,
3057 flags,
3057 cachedelta,
3058 cachedelta,
3058 deltacomputer=deltacomputer,
3059 deltacomputer=deltacomputer,
3059 sidedata=sidedata,
3060 sidedata=sidedata,
3060 )
3061 )
3061
3062
3062 if addrevisioncb:
3063 if addrevisioncb:
3063 addrevisioncb(self, rev, node)
3064 addrevisioncb(self, rev, node)
3064
3065
3065 def censorrevision(self, tr, censornode, tombstone=b''):
3066 def censorrevision(self, tr, censornode, tombstone=b''):
3066 if self._format_version == REVLOGV0:
3067 if self._format_version == REVLOGV0:
3067 raise error.RevlogError(
3068 raise error.RevlogError(
3068 _(b'cannot censor with version %d revlogs')
3069 _(b'cannot censor with version %d revlogs')
3069 % self._format_version
3070 % self._format_version
3070 )
3071 )
3071 elif self._format_version == REVLOGV1:
3072 elif self._format_version == REVLOGV1:
3072 censor.v1_censor(self, tr, censornode, tombstone)
3073 censor.v1_censor(self, tr, censornode, tombstone)
3073 else:
3074 else:
3074 # revlog v2
3075 # revlog v2
3075 raise error.RevlogError(
3076 raise error.RevlogError(
3076 _(b'cannot censor with version %d revlogs')
3077 _(b'cannot censor with version %d revlogs')
3077 % self._format_version
3078 % self._format_version
3078 )
3079 )
3079
3080
3080 def verifyintegrity(self, state):
3081 def verifyintegrity(self, state):
3081 """Verifies the integrity of the revlog.
3082 """Verifies the integrity of the revlog.
3082
3083
3083 Yields ``revlogproblem`` instances describing problems that are
3084 Yields ``revlogproblem`` instances describing problems that are
3084 found.
3085 found.
3085 """
3086 """
3086 dd, di = self.checksize()
3087 dd, di = self.checksize()
3087 if dd:
3088 if dd:
3088 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3089 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3089 if di:
3090 if di:
3090 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3091 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3091
3092
3092 version = self._format_version
3093 version = self._format_version
3093
3094
3094 # The verifier tells us what version revlog we should be.
3095 # The verifier tells us what version revlog we should be.
3095 if version != state[b'expectedversion']:
3096 if version != state[b'expectedversion']:
3096 yield revlogproblem(
3097 yield revlogproblem(
3097 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3098 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3098 % (self.display_id, version, state[b'expectedversion'])
3099 % (self.display_id, version, state[b'expectedversion'])
3099 )
3100 )
3100
3101
3101 state[b'skipread'] = set()
3102 state[b'skipread'] = set()
3102 state[b'safe_renamed'] = set()
3103 state[b'safe_renamed'] = set()
3103
3104
3104 for rev in self:
3105 for rev in self:
3105 node = self.node(rev)
3106 node = self.node(rev)
3106
3107
3107 # Verify contents. 4 cases to care about:
3108 # Verify contents. 4 cases to care about:
3108 #
3109 #
3109 # common: the most common case
3110 # common: the most common case
3110 # rename: with a rename
3111 # rename: with a rename
3111 # meta: file content starts with b'\1\n', the metadata
3112 # meta: file content starts with b'\1\n', the metadata
3112 # header defined in filelog.py, but without a rename
3113 # header defined in filelog.py, but without a rename
3113 # ext: content stored externally
3114 # ext: content stored externally
3114 #
3115 #
3115 # More formally, their differences are shown below:
3116 # More formally, their differences are shown below:
3116 #
3117 #
3117 # | common | rename | meta | ext
3118 # | common | rename | meta | ext
3118 # -------------------------------------------------------
3119 # -------------------------------------------------------
3119 # flags() | 0 | 0 | 0 | not 0
3120 # flags() | 0 | 0 | 0 | not 0
3120 # renamed() | False | True | False | ?
3121 # renamed() | False | True | False | ?
3121 # rawtext[0:2]=='\1\n'| False | True | True | ?
3122 # rawtext[0:2]=='\1\n'| False | True | True | ?
3122 #
3123 #
3123 # "rawtext" means the raw text stored in revlog data, which
3124 # "rawtext" means the raw text stored in revlog data, which
3124 # could be retrieved by "rawdata(rev)". "text"
3125 # could be retrieved by "rawdata(rev)". "text"
3125 # mentioned below is "revision(rev)".
3126 # mentioned below is "revision(rev)".
3126 #
3127 #
3127 # There are 3 different lengths stored physically:
3128 # There are 3 different lengths stored physically:
3128 # 1. L1: rawsize, stored in revlog index
3129 # 1. L1: rawsize, stored in revlog index
3129 # 2. L2: len(rawtext), stored in revlog data
3130 # 2. L2: len(rawtext), stored in revlog data
3130 # 3. L3: len(text), stored in revlog data if flags==0, or
3131 # 3. L3: len(text), stored in revlog data if flags==0, or
3131 # possibly somewhere else if flags!=0
3132 # possibly somewhere else if flags!=0
3132 #
3133 #
3133 # L1 should be equal to L2. L3 could be different from them.
3134 # L1 should be equal to L2. L3 could be different from them.
3134 # "text" may or may not affect commit hash depending on flag
3135 # "text" may or may not affect commit hash depending on flag
3135 # processors (see flagutil.addflagprocessor).
3136 # processors (see flagutil.addflagprocessor).
3136 #
3137 #
3137 # | common | rename | meta | ext
3138 # | common | rename | meta | ext
3138 # -------------------------------------------------
3139 # -------------------------------------------------
3139 # rawsize() | L1 | L1 | L1 | L1
3140 # rawsize() | L1 | L1 | L1 | L1
3140 # size() | L1 | L2-LM | L1(*) | L1 (?)
3141 # size() | L1 | L2-LM | L1(*) | L1 (?)
3141 # len(rawtext) | L2 | L2 | L2 | L2
3142 # len(rawtext) | L2 | L2 | L2 | L2
3142 # len(text) | L2 | L2 | L2 | L3
3143 # len(text) | L2 | L2 | L2 | L3
3143 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3144 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3144 #
3145 #
3145 # LM: length of metadata, depending on rawtext
3146 # LM: length of metadata, depending on rawtext
3146 # (*): not ideal, see comment in filelog.size
3147 # (*): not ideal, see comment in filelog.size
3147 # (?): could be "- len(meta)" if the resolved content has
3148 # (?): could be "- len(meta)" if the resolved content has
3148 # rename metadata
3149 # rename metadata
3149 #
3150 #
3150 # Checks needed to be done:
3151 # Checks needed to be done:
3151 # 1. length check: L1 == L2, in all cases.
3152 # 1. length check: L1 == L2, in all cases.
3152 # 2. hash check: depending on flag processor, we may need to
3153 # 2. hash check: depending on flag processor, we may need to
3153 # use either "text" (external), or "rawtext" (in revlog).
3154 # use either "text" (external), or "rawtext" (in revlog).
3154
3155
3155 try:
3156 try:
3156 skipflags = state.get(b'skipflags', 0)
3157 skipflags = state.get(b'skipflags', 0)
3157 if skipflags:
3158 if skipflags:
3158 skipflags &= self.flags(rev)
3159 skipflags &= self.flags(rev)
3159
3160
3160 _verify_revision(self, skipflags, state, node)
3161 _verify_revision(self, skipflags, state, node)
3161
3162
3162 l1 = self.rawsize(rev)
3163 l1 = self.rawsize(rev)
3163 l2 = len(self.rawdata(node))
3164 l2 = len(self.rawdata(node))
3164
3165
3165 if l1 != l2:
3166 if l1 != l2:
3166 yield revlogproblem(
3167 yield revlogproblem(
3167 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3168 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3168 node=node,
3169 node=node,
3169 )
3170 )
3170
3171
3171 except error.CensoredNodeError:
3172 except error.CensoredNodeError:
3172 if state[b'erroroncensored']:
3173 if state[b'erroroncensored']:
3173 yield revlogproblem(
3174 yield revlogproblem(
3174 error=_(b'censored file data'), node=node
3175 error=_(b'censored file data'), node=node
3175 )
3176 )
3176 state[b'skipread'].add(node)
3177 state[b'skipread'].add(node)
3177 except Exception as e:
3178 except Exception as e:
3178 yield revlogproblem(
3179 yield revlogproblem(
3179 error=_(b'unpacking %s: %s')
3180 error=_(b'unpacking %s: %s')
3180 % (short(node), stringutil.forcebytestr(e)),
3181 % (short(node), stringutil.forcebytestr(e)),
3181 node=node,
3182 node=node,
3182 )
3183 )
3183 state[b'skipread'].add(node)
3184 state[b'skipread'].add(node)
3184
3185
3185 def storageinfo(
3186 def storageinfo(
3186 self,
3187 self,
3187 exclusivefiles=False,
3188 exclusivefiles=False,
3188 sharedfiles=False,
3189 sharedfiles=False,
3189 revisionscount=False,
3190 revisionscount=False,
3190 trackedsize=False,
3191 trackedsize=False,
3191 storedsize=False,
3192 storedsize=False,
3192 ):
3193 ):
3193 d = {}
3194 d = {}
3194
3195
3195 if exclusivefiles:
3196 if exclusivefiles:
3196 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3197 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3197 if not self._inline:
3198 if not self._inline:
3198 d[b'exclusivefiles'].append((self.opener, self._datafile))
3199 d[b'exclusivefiles'].append((self.opener, self._datafile))
3199
3200
3200 if sharedfiles:
3201 if sharedfiles:
3201 d[b'sharedfiles'] = []
3202 d[b'sharedfiles'] = []
3202
3203
3203 if revisionscount:
3204 if revisionscount:
3204 d[b'revisionscount'] = len(self)
3205 d[b'revisionscount'] = len(self)
3205
3206
3206 if trackedsize:
3207 if trackedsize:
3207 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3208 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3208
3209
3209 if storedsize:
3210 if storedsize:
3210 d[b'storedsize'] = sum(
3211 d[b'storedsize'] = sum(
3211 self.opener.stat(path).st_size for path in self.files()
3212 self.opener.stat(path).st_size for path in self.files()
3212 )
3213 )
3213
3214
3214 return d
3215 return d
3215
3216
3216 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3217 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3217 if not self.hassidedata:
3218 if not self.hassidedata:
3218 return
3219 return
3219 # revlog formats with sidedata support does not support inline
3220 # revlog formats with sidedata support does not support inline
3220 assert not self._inline
3221 assert not self._inline
3221 if not helpers[1] and not helpers[2]:
3222 if not helpers[1] and not helpers[2]:
3222 # Nothing to generate or remove
3223 # Nothing to generate or remove
3223 return
3224 return
3224
3225
3225 new_entries = []
3226 new_entries = []
3226 # append the new sidedata
3227 # append the new sidedata
3227 with self._writing(transaction):
3228 with self._writing(transaction):
3228 ifh, dfh, sdfh = self._writinghandles
3229 ifh, dfh, sdfh = self._writinghandles
3229 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3230 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3230
3231
3231 current_offset = sdfh.tell()
3232 current_offset = sdfh.tell()
3232 for rev in range(startrev, endrev + 1):
3233 for rev in range(startrev, endrev + 1):
3233 entry = self.index[rev]
3234 entry = self.index[rev]
3234 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3235 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3235 store=self,
3236 store=self,
3236 sidedata_helpers=helpers,
3237 sidedata_helpers=helpers,
3237 sidedata={},
3238 sidedata={},
3238 rev=rev,
3239 rev=rev,
3239 )
3240 )
3240
3241
3241 serialized_sidedata = sidedatautil.serialize_sidedata(
3242 serialized_sidedata = sidedatautil.serialize_sidedata(
3242 new_sidedata
3243 new_sidedata
3243 )
3244 )
3244
3245
3245 sidedata_compression_mode = COMP_MODE_INLINE
3246 sidedata_compression_mode = COMP_MODE_INLINE
3246 if serialized_sidedata and self.hassidedata:
3247 if serialized_sidedata and self.hassidedata:
3247 sidedata_compression_mode = COMP_MODE_PLAIN
3248 sidedata_compression_mode = COMP_MODE_PLAIN
3248 h, comp_sidedata = self.compress(serialized_sidedata)
3249 h, comp_sidedata = self.compress(serialized_sidedata)
3249 if (
3250 if (
3250 h != b'u'
3251 h != b'u'
3251 and comp_sidedata[0] != b'\0'
3252 and comp_sidedata[0] != b'\0'
3252 and len(comp_sidedata) < len(serialized_sidedata)
3253 and len(comp_sidedata) < len(serialized_sidedata)
3253 ):
3254 ):
3254 assert not h
3255 assert not h
3255 if (
3256 if (
3256 comp_sidedata[0]
3257 comp_sidedata[0]
3257 == self._docket.default_compression_header
3258 == self._docket.default_compression_header
3258 ):
3259 ):
3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3260 sidedata_compression_mode = COMP_MODE_DEFAULT
3260 serialized_sidedata = comp_sidedata
3261 serialized_sidedata = comp_sidedata
3261 else:
3262 else:
3262 sidedata_compression_mode = COMP_MODE_INLINE
3263 sidedata_compression_mode = COMP_MODE_INLINE
3263 serialized_sidedata = comp_sidedata
3264 serialized_sidedata = comp_sidedata
3264 if entry[8] != 0 or entry[9] != 0:
3265 if entry[8] != 0 or entry[9] != 0:
3265 # rewriting entries that already have sidedata is not
3266 # rewriting entries that already have sidedata is not
3266 # supported yet, because it introduces garbage data in the
3267 # supported yet, because it introduces garbage data in the
3267 # revlog.
3268 # revlog.
3268 msg = b"rewriting existing sidedata is not supported yet"
3269 msg = b"rewriting existing sidedata is not supported yet"
3269 raise error.Abort(msg)
3270 raise error.Abort(msg)
3270
3271
3271 # Apply (potential) flags to add and to remove after running
3272 # Apply (potential) flags to add and to remove after running
3272 # the sidedata helpers
3273 # the sidedata helpers
3273 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3274 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3274 entry_update = (
3275 entry_update = (
3275 current_offset,
3276 current_offset,
3276 len(serialized_sidedata),
3277 len(serialized_sidedata),
3277 new_offset_flags,
3278 new_offset_flags,
3278 sidedata_compression_mode,
3279 sidedata_compression_mode,
3279 )
3280 )
3280
3281
3281 # the sidedata computation might have move the file cursors around
3282 # the sidedata computation might have move the file cursors around
3282 sdfh.seek(current_offset, os.SEEK_SET)
3283 sdfh.seek(current_offset, os.SEEK_SET)
3283 sdfh.write(serialized_sidedata)
3284 sdfh.write(serialized_sidedata)
3284 new_entries.append(entry_update)
3285 new_entries.append(entry_update)
3285 current_offset += len(serialized_sidedata)
3286 current_offset += len(serialized_sidedata)
3286 self._docket.sidedata_end = sdfh.tell()
3287 self._docket.sidedata_end = sdfh.tell()
3287
3288
3288 # rewrite the new index entries
3289 # rewrite the new index entries
3289 ifh.seek(startrev * self.index.entry_size)
3290 ifh.seek(startrev * self.index.entry_size)
3290 for i, e in enumerate(new_entries):
3291 for i, e in enumerate(new_entries):
3291 rev = startrev + i
3292 rev = startrev + i
3292 self.index.replace_sidedata_info(rev, *e)
3293 self.index.replace_sidedata_info(rev, *e)
3293 packed = self.index.entry_binary(rev)
3294 packed = self.index.entry_binary(rev)
3294 if rev == 0 and self._docket is None:
3295 if rev == 0 and self._docket is None:
3295 header = self._format_flags | self._format_version
3296 header = self._format_flags | self._format_version
3296 header = self.index.pack_header(header)
3297 header = self.index.pack_header(header)
3297 packed = header + packed
3298 packed = header + packed
3298 ifh.write(packed)
3299 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now