##// END OF EJS Templates
revlog: Extract low-level random-access file read caching logic...
Simon Sapin -
r48218:e0a314bc default
parent child Browse files
Show More
@@ -0,0 +1,138 b''
1 # Copyright Mercurial Contributors
2 #
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
5
6 import contextlib
7
8 from ..i18n import _
9 from .. import (
10 error,
11 util,
12 )
13
14
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16
17 PARTIAL_READ_MSG = _(
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 )
20
21
22 def _is_power_of_two(n):
23 return (n & (n - 1) == 0) and n != 0
24
25
26 class randomaccessfile(object):
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28
29 def __init__(
30 self,
31 opener,
32 filename,
33 default_cached_chunk_size,
34 initial_cache=None,
35 ):
36 # Required by bitwise manipulation below
37 assert _is_power_of_two(default_cached_chunk_size)
38
39 self.opener = opener
40 self.filename = filename
41 self.default_cached_chunk_size = default_cached_chunk_size
42 self.writing_handle = None # This is set from revlog.py
43 self._cached_chunk = b''
44 self._cached_chunk_position = 0 # Offset from the start of the file
45 if initial_cache:
46 self._cached_chunk_position, self._cached_chunk = initial_cache
47
48 def clear_cache(self):
49 self._cached_chunk = b''
50 self._cached_chunk_position = 0
51
52 def _open(self, mode=b'r'):
53 """Return a file object"""
54 return self.opener(self.filename, mode=mode)
55
56 @contextlib.contextmanager
57 def _open_read(self, existing_file_obj=None):
58 """File object suitable for reading data"""
59 # Use explicit file handle, if given.
60 if existing_file_obj is not None:
61 yield existing_file_obj
62
63 # Use a file handle being actively used for writes, if available.
64 # There is some danger to doing this because reads will seek the
65 # file. However, revlog._writeentry performs a SEEK_END before all
66 # writes, so we should be safe.
67 elif self.writing_handle:
68 yield self.writing_handle
69
70 # Otherwise open a new file handle.
71 else:
72 with self._open() as fp:
73 yield fp
74
75 def read_chunk(self, offset, length, existing_file_obj=None):
76 """Read a chunk of bytes from the file.
77
78 Accepts an absolute offset, length to read, and an optional existing
79 file handle to read from.
80
81 If an existing file handle is passed, it will be seeked and the
82 original seek position will NOT be restored.
83
84 Returns a str or buffer of raw byte data.
85
86 Raises if the requested number of bytes could not be read.
87 """
88 end = offset + length
89 cache_start = self._cached_chunk_position
90 cache_end = cache_start + len(self._cached_chunk)
91 # Is the requested chunk within the cache?
92 if cache_start <= offset and end <= cache_end:
93 if cache_start == offset and end == cache_end:
94 return self._cached_chunk # avoid a copy
95 relative_start = offset - cache_start
96 return util.buffer(self._cached_chunk, relative_start, length)
97
98 return self._read_and_update_cache(offset, length, existing_file_obj)
99
100 def _read_and_update_cache(self, offset, length, existing_file_obj=None):
101 # Cache data both forward and backward around the requested
102 # data, in a fixed size window. This helps speed up operations
103 # involving reading the revlog backwards.
104 real_offset = offset & ~(self.default_cached_chunk_size - 1)
105 real_length = (
106 (offset + length + self.default_cached_chunk_size)
107 & ~(self.default_cached_chunk_size - 1)
108 ) - real_offset
109 with self._open_read(existing_file_obj) as file_obj:
110 file_obj.seek(real_offset)
111 data = file_obj.read(real_length)
112
113 self._add_cached_chunk(real_offset, data)
114
115 relative_offset = offset - real_offset
116 got = len(data) - relative_offset
117 if got < length:
118 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
119 raise error.RevlogError(message)
120
121 if offset != real_offset or real_length != length:
122 return util.buffer(data, relative_offset, length)
123 return data
124
125 def _add_cached_chunk(self, offset, data):
126 """Add to or replace the cached data chunk.
127
128 Accepts an absolute offset and the data that is at that location.
129 """
130 if (
131 self._cached_chunk_position + len(self._cached_chunk) == offset
132 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
133 ):
134 # add to existing cache
135 self._cached_chunk += data
136 else:
137 self._cached_chunk = data
138 self._cached_chunk_position = offset
@@ -1,627 +1,630 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 )
14 )
15 from .thirdparty import attr
15 from .thirdparty import attr
16
16
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 metadata,
20 metadata,
21 pycompat,
21 pycompat,
22 revlog,
22 revlog,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 dateutil,
25 dateutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from .revlogutils import (
28 from .revlogutils import (
29 constants as revlog_constants,
29 constants as revlog_constants,
30 flagutil,
30 flagutil,
31 )
31 )
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 return b"\0".join(items)
87 return b"\0".join(items)
88
88
89
89
90 def stripdesc(desc):
90 def stripdesc(desc):
91 """strip trailing whitespace and leading and trailing empty lines"""
91 """strip trailing whitespace and leading and trailing empty lines"""
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93
93
94
94
95 class appender(object):
95 class appender(object):
96 """the changelog index must be updated last on disk, so we use this class
96 """the changelog index must be updated last on disk, so we use this class
97 to delay writes to it"""
97 to delay writes to it"""
98
98
99 def __init__(self, vfs, name, mode, buf):
99 def __init__(self, vfs, name, mode, buf):
100 self.data = buf
100 self.data = buf
101 fp = vfs(name, mode)
101 fp = vfs(name, mode)
102 self.fp = fp
102 self.fp = fp
103 self.offset = fp.tell()
103 self.offset = fp.tell()
104 self.size = vfs.fstat(fp).st_size
104 self.size = vfs.fstat(fp).st_size
105 self._end = self.size
105 self._end = self.size
106
106
107 def end(self):
107 def end(self):
108 return self._end
108 return self._end
109
109
110 def tell(self):
110 def tell(self):
111 return self.offset
111 return self.offset
112
112
113 def flush(self):
113 def flush(self):
114 pass
114 pass
115
115
116 @property
116 @property
117 def closed(self):
117 def closed(self):
118 return self.fp.closed
118 return self.fp.closed
119
119
120 def close(self):
120 def close(self):
121 self.fp.close()
121 self.fp.close()
122
122
123 def seek(self, offset, whence=0):
123 def seek(self, offset, whence=0):
124 '''virtual file offset spans real file and data'''
124 '''virtual file offset spans real file and data'''
125 if whence == 0:
125 if whence == 0:
126 self.offset = offset
126 self.offset = offset
127 elif whence == 1:
127 elif whence == 1:
128 self.offset += offset
128 self.offset += offset
129 elif whence == 2:
129 elif whence == 2:
130 self.offset = self.end() + offset
130 self.offset = self.end() + offset
131 if self.offset < self.size:
131 if self.offset < self.size:
132 self.fp.seek(self.offset)
132 self.fp.seek(self.offset)
133
133
134 def read(self, count=-1):
134 def read(self, count=-1):
135 '''only trick here is reads that span real file and data'''
135 '''only trick here is reads that span real file and data'''
136 ret = b""
136 ret = b""
137 if self.offset < self.size:
137 if self.offset < self.size:
138 s = self.fp.read(count)
138 s = self.fp.read(count)
139 ret = s
139 ret = s
140 self.offset += len(s)
140 self.offset += len(s)
141 if count > 0:
141 if count > 0:
142 count -= len(s)
142 count -= len(s)
143 if count != 0:
143 if count != 0:
144 doff = self.offset - self.size
144 doff = self.offset - self.size
145 self.data.insert(0, b"".join(self.data))
145 self.data.insert(0, b"".join(self.data))
146 del self.data[1:]
146 del self.data[1:]
147 s = self.data[0][doff : doff + count]
147 s = self.data[0][doff : doff + count]
148 self.offset += len(s)
148 self.offset += len(s)
149 ret += s
149 ret += s
150 return ret
150 return ret
151
151
152 def write(self, s):
152 def write(self, s):
153 self.data.append(bytes(s))
153 self.data.append(bytes(s))
154 self.offset += len(s)
154 self.offset += len(s)
155 self._end += len(s)
155 self._end += len(s)
156
156
157 def __enter__(self):
157 def __enter__(self):
158 self.fp.__enter__()
158 self.fp.__enter__()
159 return self
159 return self
160
160
161 def __exit__(self, *args):
161 def __exit__(self, *args):
162 return self.fp.__exit__(*args)
162 return self.fp.__exit__(*args)
163
163
164
164
165 class _divertopener(object):
165 class _divertopener(object):
166 def __init__(self, opener, target):
166 def __init__(self, opener, target):
167 self._opener = opener
167 self._opener = opener
168 self._target = target
168 self._target = target
169
169
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 if name != self._target:
171 if name != self._target:
172 return self._opener(name, mode, **kwargs)
172 return self._opener(name, mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
174
174
175 def __getattr__(self, attr):
175 def __getattr__(self, attr):
176 return getattr(self._opener, attr)
176 return getattr(self._opener, attr)
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 if name != target:
183 if name != target:
184 return opener(name, mode, **kwargs)
184 return opener(name, mode, **kwargs)
185 assert not kwargs
185 assert not kwargs
186 return appender(opener, name, mode, buf)
186 return appender(opener, name, mode, buf)
187
187
188 return _delay
188 return _delay
189
189
190
190
191 @attr.s
191 @attr.s
192 class _changelogrevision(object):
192 class _changelogrevision(object):
193 # Extensions might modify _defaultextra, so let the constructor below pass
193 # Extensions might modify _defaultextra, so let the constructor below pass
194 # it in
194 # it in
195 extra = attr.ib()
195 extra = attr.ib()
196 manifest = attr.ib()
196 manifest = attr.ib()
197 user = attr.ib(default=b'')
197 user = attr.ib(default=b'')
198 date = attr.ib(default=(0, 0))
198 date = attr.ib(default=(0, 0))
199 files = attr.ib(default=attr.Factory(list))
199 files = attr.ib(default=attr.Factory(list))
200 filesadded = attr.ib(default=None)
200 filesadded = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
204 description = attr.ib(default=b'')
204 description = attr.ib(default=b'')
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206
206
207
207
208 class changelogrevision(object):
208 class changelogrevision(object):
209 """Holds results of a parsed changelog revision.
209 """Holds results of a parsed changelog revision.
210
210
211 Changelog revisions consist of multiple pieces of data, including
211 Changelog revisions consist of multiple pieces of data, including
212 the manifest node, user, and date. This object exposes a view into
212 the manifest node, user, and date. This object exposes a view into
213 the parsed object.
213 the parsed object.
214 """
214 """
215
215
216 __slots__ = (
216 __slots__ = (
217 '_offsets',
217 '_offsets',
218 '_text',
218 '_text',
219 '_sidedata',
219 '_sidedata',
220 '_cpsd',
220 '_cpsd',
221 '_changes',
221 '_changes',
222 )
222 )
223
223
224 def __new__(cls, cl, text, sidedata, cpsd):
224 def __new__(cls, cl, text, sidedata, cpsd):
225 if not text:
225 if not text:
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227
227
228 self = super(changelogrevision, cls).__new__(cls)
228 self = super(changelogrevision, cls).__new__(cls)
229 # We could return here and implement the following as an __init__.
229 # We could return here and implement the following as an __init__.
230 # But doing it here is equivalent and saves an extra function call.
230 # But doing it here is equivalent and saves an extra function call.
231
231
232 # format used:
232 # format used:
233 # nodeid\n : manifest node in ascii
233 # nodeid\n : manifest node in ascii
234 # user\n : user, no \n or \r allowed
234 # user\n : user, no \n or \r allowed
235 # time tz extra\n : date (time is int or float, timezone is int)
235 # time tz extra\n : date (time is int or float, timezone is int)
236 # : extra is metadata, encoded and separated by '\0'
236 # : extra is metadata, encoded and separated by '\0'
237 # : older versions ignore it
237 # : older versions ignore it
238 # files\n\n : files modified by the cset, no \n or \r allowed
238 # files\n\n : files modified by the cset, no \n or \r allowed
239 # (.*) : comment (free text, ideally utf-8)
239 # (.*) : comment (free text, ideally utf-8)
240 #
240 #
241 # changelog v0 doesn't use extra
241 # changelog v0 doesn't use extra
242
242
243 nl1 = text.index(b'\n')
243 nl1 = text.index(b'\n')
244 nl2 = text.index(b'\n', nl1 + 1)
244 nl2 = text.index(b'\n', nl1 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
246
246
247 # The list of files may be empty. Which means nl3 is the first of the
247 # The list of files may be empty. Which means nl3 is the first of the
248 # double newline that precedes the description.
248 # double newline that precedes the description.
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 doublenl = nl3
250 doublenl = nl3
251 else:
251 else:
252 doublenl = text.index(b'\n\n', nl3 + 1)
252 doublenl = text.index(b'\n\n', nl3 + 1)
253
253
254 self._offsets = (nl1, nl2, nl3, doublenl)
254 self._offsets = (nl1, nl2, nl3, doublenl)
255 self._text = text
255 self._text = text
256 self._sidedata = sidedata
256 self._sidedata = sidedata
257 self._cpsd = cpsd
257 self._cpsd = cpsd
258 self._changes = None
258 self._changes = None
259
259
260 return self
260 return self
261
261
262 @property
262 @property
263 def manifest(self):
263 def manifest(self):
264 return bin(self._text[0 : self._offsets[0]])
264 return bin(self._text[0 : self._offsets[0]])
265
265
266 @property
266 @property
267 def user(self):
267 def user(self):
268 off = self._offsets
268 off = self._offsets
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270
270
271 @property
271 @property
272 def _rawdate(self):
272 def _rawdate(self):
273 off = self._offsets
273 off = self._offsets
274 dateextra = self._text[off[1] + 1 : off[2]]
274 dateextra = self._text[off[1] + 1 : off[2]]
275 return dateextra.split(b' ', 2)[0:2]
275 return dateextra.split(b' ', 2)[0:2]
276
276
277 @property
277 @property
278 def _rawextra(self):
278 def _rawextra(self):
279 off = self._offsets
279 off = self._offsets
280 dateextra = self._text[off[1] + 1 : off[2]]
280 dateextra = self._text[off[1] + 1 : off[2]]
281 fields = dateextra.split(b' ', 2)
281 fields = dateextra.split(b' ', 2)
282 if len(fields) != 3:
282 if len(fields) != 3:
283 return None
283 return None
284
284
285 return fields[2]
285 return fields[2]
286
286
287 @property
287 @property
288 def date(self):
288 def date(self):
289 raw = self._rawdate
289 raw = self._rawdate
290 time = float(raw[0])
290 time = float(raw[0])
291 # Various tools did silly things with the timezone.
291 # Various tools did silly things with the timezone.
292 try:
292 try:
293 timezone = int(raw[1])
293 timezone = int(raw[1])
294 except ValueError:
294 except ValueError:
295 timezone = 0
295 timezone = 0
296
296
297 return time, timezone
297 return time, timezone
298
298
299 @property
299 @property
300 def extra(self):
300 def extra(self):
301 raw = self._rawextra
301 raw = self._rawextra
302 if raw is None:
302 if raw is None:
303 return _defaultextra
303 return _defaultextra
304
304
305 return decodeextra(raw)
305 return decodeextra(raw)
306
306
307 @property
307 @property
308 def changes(self):
308 def changes(self):
309 if self._changes is not None:
309 if self._changes is not None:
310 return self._changes
310 return self._changes
311 if self._cpsd:
311 if self._cpsd:
312 changes = metadata.decode_files_sidedata(self._sidedata)
312 changes = metadata.decode_files_sidedata(self._sidedata)
313 else:
313 else:
314 changes = metadata.ChangingFiles(
314 changes = metadata.ChangingFiles(
315 touched=self.files or (),
315 touched=self.files or (),
316 added=self.filesadded or (),
316 added=self.filesadded or (),
317 removed=self.filesremoved or (),
317 removed=self.filesremoved or (),
318 p1_copies=self.p1copies or {},
318 p1_copies=self.p1copies or {},
319 p2_copies=self.p2copies or {},
319 p2_copies=self.p2copies or {},
320 )
320 )
321 self._changes = changes
321 self._changes = changes
322 return changes
322 return changes
323
323
324 @property
324 @property
325 def files(self):
325 def files(self):
326 if self._cpsd:
326 if self._cpsd:
327 return sorted(self.changes.touched)
327 return sorted(self.changes.touched)
328 off = self._offsets
328 off = self._offsets
329 if off[2] == off[3]:
329 if off[2] == off[3]:
330 return []
330 return []
331
331
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333
333
334 @property
334 @property
335 def filesadded(self):
335 def filesadded(self):
336 if self._cpsd:
336 if self._cpsd:
337 return self.changes.added
337 return self.changes.added
338 else:
338 else:
339 rawindices = self.extra.get(b'filesadded')
339 rawindices = self.extra.get(b'filesadded')
340 if rawindices is None:
340 if rawindices is None:
341 return None
341 return None
342 return metadata.decodefileindices(self.files, rawindices)
342 return metadata.decodefileindices(self.files, rawindices)
343
343
344 @property
344 @property
345 def filesremoved(self):
345 def filesremoved(self):
346 if self._cpsd:
346 if self._cpsd:
347 return self.changes.removed
347 return self.changes.removed
348 else:
348 else:
349 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
350 if rawindices is None:
350 if rawindices is None:
351 return None
351 return None
352 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
353
353
354 @property
354 @property
355 def p1copies(self):
355 def p1copies(self):
356 if self._cpsd:
356 if self._cpsd:
357 return self.changes.copied_from_p1
357 return self.changes.copied_from_p1
358 else:
358 else:
359 rawcopies = self.extra.get(b'p1copies')
359 rawcopies = self.extra.get(b'p1copies')
360 if rawcopies is None:
360 if rawcopies is None:
361 return None
361 return None
362 return metadata.decodecopies(self.files, rawcopies)
362 return metadata.decodecopies(self.files, rawcopies)
363
363
364 @property
364 @property
365 def p2copies(self):
365 def p2copies(self):
366 if self._cpsd:
366 if self._cpsd:
367 return self.changes.copied_from_p2
367 return self.changes.copied_from_p2
368 else:
368 else:
369 rawcopies = self.extra.get(b'p2copies')
369 rawcopies = self.extra.get(b'p2copies')
370 if rawcopies is None:
370 if rawcopies is None:
371 return None
371 return None
372 return metadata.decodecopies(self.files, rawcopies)
372 return metadata.decodecopies(self.files, rawcopies)
373
373
374 @property
374 @property
375 def description(self):
375 def description(self):
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377
377
378 @property
378 @property
379 def branchinfo(self):
379 def branchinfo(self):
380 extra = self.extra
380 extra = self.extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382
382
383
383
384 class changelog(revlog.revlog):
384 class changelog(revlog.revlog):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 """Load a changelog revlog using an opener.
386 """Load a changelog revlog using an opener.
387
387
388 If ``trypending`` is true, we attempt to load the index from a
388 If ``trypending`` is true, we attempt to load the index from a
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 The ``00changelog.i.a`` file contains index (and possibly inline
390 The ``00changelog.i.a`` file contains index (and possibly inline
391 revision) data for a transaction that hasn't been finalized yet.
391 revision) data for a transaction that hasn't been finalized yet.
392 It exists in a separate file to facilitate readers (such as
392 It exists in a separate file to facilitate readers (such as
393 hooks processes) accessing data before a transaction is finalized.
393 hooks processes) accessing data before a transaction is finalized.
394
394
395 ``concurrencychecker`` will be passed to the revlog init function, see
395 ``concurrencychecker`` will be passed to the revlog init function, see
396 the documentation there.
396 the documentation there.
397 """
397 """
398 revlog.revlog.__init__(
398 revlog.revlog.__init__(
399 self,
399 self,
400 opener,
400 opener,
401 target=(revlog_constants.KIND_CHANGELOG, None),
401 target=(revlog_constants.KIND_CHANGELOG, None),
402 radix=b'00changelog',
402 radix=b'00changelog',
403 checkambig=True,
403 checkambig=True,
404 mmaplargeindex=True,
404 mmaplargeindex=True,
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
405 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
406 concurrencychecker=concurrencychecker,
406 concurrencychecker=concurrencychecker,
407 trypending=trypending,
407 trypending=trypending,
408 )
408 )
409
409
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
410 if self._initempty and (self._format_version == revlog.REVLOGV1):
411 # changelogs don't benefit from generaldelta.
411 # changelogs don't benefit from generaldelta.
412
412
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
413 self._format_flags &= ~revlog.FLAG_GENERALDELTA
414 self._generaldelta = False
414 self._generaldelta = False
415
415
416 # Delta chains for changelogs tend to be very small because entries
416 # Delta chains for changelogs tend to be very small because entries
417 # tend to be small and don't delta well with each. So disable delta
417 # tend to be small and don't delta well with each. So disable delta
418 # chains.
418 # chains.
419 self._storedeltachains = False
419 self._storedeltachains = False
420
420
421 self._realopener = opener
421 self._realopener = opener
422 self._delayed = False
422 self._delayed = False
423 self._delaybuf = None
423 self._delaybuf = None
424 self._divert = False
424 self._divert = False
425 self._filteredrevs = frozenset()
425 self._filteredrevs = frozenset()
426 self._filteredrevs_hashcache = {}
426 self._filteredrevs_hashcache = {}
427 self._copiesstorage = opener.options.get(b'copies-storage')
427 self._copiesstorage = opener.options.get(b'copies-storage')
428
428
429 @property
429 @property
430 def filteredrevs(self):
430 def filteredrevs(self):
431 return self._filteredrevs
431 return self._filteredrevs
432
432
433 @filteredrevs.setter
433 @filteredrevs.setter
434 def filteredrevs(self, val):
434 def filteredrevs(self, val):
435 # Ensure all updates go through this function
435 # Ensure all updates go through this function
436 assert isinstance(val, frozenset)
436 assert isinstance(val, frozenset)
437 self._filteredrevs = val
437 self._filteredrevs = val
438 self._filteredrevs_hashcache = {}
438 self._filteredrevs_hashcache = {}
439
439
440 def _write_docket(self, tr):
440 def _write_docket(self, tr):
441 if not self._delayed:
441 if not self._delayed:
442 super(changelog, self)._write_docket(tr)
442 super(changelog, self)._write_docket(tr)
443
443
444 def delayupdate(self, tr):
444 def delayupdate(self, tr):
445 """delay visibility of index updates to other readers"""
445 """delay visibility of index updates to other readers"""
446 if self._docket is None and not self._delayed:
446 if self._docket is None and not self._delayed:
447 if len(self) == 0:
447 if len(self) == 0:
448 self._divert = True
448 self._divert = True
449 if self._realopener.exists(self._indexfile + b'.a'):
449 if self._realopener.exists(self._indexfile + b'.a'):
450 self._realopener.unlink(self._indexfile + b'.a')
450 self._realopener.unlink(self._indexfile + b'.a')
451 self.opener = _divertopener(self._realopener, self._indexfile)
451 self.opener = _divertopener(self._realopener, self._indexfile)
452 else:
452 else:
453 self._delaybuf = []
453 self._delaybuf = []
454 self.opener = _delayopener(
454 self.opener = _delayopener(
455 self._realopener, self._indexfile, self._delaybuf
455 self._realopener, self._indexfile, self._delaybuf
456 )
456 )
457 self._segmentfile.opener = self.opener
457 self._delayed = True
458 self._delayed = True
458 tr.addpending(b'cl-%i' % id(self), self._writepending)
459 tr.addpending(b'cl-%i' % id(self), self._writepending)
459 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
460 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
460
461
461 def _finalize(self, tr):
462 def _finalize(self, tr):
462 """finalize index updates"""
463 """finalize index updates"""
463 self._delayed = False
464 self._delayed = False
464 self.opener = self._realopener
465 self.opener = self._realopener
466 self._segmentfile.opener = self.opener
465 # move redirected index data back into place
467 # move redirected index data back into place
466 if self._docket is not None:
468 if self._docket is not None:
467 self._write_docket(tr)
469 self._write_docket(tr)
468 elif self._divert:
470 elif self._divert:
469 assert not self._delaybuf
471 assert not self._delaybuf
470 tmpname = self._indexfile + b".a"
472 tmpname = self._indexfile + b".a"
471 nfile = self.opener.open(tmpname)
473 nfile = self.opener.open(tmpname)
472 nfile.close()
474 nfile.close()
473 self.opener.rename(tmpname, self._indexfile, checkambig=True)
475 self.opener.rename(tmpname, self._indexfile, checkambig=True)
474 elif self._delaybuf:
476 elif self._delaybuf:
475 fp = self.opener(self._indexfile, b'a', checkambig=True)
477 fp = self.opener(self._indexfile, b'a', checkambig=True)
476 fp.write(b"".join(self._delaybuf))
478 fp.write(b"".join(self._delaybuf))
477 fp.close()
479 fp.close()
478 self._delaybuf = None
480 self._delaybuf = None
479 self._divert = False
481 self._divert = False
480 # split when we're done
482 # split when we're done
481 self._enforceinlinesize(tr)
483 self._enforceinlinesize(tr)
482
484
483 def _writepending(self, tr):
485 def _writepending(self, tr):
484 """create a file containing the unfinalized state for
486 """create a file containing the unfinalized state for
485 pretxnchangegroup"""
487 pretxnchangegroup"""
486 if self._docket:
488 if self._docket:
487 return self._docket.write(tr, pending=True)
489 return self._docket.write(tr, pending=True)
488 if self._delaybuf:
490 if self._delaybuf:
489 # make a temporary copy of the index
491 # make a temporary copy of the index
490 fp1 = self._realopener(self._indexfile)
492 fp1 = self._realopener(self._indexfile)
491 pendingfilename = self._indexfile + b".a"
493 pendingfilename = self._indexfile + b".a"
492 # register as a temp file to ensure cleanup on failure
494 # register as a temp file to ensure cleanup on failure
493 tr.registertmp(pendingfilename)
495 tr.registertmp(pendingfilename)
494 # write existing data
496 # write existing data
495 fp2 = self._realopener(pendingfilename, b"w")
497 fp2 = self._realopener(pendingfilename, b"w")
496 fp2.write(fp1.read())
498 fp2.write(fp1.read())
497 # add pending data
499 # add pending data
498 fp2.write(b"".join(self._delaybuf))
500 fp2.write(b"".join(self._delaybuf))
499 fp2.close()
501 fp2.close()
500 # switch modes so finalize can simply rename
502 # switch modes so finalize can simply rename
501 self._delaybuf = None
503 self._delaybuf = None
502 self._divert = True
504 self._divert = True
503 self.opener = _divertopener(self._realopener, self._indexfile)
505 self.opener = _divertopener(self._realopener, self._indexfile)
506 self._segmentfile.opener = self.opener
504
507
505 if self._divert:
508 if self._divert:
506 return True
509 return True
507
510
508 return False
511 return False
509
512
510 def _enforceinlinesize(self, tr):
513 def _enforceinlinesize(self, tr):
511 if not self._delayed:
514 if not self._delayed:
512 revlog.revlog._enforceinlinesize(self, tr)
515 revlog.revlog._enforceinlinesize(self, tr)
513
516
514 def read(self, nodeorrev):
517 def read(self, nodeorrev):
515 """Obtain data from a parsed changelog revision.
518 """Obtain data from a parsed changelog revision.
516
519
517 Returns a 6-tuple of:
520 Returns a 6-tuple of:
518
521
519 - manifest node in binary
522 - manifest node in binary
520 - author/user as a localstr
523 - author/user as a localstr
521 - date as a 2-tuple of (time, timezone)
524 - date as a 2-tuple of (time, timezone)
522 - list of files
525 - list of files
523 - commit message as a localstr
526 - commit message as a localstr
524 - dict of extra metadata
527 - dict of extra metadata
525
528
526 Unless you need to access all fields, consider calling
529 Unless you need to access all fields, consider calling
527 ``changelogrevision`` instead, as it is faster for partial object
530 ``changelogrevision`` instead, as it is faster for partial object
528 access.
531 access.
529 """
532 """
530 d = self._revisiondata(nodeorrev)
533 d = self._revisiondata(nodeorrev)
531 sidedata = self.sidedata(nodeorrev)
534 sidedata = self.sidedata(nodeorrev)
532 copy_sd = self._copiesstorage == b'changeset-sidedata'
535 copy_sd = self._copiesstorage == b'changeset-sidedata'
533 c = changelogrevision(self, d, sidedata, copy_sd)
536 c = changelogrevision(self, d, sidedata, copy_sd)
534 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
537 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
535
538
536 def changelogrevision(self, nodeorrev):
539 def changelogrevision(self, nodeorrev):
537 """Obtain a ``changelogrevision`` for a node or revision."""
540 """Obtain a ``changelogrevision`` for a node or revision."""
538 text = self._revisiondata(nodeorrev)
541 text = self._revisiondata(nodeorrev)
539 sidedata = self.sidedata(nodeorrev)
542 sidedata = self.sidedata(nodeorrev)
540 return changelogrevision(
543 return changelogrevision(
541 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
544 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
542 )
545 )
543
546
544 def readfiles(self, nodeorrev):
547 def readfiles(self, nodeorrev):
545 """
548 """
546 short version of read that only returns the files modified by the cset
549 short version of read that only returns the files modified by the cset
547 """
550 """
548 text = self.revision(nodeorrev)
551 text = self.revision(nodeorrev)
549 if not text:
552 if not text:
550 return []
553 return []
551 last = text.index(b"\n\n")
554 last = text.index(b"\n\n")
552 l = text[:last].split(b'\n')
555 l = text[:last].split(b'\n')
553 return l[3:]
556 return l[3:]
554
557
555 def add(
558 def add(
556 self,
559 self,
557 manifest,
560 manifest,
558 files,
561 files,
559 desc,
562 desc,
560 transaction,
563 transaction,
561 p1,
564 p1,
562 p2,
565 p2,
563 user,
566 user,
564 date=None,
567 date=None,
565 extra=None,
568 extra=None,
566 ):
569 ):
567 # Convert to UTF-8 encoded bytestrings as the very first
570 # Convert to UTF-8 encoded bytestrings as the very first
568 # thing: calling any method on a localstr object will turn it
571 # thing: calling any method on a localstr object will turn it
569 # into a str object and the cached UTF-8 string is thus lost.
572 # into a str object and the cached UTF-8 string is thus lost.
570 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
573 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
571
574
572 user = user.strip()
575 user = user.strip()
573 # An empty username or a username with a "\n" will make the
576 # An empty username or a username with a "\n" will make the
574 # revision text contain two "\n\n" sequences -> corrupt
577 # revision text contain two "\n\n" sequences -> corrupt
575 # repository since read cannot unpack the revision.
578 # repository since read cannot unpack the revision.
576 if not user:
579 if not user:
577 raise error.StorageError(_(b"empty username"))
580 raise error.StorageError(_(b"empty username"))
578 if b"\n" in user:
581 if b"\n" in user:
579 raise error.StorageError(
582 raise error.StorageError(
580 _(b"username %r contains a newline") % pycompat.bytestr(user)
583 _(b"username %r contains a newline") % pycompat.bytestr(user)
581 )
584 )
582
585
583 desc = stripdesc(desc)
586 desc = stripdesc(desc)
584
587
585 if date:
588 if date:
586 parseddate = b"%d %d" % dateutil.parsedate(date)
589 parseddate = b"%d %d" % dateutil.parsedate(date)
587 else:
590 else:
588 parseddate = b"%d %d" % dateutil.makedate()
591 parseddate = b"%d %d" % dateutil.makedate()
589 if extra:
592 if extra:
590 branch = extra.get(b"branch")
593 branch = extra.get(b"branch")
591 if branch in (b"default", b""):
594 if branch in (b"default", b""):
592 del extra[b"branch"]
595 del extra[b"branch"]
593 elif branch in (b".", b"null", b"tip"):
596 elif branch in (b".", b"null", b"tip"):
594 raise error.StorageError(
597 raise error.StorageError(
595 _(b'the name \'%s\' is reserved') % branch
598 _(b'the name \'%s\' is reserved') % branch
596 )
599 )
597 sortedfiles = sorted(files.touched)
600 sortedfiles = sorted(files.touched)
598 flags = 0
601 flags = 0
599 sidedata = None
602 sidedata = None
600 if self._copiesstorage == b'changeset-sidedata':
603 if self._copiesstorage == b'changeset-sidedata':
601 if files.has_copies_info:
604 if files.has_copies_info:
602 flags |= flagutil.REVIDX_HASCOPIESINFO
605 flags |= flagutil.REVIDX_HASCOPIESINFO
603 sidedata = metadata.encode_files_sidedata(files)
606 sidedata = metadata.encode_files_sidedata(files)
604
607
605 if extra:
608 if extra:
606 extra = encodeextra(extra)
609 extra = encodeextra(extra)
607 parseddate = b"%s %s" % (parseddate, extra)
610 parseddate = b"%s %s" % (parseddate, extra)
608 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
611 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
609 text = b"\n".join(l)
612 text = b"\n".join(l)
610 rev = self.addrevision(
613 rev = self.addrevision(
611 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
614 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
612 )
615 )
613 return self.node(rev)
616 return self.node(rev)
614
617
615 def branchinfo(self, rev):
618 def branchinfo(self, rev):
616 """return the branch name and open/close state of a revision
619 """return the branch name and open/close state of a revision
617
620
618 This function exists because creating a changectx object
621 This function exists because creating a changectx object
619 just to access this is costly."""
622 just to access this is costly."""
620 return self.changelogrevision(rev).branchinfo
623 return self.changelogrevision(rev).branchinfo
621
624
622 def _nodeduplicatecallback(self, transaction, rev):
625 def _nodeduplicatecallback(self, transaction, rev):
623 # keep track of revisions that got "re-added", eg: unbunde of know rev.
626 # keep track of revisions that got "re-added", eg: unbunde of know rev.
624 #
627 #
625 # We track them in a list to preserve their order from the source bundle
628 # We track them in a list to preserve their order from the source bundle
626 duplicates = transaction.changes.setdefault(b'revduplicates', [])
629 duplicates = transaction.changes.setdefault(b'revduplicates', [])
627 duplicates.append(rev)
630 duplicates.append(rev)
@@ -1,3394 +1,3298 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 revlogutils,
75 revlogutils,
76 templatefilters,
76 templatefilters,
77 util,
77 util,
78 )
78 )
79 from .interfaces import (
79 from .interfaces import (
80 repository,
80 repository,
81 util as interfaceutil,
81 util as interfaceutil,
82 )
82 )
83 from .revlogutils import (
83 from .revlogutils import (
84 censor,
84 censor,
85 deltas as deltautil,
85 deltas as deltautil,
86 docket as docketutil,
86 docket as docketutil,
87 flagutil,
87 flagutil,
88 nodemap as nodemaputil,
88 nodemap as nodemaputil,
89 randomaccessfile,
89 revlogv0,
90 revlogv0,
90 sidedata as sidedatautil,
91 sidedata as sidedatautil,
91 )
92 )
92 from .utils import (
93 from .utils import (
93 storageutil,
94 storageutil,
94 stringutil,
95 stringutil,
95 )
96 )
96
97
97 # blanked usage of all the name to prevent pyflakes constraints
98 # blanked usage of all the name to prevent pyflakes constraints
98 # We need these name available in the module for extensions.
99 # We need these name available in the module for extensions.
99
100
100 REVLOGV0
101 REVLOGV0
101 REVLOGV1
102 REVLOGV1
102 REVLOGV2
103 REVLOGV2
103 FLAG_INLINE_DATA
104 FLAG_INLINE_DATA
104 FLAG_GENERALDELTA
105 FLAG_GENERALDELTA
105 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FLAGS
106 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_FORMAT
107 REVLOG_DEFAULT_VERSION
108 REVLOG_DEFAULT_VERSION
108 REVLOGV1_FLAGS
109 REVLOGV1_FLAGS
109 REVLOGV2_FLAGS
110 REVLOGV2_FLAGS
110 REVIDX_ISCENSORED
111 REVIDX_ISCENSORED
111 REVIDX_ELLIPSIS
112 REVIDX_ELLIPSIS
112 REVIDX_HASCOPIESINFO
113 REVIDX_HASCOPIESINFO
113 REVIDX_EXTSTORED
114 REVIDX_EXTSTORED
114 REVIDX_DEFAULT_FLAGS
115 REVIDX_DEFAULT_FLAGS
115 REVIDX_FLAGS_ORDER
116 REVIDX_FLAGS_ORDER
116 REVIDX_RAWTEXT_CHANGING_FLAGS
117 REVIDX_RAWTEXT_CHANGING_FLAGS
117
118
118 parsers = policy.importmod('parsers')
119 parsers = policy.importmod('parsers')
119 rustancestor = policy.importrust('ancestor')
120 rustancestor = policy.importrust('ancestor')
120 rustdagop = policy.importrust('dagop')
121 rustdagop = policy.importrust('dagop')
121 rustrevlog = policy.importrust('revlog')
122 rustrevlog = policy.importrust('revlog')
122
123
123 # Aliased for performance.
124 # Aliased for performance.
124 _zlibdecompress = zlib.decompress
125 _zlibdecompress = zlib.decompress
125
126
126 # max size of revlog with inline data
127 # max size of revlog with inline data
127 _maxinline = 131072
128 _maxinline = 131072
128 _chunksize = 1048576
129
129
130 # Flag processors for REVIDX_ELLIPSIS.
130 # Flag processors for REVIDX_ELLIPSIS.
131 def ellipsisreadprocessor(rl, text):
131 def ellipsisreadprocessor(rl, text):
132 return text, False
132 return text, False
133
133
134
134
135 def ellipsiswriteprocessor(rl, text):
135 def ellipsiswriteprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsisrawprocessor(rl, text):
139 def ellipsisrawprocessor(rl, text):
140 return False
140 return False
141
141
142
142
143 ellipsisprocessor = (
143 ellipsisprocessor = (
144 ellipsisreadprocessor,
144 ellipsisreadprocessor,
145 ellipsiswriteprocessor,
145 ellipsiswriteprocessor,
146 ellipsisrawprocessor,
146 ellipsisrawprocessor,
147 )
147 )
148
148
149
149
150 def _verify_revision(rl, skipflags, state, node):
150 def _verify_revision(rl, skipflags, state, node):
151 """Verify the integrity of the given revlog ``node`` while providing a hook
151 """Verify the integrity of the given revlog ``node`` while providing a hook
152 point for extensions to influence the operation."""
152 point for extensions to influence the operation."""
153 if skipflags:
153 if skipflags:
154 state[b'skipread'].add(node)
154 state[b'skipread'].add(node)
155 else:
155 else:
156 # Side-effect: read content and verify hash.
156 # Side-effect: read content and verify hash.
157 rl.revision(node)
157 rl.revision(node)
158
158
159
159
160 # True if a fast implementation for persistent-nodemap is available
160 # True if a fast implementation for persistent-nodemap is available
161 #
161 #
162 # We also consider we have a "fast" implementation in "pure" python because
162 # We also consider we have a "fast" implementation in "pure" python because
163 # people using pure don't really have performance consideration (and a
163 # people using pure don't really have performance consideration (and a
164 # wheelbarrow of other slowness source)
164 # wheelbarrow of other slowness source)
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
165 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 parsers, 'BaseIndexObject'
166 parsers, 'BaseIndexObject'
167 )
167 )
168
168
169
169
170 @interfaceutil.implementer(repository.irevisiondelta)
170 @interfaceutil.implementer(repository.irevisiondelta)
171 @attr.s(slots=True)
171 @attr.s(slots=True)
172 class revlogrevisiondelta(object):
172 class revlogrevisiondelta(object):
173 node = attr.ib()
173 node = attr.ib()
174 p1node = attr.ib()
174 p1node = attr.ib()
175 p2node = attr.ib()
175 p2node = attr.ib()
176 basenode = attr.ib()
176 basenode = attr.ib()
177 flags = attr.ib()
177 flags = attr.ib()
178 baserevisionsize = attr.ib()
178 baserevisionsize = attr.ib()
179 revision = attr.ib()
179 revision = attr.ib()
180 delta = attr.ib()
180 delta = attr.ib()
181 sidedata = attr.ib()
181 sidedata = attr.ib()
182 protocol_flags = attr.ib()
182 protocol_flags = attr.ib()
183 linknode = attr.ib(default=None)
183 linknode = attr.ib(default=None)
184
184
185
185
186 @interfaceutil.implementer(repository.iverifyproblem)
186 @interfaceutil.implementer(repository.iverifyproblem)
187 @attr.s(frozen=True)
187 @attr.s(frozen=True)
188 class revlogproblem(object):
188 class revlogproblem(object):
189 warning = attr.ib(default=None)
189 warning = attr.ib(default=None)
190 error = attr.ib(default=None)
190 error = attr.ib(default=None)
191 node = attr.ib(default=None)
191 node = attr.ib(default=None)
192
192
193
193
194 def parse_index_v1(data, inline):
194 def parse_index_v1(data, inline):
195 # call the C implementation to parse the index data
195 # call the C implementation to parse the index data
196 index, cache = parsers.parse_index2(data, inline)
196 index, cache = parsers.parse_index2(data, inline)
197 return index, cache
197 return index, cache
198
198
199
199
200 def parse_index_v2(data, inline):
200 def parse_index_v2(data, inline):
201 # call the C implementation to parse the index data
201 # call the C implementation to parse the index data
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
202 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
203 return index, cache
203 return index, cache
204
204
205
205
206 def parse_index_cl_v2(data, inline):
206 def parse_index_cl_v2(data, inline):
207 # call the C implementation to parse the index data
207 # call the C implementation to parse the index data
208 assert not inline
208 assert not inline
209 from .pure.parsers import parse_index_cl_v2
209 from .pure.parsers import parse_index_cl_v2
210
210
211 index, cache = parse_index_cl_v2(data)
211 index, cache = parse_index_cl_v2(data)
212 return index, cache
212 return index, cache
213
213
214
214
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
215 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
216
216
217 def parse_index_v1_nodemap(data, inline):
217 def parse_index_v1_nodemap(data, inline):
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
218 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 else:
222 else:
223 parse_index_v1_nodemap = None
223 parse_index_v1_nodemap = None
224
224
225
225
226 def parse_index_v1_mixed(data, inline):
226 def parse_index_v1_mixed(data, inline):
227 index, cache = parse_index_v1(data, inline)
227 index, cache = parse_index_v1(data, inline)
228 return rustrevlog.MixedIndex(index), cache
228 return rustrevlog.MixedIndex(index), cache
229
229
230
230
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
231 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # signed integer)
232 # signed integer)
233 _maxentrysize = 0x7FFFFFFF
233 _maxentrysize = 0x7FFFFFFF
234
234
235 PARTIAL_READ_MSG = _(
236 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
237 )
238
239 FILE_TOO_SHORT_MSG = _(
235 FILE_TOO_SHORT_MSG = _(
240 b'cannot read from revlog %s;'
236 b'cannot read from revlog %s;'
241 b' expected %d bytes from offset %d, data size is %d'
237 b' expected %d bytes from offset %d, data size is %d'
242 )
238 )
243
239
244
240
245 class revlog(object):
241 class revlog(object):
246 """
242 """
247 the underlying revision storage object
243 the underlying revision storage object
248
244
249 A revlog consists of two parts, an index and the revision data.
245 A revlog consists of two parts, an index and the revision data.
250
246
251 The index is a file with a fixed record size containing
247 The index is a file with a fixed record size containing
252 information on each revision, including its nodeid (hash), the
248 information on each revision, including its nodeid (hash), the
253 nodeids of its parents, the position and offset of its data within
249 nodeids of its parents, the position and offset of its data within
254 the data file, and the revision it's based on. Finally, each entry
250 the data file, and the revision it's based on. Finally, each entry
255 contains a linkrev entry that can serve as a pointer to external
251 contains a linkrev entry that can serve as a pointer to external
256 data.
252 data.
257
253
258 The revision data itself is a linear collection of data chunks.
254 The revision data itself is a linear collection of data chunks.
259 Each chunk represents a revision and is usually represented as a
255 Each chunk represents a revision and is usually represented as a
260 delta against the previous chunk. To bound lookup time, runs of
256 delta against the previous chunk. To bound lookup time, runs of
261 deltas are limited to about 2 times the length of the original
257 deltas are limited to about 2 times the length of the original
262 version data. This makes retrieval of a version proportional to
258 version data. This makes retrieval of a version proportional to
263 its size, or O(1) relative to the number of revisions.
259 its size, or O(1) relative to the number of revisions.
264
260
265 Both pieces of the revlog are written to in an append-only
261 Both pieces of the revlog are written to in an append-only
266 fashion, which means we never need to rewrite a file to insert or
262 fashion, which means we never need to rewrite a file to insert or
267 remove data, and can use some simple techniques to avoid the need
263 remove data, and can use some simple techniques to avoid the need
268 for locking while reading.
264 for locking while reading.
269
265
270 If checkambig, indexfile is opened with checkambig=True at
266 If checkambig, indexfile is opened with checkambig=True at
271 writing, to avoid file stat ambiguity.
267 writing, to avoid file stat ambiguity.
272
268
273 If mmaplargeindex is True, and an mmapindexthreshold is set, the
269 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 index will be mmapped rather than read if it is larger than the
270 index will be mmapped rather than read if it is larger than the
275 configured threshold.
271 configured threshold.
276
272
277 If censorable is True, the revlog can have censored revisions.
273 If censorable is True, the revlog can have censored revisions.
278
274
279 If `upperboundcomp` is not None, this is the expected maximal gain from
275 If `upperboundcomp` is not None, this is the expected maximal gain from
280 compression for the data content.
276 compression for the data content.
281
277
282 `concurrencychecker` is an optional function that receives 3 arguments: a
278 `concurrencychecker` is an optional function that receives 3 arguments: a
283 file handle, a filename, and an expected position. It should check whether
279 file handle, a filename, and an expected position. It should check whether
284 the current position in the file handle is valid, and log/warn/fail (by
280 the current position in the file handle is valid, and log/warn/fail (by
285 raising).
281 raising).
286
282
287 See mercurial/revlogutils/contants.py for details about the content of an
283 See mercurial/revlogutils/contants.py for details about the content of an
288 index entry.
284 index entry.
289 """
285 """
290
286
291 _flagserrorclass = error.RevlogError
287 _flagserrorclass = error.RevlogError
292
288
293 def __init__(
289 def __init__(
294 self,
290 self,
295 opener,
291 opener,
296 target,
292 target,
297 radix,
293 radix,
298 postfix=None, # only exist for `tmpcensored` now
294 postfix=None, # only exist for `tmpcensored` now
299 checkambig=False,
295 checkambig=False,
300 mmaplargeindex=False,
296 mmaplargeindex=False,
301 censorable=False,
297 censorable=False,
302 upperboundcomp=None,
298 upperboundcomp=None,
303 persistentnodemap=False,
299 persistentnodemap=False,
304 concurrencychecker=None,
300 concurrencychecker=None,
305 trypending=False,
301 trypending=False,
306 ):
302 ):
307 """
303 """
308 create a revlog object
304 create a revlog object
309
305
310 opener is a function that abstracts the file opening operation
306 opener is a function that abstracts the file opening operation
311 and can be used to implement COW semantics or the like.
307 and can be used to implement COW semantics or the like.
312
308
313 `target`: a (KIND, ID) tuple that identify the content stored in
309 `target`: a (KIND, ID) tuple that identify the content stored in
314 this revlog. It help the rest of the code to understand what the revlog
310 this revlog. It help the rest of the code to understand what the revlog
315 is about without having to resort to heuristic and index filename
311 is about without having to resort to heuristic and index filename
316 analysis. Note: that this must be reliably be set by normal code, but
312 analysis. Note: that this must be reliably be set by normal code, but
317 that test, debug, or performance measurement code might not set this to
313 that test, debug, or performance measurement code might not set this to
318 accurate value.
314 accurate value.
319 """
315 """
320 self.upperboundcomp = upperboundcomp
316 self.upperboundcomp = upperboundcomp
321
317
322 self.radix = radix
318 self.radix = radix
323
319
324 self._docket_file = None
320 self._docket_file = None
325 self._indexfile = None
321 self._indexfile = None
326 self._datafile = None
322 self._datafile = None
327 self._sidedatafile = None
323 self._sidedatafile = None
328 self._nodemap_file = None
324 self._nodemap_file = None
329 self.postfix = postfix
325 self.postfix = postfix
330 self._trypending = trypending
326 self._trypending = trypending
331 self.opener = opener
327 self.opener = opener
332 if persistentnodemap:
328 if persistentnodemap:
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329 self._nodemap_file = nodemaputil.get_nodemap_file(self)
334
330
335 assert target[0] in ALL_KINDS
331 assert target[0] in ALL_KINDS
336 assert len(target) == 2
332 assert len(target) == 2
337 self.target = target
333 self.target = target
338 # When True, indexfile is opened with checkambig=True at writing, to
334 # When True, indexfile is opened with checkambig=True at writing, to
339 # avoid file stat ambiguity.
335 # avoid file stat ambiguity.
340 self._checkambig = checkambig
336 self._checkambig = checkambig
341 self._mmaplargeindex = mmaplargeindex
337 self._mmaplargeindex = mmaplargeindex
342 self._censorable = censorable
338 self._censorable = censorable
343 # 3-tuple of (node, rev, text) for a raw revision.
339 # 3-tuple of (node, rev, text) for a raw revision.
344 self._revisioncache = None
340 self._revisioncache = None
345 # Maps rev to chain base rev.
341 # Maps rev to chain base rev.
346 self._chainbasecache = util.lrucachedict(100)
342 self._chainbasecache = util.lrucachedict(100)
347 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
348 self._chunkcache = (0, b'')
344 self._chunkcache = (0, b'')
349 # How much data to read and cache into the raw revlog data cache.
345 # How much data to read and cache into the raw revlog data cache.
350 self._chunkcachesize = 65536
346 self._chunkcachesize = 65536
351 self._maxchainlen = None
347 self._maxchainlen = None
352 self._deltabothparents = True
348 self._deltabothparents = True
353 self.index = None
349 self.index = None
354 self._docket = None
350 self._docket = None
355 self._nodemap_docket = None
351 self._nodemap_docket = None
356 # Mapping of partial identifiers to full nodes.
352 # Mapping of partial identifiers to full nodes.
357 self._pcache = {}
353 self._pcache = {}
358 # Mapping of revision integer to full node.
354 # Mapping of revision integer to full node.
359 self._compengine = b'zlib'
355 self._compengine = b'zlib'
360 self._compengineopts = {}
356 self._compengineopts = {}
361 self._maxdeltachainspan = -1
357 self._maxdeltachainspan = -1
362 self._withsparseread = False
358 self._withsparseread = False
363 self._sparserevlog = False
359 self._sparserevlog = False
364 self.hassidedata = False
360 self.hassidedata = False
365 self._srdensitythreshold = 0.50
361 self._srdensitythreshold = 0.50
366 self._srmingapsize = 262144
362 self._srmingapsize = 262144
367
363
368 # Make copy of flag processors so each revlog instance can support
364 # Make copy of flag processors so each revlog instance can support
369 # custom flags.
365 # custom flags.
370 self._flagprocessors = dict(flagutil.flagprocessors)
366 self._flagprocessors = dict(flagutil.flagprocessors)
371
367
372 # 3-tuple of file handles being used for active writing.
368 # 3-tuple of file handles being used for active writing.
373 self._writinghandles = None
369 self._writinghandles = None
374 # prevent nesting of addgroup
370 # prevent nesting of addgroup
375 self._adding_group = None
371 self._adding_group = None
376
372
377 self._loadindex()
373 self._loadindex()
378
374
379 self._concurrencychecker = concurrencychecker
375 self._concurrencychecker = concurrencychecker
380
376
381 def _init_opts(self):
377 def _init_opts(self):
382 """process options (from above/config) to setup associated default revlog mode
378 """process options (from above/config) to setup associated default revlog mode
383
379
384 These values might be affected when actually reading on disk information.
380 These values might be affected when actually reading on disk information.
385
381
386 The relevant values are returned for use in _loadindex().
382 The relevant values are returned for use in _loadindex().
387
383
388 * newversionflags:
384 * newversionflags:
389 version header to use if we need to create a new revlog
385 version header to use if we need to create a new revlog
390
386
391 * mmapindexthreshold:
387 * mmapindexthreshold:
392 minimal index size for start to use mmap
388 minimal index size for start to use mmap
393
389
394 * force_nodemap:
390 * force_nodemap:
395 force the usage of a "development" version of the nodemap code
391 force the usage of a "development" version of the nodemap code
396 """
392 """
397 mmapindexthreshold = None
393 mmapindexthreshold = None
398 opts = self.opener.options
394 opts = self.opener.options
399
395
400 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
396 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
401 new_header = CHANGELOGV2
397 new_header = CHANGELOGV2
402 elif b'revlogv2' in opts:
398 elif b'revlogv2' in opts:
403 new_header = REVLOGV2
399 new_header = REVLOGV2
404 elif b'revlogv1' in opts:
400 elif b'revlogv1' in opts:
405 new_header = REVLOGV1 | FLAG_INLINE_DATA
401 new_header = REVLOGV1 | FLAG_INLINE_DATA
406 if b'generaldelta' in opts:
402 if b'generaldelta' in opts:
407 new_header |= FLAG_GENERALDELTA
403 new_header |= FLAG_GENERALDELTA
408 elif b'revlogv0' in self.opener.options:
404 elif b'revlogv0' in self.opener.options:
409 new_header = REVLOGV0
405 new_header = REVLOGV0
410 else:
406 else:
411 new_header = REVLOG_DEFAULT_VERSION
407 new_header = REVLOG_DEFAULT_VERSION
412
408
413 if b'chunkcachesize' in opts:
409 if b'chunkcachesize' in opts:
414 self._chunkcachesize = opts[b'chunkcachesize']
410 self._chunkcachesize = opts[b'chunkcachesize']
415 if b'maxchainlen' in opts:
411 if b'maxchainlen' in opts:
416 self._maxchainlen = opts[b'maxchainlen']
412 self._maxchainlen = opts[b'maxchainlen']
417 if b'deltabothparents' in opts:
413 if b'deltabothparents' in opts:
418 self._deltabothparents = opts[b'deltabothparents']
414 self._deltabothparents = opts[b'deltabothparents']
419 self._lazydelta = bool(opts.get(b'lazydelta', True))
415 self._lazydelta = bool(opts.get(b'lazydelta', True))
420 self._lazydeltabase = False
416 self._lazydeltabase = False
421 if self._lazydelta:
417 if self._lazydelta:
422 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
418 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
423 if b'compengine' in opts:
419 if b'compengine' in opts:
424 self._compengine = opts[b'compengine']
420 self._compengine = opts[b'compengine']
425 if b'zlib.level' in opts:
421 if b'zlib.level' in opts:
426 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
422 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
427 if b'zstd.level' in opts:
423 if b'zstd.level' in opts:
428 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
424 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
429 if b'maxdeltachainspan' in opts:
425 if b'maxdeltachainspan' in opts:
430 self._maxdeltachainspan = opts[b'maxdeltachainspan']
426 self._maxdeltachainspan = opts[b'maxdeltachainspan']
431 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
427 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
432 mmapindexthreshold = opts[b'mmapindexthreshold']
428 mmapindexthreshold = opts[b'mmapindexthreshold']
433 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
429 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
434 withsparseread = bool(opts.get(b'with-sparse-read', False))
430 withsparseread = bool(opts.get(b'with-sparse-read', False))
435 # sparse-revlog forces sparse-read
431 # sparse-revlog forces sparse-read
436 self._withsparseread = self._sparserevlog or withsparseread
432 self._withsparseread = self._sparserevlog or withsparseread
437 if b'sparse-read-density-threshold' in opts:
433 if b'sparse-read-density-threshold' in opts:
438 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
434 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
439 if b'sparse-read-min-gap-size' in opts:
435 if b'sparse-read-min-gap-size' in opts:
440 self._srmingapsize = opts[b'sparse-read-min-gap-size']
436 self._srmingapsize = opts[b'sparse-read-min-gap-size']
441 if opts.get(b'enableellipsis'):
437 if opts.get(b'enableellipsis'):
442 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
443
439
444 # revlog v0 doesn't have flag processors
440 # revlog v0 doesn't have flag processors
445 for flag, processor in pycompat.iteritems(
441 for flag, processor in pycompat.iteritems(
446 opts.get(b'flagprocessors', {})
442 opts.get(b'flagprocessors', {})
447 ):
443 ):
448 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
444 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
449
445
450 if self._chunkcachesize <= 0:
446 if self._chunkcachesize <= 0:
451 raise error.RevlogError(
447 raise error.RevlogError(
452 _(b'revlog chunk cache size %r is not greater than 0')
448 _(b'revlog chunk cache size %r is not greater than 0')
453 % self._chunkcachesize
449 % self._chunkcachesize
454 )
450 )
455 elif self._chunkcachesize & (self._chunkcachesize - 1):
451 elif self._chunkcachesize & (self._chunkcachesize - 1):
456 raise error.RevlogError(
452 raise error.RevlogError(
457 _(b'revlog chunk cache size %r is not a power of 2')
453 _(b'revlog chunk cache size %r is not a power of 2')
458 % self._chunkcachesize
454 % self._chunkcachesize
459 )
455 )
460 force_nodemap = opts.get(b'devel-force-nodemap', False)
456 force_nodemap = opts.get(b'devel-force-nodemap', False)
461 return new_header, mmapindexthreshold, force_nodemap
457 return new_header, mmapindexthreshold, force_nodemap
462
458
463 def _get_data(self, filepath, mmap_threshold, size=None):
459 def _get_data(self, filepath, mmap_threshold, size=None):
464 """return a file content with or without mmap
460 """return a file content with or without mmap
465
461
466 If the file is missing return the empty string"""
462 If the file is missing return the empty string"""
467 try:
463 try:
468 with self.opener(filepath) as fp:
464 with self.opener(filepath) as fp:
469 if mmap_threshold is not None:
465 if mmap_threshold is not None:
470 file_size = self.opener.fstat(fp).st_size
466 file_size = self.opener.fstat(fp).st_size
471 if file_size >= mmap_threshold:
467 if file_size >= mmap_threshold:
472 if size is not None:
468 if size is not None:
473 # avoid potentiel mmap crash
469 # avoid potentiel mmap crash
474 size = min(file_size, size)
470 size = min(file_size, size)
475 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
476 # relying on Python GC
472 # relying on Python GC
477 if size is None:
473 if size is None:
478 return util.buffer(util.mmapread(fp))
474 return util.buffer(util.mmapread(fp))
479 else:
475 else:
480 return util.buffer(util.mmapread(fp, size))
476 return util.buffer(util.mmapread(fp, size))
481 if size is None:
477 if size is None:
482 return fp.read()
478 return fp.read()
483 else:
479 else:
484 return fp.read(size)
480 return fp.read(size)
485 except IOError as inst:
481 except IOError as inst:
486 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
487 raise
483 raise
488 return b''
484 return b''
489
485
490 def _loadindex(self, docket=None):
486 def _loadindex(self, docket=None):
491
487
492 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
488 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
493
489
494 if self.postfix is not None:
490 if self.postfix is not None:
495 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
496 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
497 entry_point = b'%s.i.a' % self.radix
493 entry_point = b'%s.i.a' % self.radix
498 else:
494 else:
499 entry_point = b'%s.i' % self.radix
495 entry_point = b'%s.i' % self.radix
500
496
501 if docket is not None:
497 if docket is not None:
502 self._docket = docket
498 self._docket = docket
503 self._docket_file = entry_point
499 self._docket_file = entry_point
504 else:
500 else:
505 entry_data = b''
501 entry_data = b''
506 self._initempty = True
502 self._initempty = True
507 entry_data = self._get_data(entry_point, mmapindexthreshold)
503 entry_data = self._get_data(entry_point, mmapindexthreshold)
508 if len(entry_data) > 0:
504 if len(entry_data) > 0:
509 header = INDEX_HEADER.unpack(entry_data[:4])[0]
505 header = INDEX_HEADER.unpack(entry_data[:4])[0]
510 self._initempty = False
506 self._initempty = False
511 else:
507 else:
512 header = new_header
508 header = new_header
513
509
514 self._format_flags = header & ~0xFFFF
510 self._format_flags = header & ~0xFFFF
515 self._format_version = header & 0xFFFF
511 self._format_version = header & 0xFFFF
516
512
517 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
513 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
518 if supported_flags is None:
514 if supported_flags is None:
519 msg = _(b'unknown version (%d) in revlog %s')
515 msg = _(b'unknown version (%d) in revlog %s')
520 msg %= (self._format_version, self.display_id)
516 msg %= (self._format_version, self.display_id)
521 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
522 elif self._format_flags & ~supported_flags:
518 elif self._format_flags & ~supported_flags:
523 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
519 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
524 display_flag = self._format_flags >> 16
520 display_flag = self._format_flags >> 16
525 msg %= (display_flag, self._format_version, self.display_id)
521 msg %= (display_flag, self._format_version, self.display_id)
526 raise error.RevlogError(msg)
522 raise error.RevlogError(msg)
527
523
528 features = FEATURES_BY_VERSION[self._format_version]
524 features = FEATURES_BY_VERSION[self._format_version]
529 self._inline = features[b'inline'](self._format_flags)
525 self._inline = features[b'inline'](self._format_flags)
530 self._generaldelta = features[b'generaldelta'](self._format_flags)
526 self._generaldelta = features[b'generaldelta'](self._format_flags)
531 self.hassidedata = features[b'sidedata']
527 self.hassidedata = features[b'sidedata']
532
528
533 if not features[b'docket']:
529 if not features[b'docket']:
534 self._indexfile = entry_point
530 self._indexfile = entry_point
535 index_data = entry_data
531 index_data = entry_data
536 else:
532 else:
537 self._docket_file = entry_point
533 self._docket_file = entry_point
538 if self._initempty:
534 if self._initempty:
539 self._docket = docketutil.default_docket(self, header)
535 self._docket = docketutil.default_docket(self, header)
540 else:
536 else:
541 self._docket = docketutil.parse_docket(
537 self._docket = docketutil.parse_docket(
542 self, entry_data, use_pending=self._trypending
538 self, entry_data, use_pending=self._trypending
543 )
539 )
544
540
545 if self._docket is not None:
541 if self._docket is not None:
546 self._indexfile = self._docket.index_filepath()
542 self._indexfile = self._docket.index_filepath()
547 index_data = b''
543 index_data = b''
548 index_size = self._docket.index_end
544 index_size = self._docket.index_end
549 if index_size > 0:
545 if index_size > 0:
550 index_data = self._get_data(
546 index_data = self._get_data(
551 self._indexfile, mmapindexthreshold, size=index_size
547 self._indexfile, mmapindexthreshold, size=index_size
552 )
548 )
553 if len(index_data) < index_size:
549 if len(index_data) < index_size:
554 msg = _(b'too few index data for %s: got %d, expected %d')
550 msg = _(b'too few index data for %s: got %d, expected %d')
555 msg %= (self.display_id, len(index_data), index_size)
551 msg %= (self.display_id, len(index_data), index_size)
556 raise error.RevlogError(msg)
552 raise error.RevlogError(msg)
557
553
558 self._inline = False
554 self._inline = False
559 # generaldelta implied by version 2 revlogs.
555 # generaldelta implied by version 2 revlogs.
560 self._generaldelta = True
556 self._generaldelta = True
561 # the logic for persistent nodemap will be dealt with within the
557 # the logic for persistent nodemap will be dealt with within the
562 # main docket, so disable it for now.
558 # main docket, so disable it for now.
563 self._nodemap_file = None
559 self._nodemap_file = None
564
560
565 if self._docket is not None:
561 if self._docket is not None:
566 self._datafile = self._docket.data_filepath()
562 self._datafile = self._docket.data_filepath()
567 self._sidedatafile = self._docket.sidedata_filepath()
563 self._sidedatafile = self._docket.sidedata_filepath()
568 elif self.postfix is None:
564 elif self.postfix is None:
569 self._datafile = b'%s.d' % self.radix
565 self._datafile = b'%s.d' % self.radix
570 else:
566 else:
571 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
567 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
572
568
573 self.nodeconstants = sha1nodeconstants
569 self.nodeconstants = sha1nodeconstants
574 self.nullid = self.nodeconstants.nullid
570 self.nullid = self.nodeconstants.nullid
575
571
576 # sparse-revlog can't be on without general-delta (issue6056)
572 # sparse-revlog can't be on without general-delta (issue6056)
577 if not self._generaldelta:
573 if not self._generaldelta:
578 self._sparserevlog = False
574 self._sparserevlog = False
579
575
580 self._storedeltachains = True
576 self._storedeltachains = True
581
577
582 devel_nodemap = (
578 devel_nodemap = (
583 self._nodemap_file
579 self._nodemap_file
584 and force_nodemap
580 and force_nodemap
585 and parse_index_v1_nodemap is not None
581 and parse_index_v1_nodemap is not None
586 )
582 )
587
583
588 use_rust_index = False
584 use_rust_index = False
589 if rustrevlog is not None:
585 if rustrevlog is not None:
590 if self._nodemap_file is not None:
586 if self._nodemap_file is not None:
591 use_rust_index = True
587 use_rust_index = True
592 else:
588 else:
593 use_rust_index = self.opener.options.get(b'rust.index')
589 use_rust_index = self.opener.options.get(b'rust.index')
594
590
595 self._parse_index = parse_index_v1
591 self._parse_index = parse_index_v1
596 if self._format_version == REVLOGV0:
592 if self._format_version == REVLOGV0:
597 self._parse_index = revlogv0.parse_index_v0
593 self._parse_index = revlogv0.parse_index_v0
598 elif self._format_version == REVLOGV2:
594 elif self._format_version == REVLOGV2:
599 self._parse_index = parse_index_v2
595 self._parse_index = parse_index_v2
600 elif self._format_version == CHANGELOGV2:
596 elif self._format_version == CHANGELOGV2:
601 self._parse_index = parse_index_cl_v2
597 self._parse_index = parse_index_cl_v2
602 elif devel_nodemap:
598 elif devel_nodemap:
603 self._parse_index = parse_index_v1_nodemap
599 self._parse_index = parse_index_v1_nodemap
604 elif use_rust_index:
600 elif use_rust_index:
605 self._parse_index = parse_index_v1_mixed
601 self._parse_index = parse_index_v1_mixed
606 try:
602 try:
607 d = self._parse_index(index_data, self._inline)
603 d = self._parse_index(index_data, self._inline)
608 index, _chunkcache = d
604 index, chunkcache = d
609 use_nodemap = (
605 use_nodemap = (
610 not self._inline
606 not self._inline
611 and self._nodemap_file is not None
607 and self._nodemap_file is not None
612 and util.safehasattr(index, 'update_nodemap_data')
608 and util.safehasattr(index, 'update_nodemap_data')
613 )
609 )
614 if use_nodemap:
610 if use_nodemap:
615 nodemap_data = nodemaputil.persisted_data(self)
611 nodemap_data = nodemaputil.persisted_data(self)
616 if nodemap_data is not None:
612 if nodemap_data is not None:
617 docket = nodemap_data[0]
613 docket = nodemap_data[0]
618 if (
614 if (
619 len(d[0]) > docket.tip_rev
615 len(d[0]) > docket.tip_rev
620 and d[0][docket.tip_rev][7] == docket.tip_node
616 and d[0][docket.tip_rev][7] == docket.tip_node
621 ):
617 ):
622 # no changelog tampering
618 # no changelog tampering
623 self._nodemap_docket = docket
619 self._nodemap_docket = docket
624 index.update_nodemap_data(*nodemap_data)
620 index.update_nodemap_data(*nodemap_data)
625 except (ValueError, IndexError):
621 except (ValueError, IndexError):
626 raise error.RevlogError(
622 raise error.RevlogError(
627 _(b"index %s is corrupted") % self.display_id
623 _(b"index %s is corrupted") % self.display_id
628 )
624 )
629 self.index, self._chunkcache = d
625 self.index = index
630 if not self._chunkcache:
626 self._segmentfile = randomaccessfile.randomaccessfile(
631 self._chunkclear()
627 self.opener,
628 (self._indexfile if self._inline else self._datafile),
629 self._chunkcachesize,
630 chunkcache,
631 )
632 # revnum -> (chain-length, sum-delta-length)
632 # revnum -> (chain-length, sum-delta-length)
633 self._chaininfocache = util.lrucachedict(500)
633 self._chaininfocache = util.lrucachedict(500)
634 # revlog header -> revlog compressor
634 # revlog header -> revlog compressor
635 self._decompressors = {}
635 self._decompressors = {}
636
636
637 @util.propertycache
637 @util.propertycache
638 def revlog_kind(self):
638 def revlog_kind(self):
639 return self.target[0]
639 return self.target[0]
640
640
641 @util.propertycache
641 @util.propertycache
642 def display_id(self):
642 def display_id(self):
643 """The public facing "ID" of the revlog that we use in message"""
643 """The public facing "ID" of the revlog that we use in message"""
644 # Maybe we should build a user facing representation of
644 # Maybe we should build a user facing representation of
645 # revlog.target instead of using `self.radix`
645 # revlog.target instead of using `self.radix`
646 return self.radix
646 return self.radix
647
647
648 def _get_decompressor(self, t):
648 def _get_decompressor(self, t):
649 try:
649 try:
650 compressor = self._decompressors[t]
650 compressor = self._decompressors[t]
651 except KeyError:
651 except KeyError:
652 try:
652 try:
653 engine = util.compengines.forrevlogheader(t)
653 engine = util.compengines.forrevlogheader(t)
654 compressor = engine.revlogcompressor(self._compengineopts)
654 compressor = engine.revlogcompressor(self._compengineopts)
655 self._decompressors[t] = compressor
655 self._decompressors[t] = compressor
656 except KeyError:
656 except KeyError:
657 raise error.RevlogError(
657 raise error.RevlogError(
658 _(b'unknown compression type %s') % binascii.hexlify(t)
658 _(b'unknown compression type %s') % binascii.hexlify(t)
659 )
659 )
660 return compressor
660 return compressor
661
661
662 @util.propertycache
662 @util.propertycache
663 def _compressor(self):
663 def _compressor(self):
664 engine = util.compengines[self._compengine]
664 engine = util.compengines[self._compengine]
665 return engine.revlogcompressor(self._compengineopts)
665 return engine.revlogcompressor(self._compengineopts)
666
666
667 @util.propertycache
667 @util.propertycache
668 def _decompressor(self):
668 def _decompressor(self):
669 """the default decompressor"""
669 """the default decompressor"""
670 if self._docket is None:
670 if self._docket is None:
671 return None
671 return None
672 t = self._docket.default_compression_header
672 t = self._docket.default_compression_header
673 c = self._get_decompressor(t)
673 c = self._get_decompressor(t)
674 return c.decompress
674 return c.decompress
675
675
676 def _indexfp(self):
676 def _indexfp(self):
677 """file object for the revlog's index file"""
677 """file object for the revlog's index file"""
678 return self.opener(self._indexfile, mode=b"r")
678 return self.opener(self._indexfile, mode=b"r")
679
679
680 def __index_write_fp(self):
680 def __index_write_fp(self):
681 # You should not use this directly and use `_writing` instead
681 # You should not use this directly and use `_writing` instead
682 try:
682 try:
683 f = self.opener(
683 f = self.opener(
684 self._indexfile, mode=b"r+", checkambig=self._checkambig
684 self._indexfile, mode=b"r+", checkambig=self._checkambig
685 )
685 )
686 if self._docket is None:
686 if self._docket is None:
687 f.seek(0, os.SEEK_END)
687 f.seek(0, os.SEEK_END)
688 else:
688 else:
689 f.seek(self._docket.index_end, os.SEEK_SET)
689 f.seek(self._docket.index_end, os.SEEK_SET)
690 return f
690 return f
691 except IOError as inst:
691 except IOError as inst:
692 if inst.errno != errno.ENOENT:
692 if inst.errno != errno.ENOENT:
693 raise
693 raise
694 return self.opener(
694 return self.opener(
695 self._indexfile, mode=b"w+", checkambig=self._checkambig
695 self._indexfile, mode=b"w+", checkambig=self._checkambig
696 )
696 )
697
697
698 def __index_new_fp(self):
698 def __index_new_fp(self):
699 # You should not use this unless you are upgrading from inline revlog
699 # You should not use this unless you are upgrading from inline revlog
700 return self.opener(
700 return self.opener(
701 self._indexfile,
701 self._indexfile,
702 mode=b"w",
702 mode=b"w",
703 checkambig=self._checkambig,
703 checkambig=self._checkambig,
704 atomictemp=True,
704 atomictemp=True,
705 )
705 )
706
706
707 def _datafp(self, mode=b'r'):
707 def _datafp(self, mode=b'r'):
708 """file object for the revlog's data file"""
708 """file object for the revlog's data file"""
709 return self.opener(self._datafile, mode=mode)
709 return self.opener(self._datafile, mode=mode)
710
710
711 @contextlib.contextmanager
711 @contextlib.contextmanager
712 def _datareadfp(self, existingfp=None):
713 """file object suitable to read data"""
714 # Use explicit file handle, if given.
715 if existingfp is not None:
716 yield existingfp
717
718 # Use a file handle being actively used for writes, if available.
719 # There is some danger to doing this because reads will seek the
720 # file. However, _writeentry() performs a SEEK_END before all writes,
721 # so we should be safe.
722 elif self._writinghandles:
723 if self._inline:
724 yield self._writinghandles[0]
725 else:
726 yield self._writinghandles[1]
727
728 # Otherwise open a new file handle.
729 else:
730 if self._inline:
731 func = self._indexfp
732 else:
733 func = self._datafp
734 with func() as fp:
735 yield fp
736
737 @contextlib.contextmanager
738 def _sidedatareadfp(self):
712 def _sidedatareadfp(self):
739 """file object suitable to read sidedata"""
713 """file object suitable to read sidedata"""
740 if self._writinghandles:
714 if self._writinghandles:
741 yield self._writinghandles[2]
715 yield self._writinghandles[2]
742 else:
716 else:
743 with self.opener(self._sidedatafile) as fp:
717 with self.opener(self._sidedatafile) as fp:
744 yield fp
718 yield fp
745
719
746 def tiprev(self):
720 def tiprev(self):
747 return len(self.index) - 1
721 return len(self.index) - 1
748
722
749 def tip(self):
723 def tip(self):
750 return self.node(self.tiprev())
724 return self.node(self.tiprev())
751
725
752 def __contains__(self, rev):
726 def __contains__(self, rev):
753 return 0 <= rev < len(self)
727 return 0 <= rev < len(self)
754
728
755 def __len__(self):
729 def __len__(self):
756 return len(self.index)
730 return len(self.index)
757
731
758 def __iter__(self):
732 def __iter__(self):
759 return iter(pycompat.xrange(len(self)))
733 return iter(pycompat.xrange(len(self)))
760
734
761 def revs(self, start=0, stop=None):
735 def revs(self, start=0, stop=None):
762 """iterate over all rev in this revlog (from start to stop)"""
736 """iterate over all rev in this revlog (from start to stop)"""
763 return storageutil.iterrevs(len(self), start=start, stop=stop)
737 return storageutil.iterrevs(len(self), start=start, stop=stop)
764
738
765 @property
739 @property
766 def nodemap(self):
740 def nodemap(self):
767 msg = (
741 msg = (
768 b"revlog.nodemap is deprecated, "
742 b"revlog.nodemap is deprecated, "
769 b"use revlog.index.[has_node|rev|get_rev]"
743 b"use revlog.index.[has_node|rev|get_rev]"
770 )
744 )
771 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
745 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
772 return self.index.nodemap
746 return self.index.nodemap
773
747
774 @property
748 @property
775 def _nodecache(self):
749 def _nodecache(self):
776 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
750 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
777 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
751 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 return self.index.nodemap
752 return self.index.nodemap
779
753
780 def hasnode(self, node):
754 def hasnode(self, node):
781 try:
755 try:
782 self.rev(node)
756 self.rev(node)
783 return True
757 return True
784 except KeyError:
758 except KeyError:
785 return False
759 return False
786
760
787 def candelta(self, baserev, rev):
761 def candelta(self, baserev, rev):
788 """whether two revisions (baserev, rev) can be delta-ed or not"""
762 """whether two revisions (baserev, rev) can be delta-ed or not"""
789 # Disable delta if either rev requires a content-changing flag
763 # Disable delta if either rev requires a content-changing flag
790 # processor (ex. LFS). This is because such flag processor can alter
764 # processor (ex. LFS). This is because such flag processor can alter
791 # the rawtext content that the delta will be based on, and two clients
765 # the rawtext content that the delta will be based on, and two clients
792 # could have a same revlog node with different flags (i.e. different
766 # could have a same revlog node with different flags (i.e. different
793 # rawtext contents) and the delta could be incompatible.
767 # rawtext contents) and the delta could be incompatible.
794 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
768 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
795 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
769 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
796 ):
770 ):
797 return False
771 return False
798 return True
772 return True
799
773
800 def update_caches(self, transaction):
774 def update_caches(self, transaction):
801 if self._nodemap_file is not None:
775 if self._nodemap_file is not None:
802 if transaction is None:
776 if transaction is None:
803 nodemaputil.update_persistent_nodemap(self)
777 nodemaputil.update_persistent_nodemap(self)
804 else:
778 else:
805 nodemaputil.setup_persistent_nodemap(transaction, self)
779 nodemaputil.setup_persistent_nodemap(transaction, self)
806
780
807 def clearcaches(self):
781 def clearcaches(self):
808 self._revisioncache = None
782 self._revisioncache = None
809 self._chainbasecache.clear()
783 self._chainbasecache.clear()
810 self._chunkcache = (0, b'')
784 self._segmentfile.clear_cache()
811 self._pcache = {}
785 self._pcache = {}
812 self._nodemap_docket = None
786 self._nodemap_docket = None
813 self.index.clearcaches()
787 self.index.clearcaches()
814 # The python code is the one responsible for validating the docket, we
788 # The python code is the one responsible for validating the docket, we
815 # end up having to refresh it here.
789 # end up having to refresh it here.
816 use_nodemap = (
790 use_nodemap = (
817 not self._inline
791 not self._inline
818 and self._nodemap_file is not None
792 and self._nodemap_file is not None
819 and util.safehasattr(self.index, 'update_nodemap_data')
793 and util.safehasattr(self.index, 'update_nodemap_data')
820 )
794 )
821 if use_nodemap:
795 if use_nodemap:
822 nodemap_data = nodemaputil.persisted_data(self)
796 nodemap_data = nodemaputil.persisted_data(self)
823 if nodemap_data is not None:
797 if nodemap_data is not None:
824 self._nodemap_docket = nodemap_data[0]
798 self._nodemap_docket = nodemap_data[0]
825 self.index.update_nodemap_data(*nodemap_data)
799 self.index.update_nodemap_data(*nodemap_data)
826
800
827 def rev(self, node):
801 def rev(self, node):
828 try:
802 try:
829 return self.index.rev(node)
803 return self.index.rev(node)
830 except TypeError:
804 except TypeError:
831 raise
805 raise
832 except error.RevlogError:
806 except error.RevlogError:
833 # parsers.c radix tree lookup failed
807 # parsers.c radix tree lookup failed
834 if (
808 if (
835 node == self.nodeconstants.wdirid
809 node == self.nodeconstants.wdirid
836 or node in self.nodeconstants.wdirfilenodeids
810 or node in self.nodeconstants.wdirfilenodeids
837 ):
811 ):
838 raise error.WdirUnsupported
812 raise error.WdirUnsupported
839 raise error.LookupError(node, self.display_id, _(b'no node'))
813 raise error.LookupError(node, self.display_id, _(b'no node'))
840
814
841 # Accessors for index entries.
815 # Accessors for index entries.
842
816
843 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
817 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
844 # are flags.
818 # are flags.
845 def start(self, rev):
819 def start(self, rev):
846 return int(self.index[rev][0] >> 16)
820 return int(self.index[rev][0] >> 16)
847
821
848 def sidedata_cut_off(self, rev):
822 def sidedata_cut_off(self, rev):
849 sd_cut_off = self.index[rev][8]
823 sd_cut_off = self.index[rev][8]
850 if sd_cut_off != 0:
824 if sd_cut_off != 0:
851 return sd_cut_off
825 return sd_cut_off
852 # This is some annoying dance, because entries without sidedata
826 # This is some annoying dance, because entries without sidedata
853 # currently use 0 as their ofsset. (instead of previous-offset +
827 # currently use 0 as their ofsset. (instead of previous-offset +
854 # previous-size)
828 # previous-size)
855 #
829 #
856 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
830 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
857 # In the meantime, we need this.
831 # In the meantime, we need this.
858 while 0 <= rev:
832 while 0 <= rev:
859 e = self.index[rev]
833 e = self.index[rev]
860 if e[9] != 0:
834 if e[9] != 0:
861 return e[8] + e[9]
835 return e[8] + e[9]
862 rev -= 1
836 rev -= 1
863 return 0
837 return 0
864
838
865 def flags(self, rev):
839 def flags(self, rev):
866 return self.index[rev][0] & 0xFFFF
840 return self.index[rev][0] & 0xFFFF
867
841
868 def length(self, rev):
842 def length(self, rev):
869 return self.index[rev][1]
843 return self.index[rev][1]
870
844
871 def sidedata_length(self, rev):
845 def sidedata_length(self, rev):
872 if not self.hassidedata:
846 if not self.hassidedata:
873 return 0
847 return 0
874 return self.index[rev][9]
848 return self.index[rev][9]
875
849
876 def rawsize(self, rev):
850 def rawsize(self, rev):
877 """return the length of the uncompressed text for a given revision"""
851 """return the length of the uncompressed text for a given revision"""
878 l = self.index[rev][2]
852 l = self.index[rev][2]
879 if l >= 0:
853 if l >= 0:
880 return l
854 return l
881
855
882 t = self.rawdata(rev)
856 t = self.rawdata(rev)
883 return len(t)
857 return len(t)
884
858
885 def size(self, rev):
859 def size(self, rev):
886 """length of non-raw text (processed by a "read" flag processor)"""
860 """length of non-raw text (processed by a "read" flag processor)"""
887 # fast path: if no "read" flag processor could change the content,
861 # fast path: if no "read" flag processor could change the content,
888 # size is rawsize. note: ELLIPSIS is known to not change the content.
862 # size is rawsize. note: ELLIPSIS is known to not change the content.
889 flags = self.flags(rev)
863 flags = self.flags(rev)
890 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
864 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
891 return self.rawsize(rev)
865 return self.rawsize(rev)
892
866
893 return len(self.revision(rev, raw=False))
867 return len(self.revision(rev, raw=False))
894
868
895 def chainbase(self, rev):
869 def chainbase(self, rev):
896 base = self._chainbasecache.get(rev)
870 base = self._chainbasecache.get(rev)
897 if base is not None:
871 if base is not None:
898 return base
872 return base
899
873
900 index = self.index
874 index = self.index
901 iterrev = rev
875 iterrev = rev
902 base = index[iterrev][3]
876 base = index[iterrev][3]
903 while base != iterrev:
877 while base != iterrev:
904 iterrev = base
878 iterrev = base
905 base = index[iterrev][3]
879 base = index[iterrev][3]
906
880
907 self._chainbasecache[rev] = base
881 self._chainbasecache[rev] = base
908 return base
882 return base
909
883
910 def linkrev(self, rev):
884 def linkrev(self, rev):
911 return self.index[rev][4]
885 return self.index[rev][4]
912
886
913 def parentrevs(self, rev):
887 def parentrevs(self, rev):
914 try:
888 try:
915 entry = self.index[rev]
889 entry = self.index[rev]
916 except IndexError:
890 except IndexError:
917 if rev == wdirrev:
891 if rev == wdirrev:
918 raise error.WdirUnsupported
892 raise error.WdirUnsupported
919 raise
893 raise
920 if entry[5] == nullrev:
894 if entry[5] == nullrev:
921 return entry[6], entry[5]
895 return entry[6], entry[5]
922 else:
896 else:
923 return entry[5], entry[6]
897 return entry[5], entry[6]
924
898
925 # fast parentrevs(rev) where rev isn't filtered
899 # fast parentrevs(rev) where rev isn't filtered
926 _uncheckedparentrevs = parentrevs
900 _uncheckedparentrevs = parentrevs
927
901
928 def node(self, rev):
902 def node(self, rev):
929 try:
903 try:
930 return self.index[rev][7]
904 return self.index[rev][7]
931 except IndexError:
905 except IndexError:
932 if rev == wdirrev:
906 if rev == wdirrev:
933 raise error.WdirUnsupported
907 raise error.WdirUnsupported
934 raise
908 raise
935
909
936 # Derived from index values.
910 # Derived from index values.
937
911
938 def end(self, rev):
912 def end(self, rev):
939 return self.start(rev) + self.length(rev)
913 return self.start(rev) + self.length(rev)
940
914
941 def parents(self, node):
915 def parents(self, node):
942 i = self.index
916 i = self.index
943 d = i[self.rev(node)]
917 d = i[self.rev(node)]
944 # inline node() to avoid function call overhead
918 # inline node() to avoid function call overhead
945 if d[5] == self.nullid:
919 if d[5] == self.nullid:
946 return i[d[6]][7], i[d[5]][7]
920 return i[d[6]][7], i[d[5]][7]
947 else:
921 else:
948 return i[d[5]][7], i[d[6]][7]
922 return i[d[5]][7], i[d[6]][7]
949
923
950 def chainlen(self, rev):
924 def chainlen(self, rev):
951 return self._chaininfo(rev)[0]
925 return self._chaininfo(rev)[0]
952
926
953 def _chaininfo(self, rev):
927 def _chaininfo(self, rev):
954 chaininfocache = self._chaininfocache
928 chaininfocache = self._chaininfocache
955 if rev in chaininfocache:
929 if rev in chaininfocache:
956 return chaininfocache[rev]
930 return chaininfocache[rev]
957 index = self.index
931 index = self.index
958 generaldelta = self._generaldelta
932 generaldelta = self._generaldelta
959 iterrev = rev
933 iterrev = rev
960 e = index[iterrev]
934 e = index[iterrev]
961 clen = 0
935 clen = 0
962 compresseddeltalen = 0
936 compresseddeltalen = 0
963 while iterrev != e[3]:
937 while iterrev != e[3]:
964 clen += 1
938 clen += 1
965 compresseddeltalen += e[1]
939 compresseddeltalen += e[1]
966 if generaldelta:
940 if generaldelta:
967 iterrev = e[3]
941 iterrev = e[3]
968 else:
942 else:
969 iterrev -= 1
943 iterrev -= 1
970 if iterrev in chaininfocache:
944 if iterrev in chaininfocache:
971 t = chaininfocache[iterrev]
945 t = chaininfocache[iterrev]
972 clen += t[0]
946 clen += t[0]
973 compresseddeltalen += t[1]
947 compresseddeltalen += t[1]
974 break
948 break
975 e = index[iterrev]
949 e = index[iterrev]
976 else:
950 else:
977 # Add text length of base since decompressing that also takes
951 # Add text length of base since decompressing that also takes
978 # work. For cache hits the length is already included.
952 # work. For cache hits the length is already included.
979 compresseddeltalen += e[1]
953 compresseddeltalen += e[1]
980 r = (clen, compresseddeltalen)
954 r = (clen, compresseddeltalen)
981 chaininfocache[rev] = r
955 chaininfocache[rev] = r
982 return r
956 return r
983
957
984 def _deltachain(self, rev, stoprev=None):
958 def _deltachain(self, rev, stoprev=None):
985 """Obtain the delta chain for a revision.
959 """Obtain the delta chain for a revision.
986
960
987 ``stoprev`` specifies a revision to stop at. If not specified, we
961 ``stoprev`` specifies a revision to stop at. If not specified, we
988 stop at the base of the chain.
962 stop at the base of the chain.
989
963
990 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
964 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
991 revs in ascending order and ``stopped`` is a bool indicating whether
965 revs in ascending order and ``stopped`` is a bool indicating whether
992 ``stoprev`` was hit.
966 ``stoprev`` was hit.
993 """
967 """
994 # Try C implementation.
968 # Try C implementation.
995 try:
969 try:
996 return self.index.deltachain(rev, stoprev, self._generaldelta)
970 return self.index.deltachain(rev, stoprev, self._generaldelta)
997 except AttributeError:
971 except AttributeError:
998 pass
972 pass
999
973
1000 chain = []
974 chain = []
1001
975
1002 # Alias to prevent attribute lookup in tight loop.
976 # Alias to prevent attribute lookup in tight loop.
1003 index = self.index
977 index = self.index
1004 generaldelta = self._generaldelta
978 generaldelta = self._generaldelta
1005
979
1006 iterrev = rev
980 iterrev = rev
1007 e = index[iterrev]
981 e = index[iterrev]
1008 while iterrev != e[3] and iterrev != stoprev:
982 while iterrev != e[3] and iterrev != stoprev:
1009 chain.append(iterrev)
983 chain.append(iterrev)
1010 if generaldelta:
984 if generaldelta:
1011 iterrev = e[3]
985 iterrev = e[3]
1012 else:
986 else:
1013 iterrev -= 1
987 iterrev -= 1
1014 e = index[iterrev]
988 e = index[iterrev]
1015
989
1016 if iterrev == stoprev:
990 if iterrev == stoprev:
1017 stopped = True
991 stopped = True
1018 else:
992 else:
1019 chain.append(iterrev)
993 chain.append(iterrev)
1020 stopped = False
994 stopped = False
1021
995
1022 chain.reverse()
996 chain.reverse()
1023 return chain, stopped
997 return chain, stopped
1024
998
1025 def ancestors(self, revs, stoprev=0, inclusive=False):
999 def ancestors(self, revs, stoprev=0, inclusive=False):
1026 """Generate the ancestors of 'revs' in reverse revision order.
1000 """Generate the ancestors of 'revs' in reverse revision order.
1027 Does not generate revs lower than stoprev.
1001 Does not generate revs lower than stoprev.
1028
1002
1029 See the documentation for ancestor.lazyancestors for more details."""
1003 See the documentation for ancestor.lazyancestors for more details."""
1030
1004
1031 # first, make sure start revisions aren't filtered
1005 # first, make sure start revisions aren't filtered
1032 revs = list(revs)
1006 revs = list(revs)
1033 checkrev = self.node
1007 checkrev = self.node
1034 for r in revs:
1008 for r in revs:
1035 checkrev(r)
1009 checkrev(r)
1036 # and we're sure ancestors aren't filtered as well
1010 # and we're sure ancestors aren't filtered as well
1037
1011
1038 if rustancestor is not None and self.index.rust_ext_compat:
1012 if rustancestor is not None and self.index.rust_ext_compat:
1039 lazyancestors = rustancestor.LazyAncestors
1013 lazyancestors = rustancestor.LazyAncestors
1040 arg = self.index
1014 arg = self.index
1041 else:
1015 else:
1042 lazyancestors = ancestor.lazyancestors
1016 lazyancestors = ancestor.lazyancestors
1043 arg = self._uncheckedparentrevs
1017 arg = self._uncheckedparentrevs
1044 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1018 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1045
1019
1046 def descendants(self, revs):
1020 def descendants(self, revs):
1047 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1021 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1048
1022
1049 def findcommonmissing(self, common=None, heads=None):
1023 def findcommonmissing(self, common=None, heads=None):
1050 """Return a tuple of the ancestors of common and the ancestors of heads
1024 """Return a tuple of the ancestors of common and the ancestors of heads
1051 that are not ancestors of common. In revset terminology, we return the
1025 that are not ancestors of common. In revset terminology, we return the
1052 tuple:
1026 tuple:
1053
1027
1054 ::common, (::heads) - (::common)
1028 ::common, (::heads) - (::common)
1055
1029
1056 The list is sorted by revision number, meaning it is
1030 The list is sorted by revision number, meaning it is
1057 topologically sorted.
1031 topologically sorted.
1058
1032
1059 'heads' and 'common' are both lists of node IDs. If heads is
1033 'heads' and 'common' are both lists of node IDs. If heads is
1060 not supplied, uses all of the revlog's heads. If common is not
1034 not supplied, uses all of the revlog's heads. If common is not
1061 supplied, uses nullid."""
1035 supplied, uses nullid."""
1062 if common is None:
1036 if common is None:
1063 common = [self.nullid]
1037 common = [self.nullid]
1064 if heads is None:
1038 if heads is None:
1065 heads = self.heads()
1039 heads = self.heads()
1066
1040
1067 common = [self.rev(n) for n in common]
1041 common = [self.rev(n) for n in common]
1068 heads = [self.rev(n) for n in heads]
1042 heads = [self.rev(n) for n in heads]
1069
1043
1070 # we want the ancestors, but inclusive
1044 # we want the ancestors, but inclusive
1071 class lazyset(object):
1045 class lazyset(object):
1072 def __init__(self, lazyvalues):
1046 def __init__(self, lazyvalues):
1073 self.addedvalues = set()
1047 self.addedvalues = set()
1074 self.lazyvalues = lazyvalues
1048 self.lazyvalues = lazyvalues
1075
1049
1076 def __contains__(self, value):
1050 def __contains__(self, value):
1077 return value in self.addedvalues or value in self.lazyvalues
1051 return value in self.addedvalues or value in self.lazyvalues
1078
1052
1079 def __iter__(self):
1053 def __iter__(self):
1080 added = self.addedvalues
1054 added = self.addedvalues
1081 for r in added:
1055 for r in added:
1082 yield r
1056 yield r
1083 for r in self.lazyvalues:
1057 for r in self.lazyvalues:
1084 if not r in added:
1058 if not r in added:
1085 yield r
1059 yield r
1086
1060
1087 def add(self, value):
1061 def add(self, value):
1088 self.addedvalues.add(value)
1062 self.addedvalues.add(value)
1089
1063
1090 def update(self, values):
1064 def update(self, values):
1091 self.addedvalues.update(values)
1065 self.addedvalues.update(values)
1092
1066
1093 has = lazyset(self.ancestors(common))
1067 has = lazyset(self.ancestors(common))
1094 has.add(nullrev)
1068 has.add(nullrev)
1095 has.update(common)
1069 has.update(common)
1096
1070
1097 # take all ancestors from heads that aren't in has
1071 # take all ancestors from heads that aren't in has
1098 missing = set()
1072 missing = set()
1099 visit = collections.deque(r for r in heads if r not in has)
1073 visit = collections.deque(r for r in heads if r not in has)
1100 while visit:
1074 while visit:
1101 r = visit.popleft()
1075 r = visit.popleft()
1102 if r in missing:
1076 if r in missing:
1103 continue
1077 continue
1104 else:
1078 else:
1105 missing.add(r)
1079 missing.add(r)
1106 for p in self.parentrevs(r):
1080 for p in self.parentrevs(r):
1107 if p not in has:
1081 if p not in has:
1108 visit.append(p)
1082 visit.append(p)
1109 missing = list(missing)
1083 missing = list(missing)
1110 missing.sort()
1084 missing.sort()
1111 return has, [self.node(miss) for miss in missing]
1085 return has, [self.node(miss) for miss in missing]
1112
1086
1113 def incrementalmissingrevs(self, common=None):
1087 def incrementalmissingrevs(self, common=None):
1114 """Return an object that can be used to incrementally compute the
1088 """Return an object that can be used to incrementally compute the
1115 revision numbers of the ancestors of arbitrary sets that are not
1089 revision numbers of the ancestors of arbitrary sets that are not
1116 ancestors of common. This is an ancestor.incrementalmissingancestors
1090 ancestors of common. This is an ancestor.incrementalmissingancestors
1117 object.
1091 object.
1118
1092
1119 'common' is a list of revision numbers. If common is not supplied, uses
1093 'common' is a list of revision numbers. If common is not supplied, uses
1120 nullrev.
1094 nullrev.
1121 """
1095 """
1122 if common is None:
1096 if common is None:
1123 common = [nullrev]
1097 common = [nullrev]
1124
1098
1125 if rustancestor is not None and self.index.rust_ext_compat:
1099 if rustancestor is not None and self.index.rust_ext_compat:
1126 return rustancestor.MissingAncestors(self.index, common)
1100 return rustancestor.MissingAncestors(self.index, common)
1127 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1101 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1128
1102
1129 def findmissingrevs(self, common=None, heads=None):
1103 def findmissingrevs(self, common=None, heads=None):
1130 """Return the revision numbers of the ancestors of heads that
1104 """Return the revision numbers of the ancestors of heads that
1131 are not ancestors of common.
1105 are not ancestors of common.
1132
1106
1133 More specifically, return a list of revision numbers corresponding to
1107 More specifically, return a list of revision numbers corresponding to
1134 nodes N such that every N satisfies the following constraints:
1108 nodes N such that every N satisfies the following constraints:
1135
1109
1136 1. N is an ancestor of some node in 'heads'
1110 1. N is an ancestor of some node in 'heads'
1137 2. N is not an ancestor of any node in 'common'
1111 2. N is not an ancestor of any node in 'common'
1138
1112
1139 The list is sorted by revision number, meaning it is
1113 The list is sorted by revision number, meaning it is
1140 topologically sorted.
1114 topologically sorted.
1141
1115
1142 'heads' and 'common' are both lists of revision numbers. If heads is
1116 'heads' and 'common' are both lists of revision numbers. If heads is
1143 not supplied, uses all of the revlog's heads. If common is not
1117 not supplied, uses all of the revlog's heads. If common is not
1144 supplied, uses nullid."""
1118 supplied, uses nullid."""
1145 if common is None:
1119 if common is None:
1146 common = [nullrev]
1120 common = [nullrev]
1147 if heads is None:
1121 if heads is None:
1148 heads = self.headrevs()
1122 heads = self.headrevs()
1149
1123
1150 inc = self.incrementalmissingrevs(common=common)
1124 inc = self.incrementalmissingrevs(common=common)
1151 return inc.missingancestors(heads)
1125 return inc.missingancestors(heads)
1152
1126
1153 def findmissing(self, common=None, heads=None):
1127 def findmissing(self, common=None, heads=None):
1154 """Return the ancestors of heads that are not ancestors of common.
1128 """Return the ancestors of heads that are not ancestors of common.
1155
1129
1156 More specifically, return a list of nodes N such that every N
1130 More specifically, return a list of nodes N such that every N
1157 satisfies the following constraints:
1131 satisfies the following constraints:
1158
1132
1159 1. N is an ancestor of some node in 'heads'
1133 1. N is an ancestor of some node in 'heads'
1160 2. N is not an ancestor of any node in 'common'
1134 2. N is not an ancestor of any node in 'common'
1161
1135
1162 The list is sorted by revision number, meaning it is
1136 The list is sorted by revision number, meaning it is
1163 topologically sorted.
1137 topologically sorted.
1164
1138
1165 'heads' and 'common' are both lists of node IDs. If heads is
1139 'heads' and 'common' are both lists of node IDs. If heads is
1166 not supplied, uses all of the revlog's heads. If common is not
1140 not supplied, uses all of the revlog's heads. If common is not
1167 supplied, uses nullid."""
1141 supplied, uses nullid."""
1168 if common is None:
1142 if common is None:
1169 common = [self.nullid]
1143 common = [self.nullid]
1170 if heads is None:
1144 if heads is None:
1171 heads = self.heads()
1145 heads = self.heads()
1172
1146
1173 common = [self.rev(n) for n in common]
1147 common = [self.rev(n) for n in common]
1174 heads = [self.rev(n) for n in heads]
1148 heads = [self.rev(n) for n in heads]
1175
1149
1176 inc = self.incrementalmissingrevs(common=common)
1150 inc = self.incrementalmissingrevs(common=common)
1177 return [self.node(r) for r in inc.missingancestors(heads)]
1151 return [self.node(r) for r in inc.missingancestors(heads)]
1178
1152
1179 def nodesbetween(self, roots=None, heads=None):
1153 def nodesbetween(self, roots=None, heads=None):
1180 """Return a topological path from 'roots' to 'heads'.
1154 """Return a topological path from 'roots' to 'heads'.
1181
1155
1182 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1156 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1183 topologically sorted list of all nodes N that satisfy both of
1157 topologically sorted list of all nodes N that satisfy both of
1184 these constraints:
1158 these constraints:
1185
1159
1186 1. N is a descendant of some node in 'roots'
1160 1. N is a descendant of some node in 'roots'
1187 2. N is an ancestor of some node in 'heads'
1161 2. N is an ancestor of some node in 'heads'
1188
1162
1189 Every node is considered to be both a descendant and an ancestor
1163 Every node is considered to be both a descendant and an ancestor
1190 of itself, so every reachable node in 'roots' and 'heads' will be
1164 of itself, so every reachable node in 'roots' and 'heads' will be
1191 included in 'nodes'.
1165 included in 'nodes'.
1192
1166
1193 'outroots' is the list of reachable nodes in 'roots', i.e., the
1167 'outroots' is the list of reachable nodes in 'roots', i.e., the
1194 subset of 'roots' that is returned in 'nodes'. Likewise,
1168 subset of 'roots' that is returned in 'nodes'. Likewise,
1195 'outheads' is the subset of 'heads' that is also in 'nodes'.
1169 'outheads' is the subset of 'heads' that is also in 'nodes'.
1196
1170
1197 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1171 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1198 unspecified, uses nullid as the only root. If 'heads' is
1172 unspecified, uses nullid as the only root. If 'heads' is
1199 unspecified, uses list of all of the revlog's heads."""
1173 unspecified, uses list of all of the revlog's heads."""
1200 nonodes = ([], [], [])
1174 nonodes = ([], [], [])
1201 if roots is not None:
1175 if roots is not None:
1202 roots = list(roots)
1176 roots = list(roots)
1203 if not roots:
1177 if not roots:
1204 return nonodes
1178 return nonodes
1205 lowestrev = min([self.rev(n) for n in roots])
1179 lowestrev = min([self.rev(n) for n in roots])
1206 else:
1180 else:
1207 roots = [self.nullid] # Everybody's a descendant of nullid
1181 roots = [self.nullid] # Everybody's a descendant of nullid
1208 lowestrev = nullrev
1182 lowestrev = nullrev
1209 if (lowestrev == nullrev) and (heads is None):
1183 if (lowestrev == nullrev) and (heads is None):
1210 # We want _all_ the nodes!
1184 # We want _all_ the nodes!
1211 return (
1185 return (
1212 [self.node(r) for r in self],
1186 [self.node(r) for r in self],
1213 [self.nullid],
1187 [self.nullid],
1214 list(self.heads()),
1188 list(self.heads()),
1215 )
1189 )
1216 if heads is None:
1190 if heads is None:
1217 # All nodes are ancestors, so the latest ancestor is the last
1191 # All nodes are ancestors, so the latest ancestor is the last
1218 # node.
1192 # node.
1219 highestrev = len(self) - 1
1193 highestrev = len(self) - 1
1220 # Set ancestors to None to signal that every node is an ancestor.
1194 # Set ancestors to None to signal that every node is an ancestor.
1221 ancestors = None
1195 ancestors = None
1222 # Set heads to an empty dictionary for later discovery of heads
1196 # Set heads to an empty dictionary for later discovery of heads
1223 heads = {}
1197 heads = {}
1224 else:
1198 else:
1225 heads = list(heads)
1199 heads = list(heads)
1226 if not heads:
1200 if not heads:
1227 return nonodes
1201 return nonodes
1228 ancestors = set()
1202 ancestors = set()
1229 # Turn heads into a dictionary so we can remove 'fake' heads.
1203 # Turn heads into a dictionary so we can remove 'fake' heads.
1230 # Also, later we will be using it to filter out the heads we can't
1204 # Also, later we will be using it to filter out the heads we can't
1231 # find from roots.
1205 # find from roots.
1232 heads = dict.fromkeys(heads, False)
1206 heads = dict.fromkeys(heads, False)
1233 # Start at the top and keep marking parents until we're done.
1207 # Start at the top and keep marking parents until we're done.
1234 nodestotag = set(heads)
1208 nodestotag = set(heads)
1235 # Remember where the top was so we can use it as a limit later.
1209 # Remember where the top was so we can use it as a limit later.
1236 highestrev = max([self.rev(n) for n in nodestotag])
1210 highestrev = max([self.rev(n) for n in nodestotag])
1237 while nodestotag:
1211 while nodestotag:
1238 # grab a node to tag
1212 # grab a node to tag
1239 n = nodestotag.pop()
1213 n = nodestotag.pop()
1240 # Never tag nullid
1214 # Never tag nullid
1241 if n == self.nullid:
1215 if n == self.nullid:
1242 continue
1216 continue
1243 # A node's revision number represents its place in a
1217 # A node's revision number represents its place in a
1244 # topologically sorted list of nodes.
1218 # topologically sorted list of nodes.
1245 r = self.rev(n)
1219 r = self.rev(n)
1246 if r >= lowestrev:
1220 if r >= lowestrev:
1247 if n not in ancestors:
1221 if n not in ancestors:
1248 # If we are possibly a descendant of one of the roots
1222 # If we are possibly a descendant of one of the roots
1249 # and we haven't already been marked as an ancestor
1223 # and we haven't already been marked as an ancestor
1250 ancestors.add(n) # Mark as ancestor
1224 ancestors.add(n) # Mark as ancestor
1251 # Add non-nullid parents to list of nodes to tag.
1225 # Add non-nullid parents to list of nodes to tag.
1252 nodestotag.update(
1226 nodestotag.update(
1253 [p for p in self.parents(n) if p != self.nullid]
1227 [p for p in self.parents(n) if p != self.nullid]
1254 )
1228 )
1255 elif n in heads: # We've seen it before, is it a fake head?
1229 elif n in heads: # We've seen it before, is it a fake head?
1256 # So it is, real heads should not be the ancestors of
1230 # So it is, real heads should not be the ancestors of
1257 # any other heads.
1231 # any other heads.
1258 heads.pop(n)
1232 heads.pop(n)
1259 if not ancestors:
1233 if not ancestors:
1260 return nonodes
1234 return nonodes
1261 # Now that we have our set of ancestors, we want to remove any
1235 # Now that we have our set of ancestors, we want to remove any
1262 # roots that are not ancestors.
1236 # roots that are not ancestors.
1263
1237
1264 # If one of the roots was nullid, everything is included anyway.
1238 # If one of the roots was nullid, everything is included anyway.
1265 if lowestrev > nullrev:
1239 if lowestrev > nullrev:
1266 # But, since we weren't, let's recompute the lowest rev to not
1240 # But, since we weren't, let's recompute the lowest rev to not
1267 # include roots that aren't ancestors.
1241 # include roots that aren't ancestors.
1268
1242
1269 # Filter out roots that aren't ancestors of heads
1243 # Filter out roots that aren't ancestors of heads
1270 roots = [root for root in roots if root in ancestors]
1244 roots = [root for root in roots if root in ancestors]
1271 # Recompute the lowest revision
1245 # Recompute the lowest revision
1272 if roots:
1246 if roots:
1273 lowestrev = min([self.rev(root) for root in roots])
1247 lowestrev = min([self.rev(root) for root in roots])
1274 else:
1248 else:
1275 # No more roots? Return empty list
1249 # No more roots? Return empty list
1276 return nonodes
1250 return nonodes
1277 else:
1251 else:
1278 # We are descending from nullid, and don't need to care about
1252 # We are descending from nullid, and don't need to care about
1279 # any other roots.
1253 # any other roots.
1280 lowestrev = nullrev
1254 lowestrev = nullrev
1281 roots = [self.nullid]
1255 roots = [self.nullid]
1282 # Transform our roots list into a set.
1256 # Transform our roots list into a set.
1283 descendants = set(roots)
1257 descendants = set(roots)
1284 # Also, keep the original roots so we can filter out roots that aren't
1258 # Also, keep the original roots so we can filter out roots that aren't
1285 # 'real' roots (i.e. are descended from other roots).
1259 # 'real' roots (i.e. are descended from other roots).
1286 roots = descendants.copy()
1260 roots = descendants.copy()
1287 # Our topologically sorted list of output nodes.
1261 # Our topologically sorted list of output nodes.
1288 orderedout = []
1262 orderedout = []
1289 # Don't start at nullid since we don't want nullid in our output list,
1263 # Don't start at nullid since we don't want nullid in our output list,
1290 # and if nullid shows up in descendants, empty parents will look like
1264 # and if nullid shows up in descendants, empty parents will look like
1291 # they're descendants.
1265 # they're descendants.
1292 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1266 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1293 n = self.node(r)
1267 n = self.node(r)
1294 isdescendant = False
1268 isdescendant = False
1295 if lowestrev == nullrev: # Everybody is a descendant of nullid
1269 if lowestrev == nullrev: # Everybody is a descendant of nullid
1296 isdescendant = True
1270 isdescendant = True
1297 elif n in descendants:
1271 elif n in descendants:
1298 # n is already a descendant
1272 # n is already a descendant
1299 isdescendant = True
1273 isdescendant = True
1300 # This check only needs to be done here because all the roots
1274 # This check only needs to be done here because all the roots
1301 # will start being marked is descendants before the loop.
1275 # will start being marked is descendants before the loop.
1302 if n in roots:
1276 if n in roots:
1303 # If n was a root, check if it's a 'real' root.
1277 # If n was a root, check if it's a 'real' root.
1304 p = tuple(self.parents(n))
1278 p = tuple(self.parents(n))
1305 # If any of its parents are descendants, it's not a root.
1279 # If any of its parents are descendants, it's not a root.
1306 if (p[0] in descendants) or (p[1] in descendants):
1280 if (p[0] in descendants) or (p[1] in descendants):
1307 roots.remove(n)
1281 roots.remove(n)
1308 else:
1282 else:
1309 p = tuple(self.parents(n))
1283 p = tuple(self.parents(n))
1310 # A node is a descendant if either of its parents are
1284 # A node is a descendant if either of its parents are
1311 # descendants. (We seeded the dependents list with the roots
1285 # descendants. (We seeded the dependents list with the roots
1312 # up there, remember?)
1286 # up there, remember?)
1313 if (p[0] in descendants) or (p[1] in descendants):
1287 if (p[0] in descendants) or (p[1] in descendants):
1314 descendants.add(n)
1288 descendants.add(n)
1315 isdescendant = True
1289 isdescendant = True
1316 if isdescendant and ((ancestors is None) or (n in ancestors)):
1290 if isdescendant and ((ancestors is None) or (n in ancestors)):
1317 # Only include nodes that are both descendants and ancestors.
1291 # Only include nodes that are both descendants and ancestors.
1318 orderedout.append(n)
1292 orderedout.append(n)
1319 if (ancestors is not None) and (n in heads):
1293 if (ancestors is not None) and (n in heads):
1320 # We're trying to figure out which heads are reachable
1294 # We're trying to figure out which heads are reachable
1321 # from roots.
1295 # from roots.
1322 # Mark this head as having been reached
1296 # Mark this head as having been reached
1323 heads[n] = True
1297 heads[n] = True
1324 elif ancestors is None:
1298 elif ancestors is None:
1325 # Otherwise, we're trying to discover the heads.
1299 # Otherwise, we're trying to discover the heads.
1326 # Assume this is a head because if it isn't, the next step
1300 # Assume this is a head because if it isn't, the next step
1327 # will eventually remove it.
1301 # will eventually remove it.
1328 heads[n] = True
1302 heads[n] = True
1329 # But, obviously its parents aren't.
1303 # But, obviously its parents aren't.
1330 for p in self.parents(n):
1304 for p in self.parents(n):
1331 heads.pop(p, None)
1305 heads.pop(p, None)
1332 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1306 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1333 roots = list(roots)
1307 roots = list(roots)
1334 assert orderedout
1308 assert orderedout
1335 assert roots
1309 assert roots
1336 assert heads
1310 assert heads
1337 return (orderedout, roots, heads)
1311 return (orderedout, roots, heads)
1338
1312
1339 def headrevs(self, revs=None):
1313 def headrevs(self, revs=None):
1340 if revs is None:
1314 if revs is None:
1341 try:
1315 try:
1342 return self.index.headrevs()
1316 return self.index.headrevs()
1343 except AttributeError:
1317 except AttributeError:
1344 return self._headrevs()
1318 return self._headrevs()
1345 if rustdagop is not None and self.index.rust_ext_compat:
1319 if rustdagop is not None and self.index.rust_ext_compat:
1346 return rustdagop.headrevs(self.index, revs)
1320 return rustdagop.headrevs(self.index, revs)
1347 return dagop.headrevs(revs, self._uncheckedparentrevs)
1321 return dagop.headrevs(revs, self._uncheckedparentrevs)
1348
1322
1349 def computephases(self, roots):
1323 def computephases(self, roots):
1350 return self.index.computephasesmapsets(roots)
1324 return self.index.computephasesmapsets(roots)
1351
1325
1352 def _headrevs(self):
1326 def _headrevs(self):
1353 count = len(self)
1327 count = len(self)
1354 if not count:
1328 if not count:
1355 return [nullrev]
1329 return [nullrev]
1356 # we won't iter over filtered rev so nobody is a head at start
1330 # we won't iter over filtered rev so nobody is a head at start
1357 ishead = [0] * (count + 1)
1331 ishead = [0] * (count + 1)
1358 index = self.index
1332 index = self.index
1359 for r in self:
1333 for r in self:
1360 ishead[r] = 1 # I may be an head
1334 ishead[r] = 1 # I may be an head
1361 e = index[r]
1335 e = index[r]
1362 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1336 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1363 return [r for r, val in enumerate(ishead) if val]
1337 return [r for r, val in enumerate(ishead) if val]
1364
1338
1365 def heads(self, start=None, stop=None):
1339 def heads(self, start=None, stop=None):
1366 """return the list of all nodes that have no children
1340 """return the list of all nodes that have no children
1367
1341
1368 if start is specified, only heads that are descendants of
1342 if start is specified, only heads that are descendants of
1369 start will be returned
1343 start will be returned
1370 if stop is specified, it will consider all the revs from stop
1344 if stop is specified, it will consider all the revs from stop
1371 as if they had no children
1345 as if they had no children
1372 """
1346 """
1373 if start is None and stop is None:
1347 if start is None and stop is None:
1374 if not len(self):
1348 if not len(self):
1375 return [self.nullid]
1349 return [self.nullid]
1376 return [self.node(r) for r in self.headrevs()]
1350 return [self.node(r) for r in self.headrevs()]
1377
1351
1378 if start is None:
1352 if start is None:
1379 start = nullrev
1353 start = nullrev
1380 else:
1354 else:
1381 start = self.rev(start)
1355 start = self.rev(start)
1382
1356
1383 stoprevs = {self.rev(n) for n in stop or []}
1357 stoprevs = {self.rev(n) for n in stop or []}
1384
1358
1385 revs = dagop.headrevssubset(
1359 revs = dagop.headrevssubset(
1386 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1360 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1387 )
1361 )
1388
1362
1389 return [self.node(rev) for rev in revs]
1363 return [self.node(rev) for rev in revs]
1390
1364
1391 def children(self, node):
1365 def children(self, node):
1392 """find the children of a given node"""
1366 """find the children of a given node"""
1393 c = []
1367 c = []
1394 p = self.rev(node)
1368 p = self.rev(node)
1395 for r in self.revs(start=p + 1):
1369 for r in self.revs(start=p + 1):
1396 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1370 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1397 if prevs:
1371 if prevs:
1398 for pr in prevs:
1372 for pr in prevs:
1399 if pr == p:
1373 if pr == p:
1400 c.append(self.node(r))
1374 c.append(self.node(r))
1401 elif p == nullrev:
1375 elif p == nullrev:
1402 c.append(self.node(r))
1376 c.append(self.node(r))
1403 return c
1377 return c
1404
1378
1405 def commonancestorsheads(self, a, b):
1379 def commonancestorsheads(self, a, b):
1406 """calculate all the heads of the common ancestors of nodes a and b"""
1380 """calculate all the heads of the common ancestors of nodes a and b"""
1407 a, b = self.rev(a), self.rev(b)
1381 a, b = self.rev(a), self.rev(b)
1408 ancs = self._commonancestorsheads(a, b)
1382 ancs = self._commonancestorsheads(a, b)
1409 return pycompat.maplist(self.node, ancs)
1383 return pycompat.maplist(self.node, ancs)
1410
1384
1411 def _commonancestorsheads(self, *revs):
1385 def _commonancestorsheads(self, *revs):
1412 """calculate all the heads of the common ancestors of revs"""
1386 """calculate all the heads of the common ancestors of revs"""
1413 try:
1387 try:
1414 ancs = self.index.commonancestorsheads(*revs)
1388 ancs = self.index.commonancestorsheads(*revs)
1415 except (AttributeError, OverflowError): # C implementation failed
1389 except (AttributeError, OverflowError): # C implementation failed
1416 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1390 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1417 return ancs
1391 return ancs
1418
1392
1419 def isancestor(self, a, b):
1393 def isancestor(self, a, b):
1420 """return True if node a is an ancestor of node b
1394 """return True if node a is an ancestor of node b
1421
1395
1422 A revision is considered an ancestor of itself."""
1396 A revision is considered an ancestor of itself."""
1423 a, b = self.rev(a), self.rev(b)
1397 a, b = self.rev(a), self.rev(b)
1424 return self.isancestorrev(a, b)
1398 return self.isancestorrev(a, b)
1425
1399
1426 def isancestorrev(self, a, b):
1400 def isancestorrev(self, a, b):
1427 """return True if revision a is an ancestor of revision b
1401 """return True if revision a is an ancestor of revision b
1428
1402
1429 A revision is considered an ancestor of itself.
1403 A revision is considered an ancestor of itself.
1430
1404
1431 The implementation of this is trivial but the use of
1405 The implementation of this is trivial but the use of
1432 reachableroots is not."""
1406 reachableroots is not."""
1433 if a == nullrev:
1407 if a == nullrev:
1434 return True
1408 return True
1435 elif a == b:
1409 elif a == b:
1436 return True
1410 return True
1437 elif a > b:
1411 elif a > b:
1438 return False
1412 return False
1439 return bool(self.reachableroots(a, [b], [a], includepath=False))
1413 return bool(self.reachableroots(a, [b], [a], includepath=False))
1440
1414
1441 def reachableroots(self, minroot, heads, roots, includepath=False):
1415 def reachableroots(self, minroot, heads, roots, includepath=False):
1442 """return (heads(::(<roots> and <roots>::<heads>)))
1416 """return (heads(::(<roots> and <roots>::<heads>)))
1443
1417
1444 If includepath is True, return (<roots>::<heads>)."""
1418 If includepath is True, return (<roots>::<heads>)."""
1445 try:
1419 try:
1446 return self.index.reachableroots2(
1420 return self.index.reachableroots2(
1447 minroot, heads, roots, includepath
1421 minroot, heads, roots, includepath
1448 )
1422 )
1449 except AttributeError:
1423 except AttributeError:
1450 return dagop._reachablerootspure(
1424 return dagop._reachablerootspure(
1451 self.parentrevs, minroot, roots, heads, includepath
1425 self.parentrevs, minroot, roots, heads, includepath
1452 )
1426 )
1453
1427
1454 def ancestor(self, a, b):
1428 def ancestor(self, a, b):
1455 """calculate the "best" common ancestor of nodes a and b"""
1429 """calculate the "best" common ancestor of nodes a and b"""
1456
1430
1457 a, b = self.rev(a), self.rev(b)
1431 a, b = self.rev(a), self.rev(b)
1458 try:
1432 try:
1459 ancs = self.index.ancestors(a, b)
1433 ancs = self.index.ancestors(a, b)
1460 except (AttributeError, OverflowError):
1434 except (AttributeError, OverflowError):
1461 ancs = ancestor.ancestors(self.parentrevs, a, b)
1435 ancs = ancestor.ancestors(self.parentrevs, a, b)
1462 if ancs:
1436 if ancs:
1463 # choose a consistent winner when there's a tie
1437 # choose a consistent winner when there's a tie
1464 return min(map(self.node, ancs))
1438 return min(map(self.node, ancs))
1465 return self.nullid
1439 return self.nullid
1466
1440
1467 def _match(self, id):
1441 def _match(self, id):
1468 if isinstance(id, int):
1442 if isinstance(id, int):
1469 # rev
1443 # rev
1470 return self.node(id)
1444 return self.node(id)
1471 if len(id) == self.nodeconstants.nodelen:
1445 if len(id) == self.nodeconstants.nodelen:
1472 # possibly a binary node
1446 # possibly a binary node
1473 # odds of a binary node being all hex in ASCII are 1 in 10**25
1447 # odds of a binary node being all hex in ASCII are 1 in 10**25
1474 try:
1448 try:
1475 node = id
1449 node = id
1476 self.rev(node) # quick search the index
1450 self.rev(node) # quick search the index
1477 return node
1451 return node
1478 except error.LookupError:
1452 except error.LookupError:
1479 pass # may be partial hex id
1453 pass # may be partial hex id
1480 try:
1454 try:
1481 # str(rev)
1455 # str(rev)
1482 rev = int(id)
1456 rev = int(id)
1483 if b"%d" % rev != id:
1457 if b"%d" % rev != id:
1484 raise ValueError
1458 raise ValueError
1485 if rev < 0:
1459 if rev < 0:
1486 rev = len(self) + rev
1460 rev = len(self) + rev
1487 if rev < 0 or rev >= len(self):
1461 if rev < 0 or rev >= len(self):
1488 raise ValueError
1462 raise ValueError
1489 return self.node(rev)
1463 return self.node(rev)
1490 except (ValueError, OverflowError):
1464 except (ValueError, OverflowError):
1491 pass
1465 pass
1492 if len(id) == 2 * self.nodeconstants.nodelen:
1466 if len(id) == 2 * self.nodeconstants.nodelen:
1493 try:
1467 try:
1494 # a full hex nodeid?
1468 # a full hex nodeid?
1495 node = bin(id)
1469 node = bin(id)
1496 self.rev(node)
1470 self.rev(node)
1497 return node
1471 return node
1498 except (TypeError, error.LookupError):
1472 except (TypeError, error.LookupError):
1499 pass
1473 pass
1500
1474
1501 def _partialmatch(self, id):
1475 def _partialmatch(self, id):
1502 # we don't care wdirfilenodeids as they should be always full hash
1476 # we don't care wdirfilenodeids as they should be always full hash
1503 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1477 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1504 ambiguous = False
1478 ambiguous = False
1505 try:
1479 try:
1506 partial = self.index.partialmatch(id)
1480 partial = self.index.partialmatch(id)
1507 if partial and self.hasnode(partial):
1481 if partial and self.hasnode(partial):
1508 if maybewdir:
1482 if maybewdir:
1509 # single 'ff...' match in radix tree, ambiguous with wdir
1483 # single 'ff...' match in radix tree, ambiguous with wdir
1510 ambiguous = True
1484 ambiguous = True
1511 else:
1485 else:
1512 return partial
1486 return partial
1513 elif maybewdir:
1487 elif maybewdir:
1514 # no 'ff...' match in radix tree, wdir identified
1488 # no 'ff...' match in radix tree, wdir identified
1515 raise error.WdirUnsupported
1489 raise error.WdirUnsupported
1516 else:
1490 else:
1517 return None
1491 return None
1518 except error.RevlogError:
1492 except error.RevlogError:
1519 # parsers.c radix tree lookup gave multiple matches
1493 # parsers.c radix tree lookup gave multiple matches
1520 # fast path: for unfiltered changelog, radix tree is accurate
1494 # fast path: for unfiltered changelog, radix tree is accurate
1521 if not getattr(self, 'filteredrevs', None):
1495 if not getattr(self, 'filteredrevs', None):
1522 ambiguous = True
1496 ambiguous = True
1523 # fall through to slow path that filters hidden revisions
1497 # fall through to slow path that filters hidden revisions
1524 except (AttributeError, ValueError):
1498 except (AttributeError, ValueError):
1525 # we are pure python, or key was too short to search radix tree
1499 # we are pure python, or key was too short to search radix tree
1526 pass
1500 pass
1527 if ambiguous:
1501 if ambiguous:
1528 raise error.AmbiguousPrefixLookupError(
1502 raise error.AmbiguousPrefixLookupError(
1529 id, self.display_id, _(b'ambiguous identifier')
1503 id, self.display_id, _(b'ambiguous identifier')
1530 )
1504 )
1531
1505
1532 if id in self._pcache:
1506 if id in self._pcache:
1533 return self._pcache[id]
1507 return self._pcache[id]
1534
1508
1535 if len(id) <= 40:
1509 if len(id) <= 40:
1536 try:
1510 try:
1537 # hex(node)[:...]
1511 # hex(node)[:...]
1538 l = len(id) // 2 # grab an even number of digits
1512 l = len(id) // 2 # grab an even number of digits
1539 prefix = bin(id[: l * 2])
1513 prefix = bin(id[: l * 2])
1540 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1514 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1541 nl = [
1515 nl = [
1542 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1516 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1543 ]
1517 ]
1544 if self.nodeconstants.nullhex.startswith(id):
1518 if self.nodeconstants.nullhex.startswith(id):
1545 nl.append(self.nullid)
1519 nl.append(self.nullid)
1546 if len(nl) > 0:
1520 if len(nl) > 0:
1547 if len(nl) == 1 and not maybewdir:
1521 if len(nl) == 1 and not maybewdir:
1548 self._pcache[id] = nl[0]
1522 self._pcache[id] = nl[0]
1549 return nl[0]
1523 return nl[0]
1550 raise error.AmbiguousPrefixLookupError(
1524 raise error.AmbiguousPrefixLookupError(
1551 id, self.display_id, _(b'ambiguous identifier')
1525 id, self.display_id, _(b'ambiguous identifier')
1552 )
1526 )
1553 if maybewdir:
1527 if maybewdir:
1554 raise error.WdirUnsupported
1528 raise error.WdirUnsupported
1555 return None
1529 return None
1556 except TypeError:
1530 except TypeError:
1557 pass
1531 pass
1558
1532
1559 def lookup(self, id):
1533 def lookup(self, id):
1560 """locate a node based on:
1534 """locate a node based on:
1561 - revision number or str(revision number)
1535 - revision number or str(revision number)
1562 - nodeid or subset of hex nodeid
1536 - nodeid or subset of hex nodeid
1563 """
1537 """
1564 n = self._match(id)
1538 n = self._match(id)
1565 if n is not None:
1539 if n is not None:
1566 return n
1540 return n
1567 n = self._partialmatch(id)
1541 n = self._partialmatch(id)
1568 if n:
1542 if n:
1569 return n
1543 return n
1570
1544
1571 raise error.LookupError(id, self.display_id, _(b'no match found'))
1545 raise error.LookupError(id, self.display_id, _(b'no match found'))
1572
1546
1573 def shortest(self, node, minlength=1):
1547 def shortest(self, node, minlength=1):
1574 """Find the shortest unambiguous prefix that matches node."""
1548 """Find the shortest unambiguous prefix that matches node."""
1575
1549
1576 def isvalid(prefix):
1550 def isvalid(prefix):
1577 try:
1551 try:
1578 matchednode = self._partialmatch(prefix)
1552 matchednode = self._partialmatch(prefix)
1579 except error.AmbiguousPrefixLookupError:
1553 except error.AmbiguousPrefixLookupError:
1580 return False
1554 return False
1581 except error.WdirUnsupported:
1555 except error.WdirUnsupported:
1582 # single 'ff...' match
1556 # single 'ff...' match
1583 return True
1557 return True
1584 if matchednode is None:
1558 if matchednode is None:
1585 raise error.LookupError(node, self.display_id, _(b'no node'))
1559 raise error.LookupError(node, self.display_id, _(b'no node'))
1586 return True
1560 return True
1587
1561
1588 def maybewdir(prefix):
1562 def maybewdir(prefix):
1589 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1563 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1590
1564
1591 hexnode = hex(node)
1565 hexnode = hex(node)
1592
1566
1593 def disambiguate(hexnode, minlength):
1567 def disambiguate(hexnode, minlength):
1594 """Disambiguate against wdirid."""
1568 """Disambiguate against wdirid."""
1595 for length in range(minlength, len(hexnode) + 1):
1569 for length in range(minlength, len(hexnode) + 1):
1596 prefix = hexnode[:length]
1570 prefix = hexnode[:length]
1597 if not maybewdir(prefix):
1571 if not maybewdir(prefix):
1598 return prefix
1572 return prefix
1599
1573
1600 if not getattr(self, 'filteredrevs', None):
1574 if not getattr(self, 'filteredrevs', None):
1601 try:
1575 try:
1602 length = max(self.index.shortest(node), minlength)
1576 length = max(self.index.shortest(node), minlength)
1603 return disambiguate(hexnode, length)
1577 return disambiguate(hexnode, length)
1604 except error.RevlogError:
1578 except error.RevlogError:
1605 if node != self.nodeconstants.wdirid:
1579 if node != self.nodeconstants.wdirid:
1606 raise error.LookupError(
1580 raise error.LookupError(
1607 node, self.display_id, _(b'no node')
1581 node, self.display_id, _(b'no node')
1608 )
1582 )
1609 except AttributeError:
1583 except AttributeError:
1610 # Fall through to pure code
1584 # Fall through to pure code
1611 pass
1585 pass
1612
1586
1613 if node == self.nodeconstants.wdirid:
1587 if node == self.nodeconstants.wdirid:
1614 for length in range(minlength, len(hexnode) + 1):
1588 for length in range(minlength, len(hexnode) + 1):
1615 prefix = hexnode[:length]
1589 prefix = hexnode[:length]
1616 if isvalid(prefix):
1590 if isvalid(prefix):
1617 return prefix
1591 return prefix
1618
1592
1619 for length in range(minlength, len(hexnode) + 1):
1593 for length in range(minlength, len(hexnode) + 1):
1620 prefix = hexnode[:length]
1594 prefix = hexnode[:length]
1621 if isvalid(prefix):
1595 if isvalid(prefix):
1622 return disambiguate(hexnode, length)
1596 return disambiguate(hexnode, length)
1623
1597
1624 def cmp(self, node, text):
1598 def cmp(self, node, text):
1625 """compare text with a given file revision
1599 """compare text with a given file revision
1626
1600
1627 returns True if text is different than what is stored.
1601 returns True if text is different than what is stored.
1628 """
1602 """
1629 p1, p2 = self.parents(node)
1603 p1, p2 = self.parents(node)
1630 return storageutil.hashrevisionsha1(text, p1, p2) != node
1604 return storageutil.hashrevisionsha1(text, p1, p2) != node
1631
1605
1632 def _cachesegment(self, offset, data):
1633 """Add a segment to the revlog cache.
1634
1635 Accepts an absolute offset and the data that is at that location.
1636 """
1637 o, d = self._chunkcache
1638 # try to add to existing cache
1639 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1640 self._chunkcache = o, d + data
1641 else:
1642 self._chunkcache = offset, data
1643
1644 def _readsegment(self, offset, length, df=None):
1645 """Load a segment of raw data from the revlog.
1646
1647 Accepts an absolute offset, length to read, and an optional existing
1648 file handle to read from.
1649
1650 If an existing file handle is passed, it will be seeked and the
1651 original seek position will NOT be restored.
1652
1653 Returns a str or buffer of raw byte data.
1654
1655 Raises if the requested number of bytes could not be read.
1656 """
1657 # Cache data both forward and backward around the requested
1658 # data, in a fixed size window. This helps speed up operations
1659 # involving reading the revlog backwards.
1660 cachesize = self._chunkcachesize
1661 realoffset = offset & ~(cachesize - 1)
1662 reallength = (
1663 (offset + length + cachesize) & ~(cachesize - 1)
1664 ) - realoffset
1665 with self._datareadfp(df) as df:
1666 df.seek(realoffset)
1667 d = df.read(reallength)
1668
1669 self._cachesegment(realoffset, d)
1670 if offset != realoffset or reallength != length:
1671 startoffset = offset - realoffset
1672 if len(d) - startoffset < length:
1673 filename = self._indexfile if self._inline else self._datafile
1674 got = len(d) - startoffset
1675 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1676 raise error.RevlogError(m)
1677 return util.buffer(d, startoffset, length)
1678
1679 if len(d) < length:
1680 filename = self._indexfile if self._inline else self._datafile
1681 got = len(d) - startoffset
1682 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1683 raise error.RevlogError(m)
1684
1685 return d
1686
1687 def _getsegment(self, offset, length, df=None):
1688 """Obtain a segment of raw data from the revlog.
1689
1690 Accepts an absolute offset, length of bytes to obtain, and an
1691 optional file handle to the already-opened revlog. If the file
1692 handle is used, it's original seek position will not be preserved.
1693
1694 Requests for data may be returned from a cache.
1695
1696 Returns a str or a buffer instance of raw byte data.
1697 """
1698 o, d = self._chunkcache
1699 l = len(d)
1700
1701 # is it in the cache?
1702 cachestart = offset - o
1703 cacheend = cachestart + length
1704 if cachestart >= 0 and cacheend <= l:
1705 if cachestart == 0 and cacheend == l:
1706 return d # avoid a copy
1707 return util.buffer(d, cachestart, cacheend - cachestart)
1708
1709 return self._readsegment(offset, length, df=df)
1710
1711 def _getsegmentforrevs(self, startrev, endrev, df=None):
1606 def _getsegmentforrevs(self, startrev, endrev, df=None):
1712 """Obtain a segment of raw data corresponding to a range of revisions.
1607 """Obtain a segment of raw data corresponding to a range of revisions.
1713
1608
1714 Accepts the start and end revisions and an optional already-open
1609 Accepts the start and end revisions and an optional already-open
1715 file handle to be used for reading. If the file handle is read, its
1610 file handle to be used for reading. If the file handle is read, its
1716 seek position will not be preserved.
1611 seek position will not be preserved.
1717
1612
1718 Requests for data may be satisfied by a cache.
1613 Requests for data may be satisfied by a cache.
1719
1614
1720 Returns a 2-tuple of (offset, data) for the requested range of
1615 Returns a 2-tuple of (offset, data) for the requested range of
1721 revisions. Offset is the integer offset from the beginning of the
1616 revisions. Offset is the integer offset from the beginning of the
1722 revlog and data is a str or buffer of the raw byte data.
1617 revlog and data is a str or buffer of the raw byte data.
1723
1618
1724 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1619 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1725 to determine where each revision's data begins and ends.
1620 to determine where each revision's data begins and ends.
1726 """
1621 """
1727 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1622 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1728 # (functions are expensive).
1623 # (functions are expensive).
1729 index = self.index
1624 index = self.index
1730 istart = index[startrev]
1625 istart = index[startrev]
1731 start = int(istart[0] >> 16)
1626 start = int(istart[0] >> 16)
1732 if startrev == endrev:
1627 if startrev == endrev:
1733 end = start + istart[1]
1628 end = start + istart[1]
1734 else:
1629 else:
1735 iend = index[endrev]
1630 iend = index[endrev]
1736 end = int(iend[0] >> 16) + iend[1]
1631 end = int(iend[0] >> 16) + iend[1]
1737
1632
1738 if self._inline:
1633 if self._inline:
1739 start += (startrev + 1) * self.index.entry_size
1634 start += (startrev + 1) * self.index.entry_size
1740 end += (endrev + 1) * self.index.entry_size
1635 end += (endrev + 1) * self.index.entry_size
1741 length = end - start
1636 length = end - start
1742
1637
1743 return start, self._getsegment(start, length, df=df)
1638 return start, self._segmentfile.read_chunk(start, length, df)
1744
1639
1745 def _chunk(self, rev, df=None):
1640 def _chunk(self, rev, df=None):
1746 """Obtain a single decompressed chunk for a revision.
1641 """Obtain a single decompressed chunk for a revision.
1747
1642
1748 Accepts an integer revision and an optional already-open file handle
1643 Accepts an integer revision and an optional already-open file handle
1749 to be used for reading. If used, the seek position of the file will not
1644 to be used for reading. If used, the seek position of the file will not
1750 be preserved.
1645 be preserved.
1751
1646
1752 Returns a str holding uncompressed data for the requested revision.
1647 Returns a str holding uncompressed data for the requested revision.
1753 """
1648 """
1754 compression_mode = self.index[rev][10]
1649 compression_mode = self.index[rev][10]
1755 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1650 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1756 if compression_mode == COMP_MODE_PLAIN:
1651 if compression_mode == COMP_MODE_PLAIN:
1757 return data
1652 return data
1758 elif compression_mode == COMP_MODE_DEFAULT:
1653 elif compression_mode == COMP_MODE_DEFAULT:
1759 return self._decompressor(data)
1654 return self._decompressor(data)
1760 elif compression_mode == COMP_MODE_INLINE:
1655 elif compression_mode == COMP_MODE_INLINE:
1761 return self.decompress(data)
1656 return self.decompress(data)
1762 else:
1657 else:
1763 msg = b'unknown compression mode %d'
1658 msg = b'unknown compression mode %d'
1764 msg %= compression_mode
1659 msg %= compression_mode
1765 raise error.RevlogError(msg)
1660 raise error.RevlogError(msg)
1766
1661
1767 def _chunks(self, revs, df=None, targetsize=None):
1662 def _chunks(self, revs, df=None, targetsize=None):
1768 """Obtain decompressed chunks for the specified revisions.
1663 """Obtain decompressed chunks for the specified revisions.
1769
1664
1770 Accepts an iterable of numeric revisions that are assumed to be in
1665 Accepts an iterable of numeric revisions that are assumed to be in
1771 ascending order. Also accepts an optional already-open file handle
1666 ascending order. Also accepts an optional already-open file handle
1772 to be used for reading. If used, the seek position of the file will
1667 to be used for reading. If used, the seek position of the file will
1773 not be preserved.
1668 not be preserved.
1774
1669
1775 This function is similar to calling ``self._chunk()`` multiple times,
1670 This function is similar to calling ``self._chunk()`` multiple times,
1776 but is faster.
1671 but is faster.
1777
1672
1778 Returns a list with decompressed data for each requested revision.
1673 Returns a list with decompressed data for each requested revision.
1779 """
1674 """
1780 if not revs:
1675 if not revs:
1781 return []
1676 return []
1782 start = self.start
1677 start = self.start
1783 length = self.length
1678 length = self.length
1784 inline = self._inline
1679 inline = self._inline
1785 iosize = self.index.entry_size
1680 iosize = self.index.entry_size
1786 buffer = util.buffer
1681 buffer = util.buffer
1787
1682
1788 l = []
1683 l = []
1789 ladd = l.append
1684 ladd = l.append
1790
1685
1791 if not self._withsparseread:
1686 if not self._withsparseread:
1792 slicedchunks = (revs,)
1687 slicedchunks = (revs,)
1793 else:
1688 else:
1794 slicedchunks = deltautil.slicechunk(
1689 slicedchunks = deltautil.slicechunk(
1795 self, revs, targetsize=targetsize
1690 self, revs, targetsize=targetsize
1796 )
1691 )
1797
1692
1798 for revschunk in slicedchunks:
1693 for revschunk in slicedchunks:
1799 firstrev = revschunk[0]
1694 firstrev = revschunk[0]
1800 # Skip trailing revisions with empty diff
1695 # Skip trailing revisions with empty diff
1801 for lastrev in revschunk[::-1]:
1696 for lastrev in revschunk[::-1]:
1802 if length(lastrev) != 0:
1697 if length(lastrev) != 0:
1803 break
1698 break
1804
1699
1805 try:
1700 try:
1806 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1701 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1807 except OverflowError:
1702 except OverflowError:
1808 # issue4215 - we can't cache a run of chunks greater than
1703 # issue4215 - we can't cache a run of chunks greater than
1809 # 2G on Windows
1704 # 2G on Windows
1810 return [self._chunk(rev, df=df) for rev in revschunk]
1705 return [self._chunk(rev, df=df) for rev in revschunk]
1811
1706
1812 decomp = self.decompress
1707 decomp = self.decompress
1813 # self._decompressor might be None, but will not be used in that case
1708 # self._decompressor might be None, but will not be used in that case
1814 def_decomp = self._decompressor
1709 def_decomp = self._decompressor
1815 for rev in revschunk:
1710 for rev in revschunk:
1816 chunkstart = start(rev)
1711 chunkstart = start(rev)
1817 if inline:
1712 if inline:
1818 chunkstart += (rev + 1) * iosize
1713 chunkstart += (rev + 1) * iosize
1819 chunklength = length(rev)
1714 chunklength = length(rev)
1820 comp_mode = self.index[rev][10]
1715 comp_mode = self.index[rev][10]
1821 c = buffer(data, chunkstart - offset, chunklength)
1716 c = buffer(data, chunkstart - offset, chunklength)
1822 if comp_mode == COMP_MODE_PLAIN:
1717 if comp_mode == COMP_MODE_PLAIN:
1823 ladd(c)
1718 ladd(c)
1824 elif comp_mode == COMP_MODE_INLINE:
1719 elif comp_mode == COMP_MODE_INLINE:
1825 ladd(decomp(c))
1720 ladd(decomp(c))
1826 elif comp_mode == COMP_MODE_DEFAULT:
1721 elif comp_mode == COMP_MODE_DEFAULT:
1827 ladd(def_decomp(c))
1722 ladd(def_decomp(c))
1828 else:
1723 else:
1829 msg = b'unknown compression mode %d'
1724 msg = b'unknown compression mode %d'
1830 msg %= comp_mode
1725 msg %= comp_mode
1831 raise error.RevlogError(msg)
1726 raise error.RevlogError(msg)
1832
1727
1833 return l
1728 return l
1834
1729
1835 def _chunkclear(self):
1836 """Clear the raw chunk cache."""
1837 self._chunkcache = (0, b'')
1838
1839 def deltaparent(self, rev):
1730 def deltaparent(self, rev):
1840 """return deltaparent of the given revision"""
1731 """return deltaparent of the given revision"""
1841 base = self.index[rev][3]
1732 base = self.index[rev][3]
1842 if base == rev:
1733 if base == rev:
1843 return nullrev
1734 return nullrev
1844 elif self._generaldelta:
1735 elif self._generaldelta:
1845 return base
1736 return base
1846 else:
1737 else:
1847 return rev - 1
1738 return rev - 1
1848
1739
1849 def issnapshot(self, rev):
1740 def issnapshot(self, rev):
1850 """tells whether rev is a snapshot"""
1741 """tells whether rev is a snapshot"""
1851 if not self._sparserevlog:
1742 if not self._sparserevlog:
1852 return self.deltaparent(rev) == nullrev
1743 return self.deltaparent(rev) == nullrev
1853 elif util.safehasattr(self.index, b'issnapshot'):
1744 elif util.safehasattr(self.index, b'issnapshot'):
1854 # directly assign the method to cache the testing and access
1745 # directly assign the method to cache the testing and access
1855 self.issnapshot = self.index.issnapshot
1746 self.issnapshot = self.index.issnapshot
1856 return self.issnapshot(rev)
1747 return self.issnapshot(rev)
1857 if rev == nullrev:
1748 if rev == nullrev:
1858 return True
1749 return True
1859 entry = self.index[rev]
1750 entry = self.index[rev]
1860 base = entry[3]
1751 base = entry[3]
1861 if base == rev:
1752 if base == rev:
1862 return True
1753 return True
1863 if base == nullrev:
1754 if base == nullrev:
1864 return True
1755 return True
1865 p1 = entry[5]
1756 p1 = entry[5]
1866 p2 = entry[6]
1757 p2 = entry[6]
1867 if base == p1 or base == p2:
1758 if base == p1 or base == p2:
1868 return False
1759 return False
1869 return self.issnapshot(base)
1760 return self.issnapshot(base)
1870
1761
1871 def snapshotdepth(self, rev):
1762 def snapshotdepth(self, rev):
1872 """number of snapshot in the chain before this one"""
1763 """number of snapshot in the chain before this one"""
1873 if not self.issnapshot(rev):
1764 if not self.issnapshot(rev):
1874 raise error.ProgrammingError(b'revision %d not a snapshot')
1765 raise error.ProgrammingError(b'revision %d not a snapshot')
1875 return len(self._deltachain(rev)[0]) - 1
1766 return len(self._deltachain(rev)[0]) - 1
1876
1767
1877 def revdiff(self, rev1, rev2):
1768 def revdiff(self, rev1, rev2):
1878 """return or calculate a delta between two revisions
1769 """return or calculate a delta between two revisions
1879
1770
1880 The delta calculated is in binary form and is intended to be written to
1771 The delta calculated is in binary form and is intended to be written to
1881 revlog data directly. So this function needs raw revision data.
1772 revlog data directly. So this function needs raw revision data.
1882 """
1773 """
1883 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1774 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1884 return bytes(self._chunk(rev2))
1775 return bytes(self._chunk(rev2))
1885
1776
1886 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1777 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1887
1778
1888 def _processflags(self, text, flags, operation, raw=False):
1779 def _processflags(self, text, flags, operation, raw=False):
1889 """deprecated entry point to access flag processors"""
1780 """deprecated entry point to access flag processors"""
1890 msg = b'_processflag(...) use the specialized variant'
1781 msg = b'_processflag(...) use the specialized variant'
1891 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1782 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1892 if raw:
1783 if raw:
1893 return text, flagutil.processflagsraw(self, text, flags)
1784 return text, flagutil.processflagsraw(self, text, flags)
1894 elif operation == b'read':
1785 elif operation == b'read':
1895 return flagutil.processflagsread(self, text, flags)
1786 return flagutil.processflagsread(self, text, flags)
1896 else: # write operation
1787 else: # write operation
1897 return flagutil.processflagswrite(self, text, flags)
1788 return flagutil.processflagswrite(self, text, flags)
1898
1789
1899 def revision(self, nodeorrev, _df=None, raw=False):
1790 def revision(self, nodeorrev, _df=None, raw=False):
1900 """return an uncompressed revision of a given node or revision
1791 """return an uncompressed revision of a given node or revision
1901 number.
1792 number.
1902
1793
1903 _df - an existing file handle to read from. (internal-only)
1794 _df - an existing file handle to read from. (internal-only)
1904 raw - an optional argument specifying if the revision data is to be
1795 raw - an optional argument specifying if the revision data is to be
1905 treated as raw data when applying flag transforms. 'raw' should be set
1796 treated as raw data when applying flag transforms. 'raw' should be set
1906 to True when generating changegroups or in debug commands.
1797 to True when generating changegroups or in debug commands.
1907 """
1798 """
1908 if raw:
1799 if raw:
1909 msg = (
1800 msg = (
1910 b'revlog.revision(..., raw=True) is deprecated, '
1801 b'revlog.revision(..., raw=True) is deprecated, '
1911 b'use revlog.rawdata(...)'
1802 b'use revlog.rawdata(...)'
1912 )
1803 )
1913 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1804 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1914 return self._revisiondata(nodeorrev, _df, raw=raw)
1805 return self._revisiondata(nodeorrev, _df, raw=raw)
1915
1806
1916 def sidedata(self, nodeorrev, _df=None):
1807 def sidedata(self, nodeorrev, _df=None):
1917 """a map of extra data related to the changeset but not part of the hash
1808 """a map of extra data related to the changeset but not part of the hash
1918
1809
1919 This function currently return a dictionary. However, more advanced
1810 This function currently return a dictionary. However, more advanced
1920 mapping object will likely be used in the future for a more
1811 mapping object will likely be used in the future for a more
1921 efficient/lazy code.
1812 efficient/lazy code.
1922 """
1813 """
1923 # deal with <nodeorrev> argument type
1814 # deal with <nodeorrev> argument type
1924 if isinstance(nodeorrev, int):
1815 if isinstance(nodeorrev, int):
1925 rev = nodeorrev
1816 rev = nodeorrev
1926 else:
1817 else:
1927 rev = self.rev(nodeorrev)
1818 rev = self.rev(nodeorrev)
1928 return self._sidedata(rev)
1819 return self._sidedata(rev)
1929
1820
1930 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1821 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1931 # deal with <nodeorrev> argument type
1822 # deal with <nodeorrev> argument type
1932 if isinstance(nodeorrev, int):
1823 if isinstance(nodeorrev, int):
1933 rev = nodeorrev
1824 rev = nodeorrev
1934 node = self.node(rev)
1825 node = self.node(rev)
1935 else:
1826 else:
1936 node = nodeorrev
1827 node = nodeorrev
1937 rev = None
1828 rev = None
1938
1829
1939 # fast path the special `nullid` rev
1830 # fast path the special `nullid` rev
1940 if node == self.nullid:
1831 if node == self.nullid:
1941 return b""
1832 return b""
1942
1833
1943 # ``rawtext`` is the text as stored inside the revlog. Might be the
1834 # ``rawtext`` is the text as stored inside the revlog. Might be the
1944 # revision or might need to be processed to retrieve the revision.
1835 # revision or might need to be processed to retrieve the revision.
1945 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1836 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1946
1837
1947 if raw and validated:
1838 if raw and validated:
1948 # if we don't want to process the raw text and that raw
1839 # if we don't want to process the raw text and that raw
1949 # text is cached, we can exit early.
1840 # text is cached, we can exit early.
1950 return rawtext
1841 return rawtext
1951 if rev is None:
1842 if rev is None:
1952 rev = self.rev(node)
1843 rev = self.rev(node)
1953 # the revlog's flag for this revision
1844 # the revlog's flag for this revision
1954 # (usually alter its state or content)
1845 # (usually alter its state or content)
1955 flags = self.flags(rev)
1846 flags = self.flags(rev)
1956
1847
1957 if validated and flags == REVIDX_DEFAULT_FLAGS:
1848 if validated and flags == REVIDX_DEFAULT_FLAGS:
1958 # no extra flags set, no flag processor runs, text = rawtext
1849 # no extra flags set, no flag processor runs, text = rawtext
1959 return rawtext
1850 return rawtext
1960
1851
1961 if raw:
1852 if raw:
1962 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1853 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1963 text = rawtext
1854 text = rawtext
1964 else:
1855 else:
1965 r = flagutil.processflagsread(self, rawtext, flags)
1856 r = flagutil.processflagsread(self, rawtext, flags)
1966 text, validatehash = r
1857 text, validatehash = r
1967 if validatehash:
1858 if validatehash:
1968 self.checkhash(text, node, rev=rev)
1859 self.checkhash(text, node, rev=rev)
1969 if not validated:
1860 if not validated:
1970 self._revisioncache = (node, rev, rawtext)
1861 self._revisioncache = (node, rev, rawtext)
1971
1862
1972 return text
1863 return text
1973
1864
1974 def _rawtext(self, node, rev, _df=None):
1865 def _rawtext(self, node, rev, _df=None):
1975 """return the possibly unvalidated rawtext for a revision
1866 """return the possibly unvalidated rawtext for a revision
1976
1867
1977 returns (rev, rawtext, validated)
1868 returns (rev, rawtext, validated)
1978 """
1869 """
1979
1870
1980 # revision in the cache (could be useful to apply delta)
1871 # revision in the cache (could be useful to apply delta)
1981 cachedrev = None
1872 cachedrev = None
1982 # An intermediate text to apply deltas to
1873 # An intermediate text to apply deltas to
1983 basetext = None
1874 basetext = None
1984
1875
1985 # Check if we have the entry in cache
1876 # Check if we have the entry in cache
1986 # The cache entry looks like (node, rev, rawtext)
1877 # The cache entry looks like (node, rev, rawtext)
1987 if self._revisioncache:
1878 if self._revisioncache:
1988 if self._revisioncache[0] == node:
1879 if self._revisioncache[0] == node:
1989 return (rev, self._revisioncache[2], True)
1880 return (rev, self._revisioncache[2], True)
1990 cachedrev = self._revisioncache[1]
1881 cachedrev = self._revisioncache[1]
1991
1882
1992 if rev is None:
1883 if rev is None:
1993 rev = self.rev(node)
1884 rev = self.rev(node)
1994
1885
1995 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1886 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1996 if stopped:
1887 if stopped:
1997 basetext = self._revisioncache[2]
1888 basetext = self._revisioncache[2]
1998
1889
1999 # drop cache to save memory, the caller is expected to
1890 # drop cache to save memory, the caller is expected to
2000 # update self._revisioncache after validating the text
1891 # update self._revisioncache after validating the text
2001 self._revisioncache = None
1892 self._revisioncache = None
2002
1893
2003 targetsize = None
1894 targetsize = None
2004 rawsize = self.index[rev][2]
1895 rawsize = self.index[rev][2]
2005 if 0 <= rawsize:
1896 if 0 <= rawsize:
2006 targetsize = 4 * rawsize
1897 targetsize = 4 * rawsize
2007
1898
2008 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1899 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2009 if basetext is None:
1900 if basetext is None:
2010 basetext = bytes(bins[0])
1901 basetext = bytes(bins[0])
2011 bins = bins[1:]
1902 bins = bins[1:]
2012
1903
2013 rawtext = mdiff.patches(basetext, bins)
1904 rawtext = mdiff.patches(basetext, bins)
2014 del basetext # let us have a chance to free memory early
1905 del basetext # let us have a chance to free memory early
2015 return (rev, rawtext, False)
1906 return (rev, rawtext, False)
2016
1907
2017 def _sidedata(self, rev):
1908 def _sidedata(self, rev):
2018 """Return the sidedata for a given revision number."""
1909 """Return the sidedata for a given revision number."""
2019 index_entry = self.index[rev]
1910 index_entry = self.index[rev]
2020 sidedata_offset = index_entry[8]
1911 sidedata_offset = index_entry[8]
2021 sidedata_size = index_entry[9]
1912 sidedata_size = index_entry[9]
2022
1913
2023 if self._inline:
1914 if self._inline:
2024 sidedata_offset += self.index.entry_size * (1 + rev)
1915 sidedata_offset += self.index.entry_size * (1 + rev)
2025 if sidedata_size == 0:
1916 if sidedata_size == 0:
2026 return {}
1917 return {}
2027
1918
2028 # XXX this need caching, as we do for data
1919 # XXX this need caching, as we do for data
2029 with self._sidedatareadfp() as sdf:
1920 with self._sidedatareadfp() as sdf:
2030 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
1921 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2031 filename = self._sidedatafile
1922 filename = self._sidedatafile
2032 end = self._docket.sidedata_end
1923 end = self._docket.sidedata_end
2033 offset = sidedata_offset
1924 offset = sidedata_offset
2034 length = sidedata_size
1925 length = sidedata_size
2035 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1926 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2036 raise error.RevlogError(m)
1927 raise error.RevlogError(m)
2037
1928
2038 sdf.seek(sidedata_offset, os.SEEK_SET)
1929 sdf.seek(sidedata_offset, os.SEEK_SET)
2039 comp_segment = sdf.read(sidedata_size)
1930 comp_segment = sdf.read(sidedata_size)
2040
1931
2041 if len(comp_segment) < sidedata_size:
1932 if len(comp_segment) < sidedata_size:
2042 filename = self._sidedatafile
1933 filename = self._sidedatafile
2043 length = sidedata_size
1934 length = sidedata_size
2044 offset = sidedata_offset
1935 offset = sidedata_offset
2045 got = len(comp_segment)
1936 got = len(comp_segment)
2046 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1937 m = randomaccessfile.PARTIAL_READ_MSG % (
1938 filename,
1939 length,
1940 offset,
1941 got,
1942 )
2047 raise error.RevlogError(m)
1943 raise error.RevlogError(m)
2048
1944
2049 comp = self.index[rev][11]
1945 comp = self.index[rev][11]
2050 if comp == COMP_MODE_PLAIN:
1946 if comp == COMP_MODE_PLAIN:
2051 segment = comp_segment
1947 segment = comp_segment
2052 elif comp == COMP_MODE_DEFAULT:
1948 elif comp == COMP_MODE_DEFAULT:
2053 segment = self._decompressor(comp_segment)
1949 segment = self._decompressor(comp_segment)
2054 elif comp == COMP_MODE_INLINE:
1950 elif comp == COMP_MODE_INLINE:
2055 segment = self.decompress(comp_segment)
1951 segment = self.decompress(comp_segment)
2056 else:
1952 else:
2057 msg = b'unknown compression mode %d'
1953 msg = b'unknown compression mode %d'
2058 msg %= comp
1954 msg %= comp
2059 raise error.RevlogError(msg)
1955 raise error.RevlogError(msg)
2060
1956
2061 sidedata = sidedatautil.deserialize_sidedata(segment)
1957 sidedata = sidedatautil.deserialize_sidedata(segment)
2062 return sidedata
1958 return sidedata
2063
1959
2064 def rawdata(self, nodeorrev, _df=None):
1960 def rawdata(self, nodeorrev, _df=None):
2065 """return an uncompressed raw data of a given node or revision number.
1961 """return an uncompressed raw data of a given node or revision number.
2066
1962
2067 _df - an existing file handle to read from. (internal-only)
1963 _df - an existing file handle to read from. (internal-only)
2068 """
1964 """
2069 return self._revisiondata(nodeorrev, _df, raw=True)
1965 return self._revisiondata(nodeorrev, _df, raw=True)
2070
1966
2071 def hash(self, text, p1, p2):
1967 def hash(self, text, p1, p2):
2072 """Compute a node hash.
1968 """Compute a node hash.
2073
1969
2074 Available as a function so that subclasses can replace the hash
1970 Available as a function so that subclasses can replace the hash
2075 as needed.
1971 as needed.
2076 """
1972 """
2077 return storageutil.hashrevisionsha1(text, p1, p2)
1973 return storageutil.hashrevisionsha1(text, p1, p2)
2078
1974
2079 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1975 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2080 """Check node hash integrity.
1976 """Check node hash integrity.
2081
1977
2082 Available as a function so that subclasses can extend hash mismatch
1978 Available as a function so that subclasses can extend hash mismatch
2083 behaviors as needed.
1979 behaviors as needed.
2084 """
1980 """
2085 try:
1981 try:
2086 if p1 is None and p2 is None:
1982 if p1 is None and p2 is None:
2087 p1, p2 = self.parents(node)
1983 p1, p2 = self.parents(node)
2088 if node != self.hash(text, p1, p2):
1984 if node != self.hash(text, p1, p2):
2089 # Clear the revision cache on hash failure. The revision cache
1985 # Clear the revision cache on hash failure. The revision cache
2090 # only stores the raw revision and clearing the cache does have
1986 # only stores the raw revision and clearing the cache does have
2091 # the side-effect that we won't have a cache hit when the raw
1987 # the side-effect that we won't have a cache hit when the raw
2092 # revision data is accessed. But this case should be rare and
1988 # revision data is accessed. But this case should be rare and
2093 # it is extra work to teach the cache about the hash
1989 # it is extra work to teach the cache about the hash
2094 # verification state.
1990 # verification state.
2095 if self._revisioncache and self._revisioncache[0] == node:
1991 if self._revisioncache and self._revisioncache[0] == node:
2096 self._revisioncache = None
1992 self._revisioncache = None
2097
1993
2098 revornode = rev
1994 revornode = rev
2099 if revornode is None:
1995 if revornode is None:
2100 revornode = templatefilters.short(hex(node))
1996 revornode = templatefilters.short(hex(node))
2101 raise error.RevlogError(
1997 raise error.RevlogError(
2102 _(b"integrity check failed on %s:%s")
1998 _(b"integrity check failed on %s:%s")
2103 % (self.display_id, pycompat.bytestr(revornode))
1999 % (self.display_id, pycompat.bytestr(revornode))
2104 )
2000 )
2105 except error.RevlogError:
2001 except error.RevlogError:
2106 if self._censorable and storageutil.iscensoredtext(text):
2002 if self._censorable and storageutil.iscensoredtext(text):
2107 raise error.CensoredNodeError(self.display_id, node, text)
2003 raise error.CensoredNodeError(self.display_id, node, text)
2108 raise
2004 raise
2109
2005
2110 def _enforceinlinesize(self, tr):
2006 def _enforceinlinesize(self, tr):
2111 """Check if the revlog is too big for inline and convert if so.
2007 """Check if the revlog is too big for inline and convert if so.
2112
2008
2113 This should be called after revisions are added to the revlog. If the
2009 This should be called after revisions are added to the revlog. If the
2114 revlog has grown too large to be an inline revlog, it will convert it
2010 revlog has grown too large to be an inline revlog, it will convert it
2115 to use multiple index and data files.
2011 to use multiple index and data files.
2116 """
2012 """
2117 tiprev = len(self) - 1
2013 tiprev = len(self) - 1
2118 total_size = self.start(tiprev) + self.length(tiprev)
2014 total_size = self.start(tiprev) + self.length(tiprev)
2119 if not self._inline or total_size < _maxinline:
2015 if not self._inline or total_size < _maxinline:
2120 return
2016 return
2121
2017
2122 troffset = tr.findoffset(self._indexfile)
2018 troffset = tr.findoffset(self._indexfile)
2123 if troffset is None:
2019 if troffset is None:
2124 raise error.RevlogError(
2020 raise error.RevlogError(
2125 _(b"%s not found in the transaction") % self._indexfile
2021 _(b"%s not found in the transaction") % self._indexfile
2126 )
2022 )
2127 trindex = 0
2023 trindex = 0
2128 tr.add(self._datafile, 0)
2024 tr.add(self._datafile, 0)
2129
2025
2130 existing_handles = False
2026 existing_handles = False
2131 if self._writinghandles is not None:
2027 if self._writinghandles is not None:
2132 existing_handles = True
2028 existing_handles = True
2133 fp = self._writinghandles[0]
2029 fp = self._writinghandles[0]
2134 fp.flush()
2030 fp.flush()
2135 fp.close()
2031 fp.close()
2136 # We can't use the cached file handle after close(). So prevent
2032 # We can't use the cached file handle after close(). So prevent
2137 # its usage.
2033 # its usage.
2138 self._writinghandles = None
2034 self._writinghandles = None
2035 self._segmentfile.writing_handle = None
2139
2036
2140 new_dfh = self._datafp(b'w+')
2037 new_dfh = self._datafp(b'w+')
2141 new_dfh.truncate(0) # drop any potentially existing data
2038 new_dfh.truncate(0) # drop any potentially existing data
2142 try:
2039 try:
2143 with self._indexfp() as read_ifh:
2040 with self._indexfp() as read_ifh:
2144 for r in self:
2041 for r in self:
2145 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2042 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2146 if troffset <= self.start(r) + r * self.index.entry_size:
2043 if troffset <= self.start(r) + r * self.index.entry_size:
2147 trindex = r
2044 trindex = r
2148 new_dfh.flush()
2045 new_dfh.flush()
2149
2046
2150 with self.__index_new_fp() as fp:
2047 with self.__index_new_fp() as fp:
2151 self._format_flags &= ~FLAG_INLINE_DATA
2048 self._format_flags &= ~FLAG_INLINE_DATA
2152 self._inline = False
2049 self._inline = False
2153 for i in self:
2050 for i in self:
2154 e = self.index.entry_binary(i)
2051 e = self.index.entry_binary(i)
2155 if i == 0 and self._docket is None:
2052 if i == 0 and self._docket is None:
2156 header = self._format_flags | self._format_version
2053 header = self._format_flags | self._format_version
2157 header = self.index.pack_header(header)
2054 header = self.index.pack_header(header)
2158 e = header + e
2055 e = header + e
2159 fp.write(e)
2056 fp.write(e)
2160 if self._docket is not None:
2057 if self._docket is not None:
2161 self._docket.index_end = fp.tell()
2058 self._docket.index_end = fp.tell()
2162
2059
2163 # There is a small transactional race here. If the rename of
2060 # There is a small transactional race here. If the rename of
2164 # the index fails, we should remove the datafile. It is more
2061 # the index fails, we should remove the datafile. It is more
2165 # important to ensure that the data file is not truncated
2062 # important to ensure that the data file is not truncated
2166 # when the index is replaced as otherwise data is lost.
2063 # when the index is replaced as otherwise data is lost.
2167 tr.replace(self._datafile, self.start(trindex))
2064 tr.replace(self._datafile, self.start(trindex))
2168
2065
2169 # the temp file replace the real index when we exit the context
2066 # the temp file replace the real index when we exit the context
2170 # manager
2067 # manager
2171
2068
2172 tr.replace(self._indexfile, trindex * self.index.entry_size)
2069 tr.replace(self._indexfile, trindex * self.index.entry_size)
2173 nodemaputil.setup_persistent_nodemap(tr, self)
2070 nodemaputil.setup_persistent_nodemap(tr, self)
2174 self._chunkclear()
2071 self._segmentfile = randomaccessfile.randomaccessfile(
2072 self.opener,
2073 self._datafile,
2074 self._chunkcachesize,
2075 )
2175
2076
2176 if existing_handles:
2077 if existing_handles:
2177 # switched from inline to conventional reopen the index
2078 # switched from inline to conventional reopen the index
2178 ifh = self.__index_write_fp()
2079 ifh = self.__index_write_fp()
2179 self._writinghandles = (ifh, new_dfh, None)
2080 self._writinghandles = (ifh, new_dfh, None)
2081 self._segmentfile.writing_handle = new_dfh
2180 new_dfh = None
2082 new_dfh = None
2181 finally:
2083 finally:
2182 if new_dfh is not None:
2084 if new_dfh is not None:
2183 new_dfh.close()
2085 new_dfh.close()
2184
2086
2185 def _nodeduplicatecallback(self, transaction, node):
2087 def _nodeduplicatecallback(self, transaction, node):
2186 """called when trying to add a node already stored."""
2088 """called when trying to add a node already stored."""
2187
2089
2188 @contextlib.contextmanager
2090 @contextlib.contextmanager
2189 def _writing(self, transaction):
2091 def _writing(self, transaction):
2190 if self._trypending:
2092 if self._trypending:
2191 msg = b'try to write in a `trypending` revlog: %s'
2093 msg = b'try to write in a `trypending` revlog: %s'
2192 msg %= self.display_id
2094 msg %= self.display_id
2193 raise error.ProgrammingError(msg)
2095 raise error.ProgrammingError(msg)
2194 if self._writinghandles is not None:
2096 if self._writinghandles is not None:
2195 yield
2097 yield
2196 else:
2098 else:
2197 ifh = dfh = sdfh = None
2099 ifh = dfh = sdfh = None
2198 try:
2100 try:
2199 r = len(self)
2101 r = len(self)
2200 # opening the data file.
2102 # opening the data file.
2201 dsize = 0
2103 dsize = 0
2202 if r:
2104 if r:
2203 dsize = self.end(r - 1)
2105 dsize = self.end(r - 1)
2204 dfh = None
2106 dfh = None
2205 if not self._inline:
2107 if not self._inline:
2206 try:
2108 try:
2207 dfh = self._datafp(b"r+")
2109 dfh = self._datafp(b"r+")
2208 if self._docket is None:
2110 if self._docket is None:
2209 dfh.seek(0, os.SEEK_END)
2111 dfh.seek(0, os.SEEK_END)
2210 else:
2112 else:
2211 dfh.seek(self._docket.data_end, os.SEEK_SET)
2113 dfh.seek(self._docket.data_end, os.SEEK_SET)
2212 except IOError as inst:
2114 except IOError as inst:
2213 if inst.errno != errno.ENOENT:
2115 if inst.errno != errno.ENOENT:
2214 raise
2116 raise
2215 dfh = self._datafp(b"w+")
2117 dfh = self._datafp(b"w+")
2216 transaction.add(self._datafile, dsize)
2118 transaction.add(self._datafile, dsize)
2217 if self._sidedatafile is not None:
2119 if self._sidedatafile is not None:
2218 try:
2120 try:
2219 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2121 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2220 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2122 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2221 except IOError as inst:
2123 except IOError as inst:
2222 if inst.errno != errno.ENOENT:
2124 if inst.errno != errno.ENOENT:
2223 raise
2125 raise
2224 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2126 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2225 transaction.add(
2127 transaction.add(
2226 self._sidedatafile, self._docket.sidedata_end
2128 self._sidedatafile, self._docket.sidedata_end
2227 )
2129 )
2228
2130
2229 # opening the index file.
2131 # opening the index file.
2230 isize = r * self.index.entry_size
2132 isize = r * self.index.entry_size
2231 ifh = self.__index_write_fp()
2133 ifh = self.__index_write_fp()
2232 if self._inline:
2134 if self._inline:
2233 transaction.add(self._indexfile, dsize + isize)
2135 transaction.add(self._indexfile, dsize + isize)
2234 else:
2136 else:
2235 transaction.add(self._indexfile, isize)
2137 transaction.add(self._indexfile, isize)
2236 # exposing all file handle for writing.
2138 # exposing all file handle for writing.
2237 self._writinghandles = (ifh, dfh, sdfh)
2139 self._writinghandles = (ifh, dfh, sdfh)
2140 self._segmentfile.writing_handle = ifh if self._inline else dfh
2238 yield
2141 yield
2239 if self._docket is not None:
2142 if self._docket is not None:
2240 self._write_docket(transaction)
2143 self._write_docket(transaction)
2241 finally:
2144 finally:
2242 self._writinghandles = None
2145 self._writinghandles = None
2146 self._segmentfile.writing_handle = None
2243 if dfh is not None:
2147 if dfh is not None:
2244 dfh.close()
2148 dfh.close()
2245 if sdfh is not None:
2149 if sdfh is not None:
2246 sdfh.close()
2150 sdfh.close()
2247 # closing the index file last to avoid exposing referent to
2151 # closing the index file last to avoid exposing referent to
2248 # potential unflushed data content.
2152 # potential unflushed data content.
2249 if ifh is not None:
2153 if ifh is not None:
2250 ifh.close()
2154 ifh.close()
2251
2155
2252 def _write_docket(self, transaction):
2156 def _write_docket(self, transaction):
2253 """write the current docket on disk
2157 """write the current docket on disk
2254
2158
2255 Exist as a method to help changelog to implement transaction logic
2159 Exist as a method to help changelog to implement transaction logic
2256
2160
2257 We could also imagine using the same transaction logic for all revlog
2161 We could also imagine using the same transaction logic for all revlog
2258 since docket are cheap."""
2162 since docket are cheap."""
2259 self._docket.write(transaction)
2163 self._docket.write(transaction)
2260
2164
2261 def addrevision(
2165 def addrevision(
2262 self,
2166 self,
2263 text,
2167 text,
2264 transaction,
2168 transaction,
2265 link,
2169 link,
2266 p1,
2170 p1,
2267 p2,
2171 p2,
2268 cachedelta=None,
2172 cachedelta=None,
2269 node=None,
2173 node=None,
2270 flags=REVIDX_DEFAULT_FLAGS,
2174 flags=REVIDX_DEFAULT_FLAGS,
2271 deltacomputer=None,
2175 deltacomputer=None,
2272 sidedata=None,
2176 sidedata=None,
2273 ):
2177 ):
2274 """add a revision to the log
2178 """add a revision to the log
2275
2179
2276 text - the revision data to add
2180 text - the revision data to add
2277 transaction - the transaction object used for rollback
2181 transaction - the transaction object used for rollback
2278 link - the linkrev data to add
2182 link - the linkrev data to add
2279 p1, p2 - the parent nodeids of the revision
2183 p1, p2 - the parent nodeids of the revision
2280 cachedelta - an optional precomputed delta
2184 cachedelta - an optional precomputed delta
2281 node - nodeid of revision; typically node is not specified, and it is
2185 node - nodeid of revision; typically node is not specified, and it is
2282 computed by default as hash(text, p1, p2), however subclasses might
2186 computed by default as hash(text, p1, p2), however subclasses might
2283 use different hashing method (and override checkhash() in such case)
2187 use different hashing method (and override checkhash() in such case)
2284 flags - the known flags to set on the revision
2188 flags - the known flags to set on the revision
2285 deltacomputer - an optional deltacomputer instance shared between
2189 deltacomputer - an optional deltacomputer instance shared between
2286 multiple calls
2190 multiple calls
2287 """
2191 """
2288 if link == nullrev:
2192 if link == nullrev:
2289 raise error.RevlogError(
2193 raise error.RevlogError(
2290 _(b"attempted to add linkrev -1 to %s") % self.display_id
2194 _(b"attempted to add linkrev -1 to %s") % self.display_id
2291 )
2195 )
2292
2196
2293 if sidedata is None:
2197 if sidedata is None:
2294 sidedata = {}
2198 sidedata = {}
2295 elif sidedata and not self.hassidedata:
2199 elif sidedata and not self.hassidedata:
2296 raise error.ProgrammingError(
2200 raise error.ProgrammingError(
2297 _(b"trying to add sidedata to a revlog who don't support them")
2201 _(b"trying to add sidedata to a revlog who don't support them")
2298 )
2202 )
2299
2203
2300 if flags:
2204 if flags:
2301 node = node or self.hash(text, p1, p2)
2205 node = node or self.hash(text, p1, p2)
2302
2206
2303 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2207 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2304
2208
2305 # If the flag processor modifies the revision data, ignore any provided
2209 # If the flag processor modifies the revision data, ignore any provided
2306 # cachedelta.
2210 # cachedelta.
2307 if rawtext != text:
2211 if rawtext != text:
2308 cachedelta = None
2212 cachedelta = None
2309
2213
2310 if len(rawtext) > _maxentrysize:
2214 if len(rawtext) > _maxentrysize:
2311 raise error.RevlogError(
2215 raise error.RevlogError(
2312 _(
2216 _(
2313 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2217 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2314 )
2218 )
2315 % (self.display_id, len(rawtext))
2219 % (self.display_id, len(rawtext))
2316 )
2220 )
2317
2221
2318 node = node or self.hash(rawtext, p1, p2)
2222 node = node or self.hash(rawtext, p1, p2)
2319 rev = self.index.get_rev(node)
2223 rev = self.index.get_rev(node)
2320 if rev is not None:
2224 if rev is not None:
2321 return rev
2225 return rev
2322
2226
2323 if validatehash:
2227 if validatehash:
2324 self.checkhash(rawtext, node, p1=p1, p2=p2)
2228 self.checkhash(rawtext, node, p1=p1, p2=p2)
2325
2229
2326 return self.addrawrevision(
2230 return self.addrawrevision(
2327 rawtext,
2231 rawtext,
2328 transaction,
2232 transaction,
2329 link,
2233 link,
2330 p1,
2234 p1,
2331 p2,
2235 p2,
2332 node,
2236 node,
2333 flags,
2237 flags,
2334 cachedelta=cachedelta,
2238 cachedelta=cachedelta,
2335 deltacomputer=deltacomputer,
2239 deltacomputer=deltacomputer,
2336 sidedata=sidedata,
2240 sidedata=sidedata,
2337 )
2241 )
2338
2242
2339 def addrawrevision(
2243 def addrawrevision(
2340 self,
2244 self,
2341 rawtext,
2245 rawtext,
2342 transaction,
2246 transaction,
2343 link,
2247 link,
2344 p1,
2248 p1,
2345 p2,
2249 p2,
2346 node,
2250 node,
2347 flags,
2251 flags,
2348 cachedelta=None,
2252 cachedelta=None,
2349 deltacomputer=None,
2253 deltacomputer=None,
2350 sidedata=None,
2254 sidedata=None,
2351 ):
2255 ):
2352 """add a raw revision with known flags, node and parents
2256 """add a raw revision with known flags, node and parents
2353 useful when reusing a revision not stored in this revlog (ex: received
2257 useful when reusing a revision not stored in this revlog (ex: received
2354 over wire, or read from an external bundle).
2258 over wire, or read from an external bundle).
2355 """
2259 """
2356 with self._writing(transaction):
2260 with self._writing(transaction):
2357 return self._addrevision(
2261 return self._addrevision(
2358 node,
2262 node,
2359 rawtext,
2263 rawtext,
2360 transaction,
2264 transaction,
2361 link,
2265 link,
2362 p1,
2266 p1,
2363 p2,
2267 p2,
2364 flags,
2268 flags,
2365 cachedelta,
2269 cachedelta,
2366 deltacomputer=deltacomputer,
2270 deltacomputer=deltacomputer,
2367 sidedata=sidedata,
2271 sidedata=sidedata,
2368 )
2272 )
2369
2273
2370 def compress(self, data):
2274 def compress(self, data):
2371 """Generate a possibly-compressed representation of data."""
2275 """Generate a possibly-compressed representation of data."""
2372 if not data:
2276 if not data:
2373 return b'', data
2277 return b'', data
2374
2278
2375 compressed = self._compressor.compress(data)
2279 compressed = self._compressor.compress(data)
2376
2280
2377 if compressed:
2281 if compressed:
2378 # The revlog compressor added the header in the returned data.
2282 # The revlog compressor added the header in the returned data.
2379 return b'', compressed
2283 return b'', compressed
2380
2284
2381 if data[0:1] == b'\0':
2285 if data[0:1] == b'\0':
2382 return b'', data
2286 return b'', data
2383 return b'u', data
2287 return b'u', data
2384
2288
2385 def decompress(self, data):
2289 def decompress(self, data):
2386 """Decompress a revlog chunk.
2290 """Decompress a revlog chunk.
2387
2291
2388 The chunk is expected to begin with a header identifying the
2292 The chunk is expected to begin with a header identifying the
2389 format type so it can be routed to an appropriate decompressor.
2293 format type so it can be routed to an appropriate decompressor.
2390 """
2294 """
2391 if not data:
2295 if not data:
2392 return data
2296 return data
2393
2297
2394 # Revlogs are read much more frequently than they are written and many
2298 # Revlogs are read much more frequently than they are written and many
2395 # chunks only take microseconds to decompress, so performance is
2299 # chunks only take microseconds to decompress, so performance is
2396 # important here.
2300 # important here.
2397 #
2301 #
2398 # We can make a few assumptions about revlogs:
2302 # We can make a few assumptions about revlogs:
2399 #
2303 #
2400 # 1) the majority of chunks will be compressed (as opposed to inline
2304 # 1) the majority of chunks will be compressed (as opposed to inline
2401 # raw data).
2305 # raw data).
2402 # 2) decompressing *any* data will likely by at least 10x slower than
2306 # 2) decompressing *any* data will likely by at least 10x slower than
2403 # returning raw inline data.
2307 # returning raw inline data.
2404 # 3) we want to prioritize common and officially supported compression
2308 # 3) we want to prioritize common and officially supported compression
2405 # engines
2309 # engines
2406 #
2310 #
2407 # It follows that we want to optimize for "decompress compressed data
2311 # It follows that we want to optimize for "decompress compressed data
2408 # when encoded with common and officially supported compression engines"
2312 # when encoded with common and officially supported compression engines"
2409 # case over "raw data" and "data encoded by less common or non-official
2313 # case over "raw data" and "data encoded by less common or non-official
2410 # compression engines." That is why we have the inline lookup first
2314 # compression engines." That is why we have the inline lookup first
2411 # followed by the compengines lookup.
2315 # followed by the compengines lookup.
2412 #
2316 #
2413 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2317 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2414 # compressed chunks. And this matters for changelog and manifest reads.
2318 # compressed chunks. And this matters for changelog and manifest reads.
2415 t = data[0:1]
2319 t = data[0:1]
2416
2320
2417 if t == b'x':
2321 if t == b'x':
2418 try:
2322 try:
2419 return _zlibdecompress(data)
2323 return _zlibdecompress(data)
2420 except zlib.error as e:
2324 except zlib.error as e:
2421 raise error.RevlogError(
2325 raise error.RevlogError(
2422 _(b'revlog decompress error: %s')
2326 _(b'revlog decompress error: %s')
2423 % stringutil.forcebytestr(e)
2327 % stringutil.forcebytestr(e)
2424 )
2328 )
2425 # '\0' is more common than 'u' so it goes first.
2329 # '\0' is more common than 'u' so it goes first.
2426 elif t == b'\0':
2330 elif t == b'\0':
2427 return data
2331 return data
2428 elif t == b'u':
2332 elif t == b'u':
2429 return util.buffer(data, 1)
2333 return util.buffer(data, 1)
2430
2334
2431 compressor = self._get_decompressor(t)
2335 compressor = self._get_decompressor(t)
2432
2336
2433 return compressor.decompress(data)
2337 return compressor.decompress(data)
2434
2338
2435 def _addrevision(
2339 def _addrevision(
2436 self,
2340 self,
2437 node,
2341 node,
2438 rawtext,
2342 rawtext,
2439 transaction,
2343 transaction,
2440 link,
2344 link,
2441 p1,
2345 p1,
2442 p2,
2346 p2,
2443 flags,
2347 flags,
2444 cachedelta,
2348 cachedelta,
2445 alwayscache=False,
2349 alwayscache=False,
2446 deltacomputer=None,
2350 deltacomputer=None,
2447 sidedata=None,
2351 sidedata=None,
2448 ):
2352 ):
2449 """internal function to add revisions to the log
2353 """internal function to add revisions to the log
2450
2354
2451 see addrevision for argument descriptions.
2355 see addrevision for argument descriptions.
2452
2356
2453 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2357 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2454
2358
2455 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2359 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2456 be used.
2360 be used.
2457
2361
2458 invariants:
2362 invariants:
2459 - rawtext is optional (can be None); if not set, cachedelta must be set.
2363 - rawtext is optional (can be None); if not set, cachedelta must be set.
2460 if both are set, they must correspond to each other.
2364 if both are set, they must correspond to each other.
2461 """
2365 """
2462 if node == self.nullid:
2366 if node == self.nullid:
2463 raise error.RevlogError(
2367 raise error.RevlogError(
2464 _(b"%s: attempt to add null revision") % self.display_id
2368 _(b"%s: attempt to add null revision") % self.display_id
2465 )
2369 )
2466 if (
2370 if (
2467 node == self.nodeconstants.wdirid
2371 node == self.nodeconstants.wdirid
2468 or node in self.nodeconstants.wdirfilenodeids
2372 or node in self.nodeconstants.wdirfilenodeids
2469 ):
2373 ):
2470 raise error.RevlogError(
2374 raise error.RevlogError(
2471 _(b"%s: attempt to add wdir revision") % self.display_id
2375 _(b"%s: attempt to add wdir revision") % self.display_id
2472 )
2376 )
2473 if self._writinghandles is None:
2377 if self._writinghandles is None:
2474 msg = b'adding revision outside `revlog._writing` context'
2378 msg = b'adding revision outside `revlog._writing` context'
2475 raise error.ProgrammingError(msg)
2379 raise error.ProgrammingError(msg)
2476
2380
2477 if self._inline:
2381 if self._inline:
2478 fh = self._writinghandles[0]
2382 fh = self._writinghandles[0]
2479 else:
2383 else:
2480 fh = self._writinghandles[1]
2384 fh = self._writinghandles[1]
2481
2385
2482 btext = [rawtext]
2386 btext = [rawtext]
2483
2387
2484 curr = len(self)
2388 curr = len(self)
2485 prev = curr - 1
2389 prev = curr - 1
2486
2390
2487 offset = self._get_data_offset(prev)
2391 offset = self._get_data_offset(prev)
2488
2392
2489 if self._concurrencychecker:
2393 if self._concurrencychecker:
2490 ifh, dfh, sdfh = self._writinghandles
2394 ifh, dfh, sdfh = self._writinghandles
2491 # XXX no checking for the sidedata file
2395 # XXX no checking for the sidedata file
2492 if self._inline:
2396 if self._inline:
2493 # offset is "as if" it were in the .d file, so we need to add on
2397 # offset is "as if" it were in the .d file, so we need to add on
2494 # the size of the entry metadata.
2398 # the size of the entry metadata.
2495 self._concurrencychecker(
2399 self._concurrencychecker(
2496 ifh, self._indexfile, offset + curr * self.index.entry_size
2400 ifh, self._indexfile, offset + curr * self.index.entry_size
2497 )
2401 )
2498 else:
2402 else:
2499 # Entries in the .i are a consistent size.
2403 # Entries in the .i are a consistent size.
2500 self._concurrencychecker(
2404 self._concurrencychecker(
2501 ifh, self._indexfile, curr * self.index.entry_size
2405 ifh, self._indexfile, curr * self.index.entry_size
2502 )
2406 )
2503 self._concurrencychecker(dfh, self._datafile, offset)
2407 self._concurrencychecker(dfh, self._datafile, offset)
2504
2408
2505 p1r, p2r = self.rev(p1), self.rev(p2)
2409 p1r, p2r = self.rev(p1), self.rev(p2)
2506
2410
2507 # full versions are inserted when the needed deltas
2411 # full versions are inserted when the needed deltas
2508 # become comparable to the uncompressed text
2412 # become comparable to the uncompressed text
2509 if rawtext is None:
2413 if rawtext is None:
2510 # need rawtext size, before changed by flag processors, which is
2414 # need rawtext size, before changed by flag processors, which is
2511 # the non-raw size. use revlog explicitly to avoid filelog's extra
2415 # the non-raw size. use revlog explicitly to avoid filelog's extra
2512 # logic that might remove metadata size.
2416 # logic that might remove metadata size.
2513 textlen = mdiff.patchedsize(
2417 textlen = mdiff.patchedsize(
2514 revlog.size(self, cachedelta[0]), cachedelta[1]
2418 revlog.size(self, cachedelta[0]), cachedelta[1]
2515 )
2419 )
2516 else:
2420 else:
2517 textlen = len(rawtext)
2421 textlen = len(rawtext)
2518
2422
2519 if deltacomputer is None:
2423 if deltacomputer is None:
2520 deltacomputer = deltautil.deltacomputer(self)
2424 deltacomputer = deltautil.deltacomputer(self)
2521
2425
2522 revinfo = revlogutils.revisioninfo(
2426 revinfo = revlogutils.revisioninfo(
2523 node,
2427 node,
2524 p1,
2428 p1,
2525 p2,
2429 p2,
2526 btext,
2430 btext,
2527 textlen,
2431 textlen,
2528 cachedelta,
2432 cachedelta,
2529 flags,
2433 flags,
2530 )
2434 )
2531
2435
2532 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2436 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2533
2437
2534 compression_mode = COMP_MODE_INLINE
2438 compression_mode = COMP_MODE_INLINE
2535 if self._docket is not None:
2439 if self._docket is not None:
2536 h, d = deltainfo.data
2440 h, d = deltainfo.data
2537 if not h and not d:
2441 if not h and not d:
2538 # not data to store at all... declare them uncompressed
2442 # not data to store at all... declare them uncompressed
2539 compression_mode = COMP_MODE_PLAIN
2443 compression_mode = COMP_MODE_PLAIN
2540 elif not h:
2444 elif not h:
2541 t = d[0:1]
2445 t = d[0:1]
2542 if t == b'\0':
2446 if t == b'\0':
2543 compression_mode = COMP_MODE_PLAIN
2447 compression_mode = COMP_MODE_PLAIN
2544 elif t == self._docket.default_compression_header:
2448 elif t == self._docket.default_compression_header:
2545 compression_mode = COMP_MODE_DEFAULT
2449 compression_mode = COMP_MODE_DEFAULT
2546 elif h == b'u':
2450 elif h == b'u':
2547 # we have a more efficient way to declare uncompressed
2451 # we have a more efficient way to declare uncompressed
2548 h = b''
2452 h = b''
2549 compression_mode = COMP_MODE_PLAIN
2453 compression_mode = COMP_MODE_PLAIN
2550 deltainfo = deltautil.drop_u_compression(deltainfo)
2454 deltainfo = deltautil.drop_u_compression(deltainfo)
2551
2455
2552 sidedata_compression_mode = COMP_MODE_INLINE
2456 sidedata_compression_mode = COMP_MODE_INLINE
2553 if sidedata and self.hassidedata:
2457 if sidedata and self.hassidedata:
2554 sidedata_compression_mode = COMP_MODE_PLAIN
2458 sidedata_compression_mode = COMP_MODE_PLAIN
2555 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2459 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2556 sidedata_offset = self._docket.sidedata_end
2460 sidedata_offset = self._docket.sidedata_end
2557 h, comp_sidedata = self.compress(serialized_sidedata)
2461 h, comp_sidedata = self.compress(serialized_sidedata)
2558 if (
2462 if (
2559 h != b'u'
2463 h != b'u'
2560 and comp_sidedata[0:1] != b'\0'
2464 and comp_sidedata[0:1] != b'\0'
2561 and len(comp_sidedata) < len(serialized_sidedata)
2465 and len(comp_sidedata) < len(serialized_sidedata)
2562 ):
2466 ):
2563 assert not h
2467 assert not h
2564 if (
2468 if (
2565 comp_sidedata[0:1]
2469 comp_sidedata[0:1]
2566 == self._docket.default_compression_header
2470 == self._docket.default_compression_header
2567 ):
2471 ):
2568 sidedata_compression_mode = COMP_MODE_DEFAULT
2472 sidedata_compression_mode = COMP_MODE_DEFAULT
2569 serialized_sidedata = comp_sidedata
2473 serialized_sidedata = comp_sidedata
2570 else:
2474 else:
2571 sidedata_compression_mode = COMP_MODE_INLINE
2475 sidedata_compression_mode = COMP_MODE_INLINE
2572 serialized_sidedata = comp_sidedata
2476 serialized_sidedata = comp_sidedata
2573 else:
2477 else:
2574 serialized_sidedata = b""
2478 serialized_sidedata = b""
2575 # Don't store the offset if the sidedata is empty, that way
2479 # Don't store the offset if the sidedata is empty, that way
2576 # we can easily detect empty sidedata and they will be no different
2480 # we can easily detect empty sidedata and they will be no different
2577 # than ones we manually add.
2481 # than ones we manually add.
2578 sidedata_offset = 0
2482 sidedata_offset = 0
2579
2483
2580 e = revlogutils.entry(
2484 e = revlogutils.entry(
2581 flags=flags,
2485 flags=flags,
2582 data_offset=offset,
2486 data_offset=offset,
2583 data_compressed_length=deltainfo.deltalen,
2487 data_compressed_length=deltainfo.deltalen,
2584 data_uncompressed_length=textlen,
2488 data_uncompressed_length=textlen,
2585 data_compression_mode=compression_mode,
2489 data_compression_mode=compression_mode,
2586 data_delta_base=deltainfo.base,
2490 data_delta_base=deltainfo.base,
2587 link_rev=link,
2491 link_rev=link,
2588 parent_rev_1=p1r,
2492 parent_rev_1=p1r,
2589 parent_rev_2=p2r,
2493 parent_rev_2=p2r,
2590 node_id=node,
2494 node_id=node,
2591 sidedata_offset=sidedata_offset,
2495 sidedata_offset=sidedata_offset,
2592 sidedata_compressed_length=len(serialized_sidedata),
2496 sidedata_compressed_length=len(serialized_sidedata),
2593 sidedata_compression_mode=sidedata_compression_mode,
2497 sidedata_compression_mode=sidedata_compression_mode,
2594 )
2498 )
2595
2499
2596 self.index.append(e)
2500 self.index.append(e)
2597 entry = self.index.entry_binary(curr)
2501 entry = self.index.entry_binary(curr)
2598 if curr == 0 and self._docket is None:
2502 if curr == 0 and self._docket is None:
2599 header = self._format_flags | self._format_version
2503 header = self._format_flags | self._format_version
2600 header = self.index.pack_header(header)
2504 header = self.index.pack_header(header)
2601 entry = header + entry
2505 entry = header + entry
2602 self._writeentry(
2506 self._writeentry(
2603 transaction,
2507 transaction,
2604 entry,
2508 entry,
2605 deltainfo.data,
2509 deltainfo.data,
2606 link,
2510 link,
2607 offset,
2511 offset,
2608 serialized_sidedata,
2512 serialized_sidedata,
2609 sidedata_offset,
2513 sidedata_offset,
2610 )
2514 )
2611
2515
2612 rawtext = btext[0]
2516 rawtext = btext[0]
2613
2517
2614 if alwayscache and rawtext is None:
2518 if alwayscache and rawtext is None:
2615 rawtext = deltacomputer.buildtext(revinfo, fh)
2519 rawtext = deltacomputer.buildtext(revinfo, fh)
2616
2520
2617 if type(rawtext) == bytes: # only accept immutable objects
2521 if type(rawtext) == bytes: # only accept immutable objects
2618 self._revisioncache = (node, curr, rawtext)
2522 self._revisioncache = (node, curr, rawtext)
2619 self._chainbasecache[curr] = deltainfo.chainbase
2523 self._chainbasecache[curr] = deltainfo.chainbase
2620 return curr
2524 return curr
2621
2525
2622 def _get_data_offset(self, prev):
2526 def _get_data_offset(self, prev):
2623 """Returns the current offset in the (in-transaction) data file.
2527 """Returns the current offset in the (in-transaction) data file.
2624 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2528 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2625 file to store that information: since sidedata can be rewritten to the
2529 file to store that information: since sidedata can be rewritten to the
2626 end of the data file within a transaction, you can have cases where, for
2530 end of the data file within a transaction, you can have cases where, for
2627 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2531 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2628 to `n - 1`'s sidedata being written after `n`'s data.
2532 to `n - 1`'s sidedata being written after `n`'s data.
2629
2533
2630 TODO cache this in a docket file before getting out of experimental."""
2534 TODO cache this in a docket file before getting out of experimental."""
2631 if self._docket is None:
2535 if self._docket is None:
2632 return self.end(prev)
2536 return self.end(prev)
2633 else:
2537 else:
2634 return self._docket.data_end
2538 return self._docket.data_end
2635
2539
2636 def _writeentry(
2540 def _writeentry(
2637 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2541 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2638 ):
2542 ):
2639 # Files opened in a+ mode have inconsistent behavior on various
2543 # Files opened in a+ mode have inconsistent behavior on various
2640 # platforms. Windows requires that a file positioning call be made
2544 # platforms. Windows requires that a file positioning call be made
2641 # when the file handle transitions between reads and writes. See
2545 # when the file handle transitions between reads and writes. See
2642 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2546 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2643 # platforms, Python or the platform itself can be buggy. Some versions
2547 # platforms, Python or the platform itself can be buggy. Some versions
2644 # of Solaris have been observed to not append at the end of the file
2548 # of Solaris have been observed to not append at the end of the file
2645 # if the file was seeked to before the end. See issue4943 for more.
2549 # if the file was seeked to before the end. See issue4943 for more.
2646 #
2550 #
2647 # We work around this issue by inserting a seek() before writing.
2551 # We work around this issue by inserting a seek() before writing.
2648 # Note: This is likely not necessary on Python 3. However, because
2552 # Note: This is likely not necessary on Python 3. However, because
2649 # the file handle is reused for reads and may be seeked there, we need
2553 # the file handle is reused for reads and may be seeked there, we need
2650 # to be careful before changing this.
2554 # to be careful before changing this.
2651 if self._writinghandles is None:
2555 if self._writinghandles is None:
2652 msg = b'adding revision outside `revlog._writing` context'
2556 msg = b'adding revision outside `revlog._writing` context'
2653 raise error.ProgrammingError(msg)
2557 raise error.ProgrammingError(msg)
2654 ifh, dfh, sdfh = self._writinghandles
2558 ifh, dfh, sdfh = self._writinghandles
2655 if self._docket is None:
2559 if self._docket is None:
2656 ifh.seek(0, os.SEEK_END)
2560 ifh.seek(0, os.SEEK_END)
2657 else:
2561 else:
2658 ifh.seek(self._docket.index_end, os.SEEK_SET)
2562 ifh.seek(self._docket.index_end, os.SEEK_SET)
2659 if dfh:
2563 if dfh:
2660 if self._docket is None:
2564 if self._docket is None:
2661 dfh.seek(0, os.SEEK_END)
2565 dfh.seek(0, os.SEEK_END)
2662 else:
2566 else:
2663 dfh.seek(self._docket.data_end, os.SEEK_SET)
2567 dfh.seek(self._docket.data_end, os.SEEK_SET)
2664 if sdfh:
2568 if sdfh:
2665 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2569 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2666
2570
2667 curr = len(self) - 1
2571 curr = len(self) - 1
2668 if not self._inline:
2572 if not self._inline:
2669 transaction.add(self._datafile, offset)
2573 transaction.add(self._datafile, offset)
2670 if self._sidedatafile:
2574 if self._sidedatafile:
2671 transaction.add(self._sidedatafile, sidedata_offset)
2575 transaction.add(self._sidedatafile, sidedata_offset)
2672 transaction.add(self._indexfile, curr * len(entry))
2576 transaction.add(self._indexfile, curr * len(entry))
2673 if data[0]:
2577 if data[0]:
2674 dfh.write(data[0])
2578 dfh.write(data[0])
2675 dfh.write(data[1])
2579 dfh.write(data[1])
2676 if sidedata:
2580 if sidedata:
2677 sdfh.write(sidedata)
2581 sdfh.write(sidedata)
2678 ifh.write(entry)
2582 ifh.write(entry)
2679 else:
2583 else:
2680 offset += curr * self.index.entry_size
2584 offset += curr * self.index.entry_size
2681 transaction.add(self._indexfile, offset)
2585 transaction.add(self._indexfile, offset)
2682 ifh.write(entry)
2586 ifh.write(entry)
2683 ifh.write(data[0])
2587 ifh.write(data[0])
2684 ifh.write(data[1])
2588 ifh.write(data[1])
2685 assert not sidedata
2589 assert not sidedata
2686 self._enforceinlinesize(transaction)
2590 self._enforceinlinesize(transaction)
2687 if self._docket is not None:
2591 if self._docket is not None:
2688 self._docket.index_end = self._writinghandles[0].tell()
2592 self._docket.index_end = self._writinghandles[0].tell()
2689 self._docket.data_end = self._writinghandles[1].tell()
2593 self._docket.data_end = self._writinghandles[1].tell()
2690 self._docket.sidedata_end = self._writinghandles[2].tell()
2594 self._docket.sidedata_end = self._writinghandles[2].tell()
2691
2595
2692 nodemaputil.setup_persistent_nodemap(transaction, self)
2596 nodemaputil.setup_persistent_nodemap(transaction, self)
2693
2597
2694 def addgroup(
2598 def addgroup(
2695 self,
2599 self,
2696 deltas,
2600 deltas,
2697 linkmapper,
2601 linkmapper,
2698 transaction,
2602 transaction,
2699 alwayscache=False,
2603 alwayscache=False,
2700 addrevisioncb=None,
2604 addrevisioncb=None,
2701 duplicaterevisioncb=None,
2605 duplicaterevisioncb=None,
2702 ):
2606 ):
2703 """
2607 """
2704 add a delta group
2608 add a delta group
2705
2609
2706 given a set of deltas, add them to the revision log. the
2610 given a set of deltas, add them to the revision log. the
2707 first delta is against its parent, which should be in our
2611 first delta is against its parent, which should be in our
2708 log, the rest are against the previous delta.
2612 log, the rest are against the previous delta.
2709
2613
2710 If ``addrevisioncb`` is defined, it will be called with arguments of
2614 If ``addrevisioncb`` is defined, it will be called with arguments of
2711 this revlog and the node that was added.
2615 this revlog and the node that was added.
2712 """
2616 """
2713
2617
2714 if self._adding_group:
2618 if self._adding_group:
2715 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2619 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2716
2620
2717 self._adding_group = True
2621 self._adding_group = True
2718 empty = True
2622 empty = True
2719 try:
2623 try:
2720 with self._writing(transaction):
2624 with self._writing(transaction):
2721 deltacomputer = deltautil.deltacomputer(self)
2625 deltacomputer = deltautil.deltacomputer(self)
2722 # loop through our set of deltas
2626 # loop through our set of deltas
2723 for data in deltas:
2627 for data in deltas:
2724 (
2628 (
2725 node,
2629 node,
2726 p1,
2630 p1,
2727 p2,
2631 p2,
2728 linknode,
2632 linknode,
2729 deltabase,
2633 deltabase,
2730 delta,
2634 delta,
2731 flags,
2635 flags,
2732 sidedata,
2636 sidedata,
2733 ) = data
2637 ) = data
2734 link = linkmapper(linknode)
2638 link = linkmapper(linknode)
2735 flags = flags or REVIDX_DEFAULT_FLAGS
2639 flags = flags or REVIDX_DEFAULT_FLAGS
2736
2640
2737 rev = self.index.get_rev(node)
2641 rev = self.index.get_rev(node)
2738 if rev is not None:
2642 if rev is not None:
2739 # this can happen if two branches make the same change
2643 # this can happen if two branches make the same change
2740 self._nodeduplicatecallback(transaction, rev)
2644 self._nodeduplicatecallback(transaction, rev)
2741 if duplicaterevisioncb:
2645 if duplicaterevisioncb:
2742 duplicaterevisioncb(self, rev)
2646 duplicaterevisioncb(self, rev)
2743 empty = False
2647 empty = False
2744 continue
2648 continue
2745
2649
2746 for p in (p1, p2):
2650 for p in (p1, p2):
2747 if not self.index.has_node(p):
2651 if not self.index.has_node(p):
2748 raise error.LookupError(
2652 raise error.LookupError(
2749 p, self.radix, _(b'unknown parent')
2653 p, self.radix, _(b'unknown parent')
2750 )
2654 )
2751
2655
2752 if not self.index.has_node(deltabase):
2656 if not self.index.has_node(deltabase):
2753 raise error.LookupError(
2657 raise error.LookupError(
2754 deltabase, self.display_id, _(b'unknown delta base')
2658 deltabase, self.display_id, _(b'unknown delta base')
2755 )
2659 )
2756
2660
2757 baserev = self.rev(deltabase)
2661 baserev = self.rev(deltabase)
2758
2662
2759 if baserev != nullrev and self.iscensored(baserev):
2663 if baserev != nullrev and self.iscensored(baserev):
2760 # if base is censored, delta must be full replacement in a
2664 # if base is censored, delta must be full replacement in a
2761 # single patch operation
2665 # single patch operation
2762 hlen = struct.calcsize(b">lll")
2666 hlen = struct.calcsize(b">lll")
2763 oldlen = self.rawsize(baserev)
2667 oldlen = self.rawsize(baserev)
2764 newlen = len(delta) - hlen
2668 newlen = len(delta) - hlen
2765 if delta[:hlen] != mdiff.replacediffheader(
2669 if delta[:hlen] != mdiff.replacediffheader(
2766 oldlen, newlen
2670 oldlen, newlen
2767 ):
2671 ):
2768 raise error.CensoredBaseError(
2672 raise error.CensoredBaseError(
2769 self.display_id, self.node(baserev)
2673 self.display_id, self.node(baserev)
2770 )
2674 )
2771
2675
2772 if not flags and self._peek_iscensored(baserev, delta):
2676 if not flags and self._peek_iscensored(baserev, delta):
2773 flags |= REVIDX_ISCENSORED
2677 flags |= REVIDX_ISCENSORED
2774
2678
2775 # We assume consumers of addrevisioncb will want to retrieve
2679 # We assume consumers of addrevisioncb will want to retrieve
2776 # the added revision, which will require a call to
2680 # the added revision, which will require a call to
2777 # revision(). revision() will fast path if there is a cache
2681 # revision(). revision() will fast path if there is a cache
2778 # hit. So, we tell _addrevision() to always cache in this case.
2682 # hit. So, we tell _addrevision() to always cache in this case.
2779 # We're only using addgroup() in the context of changegroup
2683 # We're only using addgroup() in the context of changegroup
2780 # generation so the revision data can always be handled as raw
2684 # generation so the revision data can always be handled as raw
2781 # by the flagprocessor.
2685 # by the flagprocessor.
2782 rev = self._addrevision(
2686 rev = self._addrevision(
2783 node,
2687 node,
2784 None,
2688 None,
2785 transaction,
2689 transaction,
2786 link,
2690 link,
2787 p1,
2691 p1,
2788 p2,
2692 p2,
2789 flags,
2693 flags,
2790 (baserev, delta),
2694 (baserev, delta),
2791 alwayscache=alwayscache,
2695 alwayscache=alwayscache,
2792 deltacomputer=deltacomputer,
2696 deltacomputer=deltacomputer,
2793 sidedata=sidedata,
2697 sidedata=sidedata,
2794 )
2698 )
2795
2699
2796 if addrevisioncb:
2700 if addrevisioncb:
2797 addrevisioncb(self, rev)
2701 addrevisioncb(self, rev)
2798 empty = False
2702 empty = False
2799 finally:
2703 finally:
2800 self._adding_group = False
2704 self._adding_group = False
2801 return not empty
2705 return not empty
2802
2706
2803 def iscensored(self, rev):
2707 def iscensored(self, rev):
2804 """Check if a file revision is censored."""
2708 """Check if a file revision is censored."""
2805 if not self._censorable:
2709 if not self._censorable:
2806 return False
2710 return False
2807
2711
2808 return self.flags(rev) & REVIDX_ISCENSORED
2712 return self.flags(rev) & REVIDX_ISCENSORED
2809
2713
2810 def _peek_iscensored(self, baserev, delta):
2714 def _peek_iscensored(self, baserev, delta):
2811 """Quickly check if a delta produces a censored revision."""
2715 """Quickly check if a delta produces a censored revision."""
2812 if not self._censorable:
2716 if not self._censorable:
2813 return False
2717 return False
2814
2718
2815 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2719 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2816
2720
2817 def getstrippoint(self, minlink):
2721 def getstrippoint(self, minlink):
2818 """find the minimum rev that must be stripped to strip the linkrev
2722 """find the minimum rev that must be stripped to strip the linkrev
2819
2723
2820 Returns a tuple containing the minimum rev and a set of all revs that
2724 Returns a tuple containing the minimum rev and a set of all revs that
2821 have linkrevs that will be broken by this strip.
2725 have linkrevs that will be broken by this strip.
2822 """
2726 """
2823 return storageutil.resolvestripinfo(
2727 return storageutil.resolvestripinfo(
2824 minlink,
2728 minlink,
2825 len(self) - 1,
2729 len(self) - 1,
2826 self.headrevs(),
2730 self.headrevs(),
2827 self.linkrev,
2731 self.linkrev,
2828 self.parentrevs,
2732 self.parentrevs,
2829 )
2733 )
2830
2734
2831 def strip(self, minlink, transaction):
2735 def strip(self, minlink, transaction):
2832 """truncate the revlog on the first revision with a linkrev >= minlink
2736 """truncate the revlog on the first revision with a linkrev >= minlink
2833
2737
2834 This function is called when we're stripping revision minlink and
2738 This function is called when we're stripping revision minlink and
2835 its descendants from the repository.
2739 its descendants from the repository.
2836
2740
2837 We have to remove all revisions with linkrev >= minlink, because
2741 We have to remove all revisions with linkrev >= minlink, because
2838 the equivalent changelog revisions will be renumbered after the
2742 the equivalent changelog revisions will be renumbered after the
2839 strip.
2743 strip.
2840
2744
2841 So we truncate the revlog on the first of these revisions, and
2745 So we truncate the revlog on the first of these revisions, and
2842 trust that the caller has saved the revisions that shouldn't be
2746 trust that the caller has saved the revisions that shouldn't be
2843 removed and that it'll re-add them after this truncation.
2747 removed and that it'll re-add them after this truncation.
2844 """
2748 """
2845 if len(self) == 0:
2749 if len(self) == 0:
2846 return
2750 return
2847
2751
2848 rev, _ = self.getstrippoint(minlink)
2752 rev, _ = self.getstrippoint(minlink)
2849 if rev == len(self):
2753 if rev == len(self):
2850 return
2754 return
2851
2755
2852 # first truncate the files on disk
2756 # first truncate the files on disk
2853 data_end = self.start(rev)
2757 data_end = self.start(rev)
2854 if not self._inline:
2758 if not self._inline:
2855 transaction.add(self._datafile, data_end)
2759 transaction.add(self._datafile, data_end)
2856 end = rev * self.index.entry_size
2760 end = rev * self.index.entry_size
2857 else:
2761 else:
2858 end = data_end + (rev * self.index.entry_size)
2762 end = data_end + (rev * self.index.entry_size)
2859
2763
2860 if self._sidedatafile:
2764 if self._sidedatafile:
2861 sidedata_end = self.sidedata_cut_off(rev)
2765 sidedata_end = self.sidedata_cut_off(rev)
2862 transaction.add(self._sidedatafile, sidedata_end)
2766 transaction.add(self._sidedatafile, sidedata_end)
2863
2767
2864 transaction.add(self._indexfile, end)
2768 transaction.add(self._indexfile, end)
2865 if self._docket is not None:
2769 if self._docket is not None:
2866 # XXX we could, leverage the docket while stripping. However it is
2770 # XXX we could, leverage the docket while stripping. However it is
2867 # not powerfull enough at the time of this comment
2771 # not powerfull enough at the time of this comment
2868 self._docket.index_end = end
2772 self._docket.index_end = end
2869 self._docket.data_end = data_end
2773 self._docket.data_end = data_end
2870 self._docket.sidedata_end = sidedata_end
2774 self._docket.sidedata_end = sidedata_end
2871 self._docket.write(transaction, stripping=True)
2775 self._docket.write(transaction, stripping=True)
2872
2776
2873 # then reset internal state in memory to forget those revisions
2777 # then reset internal state in memory to forget those revisions
2874 self._revisioncache = None
2778 self._revisioncache = None
2875 self._chaininfocache = util.lrucachedict(500)
2779 self._chaininfocache = util.lrucachedict(500)
2876 self._chunkclear()
2780 self._segmentfile.clear_cache()
2877
2781
2878 del self.index[rev:-1]
2782 del self.index[rev:-1]
2879
2783
2880 def checksize(self):
2784 def checksize(self):
2881 """Check size of index and data files
2785 """Check size of index and data files
2882
2786
2883 return a (dd, di) tuple.
2787 return a (dd, di) tuple.
2884 - dd: extra bytes for the "data" file
2788 - dd: extra bytes for the "data" file
2885 - di: extra bytes for the "index" file
2789 - di: extra bytes for the "index" file
2886
2790
2887 A healthy revlog will return (0, 0).
2791 A healthy revlog will return (0, 0).
2888 """
2792 """
2889 expected = 0
2793 expected = 0
2890 if len(self):
2794 if len(self):
2891 expected = max(0, self.end(len(self) - 1))
2795 expected = max(0, self.end(len(self) - 1))
2892
2796
2893 try:
2797 try:
2894 with self._datafp() as f:
2798 with self._datafp() as f:
2895 f.seek(0, io.SEEK_END)
2799 f.seek(0, io.SEEK_END)
2896 actual = f.tell()
2800 actual = f.tell()
2897 dd = actual - expected
2801 dd = actual - expected
2898 except IOError as inst:
2802 except IOError as inst:
2899 if inst.errno != errno.ENOENT:
2803 if inst.errno != errno.ENOENT:
2900 raise
2804 raise
2901 dd = 0
2805 dd = 0
2902
2806
2903 try:
2807 try:
2904 f = self.opener(self._indexfile)
2808 f = self.opener(self._indexfile)
2905 f.seek(0, io.SEEK_END)
2809 f.seek(0, io.SEEK_END)
2906 actual = f.tell()
2810 actual = f.tell()
2907 f.close()
2811 f.close()
2908 s = self.index.entry_size
2812 s = self.index.entry_size
2909 i = max(0, actual // s)
2813 i = max(0, actual // s)
2910 di = actual - (i * s)
2814 di = actual - (i * s)
2911 if self._inline:
2815 if self._inline:
2912 databytes = 0
2816 databytes = 0
2913 for r in self:
2817 for r in self:
2914 databytes += max(0, self.length(r))
2818 databytes += max(0, self.length(r))
2915 dd = 0
2819 dd = 0
2916 di = actual - len(self) * s - databytes
2820 di = actual - len(self) * s - databytes
2917 except IOError as inst:
2821 except IOError as inst:
2918 if inst.errno != errno.ENOENT:
2822 if inst.errno != errno.ENOENT:
2919 raise
2823 raise
2920 di = 0
2824 di = 0
2921
2825
2922 return (dd, di)
2826 return (dd, di)
2923
2827
2924 def files(self):
2828 def files(self):
2925 res = [self._indexfile]
2829 res = [self._indexfile]
2926 if not self._inline:
2830 if not self._inline:
2927 res.append(self._datafile)
2831 res.append(self._datafile)
2928 return res
2832 return res
2929
2833
2930 def emitrevisions(
2834 def emitrevisions(
2931 self,
2835 self,
2932 nodes,
2836 nodes,
2933 nodesorder=None,
2837 nodesorder=None,
2934 revisiondata=False,
2838 revisiondata=False,
2935 assumehaveparentrevisions=False,
2839 assumehaveparentrevisions=False,
2936 deltamode=repository.CG_DELTAMODE_STD,
2840 deltamode=repository.CG_DELTAMODE_STD,
2937 sidedata_helpers=None,
2841 sidedata_helpers=None,
2938 ):
2842 ):
2939 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2843 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2940 raise error.ProgrammingError(
2844 raise error.ProgrammingError(
2941 b'unhandled value for nodesorder: %s' % nodesorder
2845 b'unhandled value for nodesorder: %s' % nodesorder
2942 )
2846 )
2943
2847
2944 if nodesorder is None and not self._generaldelta:
2848 if nodesorder is None and not self._generaldelta:
2945 nodesorder = b'storage'
2849 nodesorder = b'storage'
2946
2850
2947 if (
2851 if (
2948 not self._storedeltachains
2852 not self._storedeltachains
2949 and deltamode != repository.CG_DELTAMODE_PREV
2853 and deltamode != repository.CG_DELTAMODE_PREV
2950 ):
2854 ):
2951 deltamode = repository.CG_DELTAMODE_FULL
2855 deltamode = repository.CG_DELTAMODE_FULL
2952
2856
2953 return storageutil.emitrevisions(
2857 return storageutil.emitrevisions(
2954 self,
2858 self,
2955 nodes,
2859 nodes,
2956 nodesorder,
2860 nodesorder,
2957 revlogrevisiondelta,
2861 revlogrevisiondelta,
2958 deltaparentfn=self.deltaparent,
2862 deltaparentfn=self.deltaparent,
2959 candeltafn=self.candelta,
2863 candeltafn=self.candelta,
2960 rawsizefn=self.rawsize,
2864 rawsizefn=self.rawsize,
2961 revdifffn=self.revdiff,
2865 revdifffn=self.revdiff,
2962 flagsfn=self.flags,
2866 flagsfn=self.flags,
2963 deltamode=deltamode,
2867 deltamode=deltamode,
2964 revisiondata=revisiondata,
2868 revisiondata=revisiondata,
2965 assumehaveparentrevisions=assumehaveparentrevisions,
2869 assumehaveparentrevisions=assumehaveparentrevisions,
2966 sidedata_helpers=sidedata_helpers,
2870 sidedata_helpers=sidedata_helpers,
2967 )
2871 )
2968
2872
2969 DELTAREUSEALWAYS = b'always'
2873 DELTAREUSEALWAYS = b'always'
2970 DELTAREUSESAMEREVS = b'samerevs'
2874 DELTAREUSESAMEREVS = b'samerevs'
2971 DELTAREUSENEVER = b'never'
2875 DELTAREUSENEVER = b'never'
2972
2876
2973 DELTAREUSEFULLADD = b'fulladd'
2877 DELTAREUSEFULLADD = b'fulladd'
2974
2878
2975 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2879 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2976
2880
2977 def clone(
2881 def clone(
2978 self,
2882 self,
2979 tr,
2883 tr,
2980 destrevlog,
2884 destrevlog,
2981 addrevisioncb=None,
2885 addrevisioncb=None,
2982 deltareuse=DELTAREUSESAMEREVS,
2886 deltareuse=DELTAREUSESAMEREVS,
2983 forcedeltabothparents=None,
2887 forcedeltabothparents=None,
2984 sidedata_helpers=None,
2888 sidedata_helpers=None,
2985 ):
2889 ):
2986 """Copy this revlog to another, possibly with format changes.
2890 """Copy this revlog to another, possibly with format changes.
2987
2891
2988 The destination revlog will contain the same revisions and nodes.
2892 The destination revlog will contain the same revisions and nodes.
2989 However, it may not be bit-for-bit identical due to e.g. delta encoding
2893 However, it may not be bit-for-bit identical due to e.g. delta encoding
2990 differences.
2894 differences.
2991
2895
2992 The ``deltareuse`` argument control how deltas from the existing revlog
2896 The ``deltareuse`` argument control how deltas from the existing revlog
2993 are preserved in the destination revlog. The argument can have the
2897 are preserved in the destination revlog. The argument can have the
2994 following values:
2898 following values:
2995
2899
2996 DELTAREUSEALWAYS
2900 DELTAREUSEALWAYS
2997 Deltas will always be reused (if possible), even if the destination
2901 Deltas will always be reused (if possible), even if the destination
2998 revlog would not select the same revisions for the delta. This is the
2902 revlog would not select the same revisions for the delta. This is the
2999 fastest mode of operation.
2903 fastest mode of operation.
3000 DELTAREUSESAMEREVS
2904 DELTAREUSESAMEREVS
3001 Deltas will be reused if the destination revlog would pick the same
2905 Deltas will be reused if the destination revlog would pick the same
3002 revisions for the delta. This mode strikes a balance between speed
2906 revisions for the delta. This mode strikes a balance between speed
3003 and optimization.
2907 and optimization.
3004 DELTAREUSENEVER
2908 DELTAREUSENEVER
3005 Deltas will never be reused. This is the slowest mode of execution.
2909 Deltas will never be reused. This is the slowest mode of execution.
3006 This mode can be used to recompute deltas (e.g. if the diff/delta
2910 This mode can be used to recompute deltas (e.g. if the diff/delta
3007 algorithm changes).
2911 algorithm changes).
3008 DELTAREUSEFULLADD
2912 DELTAREUSEFULLADD
3009 Revision will be re-added as if their were new content. This is
2913 Revision will be re-added as if their were new content. This is
3010 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2914 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3011 eg: large file detection and handling.
2915 eg: large file detection and handling.
3012
2916
3013 Delta computation can be slow, so the choice of delta reuse policy can
2917 Delta computation can be slow, so the choice of delta reuse policy can
3014 significantly affect run time.
2918 significantly affect run time.
3015
2919
3016 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2920 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3017 two extremes. Deltas will be reused if they are appropriate. But if the
2921 two extremes. Deltas will be reused if they are appropriate. But if the
3018 delta could choose a better revision, it will do so. This means if you
2922 delta could choose a better revision, it will do so. This means if you
3019 are converting a non-generaldelta revlog to a generaldelta revlog,
2923 are converting a non-generaldelta revlog to a generaldelta revlog,
3020 deltas will be recomputed if the delta's parent isn't a parent of the
2924 deltas will be recomputed if the delta's parent isn't a parent of the
3021 revision.
2925 revision.
3022
2926
3023 In addition to the delta policy, the ``forcedeltabothparents``
2927 In addition to the delta policy, the ``forcedeltabothparents``
3024 argument controls whether to force compute deltas against both parents
2928 argument controls whether to force compute deltas against both parents
3025 for merges. By default, the current default is used.
2929 for merges. By default, the current default is used.
3026
2930
3027 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2931 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3028 `sidedata_helpers`.
2932 `sidedata_helpers`.
3029 """
2933 """
3030 if deltareuse not in self.DELTAREUSEALL:
2934 if deltareuse not in self.DELTAREUSEALL:
3031 raise ValueError(
2935 raise ValueError(
3032 _(b'value for deltareuse invalid: %s') % deltareuse
2936 _(b'value for deltareuse invalid: %s') % deltareuse
3033 )
2937 )
3034
2938
3035 if len(destrevlog):
2939 if len(destrevlog):
3036 raise ValueError(_(b'destination revlog is not empty'))
2940 raise ValueError(_(b'destination revlog is not empty'))
3037
2941
3038 if getattr(self, 'filteredrevs', None):
2942 if getattr(self, 'filteredrevs', None):
3039 raise ValueError(_(b'source revlog has filtered revisions'))
2943 raise ValueError(_(b'source revlog has filtered revisions'))
3040 if getattr(destrevlog, 'filteredrevs', None):
2944 if getattr(destrevlog, 'filteredrevs', None):
3041 raise ValueError(_(b'destination revlog has filtered revisions'))
2945 raise ValueError(_(b'destination revlog has filtered revisions'))
3042
2946
3043 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2947 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3044 # if possible.
2948 # if possible.
3045 oldlazydelta = destrevlog._lazydelta
2949 oldlazydelta = destrevlog._lazydelta
3046 oldlazydeltabase = destrevlog._lazydeltabase
2950 oldlazydeltabase = destrevlog._lazydeltabase
3047 oldamd = destrevlog._deltabothparents
2951 oldamd = destrevlog._deltabothparents
3048
2952
3049 try:
2953 try:
3050 if deltareuse == self.DELTAREUSEALWAYS:
2954 if deltareuse == self.DELTAREUSEALWAYS:
3051 destrevlog._lazydeltabase = True
2955 destrevlog._lazydeltabase = True
3052 destrevlog._lazydelta = True
2956 destrevlog._lazydelta = True
3053 elif deltareuse == self.DELTAREUSESAMEREVS:
2957 elif deltareuse == self.DELTAREUSESAMEREVS:
3054 destrevlog._lazydeltabase = False
2958 destrevlog._lazydeltabase = False
3055 destrevlog._lazydelta = True
2959 destrevlog._lazydelta = True
3056 elif deltareuse == self.DELTAREUSENEVER:
2960 elif deltareuse == self.DELTAREUSENEVER:
3057 destrevlog._lazydeltabase = False
2961 destrevlog._lazydeltabase = False
3058 destrevlog._lazydelta = False
2962 destrevlog._lazydelta = False
3059
2963
3060 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2964 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3061
2965
3062 self._clone(
2966 self._clone(
3063 tr,
2967 tr,
3064 destrevlog,
2968 destrevlog,
3065 addrevisioncb,
2969 addrevisioncb,
3066 deltareuse,
2970 deltareuse,
3067 forcedeltabothparents,
2971 forcedeltabothparents,
3068 sidedata_helpers,
2972 sidedata_helpers,
3069 )
2973 )
3070
2974
3071 finally:
2975 finally:
3072 destrevlog._lazydelta = oldlazydelta
2976 destrevlog._lazydelta = oldlazydelta
3073 destrevlog._lazydeltabase = oldlazydeltabase
2977 destrevlog._lazydeltabase = oldlazydeltabase
3074 destrevlog._deltabothparents = oldamd
2978 destrevlog._deltabothparents = oldamd
3075
2979
3076 def _clone(
2980 def _clone(
3077 self,
2981 self,
3078 tr,
2982 tr,
3079 destrevlog,
2983 destrevlog,
3080 addrevisioncb,
2984 addrevisioncb,
3081 deltareuse,
2985 deltareuse,
3082 forcedeltabothparents,
2986 forcedeltabothparents,
3083 sidedata_helpers,
2987 sidedata_helpers,
3084 ):
2988 ):
3085 """perform the core duty of `revlog.clone` after parameter processing"""
2989 """perform the core duty of `revlog.clone` after parameter processing"""
3086 deltacomputer = deltautil.deltacomputer(destrevlog)
2990 deltacomputer = deltautil.deltacomputer(destrevlog)
3087 index = self.index
2991 index = self.index
3088 for rev in self:
2992 for rev in self:
3089 entry = index[rev]
2993 entry = index[rev]
3090
2994
3091 # Some classes override linkrev to take filtered revs into
2995 # Some classes override linkrev to take filtered revs into
3092 # account. Use raw entry from index.
2996 # account. Use raw entry from index.
3093 flags = entry[0] & 0xFFFF
2997 flags = entry[0] & 0xFFFF
3094 linkrev = entry[4]
2998 linkrev = entry[4]
3095 p1 = index[entry[5]][7]
2999 p1 = index[entry[5]][7]
3096 p2 = index[entry[6]][7]
3000 p2 = index[entry[6]][7]
3097 node = entry[7]
3001 node = entry[7]
3098
3002
3099 # (Possibly) reuse the delta from the revlog if allowed and
3003 # (Possibly) reuse the delta from the revlog if allowed and
3100 # the revlog chunk is a delta.
3004 # the revlog chunk is a delta.
3101 cachedelta = None
3005 cachedelta = None
3102 rawtext = None
3006 rawtext = None
3103 if deltareuse == self.DELTAREUSEFULLADD:
3007 if deltareuse == self.DELTAREUSEFULLADD:
3104 text = self._revisiondata(rev)
3008 text = self._revisiondata(rev)
3105 sidedata = self.sidedata(rev)
3009 sidedata = self.sidedata(rev)
3106
3010
3107 if sidedata_helpers is not None:
3011 if sidedata_helpers is not None:
3108 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3012 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3109 self, sidedata_helpers, sidedata, rev
3013 self, sidedata_helpers, sidedata, rev
3110 )
3014 )
3111 flags = flags | new_flags[0] & ~new_flags[1]
3015 flags = flags | new_flags[0] & ~new_flags[1]
3112
3016
3113 destrevlog.addrevision(
3017 destrevlog.addrevision(
3114 text,
3018 text,
3115 tr,
3019 tr,
3116 linkrev,
3020 linkrev,
3117 p1,
3021 p1,
3118 p2,
3022 p2,
3119 cachedelta=cachedelta,
3023 cachedelta=cachedelta,
3120 node=node,
3024 node=node,
3121 flags=flags,
3025 flags=flags,
3122 deltacomputer=deltacomputer,
3026 deltacomputer=deltacomputer,
3123 sidedata=sidedata,
3027 sidedata=sidedata,
3124 )
3028 )
3125 else:
3029 else:
3126 if destrevlog._lazydelta:
3030 if destrevlog._lazydelta:
3127 dp = self.deltaparent(rev)
3031 dp = self.deltaparent(rev)
3128 if dp != nullrev:
3032 if dp != nullrev:
3129 cachedelta = (dp, bytes(self._chunk(rev)))
3033 cachedelta = (dp, bytes(self._chunk(rev)))
3130
3034
3131 sidedata = None
3035 sidedata = None
3132 if not cachedelta:
3036 if not cachedelta:
3133 rawtext = self._revisiondata(rev)
3037 rawtext = self._revisiondata(rev)
3134 sidedata = self.sidedata(rev)
3038 sidedata = self.sidedata(rev)
3135 if sidedata is None:
3039 if sidedata is None:
3136 sidedata = self.sidedata(rev)
3040 sidedata = self.sidedata(rev)
3137
3041
3138 if sidedata_helpers is not None:
3042 if sidedata_helpers is not None:
3139 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3043 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3140 self, sidedata_helpers, sidedata, rev
3044 self, sidedata_helpers, sidedata, rev
3141 )
3045 )
3142 flags = flags | new_flags[0] & ~new_flags[1]
3046 flags = flags | new_flags[0] & ~new_flags[1]
3143
3047
3144 with destrevlog._writing(tr):
3048 with destrevlog._writing(tr):
3145 destrevlog._addrevision(
3049 destrevlog._addrevision(
3146 node,
3050 node,
3147 rawtext,
3051 rawtext,
3148 tr,
3052 tr,
3149 linkrev,
3053 linkrev,
3150 p1,
3054 p1,
3151 p2,
3055 p2,
3152 flags,
3056 flags,
3153 cachedelta,
3057 cachedelta,
3154 deltacomputer=deltacomputer,
3058 deltacomputer=deltacomputer,
3155 sidedata=sidedata,
3059 sidedata=sidedata,
3156 )
3060 )
3157
3061
3158 if addrevisioncb:
3062 if addrevisioncb:
3159 addrevisioncb(self, rev, node)
3063 addrevisioncb(self, rev, node)
3160
3064
3161 def censorrevision(self, tr, censornode, tombstone=b''):
3065 def censorrevision(self, tr, censornode, tombstone=b''):
3162 if self._format_version == REVLOGV0:
3066 if self._format_version == REVLOGV0:
3163 raise error.RevlogError(
3067 raise error.RevlogError(
3164 _(b'cannot censor with version %d revlogs')
3068 _(b'cannot censor with version %d revlogs')
3165 % self._format_version
3069 % self._format_version
3166 )
3070 )
3167 elif self._format_version == REVLOGV1:
3071 elif self._format_version == REVLOGV1:
3168 censor.v1_censor(self, tr, censornode, tombstone)
3072 censor.v1_censor(self, tr, censornode, tombstone)
3169 else:
3073 else:
3170 # revlog v2
3074 # revlog v2
3171 raise error.RevlogError(
3075 raise error.RevlogError(
3172 _(b'cannot censor with version %d revlogs')
3076 _(b'cannot censor with version %d revlogs')
3173 % self._format_version
3077 % self._format_version
3174 )
3078 )
3175
3079
3176 def verifyintegrity(self, state):
3080 def verifyintegrity(self, state):
3177 """Verifies the integrity of the revlog.
3081 """Verifies the integrity of the revlog.
3178
3082
3179 Yields ``revlogproblem`` instances describing problems that are
3083 Yields ``revlogproblem`` instances describing problems that are
3180 found.
3084 found.
3181 """
3085 """
3182 dd, di = self.checksize()
3086 dd, di = self.checksize()
3183 if dd:
3087 if dd:
3184 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3088 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3185 if di:
3089 if di:
3186 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3090 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3187
3091
3188 version = self._format_version
3092 version = self._format_version
3189
3093
3190 # The verifier tells us what version revlog we should be.
3094 # The verifier tells us what version revlog we should be.
3191 if version != state[b'expectedversion']:
3095 if version != state[b'expectedversion']:
3192 yield revlogproblem(
3096 yield revlogproblem(
3193 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3097 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3194 % (self.display_id, version, state[b'expectedversion'])
3098 % (self.display_id, version, state[b'expectedversion'])
3195 )
3099 )
3196
3100
3197 state[b'skipread'] = set()
3101 state[b'skipread'] = set()
3198 state[b'safe_renamed'] = set()
3102 state[b'safe_renamed'] = set()
3199
3103
3200 for rev in self:
3104 for rev in self:
3201 node = self.node(rev)
3105 node = self.node(rev)
3202
3106
3203 # Verify contents. 4 cases to care about:
3107 # Verify contents. 4 cases to care about:
3204 #
3108 #
3205 # common: the most common case
3109 # common: the most common case
3206 # rename: with a rename
3110 # rename: with a rename
3207 # meta: file content starts with b'\1\n', the metadata
3111 # meta: file content starts with b'\1\n', the metadata
3208 # header defined in filelog.py, but without a rename
3112 # header defined in filelog.py, but without a rename
3209 # ext: content stored externally
3113 # ext: content stored externally
3210 #
3114 #
3211 # More formally, their differences are shown below:
3115 # More formally, their differences are shown below:
3212 #
3116 #
3213 # | common | rename | meta | ext
3117 # | common | rename | meta | ext
3214 # -------------------------------------------------------
3118 # -------------------------------------------------------
3215 # flags() | 0 | 0 | 0 | not 0
3119 # flags() | 0 | 0 | 0 | not 0
3216 # renamed() | False | True | False | ?
3120 # renamed() | False | True | False | ?
3217 # rawtext[0:2]=='\1\n'| False | True | True | ?
3121 # rawtext[0:2]=='\1\n'| False | True | True | ?
3218 #
3122 #
3219 # "rawtext" means the raw text stored in revlog data, which
3123 # "rawtext" means the raw text stored in revlog data, which
3220 # could be retrieved by "rawdata(rev)". "text"
3124 # could be retrieved by "rawdata(rev)". "text"
3221 # mentioned below is "revision(rev)".
3125 # mentioned below is "revision(rev)".
3222 #
3126 #
3223 # There are 3 different lengths stored physically:
3127 # There are 3 different lengths stored physically:
3224 # 1. L1: rawsize, stored in revlog index
3128 # 1. L1: rawsize, stored in revlog index
3225 # 2. L2: len(rawtext), stored in revlog data
3129 # 2. L2: len(rawtext), stored in revlog data
3226 # 3. L3: len(text), stored in revlog data if flags==0, or
3130 # 3. L3: len(text), stored in revlog data if flags==0, or
3227 # possibly somewhere else if flags!=0
3131 # possibly somewhere else if flags!=0
3228 #
3132 #
3229 # L1 should be equal to L2. L3 could be different from them.
3133 # L1 should be equal to L2. L3 could be different from them.
3230 # "text" may or may not affect commit hash depending on flag
3134 # "text" may or may not affect commit hash depending on flag
3231 # processors (see flagutil.addflagprocessor).
3135 # processors (see flagutil.addflagprocessor).
3232 #
3136 #
3233 # | common | rename | meta | ext
3137 # | common | rename | meta | ext
3234 # -------------------------------------------------
3138 # -------------------------------------------------
3235 # rawsize() | L1 | L1 | L1 | L1
3139 # rawsize() | L1 | L1 | L1 | L1
3236 # size() | L1 | L2-LM | L1(*) | L1 (?)
3140 # size() | L1 | L2-LM | L1(*) | L1 (?)
3237 # len(rawtext) | L2 | L2 | L2 | L2
3141 # len(rawtext) | L2 | L2 | L2 | L2
3238 # len(text) | L2 | L2 | L2 | L3
3142 # len(text) | L2 | L2 | L2 | L3
3239 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3143 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3240 #
3144 #
3241 # LM: length of metadata, depending on rawtext
3145 # LM: length of metadata, depending on rawtext
3242 # (*): not ideal, see comment in filelog.size
3146 # (*): not ideal, see comment in filelog.size
3243 # (?): could be "- len(meta)" if the resolved content has
3147 # (?): could be "- len(meta)" if the resolved content has
3244 # rename metadata
3148 # rename metadata
3245 #
3149 #
3246 # Checks needed to be done:
3150 # Checks needed to be done:
3247 # 1. length check: L1 == L2, in all cases.
3151 # 1. length check: L1 == L2, in all cases.
3248 # 2. hash check: depending on flag processor, we may need to
3152 # 2. hash check: depending on flag processor, we may need to
3249 # use either "text" (external), or "rawtext" (in revlog).
3153 # use either "text" (external), or "rawtext" (in revlog).
3250
3154
3251 try:
3155 try:
3252 skipflags = state.get(b'skipflags', 0)
3156 skipflags = state.get(b'skipflags', 0)
3253 if skipflags:
3157 if skipflags:
3254 skipflags &= self.flags(rev)
3158 skipflags &= self.flags(rev)
3255
3159
3256 _verify_revision(self, skipflags, state, node)
3160 _verify_revision(self, skipflags, state, node)
3257
3161
3258 l1 = self.rawsize(rev)
3162 l1 = self.rawsize(rev)
3259 l2 = len(self.rawdata(node))
3163 l2 = len(self.rawdata(node))
3260
3164
3261 if l1 != l2:
3165 if l1 != l2:
3262 yield revlogproblem(
3166 yield revlogproblem(
3263 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3167 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3264 node=node,
3168 node=node,
3265 )
3169 )
3266
3170
3267 except error.CensoredNodeError:
3171 except error.CensoredNodeError:
3268 if state[b'erroroncensored']:
3172 if state[b'erroroncensored']:
3269 yield revlogproblem(
3173 yield revlogproblem(
3270 error=_(b'censored file data'), node=node
3174 error=_(b'censored file data'), node=node
3271 )
3175 )
3272 state[b'skipread'].add(node)
3176 state[b'skipread'].add(node)
3273 except Exception as e:
3177 except Exception as e:
3274 yield revlogproblem(
3178 yield revlogproblem(
3275 error=_(b'unpacking %s: %s')
3179 error=_(b'unpacking %s: %s')
3276 % (short(node), stringutil.forcebytestr(e)),
3180 % (short(node), stringutil.forcebytestr(e)),
3277 node=node,
3181 node=node,
3278 )
3182 )
3279 state[b'skipread'].add(node)
3183 state[b'skipread'].add(node)
3280
3184
3281 def storageinfo(
3185 def storageinfo(
3282 self,
3186 self,
3283 exclusivefiles=False,
3187 exclusivefiles=False,
3284 sharedfiles=False,
3188 sharedfiles=False,
3285 revisionscount=False,
3189 revisionscount=False,
3286 trackedsize=False,
3190 trackedsize=False,
3287 storedsize=False,
3191 storedsize=False,
3288 ):
3192 ):
3289 d = {}
3193 d = {}
3290
3194
3291 if exclusivefiles:
3195 if exclusivefiles:
3292 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3196 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3293 if not self._inline:
3197 if not self._inline:
3294 d[b'exclusivefiles'].append((self.opener, self._datafile))
3198 d[b'exclusivefiles'].append((self.opener, self._datafile))
3295
3199
3296 if sharedfiles:
3200 if sharedfiles:
3297 d[b'sharedfiles'] = []
3201 d[b'sharedfiles'] = []
3298
3202
3299 if revisionscount:
3203 if revisionscount:
3300 d[b'revisionscount'] = len(self)
3204 d[b'revisionscount'] = len(self)
3301
3205
3302 if trackedsize:
3206 if trackedsize:
3303 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3207 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3304
3208
3305 if storedsize:
3209 if storedsize:
3306 d[b'storedsize'] = sum(
3210 d[b'storedsize'] = sum(
3307 self.opener.stat(path).st_size for path in self.files()
3211 self.opener.stat(path).st_size for path in self.files()
3308 )
3212 )
3309
3213
3310 return d
3214 return d
3311
3215
3312 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3216 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3313 if not self.hassidedata:
3217 if not self.hassidedata:
3314 return
3218 return
3315 # revlog formats with sidedata support does not support inline
3219 # revlog formats with sidedata support does not support inline
3316 assert not self._inline
3220 assert not self._inline
3317 if not helpers[1] and not helpers[2]:
3221 if not helpers[1] and not helpers[2]:
3318 # Nothing to generate or remove
3222 # Nothing to generate or remove
3319 return
3223 return
3320
3224
3321 new_entries = []
3225 new_entries = []
3322 # append the new sidedata
3226 # append the new sidedata
3323 with self._writing(transaction):
3227 with self._writing(transaction):
3324 ifh, dfh, sdfh = self._writinghandles
3228 ifh, dfh, sdfh = self._writinghandles
3325 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3229 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3326
3230
3327 current_offset = sdfh.tell()
3231 current_offset = sdfh.tell()
3328 for rev in range(startrev, endrev + 1):
3232 for rev in range(startrev, endrev + 1):
3329 entry = self.index[rev]
3233 entry = self.index[rev]
3330 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3234 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3331 store=self,
3235 store=self,
3332 sidedata_helpers=helpers,
3236 sidedata_helpers=helpers,
3333 sidedata={},
3237 sidedata={},
3334 rev=rev,
3238 rev=rev,
3335 )
3239 )
3336
3240
3337 serialized_sidedata = sidedatautil.serialize_sidedata(
3241 serialized_sidedata = sidedatautil.serialize_sidedata(
3338 new_sidedata
3242 new_sidedata
3339 )
3243 )
3340
3244
3341 sidedata_compression_mode = COMP_MODE_INLINE
3245 sidedata_compression_mode = COMP_MODE_INLINE
3342 if serialized_sidedata and self.hassidedata:
3246 if serialized_sidedata and self.hassidedata:
3343 sidedata_compression_mode = COMP_MODE_PLAIN
3247 sidedata_compression_mode = COMP_MODE_PLAIN
3344 h, comp_sidedata = self.compress(serialized_sidedata)
3248 h, comp_sidedata = self.compress(serialized_sidedata)
3345 if (
3249 if (
3346 h != b'u'
3250 h != b'u'
3347 and comp_sidedata[0] != b'\0'
3251 and comp_sidedata[0] != b'\0'
3348 and len(comp_sidedata) < len(serialized_sidedata)
3252 and len(comp_sidedata) < len(serialized_sidedata)
3349 ):
3253 ):
3350 assert not h
3254 assert not h
3351 if (
3255 if (
3352 comp_sidedata[0]
3256 comp_sidedata[0]
3353 == self._docket.default_compression_header
3257 == self._docket.default_compression_header
3354 ):
3258 ):
3355 sidedata_compression_mode = COMP_MODE_DEFAULT
3259 sidedata_compression_mode = COMP_MODE_DEFAULT
3356 serialized_sidedata = comp_sidedata
3260 serialized_sidedata = comp_sidedata
3357 else:
3261 else:
3358 sidedata_compression_mode = COMP_MODE_INLINE
3262 sidedata_compression_mode = COMP_MODE_INLINE
3359 serialized_sidedata = comp_sidedata
3263 serialized_sidedata = comp_sidedata
3360 if entry[8] != 0 or entry[9] != 0:
3264 if entry[8] != 0 or entry[9] != 0:
3361 # rewriting entries that already have sidedata is not
3265 # rewriting entries that already have sidedata is not
3362 # supported yet, because it introduces garbage data in the
3266 # supported yet, because it introduces garbage data in the
3363 # revlog.
3267 # revlog.
3364 msg = b"rewriting existing sidedata is not supported yet"
3268 msg = b"rewriting existing sidedata is not supported yet"
3365 raise error.Abort(msg)
3269 raise error.Abort(msg)
3366
3270
3367 # Apply (potential) flags to add and to remove after running
3271 # Apply (potential) flags to add and to remove after running
3368 # the sidedata helpers
3272 # the sidedata helpers
3369 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3273 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3370 entry_update = (
3274 entry_update = (
3371 current_offset,
3275 current_offset,
3372 len(serialized_sidedata),
3276 len(serialized_sidedata),
3373 new_offset_flags,
3277 new_offset_flags,
3374 sidedata_compression_mode,
3278 sidedata_compression_mode,
3375 )
3279 )
3376
3280
3377 # the sidedata computation might have move the file cursors around
3281 # the sidedata computation might have move the file cursors around
3378 sdfh.seek(current_offset, os.SEEK_SET)
3282 sdfh.seek(current_offset, os.SEEK_SET)
3379 sdfh.write(serialized_sidedata)
3283 sdfh.write(serialized_sidedata)
3380 new_entries.append(entry_update)
3284 new_entries.append(entry_update)
3381 current_offset += len(serialized_sidedata)
3285 current_offset += len(serialized_sidedata)
3382 self._docket.sidedata_end = sdfh.tell()
3286 self._docket.sidedata_end = sdfh.tell()
3383
3287
3384 # rewrite the new index entries
3288 # rewrite the new index entries
3385 ifh.seek(startrev * self.index.entry_size)
3289 ifh.seek(startrev * self.index.entry_size)
3386 for i, e in enumerate(new_entries):
3290 for i, e in enumerate(new_entries):
3387 rev = startrev + i
3291 rev = startrev + i
3388 self.index.replace_sidedata_info(rev, *e)
3292 self.index.replace_sidedata_info(rev, *e)
3389 packed = self.index.entry_binary(rev)
3293 packed = self.index.entry_binary(rev)
3390 if rev == 0 and self._docket is None:
3294 if rev == 0 and self._docket is None:
3391 header = self._format_flags | self._format_version
3295 header = self._format_flags | self._format_version
3392 header = self.index.pack_header(header)
3296 header = self.index.pack_header(header)
3393 packed = header + packed
3297 packed = header + packed
3394 ifh.write(packed)
3298 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now