##// END OF EJS Templates
revlog: create a iteration of a _InnerRevlog object within the revlog...
marmoute -
r51979:c3748f38 default
parent child Browse files
Show More
@@ -1,638 +1,641 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 from .i18n import _
9 from .i18n import _
10 from .node import (
10 from .node import (
11 bin,
11 bin,
12 hex,
12 hex,
13 )
13 )
14 from .thirdparty import attr
14 from .thirdparty import attr
15
15
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 metadata,
19 metadata,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 dateutil,
24 dateutil,
25 stringutil,
25 stringutil,
26 )
26 )
27 from .revlogutils import (
27 from .revlogutils import (
28 constants as revlog_constants,
28 constants as revlog_constants,
29 flagutil,
29 flagutil,
30 )
30 )
31
31
32 _defaultextra = {b'branch': b'default'}
32 _defaultextra = {b'branch': b'default'}
33
33
34
34
35 def _string_escape(text):
35 def _string_escape(text):
36 """
36 """
37 >>> from .pycompat import bytechr as chr
37 >>> from .pycompat import bytechr as chr
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s
40 >>> s
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 >>> res = _string_escape(s)
42 >>> res = _string_escape(s)
43 >>> s == _string_unescape(res)
43 >>> s == _string_unescape(res)
44 True
44 True
45 """
45 """
46 # subset of the string_escape codec
46 # subset of the string_escape codec
47 text = (
47 text = (
48 text.replace(b'\\', b'\\\\')
48 text.replace(b'\\', b'\\\\')
49 .replace(b'\n', b'\\n')
49 .replace(b'\n', b'\\n')
50 .replace(b'\r', b'\\r')
50 .replace(b'\r', b'\\r')
51 )
51 )
52 return text.replace(b'\0', b'\\0')
52 return text.replace(b'\0', b'\\0')
53
53
54
54
55 def _string_unescape(text):
55 def _string_unescape(text):
56 if b'\\0' in text:
56 if b'\\0' in text:
57 # fix up \0 without getting into trouble with \\0
57 # fix up \0 without getting into trouble with \\0
58 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\n', b'')
60 text = text.replace(b'\n', b'')
61 return stringutil.unescapestr(text)
61 return stringutil.unescapestr(text)
62
62
63
63
64 def decodeextra(text):
64 def decodeextra(text):
65 """
65 """
66 >>> from .pycompat import bytechr as chr
66 >>> from .pycompat import bytechr as chr
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 ... ).items())
68 ... ).items())
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... ).items())
72 ... ).items())
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 """
74 """
75 extra = _defaultextra.copy()
75 extra = _defaultextra.copy()
76 for l in text.split(b'\0'):
76 for l in text.split(b'\0'):
77 if l:
77 if l:
78 k, v = _string_unescape(l).split(b':', 1)
78 k, v = _string_unescape(l).split(b':', 1)
79 extra[k] = v
79 extra[k] = v
80 return extra
80 return extra
81
81
82
82
83 def encodeextra(d):
83 def encodeextra(d):
84 # keys must be sorted to produce a deterministic changelog entry
84 # keys must be sorted to produce a deterministic changelog entry
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 return b"\0".join(items)
86 return b"\0".join(items)
87
87
88
88
89 def stripdesc(desc):
89 def stripdesc(desc):
90 """strip trailing whitespace and leading and trailing empty lines"""
90 """strip trailing whitespace and leading and trailing empty lines"""
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92
92
93
93
94 class appender:
94 class appender:
95 """the changelog index must be updated last on disk, so we use this class
95 """the changelog index must be updated last on disk, so we use this class
96 to delay writes to it"""
96 to delay writes to it"""
97
97
98 def __init__(self, vfs, name, mode, buf):
98 def __init__(self, vfs, name, mode, buf):
99 self.data = buf
99 self.data = buf
100 fp = vfs(name, mode)
100 fp = vfs(name, mode)
101 self.fp = fp
101 self.fp = fp
102 self.offset = fp.tell()
102 self.offset = fp.tell()
103 self.size = vfs.fstat(fp).st_size
103 self.size = vfs.fstat(fp).st_size
104 self._end = self.size
104 self._end = self.size
105
105
106 def end(self):
106 def end(self):
107 return self._end
107 return self._end
108
108
109 def tell(self):
109 def tell(self):
110 return self.offset
110 return self.offset
111
111
112 def flush(self):
112 def flush(self):
113 pass
113 pass
114
114
115 @property
115 @property
116 def closed(self):
116 def closed(self):
117 return self.fp.closed
117 return self.fp.closed
118
118
119 def close(self):
119 def close(self):
120 self.fp.close()
120 self.fp.close()
121
121
122 def seek(self, offset, whence=0):
122 def seek(self, offset, whence=0):
123 '''virtual file offset spans real file and data'''
123 '''virtual file offset spans real file and data'''
124 if whence == 0:
124 if whence == 0:
125 self.offset = offset
125 self.offset = offset
126 elif whence == 1:
126 elif whence == 1:
127 self.offset += offset
127 self.offset += offset
128 elif whence == 2:
128 elif whence == 2:
129 self.offset = self.end() + offset
129 self.offset = self.end() + offset
130 if self.offset < self.size:
130 if self.offset < self.size:
131 self.fp.seek(self.offset)
131 self.fp.seek(self.offset)
132
132
133 def read(self, count=-1):
133 def read(self, count=-1):
134 '''only trick here is reads that span real file and data'''
134 '''only trick here is reads that span real file and data'''
135 ret = b""
135 ret = b""
136 if self.offset < self.size:
136 if self.offset < self.size:
137 s = self.fp.read(count)
137 s = self.fp.read(count)
138 ret = s
138 ret = s
139 self.offset += len(s)
139 self.offset += len(s)
140 if count > 0:
140 if count > 0:
141 count -= len(s)
141 count -= len(s)
142 if count != 0:
142 if count != 0:
143 doff = self.offset - self.size
143 doff = self.offset - self.size
144 self.data.insert(0, b"".join(self.data))
144 self.data.insert(0, b"".join(self.data))
145 del self.data[1:]
145 del self.data[1:]
146 s = self.data[0][doff : doff + count]
146 s = self.data[0][doff : doff + count]
147 self.offset += len(s)
147 self.offset += len(s)
148 ret += s
148 ret += s
149 return ret
149 return ret
150
150
151 def write(self, s):
151 def write(self, s):
152 self.data.append(bytes(s))
152 self.data.append(bytes(s))
153 self.offset += len(s)
153 self.offset += len(s)
154 self._end += len(s)
154 self._end += len(s)
155
155
156 def __enter__(self):
156 def __enter__(self):
157 self.fp.__enter__()
157 self.fp.__enter__()
158 return self
158 return self
159
159
160 def __exit__(self, *args):
160 def __exit__(self, *args):
161 return self.fp.__exit__(*args)
161 return self.fp.__exit__(*args)
162
162
163
163
164 class _divertopener:
164 class _divertopener:
165 def __init__(self, opener, target):
165 def __init__(self, opener, target):
166 self._opener = opener
166 self._opener = opener
167 self._target = target
167 self._target = target
168
168
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 if name != self._target:
170 if name != self._target:
171 return self._opener(name, mode, **kwargs)
171 return self._opener(name, mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
173
173
174 def __getattr__(self, attr):
174 def __getattr__(self, attr):
175 return getattr(self._opener, attr)
175 return getattr(self._opener, attr)
176
176
177
177
178 class _delayopener:
178 class _delayopener:
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180
180
181 def __init__(self, opener, target, buf):
181 def __init__(self, opener, target, buf):
182 self._opener = opener
182 self._opener = opener
183 self._target = target
183 self._target = target
184 self._buf = buf
184 self._buf = buf
185
185
186 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
186 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
187 if name != self._target:
187 if name != self._target:
188 return self._opener(name, mode, **kwargs)
188 return self._opener(name, mode, **kwargs)
189 assert not kwargs
189 assert not kwargs
190 return appender(self._opener, name, mode, self._buf)
190 return appender(self._opener, name, mode, self._buf)
191
191
192 def __getattr__(self, attr):
192 def __getattr__(self, attr):
193 return getattr(self._opener, attr)
193 return getattr(self._opener, attr)
194
194
195
195
196 @attr.s
196 @attr.s
197 class _changelogrevision:
197 class _changelogrevision:
198 # Extensions might modify _defaultextra, so let the constructor below pass
198 # Extensions might modify _defaultextra, so let the constructor below pass
199 # it in
199 # it in
200 extra = attr.ib()
200 extra = attr.ib()
201 manifest = attr.ib()
201 manifest = attr.ib()
202 user = attr.ib(default=b'')
202 user = attr.ib(default=b'')
203 date = attr.ib(default=(0, 0))
203 date = attr.ib(default=(0, 0))
204 files = attr.ib(default=attr.Factory(list))
204 files = attr.ib(default=attr.Factory(list))
205 filesadded = attr.ib(default=None)
205 filesadded = attr.ib(default=None)
206 filesremoved = attr.ib(default=None)
206 filesremoved = attr.ib(default=None)
207 p1copies = attr.ib(default=None)
207 p1copies = attr.ib(default=None)
208 p2copies = attr.ib(default=None)
208 p2copies = attr.ib(default=None)
209 description = attr.ib(default=b'')
209 description = attr.ib(default=b'')
210 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
210 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
211
211
212
212
213 class changelogrevision:
213 class changelogrevision:
214 """Holds results of a parsed changelog revision.
214 """Holds results of a parsed changelog revision.
215
215
216 Changelog revisions consist of multiple pieces of data, including
216 Changelog revisions consist of multiple pieces of data, including
217 the manifest node, user, and date. This object exposes a view into
217 the manifest node, user, and date. This object exposes a view into
218 the parsed object.
218 the parsed object.
219 """
219 """
220
220
221 __slots__ = (
221 __slots__ = (
222 '_offsets',
222 '_offsets',
223 '_text',
223 '_text',
224 '_sidedata',
224 '_sidedata',
225 '_cpsd',
225 '_cpsd',
226 '_changes',
226 '_changes',
227 )
227 )
228
228
229 def __new__(cls, cl, text, sidedata, cpsd):
229 def __new__(cls, cl, text, sidedata, cpsd):
230 if not text:
230 if not text:
231 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
231 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
232
232
233 self = super(changelogrevision, cls).__new__(cls)
233 self = super(changelogrevision, cls).__new__(cls)
234 # We could return here and implement the following as an __init__.
234 # We could return here and implement the following as an __init__.
235 # But doing it here is equivalent and saves an extra function call.
235 # But doing it here is equivalent and saves an extra function call.
236
236
237 # format used:
237 # format used:
238 # nodeid\n : manifest node in ascii
238 # nodeid\n : manifest node in ascii
239 # user\n : user, no \n or \r allowed
239 # user\n : user, no \n or \r allowed
240 # time tz extra\n : date (time is int or float, timezone is int)
240 # time tz extra\n : date (time is int or float, timezone is int)
241 # : extra is metadata, encoded and separated by '\0'
241 # : extra is metadata, encoded and separated by '\0'
242 # : older versions ignore it
242 # : older versions ignore it
243 # files\n\n : files modified by the cset, no \n or \r allowed
243 # files\n\n : files modified by the cset, no \n or \r allowed
244 # (.*) : comment (free text, ideally utf-8)
244 # (.*) : comment (free text, ideally utf-8)
245 #
245 #
246 # changelog v0 doesn't use extra
246 # changelog v0 doesn't use extra
247
247
248 nl1 = text.index(b'\n')
248 nl1 = text.index(b'\n')
249 nl2 = text.index(b'\n', nl1 + 1)
249 nl2 = text.index(b'\n', nl1 + 1)
250 nl3 = text.index(b'\n', nl2 + 1)
250 nl3 = text.index(b'\n', nl2 + 1)
251
251
252 # The list of files may be empty. Which means nl3 is the first of the
252 # The list of files may be empty. Which means nl3 is the first of the
253 # double newline that precedes the description.
253 # double newline that precedes the description.
254 if text[nl3 + 1 : nl3 + 2] == b'\n':
254 if text[nl3 + 1 : nl3 + 2] == b'\n':
255 doublenl = nl3
255 doublenl = nl3
256 else:
256 else:
257 doublenl = text.index(b'\n\n', nl3 + 1)
257 doublenl = text.index(b'\n\n', nl3 + 1)
258
258
259 self._offsets = (nl1, nl2, nl3, doublenl)
259 self._offsets = (nl1, nl2, nl3, doublenl)
260 self._text = text
260 self._text = text
261 self._sidedata = sidedata
261 self._sidedata = sidedata
262 self._cpsd = cpsd
262 self._cpsd = cpsd
263 self._changes = None
263 self._changes = None
264
264
265 return self
265 return self
266
266
267 @property
267 @property
268 def manifest(self):
268 def manifest(self):
269 return bin(self._text[0 : self._offsets[0]])
269 return bin(self._text[0 : self._offsets[0]])
270
270
271 @property
271 @property
272 def user(self):
272 def user(self):
273 off = self._offsets
273 off = self._offsets
274 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
274 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
275
275
276 @property
276 @property
277 def _rawdate(self):
277 def _rawdate(self):
278 off = self._offsets
278 off = self._offsets
279 dateextra = self._text[off[1] + 1 : off[2]]
279 dateextra = self._text[off[1] + 1 : off[2]]
280 return dateextra.split(b' ', 2)[0:2]
280 return dateextra.split(b' ', 2)[0:2]
281
281
282 @property
282 @property
283 def _rawextra(self):
283 def _rawextra(self):
284 off = self._offsets
284 off = self._offsets
285 dateextra = self._text[off[1] + 1 : off[2]]
285 dateextra = self._text[off[1] + 1 : off[2]]
286 fields = dateextra.split(b' ', 2)
286 fields = dateextra.split(b' ', 2)
287 if len(fields) != 3:
287 if len(fields) != 3:
288 return None
288 return None
289
289
290 return fields[2]
290 return fields[2]
291
291
292 @property
292 @property
293 def date(self):
293 def date(self):
294 raw = self._rawdate
294 raw = self._rawdate
295 time = float(raw[0])
295 time = float(raw[0])
296 # Various tools did silly things with the timezone.
296 # Various tools did silly things with the timezone.
297 try:
297 try:
298 timezone = int(raw[1])
298 timezone = int(raw[1])
299 except ValueError:
299 except ValueError:
300 timezone = 0
300 timezone = 0
301
301
302 return time, timezone
302 return time, timezone
303
303
304 @property
304 @property
305 def extra(self):
305 def extra(self):
306 raw = self._rawextra
306 raw = self._rawextra
307 if raw is None:
307 if raw is None:
308 return _defaultextra
308 return _defaultextra
309
309
310 return decodeextra(raw)
310 return decodeextra(raw)
311
311
312 @property
312 @property
313 def changes(self):
313 def changes(self):
314 if self._changes is not None:
314 if self._changes is not None:
315 return self._changes
315 return self._changes
316 if self._cpsd:
316 if self._cpsd:
317 changes = metadata.decode_files_sidedata(self._sidedata)
317 changes = metadata.decode_files_sidedata(self._sidedata)
318 else:
318 else:
319 changes = metadata.ChangingFiles(
319 changes = metadata.ChangingFiles(
320 touched=self.files or (),
320 touched=self.files or (),
321 added=self.filesadded or (),
321 added=self.filesadded or (),
322 removed=self.filesremoved or (),
322 removed=self.filesremoved or (),
323 p1_copies=self.p1copies or {},
323 p1_copies=self.p1copies or {},
324 p2_copies=self.p2copies or {},
324 p2_copies=self.p2copies or {},
325 )
325 )
326 self._changes = changes
326 self._changes = changes
327 return changes
327 return changes
328
328
329 @property
329 @property
330 def files(self):
330 def files(self):
331 if self._cpsd:
331 if self._cpsd:
332 return sorted(self.changes.touched)
332 return sorted(self.changes.touched)
333 off = self._offsets
333 off = self._offsets
334 if off[2] == off[3]:
334 if off[2] == off[3]:
335 return []
335 return []
336
336
337 return self._text[off[2] + 1 : off[3]].split(b'\n')
337 return self._text[off[2] + 1 : off[3]].split(b'\n')
338
338
339 @property
339 @property
340 def filesadded(self):
340 def filesadded(self):
341 if self._cpsd:
341 if self._cpsd:
342 return self.changes.added
342 return self.changes.added
343 else:
343 else:
344 rawindices = self.extra.get(b'filesadded')
344 rawindices = self.extra.get(b'filesadded')
345 if rawindices is None:
345 if rawindices is None:
346 return None
346 return None
347 return metadata.decodefileindices(self.files, rawindices)
347 return metadata.decodefileindices(self.files, rawindices)
348
348
349 @property
349 @property
350 def filesremoved(self):
350 def filesremoved(self):
351 if self._cpsd:
351 if self._cpsd:
352 return self.changes.removed
352 return self.changes.removed
353 else:
353 else:
354 rawindices = self.extra.get(b'filesremoved')
354 rawindices = self.extra.get(b'filesremoved')
355 if rawindices is None:
355 if rawindices is None:
356 return None
356 return None
357 return metadata.decodefileindices(self.files, rawindices)
357 return metadata.decodefileindices(self.files, rawindices)
358
358
359 @property
359 @property
360 def p1copies(self):
360 def p1copies(self):
361 if self._cpsd:
361 if self._cpsd:
362 return self.changes.copied_from_p1
362 return self.changes.copied_from_p1
363 else:
363 else:
364 rawcopies = self.extra.get(b'p1copies')
364 rawcopies = self.extra.get(b'p1copies')
365 if rawcopies is None:
365 if rawcopies is None:
366 return None
366 return None
367 return metadata.decodecopies(self.files, rawcopies)
367 return metadata.decodecopies(self.files, rawcopies)
368
368
369 @property
369 @property
370 def p2copies(self):
370 def p2copies(self):
371 if self._cpsd:
371 if self._cpsd:
372 return self.changes.copied_from_p2
372 return self.changes.copied_from_p2
373 else:
373 else:
374 rawcopies = self.extra.get(b'p2copies')
374 rawcopies = self.extra.get(b'p2copies')
375 if rawcopies is None:
375 if rawcopies is None:
376 return None
376 return None
377 return metadata.decodecopies(self.files, rawcopies)
377 return metadata.decodecopies(self.files, rawcopies)
378
378
379 @property
379 @property
380 def description(self):
380 def description(self):
381 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
381 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
382
382
383 @property
383 @property
384 def branchinfo(self):
384 def branchinfo(self):
385 extra = self.extra
385 extra = self.extra
386 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
386 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
387
387
388
388
389 class changelog(revlog.revlog):
389 class changelog(revlog.revlog):
390 def __init__(self, opener, trypending=False, concurrencychecker=None):
390 def __init__(self, opener, trypending=False, concurrencychecker=None):
391 """Load a changelog revlog using an opener.
391 """Load a changelog revlog using an opener.
392
392
393 If ``trypending`` is true, we attempt to load the index from a
393 If ``trypending`` is true, we attempt to load the index from a
394 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
394 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
395 The ``00changelog.i.a`` file contains index (and possibly inline
395 The ``00changelog.i.a`` file contains index (and possibly inline
396 revision) data for a transaction that hasn't been finalized yet.
396 revision) data for a transaction that hasn't been finalized yet.
397 It exists in a separate file to facilitate readers (such as
397 It exists in a separate file to facilitate readers (such as
398 hooks processes) accessing data before a transaction is finalized.
398 hooks processes) accessing data before a transaction is finalized.
399
399
400 ``concurrencychecker`` will be passed to the revlog init function, see
400 ``concurrencychecker`` will be passed to the revlog init function, see
401 the documentation there.
401 the documentation there.
402 """
402 """
403 revlog.revlog.__init__(
403 revlog.revlog.__init__(
404 self,
404 self,
405 opener,
405 opener,
406 target=(revlog_constants.KIND_CHANGELOG, None),
406 target=(revlog_constants.KIND_CHANGELOG, None),
407 radix=b'00changelog',
407 radix=b'00changelog',
408 checkambig=True,
408 checkambig=True,
409 mmaplargeindex=True,
409 mmaplargeindex=True,
410 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
410 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
411 concurrencychecker=concurrencychecker,
411 concurrencychecker=concurrencychecker,
412 trypending=trypending,
412 trypending=trypending,
413 )
413 )
414
414
415 if self._initempty and (self._format_version == revlog.REVLOGV1):
415 if self._initempty and (self._format_version == revlog.REVLOGV1):
416 # changelogs don't benefit from generaldelta.
416 # changelogs don't benefit from generaldelta.
417
417
418 self._format_flags &= ~revlog.FLAG_GENERALDELTA
418 self._format_flags &= ~revlog.FLAG_GENERALDELTA
419 self.delta_config.general_delta = False
419 self.delta_config.general_delta = False
420
420
421 # Delta chains for changelogs tend to be very small because entries
421 # Delta chains for changelogs tend to be very small because entries
422 # tend to be small and don't delta well with each. So disable delta
422 # tend to be small and don't delta well with each. So disable delta
423 # chains.
423 # chains.
424 self._storedeltachains = False
424 self._storedeltachains = False
425
425
426 self._realopener = opener
426 self._realopener = opener
427 self._delayed = False
427 self._delayed = False
428 self._delaybuf = None
428 self._delaybuf = None
429 self._divert = False
429 self._divert = False
430 self._filteredrevs = frozenset()
430 self._filteredrevs = frozenset()
431 self._filteredrevs_hashcache = {}
431 self._filteredrevs_hashcache = {}
432 self._copiesstorage = opener.options.get(b'copies-storage')
432 self._copiesstorage = opener.options.get(b'copies-storage')
433
433
434 @property
434 @property
435 def filteredrevs(self):
435 def filteredrevs(self):
436 return self._filteredrevs
436 return self._filteredrevs
437
437
438 @filteredrevs.setter
438 @filteredrevs.setter
439 def filteredrevs(self, val):
439 def filteredrevs(self, val):
440 # Ensure all updates go through this function
440 # Ensure all updates go through this function
441 assert isinstance(val, frozenset)
441 assert isinstance(val, frozenset)
442 self._filteredrevs = val
442 self._filteredrevs = val
443 self._filteredrevs_hashcache = {}
443 self._filteredrevs_hashcache = {}
444
444
445 def _write_docket(self, tr):
445 def _write_docket(self, tr):
446 if not self._delayed:
446 if not self._delayed:
447 super(changelog, self)._write_docket(tr)
447 super(changelog, self)._write_docket(tr)
448
448
449 def delayupdate(self, tr):
449 def delayupdate(self, tr):
450 """delay visibility of index updates to other readers"""
450 """delay visibility of index updates to other readers"""
451 if self._docket is None and not self._delayed:
451 if self._docket is None and not self._delayed:
452 if len(self) == 0:
452 if len(self) == 0:
453 self._divert = True
453 self._divert = True
454 if self._realopener.exists(self._indexfile + b'.a'):
454 if self._realopener.exists(self._indexfile + b'.a'):
455 self._realopener.unlink(self._indexfile + b'.a')
455 self._realopener.unlink(self._indexfile + b'.a')
456 self.opener = _divertopener(self._realopener, self._indexfile)
456 self.opener = _divertopener(self._realopener, self._indexfile)
457 else:
457 else:
458 self._delaybuf = []
458 self._delaybuf = []
459 self.opener = _delayopener(
459 self.opener = _delayopener(
460 self._realopener, self._indexfile, self._delaybuf
460 self._realopener, self._indexfile, self._delaybuf
461 )
461 )
462 self._segmentfile.opener = self.opener
462 self._inner.opener = self.opener
463 self._segmentfile_sidedata.opener = self.opener
463 self._inner._segmentfile.opener = self.opener
464 self._inner._segmentfile_sidedata.opener = self.opener
464 self._delayed = True
465 self._delayed = True
465 tr.addpending(b'cl-%i' % id(self), self._writepending)
466 tr.addpending(b'cl-%i' % id(self), self._writepending)
466 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
467 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
467
468
468 def _finalize(self, tr):
469 def _finalize(self, tr):
469 """finalize index updates"""
470 """finalize index updates"""
470 self._delayed = False
471 self._delayed = False
471 self.opener = self._realopener
472 self.opener = self._realopener
472 self._segmentfile.opener = self.opener
473 self._inner.opener = self.opener
473 self._segmentfile_sidedata.opener = self.opener
474 self._inner._segmentfile.opener = self.opener
475 self._inner._segmentfile_sidedata.opener = self.opener
474 # move redirected index data back into place
476 # move redirected index data back into place
475 if self._docket is not None:
477 if self._docket is not None:
476 self._write_docket(tr)
478 self._write_docket(tr)
477 elif self._divert:
479 elif self._divert:
478 assert not self._delaybuf
480 assert not self._delaybuf
479 tmpname = self._indexfile + b".a"
481 tmpname = self._indexfile + b".a"
480 nfile = self.opener.open(tmpname)
482 nfile = self.opener.open(tmpname)
481 nfile.close()
483 nfile.close()
482 self.opener.rename(tmpname, self._indexfile, checkambig=True)
484 self.opener.rename(tmpname, self._indexfile, checkambig=True)
483 elif self._delaybuf:
485 elif self._delaybuf:
484 fp = self.opener(self._indexfile, b'a', checkambig=True)
486 fp = self.opener(self._indexfile, b'a', checkambig=True)
485 fp.write(b"".join(self._delaybuf))
487 fp.write(b"".join(self._delaybuf))
486 fp.close()
488 fp.close()
487 self._delaybuf = None
489 self._delaybuf = None
488 self._divert = False
490 self._divert = False
489 # split when we're done
491 # split when we're done
490 self._enforceinlinesize(tr, side_write=False)
492 self._enforceinlinesize(tr, side_write=False)
491
493
492 def _writepending(self, tr):
494 def _writepending(self, tr):
493 """create a file containing the unfinalized state for
495 """create a file containing the unfinalized state for
494 pretxnchangegroup"""
496 pretxnchangegroup"""
495 if self._docket:
497 if self._docket:
496 return self._docket.write(tr, pending=True)
498 return self._docket.write(tr, pending=True)
497 if self._delaybuf:
499 if self._delaybuf:
498 # make a temporary copy of the index
500 # make a temporary copy of the index
499 fp1 = self._realopener(self._indexfile)
501 fp1 = self._realopener(self._indexfile)
500 pendingfilename = self._indexfile + b".a"
502 pendingfilename = self._indexfile + b".a"
501 # register as a temp file to ensure cleanup on failure
503 # register as a temp file to ensure cleanup on failure
502 tr.registertmp(pendingfilename)
504 tr.registertmp(pendingfilename)
503 # write existing data
505 # write existing data
504 fp2 = self._realopener(pendingfilename, b"w")
506 fp2 = self._realopener(pendingfilename, b"w")
505 fp2.write(fp1.read())
507 fp2.write(fp1.read())
506 # add pending data
508 # add pending data
507 fp2.write(b"".join(self._delaybuf))
509 fp2.write(b"".join(self._delaybuf))
508 fp2.close()
510 fp2.close()
509 # switch modes so finalize can simply rename
511 # switch modes so finalize can simply rename
510 self._delaybuf = None
512 self._delaybuf = None
511 self._divert = True
513 self._divert = True
512 self.opener = _divertopener(self._realopener, self._indexfile)
514 self.opener = _divertopener(self._realopener, self._indexfile)
513 self._segmentfile.opener = self.opener
515 self._inner.opener = self.opener
514 self._segmentfile_sidedata.opener = self.opener
516 self._inner._segmentfile.opener = self.opener
517 self._inner._segmentfile_sidedata.opener = self.opener
515
518
516 if self._divert:
519 if self._divert:
517 return True
520 return True
518
521
519 return False
522 return False
520
523
521 def _enforceinlinesize(self, tr, side_write=True):
524 def _enforceinlinesize(self, tr, side_write=True):
522 if not self._delayed:
525 if not self._delayed:
523 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
526 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
524
527
525 def read(self, nodeorrev):
528 def read(self, nodeorrev):
526 """Obtain data from a parsed changelog revision.
529 """Obtain data from a parsed changelog revision.
527
530
528 Returns a 6-tuple of:
531 Returns a 6-tuple of:
529
532
530 - manifest node in binary
533 - manifest node in binary
531 - author/user as a localstr
534 - author/user as a localstr
532 - date as a 2-tuple of (time, timezone)
535 - date as a 2-tuple of (time, timezone)
533 - list of files
536 - list of files
534 - commit message as a localstr
537 - commit message as a localstr
535 - dict of extra metadata
538 - dict of extra metadata
536
539
537 Unless you need to access all fields, consider calling
540 Unless you need to access all fields, consider calling
538 ``changelogrevision`` instead, as it is faster for partial object
541 ``changelogrevision`` instead, as it is faster for partial object
539 access.
542 access.
540 """
543 """
541 d = self._revisiondata(nodeorrev)
544 d = self._revisiondata(nodeorrev)
542 sidedata = self.sidedata(nodeorrev)
545 sidedata = self.sidedata(nodeorrev)
543 copy_sd = self._copiesstorage == b'changeset-sidedata'
546 copy_sd = self._copiesstorage == b'changeset-sidedata'
544 c = changelogrevision(self, d, sidedata, copy_sd)
547 c = changelogrevision(self, d, sidedata, copy_sd)
545 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
548 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
546
549
547 def changelogrevision(self, nodeorrev):
550 def changelogrevision(self, nodeorrev):
548 """Obtain a ``changelogrevision`` for a node or revision."""
551 """Obtain a ``changelogrevision`` for a node or revision."""
549 text = self._revisiondata(nodeorrev)
552 text = self._revisiondata(nodeorrev)
550 sidedata = self.sidedata(nodeorrev)
553 sidedata = self.sidedata(nodeorrev)
551 return changelogrevision(
554 return changelogrevision(
552 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
555 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
553 )
556 )
554
557
555 def readfiles(self, nodeorrev):
558 def readfiles(self, nodeorrev):
556 """
559 """
557 short version of read that only returns the files modified by the cset
560 short version of read that only returns the files modified by the cset
558 """
561 """
559 text = self.revision(nodeorrev)
562 text = self.revision(nodeorrev)
560 if not text:
563 if not text:
561 return []
564 return []
562 last = text.index(b"\n\n")
565 last = text.index(b"\n\n")
563 l = text[:last].split(b'\n')
566 l = text[:last].split(b'\n')
564 return l[3:]
567 return l[3:]
565
568
566 def add(
569 def add(
567 self,
570 self,
568 manifest,
571 manifest,
569 files,
572 files,
570 desc,
573 desc,
571 transaction,
574 transaction,
572 p1,
575 p1,
573 p2,
576 p2,
574 user,
577 user,
575 date=None,
578 date=None,
576 extra=None,
579 extra=None,
577 ):
580 ):
578 # Convert to UTF-8 encoded bytestrings as the very first
581 # Convert to UTF-8 encoded bytestrings as the very first
579 # thing: calling any method on a localstr object will turn it
582 # thing: calling any method on a localstr object will turn it
580 # into a str object and the cached UTF-8 string is thus lost.
583 # into a str object and the cached UTF-8 string is thus lost.
581 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
584 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
582
585
583 user = user.strip()
586 user = user.strip()
584 # An empty username or a username with a "\n" will make the
587 # An empty username or a username with a "\n" will make the
585 # revision text contain two "\n\n" sequences -> corrupt
588 # revision text contain two "\n\n" sequences -> corrupt
586 # repository since read cannot unpack the revision.
589 # repository since read cannot unpack the revision.
587 if not user:
590 if not user:
588 raise error.StorageError(_(b"empty username"))
591 raise error.StorageError(_(b"empty username"))
589 if b"\n" in user:
592 if b"\n" in user:
590 raise error.StorageError(
593 raise error.StorageError(
591 _(b"username %r contains a newline") % pycompat.bytestr(user)
594 _(b"username %r contains a newline") % pycompat.bytestr(user)
592 )
595 )
593
596
594 desc = stripdesc(desc)
597 desc = stripdesc(desc)
595
598
596 if date:
599 if date:
597 parseddate = b"%d %d" % dateutil.parsedate(date)
600 parseddate = b"%d %d" % dateutil.parsedate(date)
598 else:
601 else:
599 parseddate = b"%d %d" % dateutil.makedate()
602 parseddate = b"%d %d" % dateutil.makedate()
600 if extra:
603 if extra:
601 branch = extra.get(b"branch")
604 branch = extra.get(b"branch")
602 if branch in (b"default", b""):
605 if branch in (b"default", b""):
603 del extra[b"branch"]
606 del extra[b"branch"]
604 elif branch in (b".", b"null", b"tip"):
607 elif branch in (b".", b"null", b"tip"):
605 raise error.StorageError(
608 raise error.StorageError(
606 _(b'the name \'%s\' is reserved') % branch
609 _(b'the name \'%s\' is reserved') % branch
607 )
610 )
608 sortedfiles = sorted(files.touched)
611 sortedfiles = sorted(files.touched)
609 flags = 0
612 flags = 0
610 sidedata = None
613 sidedata = None
611 if self._copiesstorage == b'changeset-sidedata':
614 if self._copiesstorage == b'changeset-sidedata':
612 if files.has_copies_info:
615 if files.has_copies_info:
613 flags |= flagutil.REVIDX_HASCOPIESINFO
616 flags |= flagutil.REVIDX_HASCOPIESINFO
614 sidedata = metadata.encode_files_sidedata(files)
617 sidedata = metadata.encode_files_sidedata(files)
615
618
616 if extra:
619 if extra:
617 extra = encodeextra(extra)
620 extra = encodeextra(extra)
618 parseddate = b"%s %s" % (parseddate, extra)
621 parseddate = b"%s %s" % (parseddate, extra)
619 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
622 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
620 text = b"\n".join(l)
623 text = b"\n".join(l)
621 rev = self.addrevision(
624 rev = self.addrevision(
622 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
625 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
623 )
626 )
624 return self.node(rev)
627 return self.node(rev)
625
628
626 def branchinfo(self, rev):
629 def branchinfo(self, rev):
627 """return the branch name and open/close state of a revision
630 """return the branch name and open/close state of a revision
628
631
629 This function exists because creating a changectx object
632 This function exists because creating a changectx object
630 just to access this is costly."""
633 just to access this is costly."""
631 return self.changelogrevision(rev).branchinfo
634 return self.changelogrevision(rev).branchinfo
632
635
633 def _nodeduplicatecallback(self, transaction, rev):
636 def _nodeduplicatecallback(self, transaction, rev):
634 # keep track of revisions that got "re-added", eg: unbunde of know rev.
637 # keep track of revisions that got "re-added", eg: unbunde of know rev.
635 #
638 #
636 # We track them in a list to preserve their order from the source bundle
639 # We track them in a list to preserve their order from the source bundle
637 duplicates = transaction.changes.setdefault(b'revduplicates', [])
640 duplicates = transaction.changes.setdefault(b'revduplicates', [])
638 duplicates.append(rev)
641 duplicates.append(rev)
@@ -1,3796 +1,3901 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
341 """An inner layer of the revlog object
342
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
346
347 def __init__(
348 self,
349 opener,
350 index,
351 index_file,
352 data_file,
353 sidedata_file,
354 inline,
355 data_config,
356 chunk_cache,
357 ):
358 self.opener = opener
359 self.index = index
360
361 self.index_file = index_file
362 self.data_file = data_file
363 self.sidedata_file = sidedata_file
364 self.inline = inline
365 self.data_config = data_config
366
367 # index
368
369 # 3-tuple of file handles being used for active writing.
370 self._writinghandles = None
371
372 self._segmentfile = randomaccessfile.randomaccessfile(
373 self.opener,
374 (self.index_file if self.inline else self.data_file),
375 self.data_config.chunk_cache_size,
376 chunk_cache,
377 )
378 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
379 self.opener,
380 self.sidedata_file,
381 self.data_config.chunk_cache_size,
382 )
383
384 # Derived from index values.
385
386 def start(self, rev):
387 """the offset of the data chunk for this revision"""
388 return int(self.index[rev][0] >> 16)
389
390 def length(self, rev):
391 """the length of the data chunk for this revision"""
392 return self.index[rev][1]
393
394 def end(self, rev):
395 """the end of the data chunk for this revision"""
396 return self.start(rev) + self.length(rev)
397
398 @contextlib.contextmanager
399 def reading(self):
400 """Context manager that keeps data and sidedata files open for reading"""
401 if len(self.index) == 0:
402 yield # nothing to be read
403 else:
404 with self._segmentfile.reading():
405 with self._segmentfile_sidedata.reading():
406 yield
407
408 @property
409 def is_writing(self):
410 """True is a writing context is open"""
411 return self._writinghandles is not None
412
413 @contextlib.contextmanager
414 def writing(self, transaction, data_end=None, sidedata_end=None):
415 """Open the revlog files for writing
416
417 Add content to a revlog should be done within such context.
418 """
419 if self.is_writing:
420 yield
421 else:
422 ifh = dfh = sdfh = None
423 try:
424 r = len(self.index)
425 # opening the data file.
426 dsize = 0
427 if r:
428 dsize = self.end(r - 1)
429 dfh = None
430 if not self.inline:
431 try:
432 dfh = self.opener(self.data_file, mode=b"r+")
433 if data_end is None:
434 dfh.seek(0, os.SEEK_END)
435 else:
436 dfh.seek(data_end, os.SEEK_SET)
437 except FileNotFoundError:
438 dfh = self.opener(self.data_file, mode=b"w+")
439 transaction.add(self.data_file, dsize)
440 if self.sidedata_file is not None:
441 assert sidedata_end is not None
442 # revlog-v2 does not inline, help Pytype
443 assert dfh is not None
444 try:
445 sdfh = self.opener(self.sidedata_file, mode=b"r+")
446 dfh.seek(sidedata_end, os.SEEK_SET)
447 except FileNotFoundError:
448 sdfh = self.opener(self.sidedata_file, mode=b"w+")
449 transaction.add(self.sidedata_file, sidedata_end)
450
451 # opening the index file.
452 isize = r * self.index.entry_size
453 ifh = self.__index_write_fp()
454 if self.inline:
455 transaction.add(self.index_file, dsize + isize)
456 else:
457 transaction.add(self.index_file, isize)
458 # exposing all file handle for writing.
459 self._writinghandles = (ifh, dfh, sdfh)
460 self._segmentfile.writing_handle = ifh if self.inline else dfh
461 self._segmentfile_sidedata.writing_handle = sdfh
462 yield
463 finally:
464 self._writinghandles = None
465 self._segmentfile.writing_handle = None
466 self._segmentfile_sidedata.writing_handle = None
467 if dfh is not None:
468 dfh.close()
469 if sdfh is not None:
470 sdfh.close()
471 # closing the index file last to avoid exposing referent to
472 # potential unflushed data content.
473 if ifh is not None:
474 ifh.close()
475
476 def __index_write_fp(self, index_end=None):
477 """internal method to open the index file for writing
478
479 You should not use this directly and use `_writing` instead
480 """
481 try:
482 f = self.opener(
483 self.index_file,
484 mode=b"r+",
485 checkambig=self.data_config.check_ambig,
486 )
487 if index_end is None:
488 f.seek(0, os.SEEK_END)
489 else:
490 f.seek(index_end, os.SEEK_SET)
491 return f
492 except FileNotFoundError:
493 return self.opener(
494 self.index_file,
495 mode=b"w+",
496 checkambig=self.data_config.check_ambig,
497 )
498
499 def __index_new_fp(self):
500 """internal method to create a new index file for writing
501
502 You should not use this unless you are upgrading from inline revlog
503 """
504 return self.opener(
505 self.index_file,
506 mode=b"w",
507 checkambig=self.data_config.check_ambig,
508 atomictemp=True,
509 )
510
511
340 class revlog:
512 class revlog:
341 """
513 """
342 the underlying revision storage object
514 the underlying revision storage object
343
515
344 A revlog consists of two parts, an index and the revision data.
516 A revlog consists of two parts, an index and the revision data.
345
517
346 The index is a file with a fixed record size containing
518 The index is a file with a fixed record size containing
347 information on each revision, including its nodeid (hash), the
519 information on each revision, including its nodeid (hash), the
348 nodeids of its parents, the position and offset of its data within
520 nodeids of its parents, the position and offset of its data within
349 the data file, and the revision it's based on. Finally, each entry
521 the data file, and the revision it's based on. Finally, each entry
350 contains a linkrev entry that can serve as a pointer to external
522 contains a linkrev entry that can serve as a pointer to external
351 data.
523 data.
352
524
353 The revision data itself is a linear collection of data chunks.
525 The revision data itself is a linear collection of data chunks.
354 Each chunk represents a revision and is usually represented as a
526 Each chunk represents a revision and is usually represented as a
355 delta against the previous chunk. To bound lookup time, runs of
527 delta against the previous chunk. To bound lookup time, runs of
356 deltas are limited to about 2 times the length of the original
528 deltas are limited to about 2 times the length of the original
357 version data. This makes retrieval of a version proportional to
529 version data. This makes retrieval of a version proportional to
358 its size, or O(1) relative to the number of revisions.
530 its size, or O(1) relative to the number of revisions.
359
531
360 Both pieces of the revlog are written to in an append-only
532 Both pieces of the revlog are written to in an append-only
361 fashion, which means we never need to rewrite a file to insert or
533 fashion, which means we never need to rewrite a file to insert or
362 remove data, and can use some simple techniques to avoid the need
534 remove data, and can use some simple techniques to avoid the need
363 for locking while reading.
535 for locking while reading.
364
536
365 If checkambig, indexfile is opened with checkambig=True at
537 If checkambig, indexfile is opened with checkambig=True at
366 writing, to avoid file stat ambiguity.
538 writing, to avoid file stat ambiguity.
367
539
368 If mmaplargeindex is True, and an mmapindexthreshold is set, the
540 If mmaplargeindex is True, and an mmapindexthreshold is set, the
369 index will be mmapped rather than read if it is larger than the
541 index will be mmapped rather than read if it is larger than the
370 configured threshold.
542 configured threshold.
371
543
372 If censorable is True, the revlog can have censored revisions.
544 If censorable is True, the revlog can have censored revisions.
373
545
374 If `upperboundcomp` is not None, this is the expected maximal gain from
546 If `upperboundcomp` is not None, this is the expected maximal gain from
375 compression for the data content.
547 compression for the data content.
376
548
377 `concurrencychecker` is an optional function that receives 3 arguments: a
549 `concurrencychecker` is an optional function that receives 3 arguments: a
378 file handle, a filename, and an expected position. It should check whether
550 file handle, a filename, and an expected position. It should check whether
379 the current position in the file handle is valid, and log/warn/fail (by
551 the current position in the file handle is valid, and log/warn/fail (by
380 raising).
552 raising).
381
553
382 See mercurial/revlogutils/contants.py for details about the content of an
554 See mercurial/revlogutils/contants.py for details about the content of an
383 index entry.
555 index entry.
384 """
556 """
385
557
386 _flagserrorclass = error.RevlogError
558 _flagserrorclass = error.RevlogError
387
559
388 @staticmethod
560 @staticmethod
389 def is_inline_index(header_bytes):
561 def is_inline_index(header_bytes):
390 """Determine if a revlog is inline from the initial bytes of the index"""
562 """Determine if a revlog is inline from the initial bytes of the index"""
391 header = INDEX_HEADER.unpack(header_bytes)[0]
563 header = INDEX_HEADER.unpack(header_bytes)[0]
392
564
393 _format_flags = header & ~0xFFFF
565 _format_flags = header & ~0xFFFF
394 _format_version = header & 0xFFFF
566 _format_version = header & 0xFFFF
395
567
396 features = FEATURES_BY_VERSION[_format_version]
568 features = FEATURES_BY_VERSION[_format_version]
397 return features[b'inline'](_format_flags)
569 return features[b'inline'](_format_flags)
398
570
399 def __init__(
571 def __init__(
400 self,
572 self,
401 opener,
573 opener,
402 target,
574 target,
403 radix,
575 radix,
404 postfix=None, # only exist for `tmpcensored` now
576 postfix=None, # only exist for `tmpcensored` now
405 checkambig=False,
577 checkambig=False,
406 mmaplargeindex=False,
578 mmaplargeindex=False,
407 censorable=False,
579 censorable=False,
408 upperboundcomp=None,
580 upperboundcomp=None,
409 persistentnodemap=False,
581 persistentnodemap=False,
410 concurrencychecker=None,
582 concurrencychecker=None,
411 trypending=False,
583 trypending=False,
412 try_split=False,
584 try_split=False,
413 canonical_parent_order=True,
585 canonical_parent_order=True,
414 ):
586 ):
415 """
587 """
416 create a revlog object
588 create a revlog object
417
589
418 opener is a function that abstracts the file opening operation
590 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
591 and can be used to implement COW semantics or the like.
420
592
421 `target`: a (KIND, ID) tuple that identify the content stored in
593 `target`: a (KIND, ID) tuple that identify the content stored in
422 this revlog. It help the rest of the code to understand what the revlog
594 this revlog. It help the rest of the code to understand what the revlog
423 is about without having to resort to heuristic and index filename
595 is about without having to resort to heuristic and index filename
424 analysis. Note: that this must be reliably be set by normal code, but
596 analysis. Note: that this must be reliably be set by normal code, but
425 that test, debug, or performance measurement code might not set this to
597 that test, debug, or performance measurement code might not set this to
426 accurate value.
598 accurate value.
427 """
599 """
428
600
429 self.radix = radix
601 self.radix = radix
430
602
431 self._docket_file = None
603 self._docket_file = None
432 self._indexfile = None
604 self._indexfile = None
433 self._datafile = None
605 self._datafile = None
434 self._sidedatafile = None
606 self._sidedatafile = None
435 self._nodemap_file = None
607 self._nodemap_file = None
436 self.postfix = postfix
608 self.postfix = postfix
437 self._trypending = trypending
609 self._trypending = trypending
438 self._try_split = try_split
610 self._try_split = try_split
439 self.opener = opener
611 self.opener = opener
440 if persistentnodemap:
612 if persistentnodemap:
441 self._nodemap_file = nodemaputil.get_nodemap_file(self)
613 self._nodemap_file = nodemaputil.get_nodemap_file(self)
442
614
443 assert target[0] in ALL_KINDS
615 assert target[0] in ALL_KINDS
444 assert len(target) == 2
616 assert len(target) == 2
445 self.target = target
617 self.target = target
446 if b'feature-config' in self.opener.options:
618 if b'feature-config' in self.opener.options:
447 self.feature_config = self.opener.options[b'feature-config'].copy()
619 self.feature_config = self.opener.options[b'feature-config'].copy()
448 else:
620 else:
449 self.feature_config = FeatureConfig()
621 self.feature_config = FeatureConfig()
450 self.feature_config.censorable = censorable
622 self.feature_config.censorable = censorable
451 self.feature_config.canonical_parent_order = canonical_parent_order
623 self.feature_config.canonical_parent_order = canonical_parent_order
452 if b'data-config' in self.opener.options:
624 if b'data-config' in self.opener.options:
453 self.data_config = self.opener.options[b'data-config'].copy()
625 self.data_config = self.opener.options[b'data-config'].copy()
454 else:
626 else:
455 self.data_config = DataConfig()
627 self.data_config = DataConfig()
456 self.data_config.check_ambig = checkambig
628 self.data_config.check_ambig = checkambig
457 self.data_config.mmap_large_index = mmaplargeindex
629 self.data_config.mmap_large_index = mmaplargeindex
458 if b'delta-config' in self.opener.options:
630 if b'delta-config' in self.opener.options:
459 self.delta_config = self.opener.options[b'delta-config'].copy()
631 self.delta_config = self.opener.options[b'delta-config'].copy()
460 else:
632 else:
461 self.delta_config = DeltaConfig()
633 self.delta_config = DeltaConfig()
462 self.delta_config.upper_bound_comp = upperboundcomp
634 self.delta_config.upper_bound_comp = upperboundcomp
463
635
464 # 3-tuple of (node, rev, text) for a raw revision.
636 # 3-tuple of (node, rev, text) for a raw revision.
465 self._revisioncache = None
637 self._revisioncache = None
466 # Maps rev to chain base rev.
638 # Maps rev to chain base rev.
467 self._chainbasecache = util.lrucachedict(100)
639 self._chainbasecache = util.lrucachedict(100)
468
640
469 self.index = None
641 self.index = None
470 self._docket = None
642 self._docket = None
471 self._nodemap_docket = None
643 self._nodemap_docket = None
472 # Mapping of partial identifiers to full nodes.
644 # Mapping of partial identifiers to full nodes.
473 self._pcache = {}
645 self._pcache = {}
474
646
475 # other optionnals features
647 # other optionnals features
476
648
477 # Make copy of flag processors so each revlog instance can support
649 # Make copy of flag processors so each revlog instance can support
478 # custom flags.
650 # custom flags.
479 self._flagprocessors = dict(flagutil.flagprocessors)
651 self._flagprocessors = dict(flagutil.flagprocessors)
480
481 # 3-tuple of file handles being used for active writing.
482 self._writinghandles = None
483 # prevent nesting of addgroup
652 # prevent nesting of addgroup
484 self._adding_group = None
653 self._adding_group = None
485
654
486 self._loadindex()
655 chunk_cache = self._loadindex()
656 self._load_inner(chunk_cache)
487
657
488 self._concurrencychecker = concurrencychecker
658 self._concurrencychecker = concurrencychecker
489
659
490 @property
660 @property
491 def _generaldelta(self):
661 def _generaldelta(self):
492 """temporary compatibility proxy"""
662 """temporary compatibility proxy"""
493 util.nouideprecwarn(
663 util.nouideprecwarn(
494 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
664 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
495 )
665 )
496 return self.delta_config.general_delta
666 return self.delta_config.general_delta
497
667
498 @property
668 @property
499 def _checkambig(self):
669 def _checkambig(self):
500 """temporary compatibility proxy"""
670 """temporary compatibility proxy"""
501 util.nouideprecwarn(
671 util.nouideprecwarn(
502 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
672 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
503 )
673 )
504 return self.data_config.check_ambig
674 return self.data_config.check_ambig
505
675
506 @property
676 @property
507 def _mmaplargeindex(self):
677 def _mmaplargeindex(self):
508 """temporary compatibility proxy"""
678 """temporary compatibility proxy"""
509 util.nouideprecwarn(
679 util.nouideprecwarn(
510 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
680 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
511 )
681 )
512 return self.data_config.mmap_large_index
682 return self.data_config.mmap_large_index
513
683
514 @property
684 @property
515 def _censorable(self):
685 def _censorable(self):
516 """temporary compatibility proxy"""
686 """temporary compatibility proxy"""
517 util.nouideprecwarn(
687 util.nouideprecwarn(
518 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
688 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
519 )
689 )
520 return self.feature_config.censorable
690 return self.feature_config.censorable
521
691
522 @property
692 @property
523 def _chunkcachesize(self):
693 def _chunkcachesize(self):
524 """temporary compatibility proxy"""
694 """temporary compatibility proxy"""
525 util.nouideprecwarn(
695 util.nouideprecwarn(
526 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
696 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
527 )
697 )
528 return self.data_config.chunk_cache_size
698 return self.data_config.chunk_cache_size
529
699
530 @property
700 @property
531 def _maxchainlen(self):
701 def _maxchainlen(self):
532 """temporary compatibility proxy"""
702 """temporary compatibility proxy"""
533 util.nouideprecwarn(
703 util.nouideprecwarn(
534 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
704 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
535 )
705 )
536 return self.delta_config.max_chain_len
706 return self.delta_config.max_chain_len
537
707
538 @property
708 @property
539 def _deltabothparents(self):
709 def _deltabothparents(self):
540 """temporary compatibility proxy"""
710 """temporary compatibility proxy"""
541 util.nouideprecwarn(
711 util.nouideprecwarn(
542 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
712 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
543 )
713 )
544 return self.delta_config.delta_both_parents
714 return self.delta_config.delta_both_parents
545
715
546 @property
716 @property
547 def _candidate_group_chunk_size(self):
717 def _candidate_group_chunk_size(self):
548 """temporary compatibility proxy"""
718 """temporary compatibility proxy"""
549 util.nouideprecwarn(
719 util.nouideprecwarn(
550 b"use revlog.delta_config.candidate_group_chunk_size",
720 b"use revlog.delta_config.candidate_group_chunk_size",
551 b"6.6",
721 b"6.6",
552 stacklevel=2,
722 stacklevel=2,
553 )
723 )
554 return self.delta_config.candidate_group_chunk_size
724 return self.delta_config.candidate_group_chunk_size
555
725
556 @property
726 @property
557 def _debug_delta(self):
727 def _debug_delta(self):
558 """temporary compatibility proxy"""
728 """temporary compatibility proxy"""
559 util.nouideprecwarn(
729 util.nouideprecwarn(
560 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
730 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
561 )
731 )
562 return self.delta_config.debug_delta
732 return self.delta_config.debug_delta
563
733
564 @property
734 @property
565 def _compengine(self):
735 def _compengine(self):
566 """temporary compatibility proxy"""
736 """temporary compatibility proxy"""
567 util.nouideprecwarn(
737 util.nouideprecwarn(
568 b"use revlog.feature_config.compression_engine",
738 b"use revlog.feature_config.compression_engine",
569 b"6.6",
739 b"6.6",
570 stacklevel=2,
740 stacklevel=2,
571 )
741 )
572 return self.feature_config.compression_engine
742 return self.feature_config.compression_engine
573
743
574 @property
744 @property
575 def upperboundcomp(self):
745 def upperboundcomp(self):
576 """temporary compatibility proxy"""
746 """temporary compatibility proxy"""
577 util.nouideprecwarn(
747 util.nouideprecwarn(
578 b"use revlog.delta_config.upper_bound_comp",
748 b"use revlog.delta_config.upper_bound_comp",
579 b"6.6",
749 b"6.6",
580 stacklevel=2,
750 stacklevel=2,
581 )
751 )
582 return self.delta_config.upper_bound_comp
752 return self.delta_config.upper_bound_comp
583
753
584 @property
754 @property
585 def _compengineopts(self):
755 def _compengineopts(self):
586 """temporary compatibility proxy"""
756 """temporary compatibility proxy"""
587 util.nouideprecwarn(
757 util.nouideprecwarn(
588 b"use revlog.feature_config.compression_engine_options",
758 b"use revlog.feature_config.compression_engine_options",
589 b"6.6",
759 b"6.6",
590 stacklevel=2,
760 stacklevel=2,
591 )
761 )
592 return self.feature_config.compression_engine_options
762 return self.feature_config.compression_engine_options
593
763
594 @property
764 @property
595 def _maxdeltachainspan(self):
765 def _maxdeltachainspan(self):
596 """temporary compatibility proxy"""
766 """temporary compatibility proxy"""
597 util.nouideprecwarn(
767 util.nouideprecwarn(
598 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
768 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
599 )
769 )
600 return self.delta_config.max_deltachain_span
770 return self.delta_config.max_deltachain_span
601
771
602 @property
772 @property
603 def _withsparseread(self):
773 def _withsparseread(self):
604 """temporary compatibility proxy"""
774 """temporary compatibility proxy"""
605 util.nouideprecwarn(
775 util.nouideprecwarn(
606 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
776 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
607 )
777 )
608 return self.data_config.with_sparse_read
778 return self.data_config.with_sparse_read
609
779
610 @property
780 @property
611 def _sparserevlog(self):
781 def _sparserevlog(self):
612 """temporary compatibility proxy"""
782 """temporary compatibility proxy"""
613 util.nouideprecwarn(
783 util.nouideprecwarn(
614 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
784 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
615 )
785 )
616 return self.delta_config.sparse_revlog
786 return self.delta_config.sparse_revlog
617
787
618 @property
788 @property
619 def hassidedata(self):
789 def hassidedata(self):
620 """temporary compatibility proxy"""
790 """temporary compatibility proxy"""
621 util.nouideprecwarn(
791 util.nouideprecwarn(
622 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
792 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
623 )
793 )
624 return self.feature_config.has_side_data
794 return self.feature_config.has_side_data
625
795
626 @property
796 @property
627 def _srdensitythreshold(self):
797 def _srdensitythreshold(self):
628 """temporary compatibility proxy"""
798 """temporary compatibility proxy"""
629 util.nouideprecwarn(
799 util.nouideprecwarn(
630 b"use revlog.data_config.sr_density_threshold",
800 b"use revlog.data_config.sr_density_threshold",
631 b"6.6",
801 b"6.6",
632 stacklevel=2,
802 stacklevel=2,
633 )
803 )
634 return self.data_config.sr_density_threshold
804 return self.data_config.sr_density_threshold
635
805
636 @property
806 @property
637 def _srmingapsize(self):
807 def _srmingapsize(self):
638 """temporary compatibility proxy"""
808 """temporary compatibility proxy"""
639 util.nouideprecwarn(
809 util.nouideprecwarn(
640 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
810 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
641 )
811 )
642 return self.data_config.sr_min_gap_size
812 return self.data_config.sr_min_gap_size
643
813
644 @property
814 @property
645 def _compute_rank(self):
815 def _compute_rank(self):
646 """temporary compatibility proxy"""
816 """temporary compatibility proxy"""
647 util.nouideprecwarn(
817 util.nouideprecwarn(
648 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
818 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
649 )
819 )
650 return self.feature_config.compute_rank
820 return self.feature_config.compute_rank
651
821
652 @property
822 @property
653 def canonical_parent_order(self):
823 def canonical_parent_order(self):
654 """temporary compatibility proxy"""
824 """temporary compatibility proxy"""
655 util.nouideprecwarn(
825 util.nouideprecwarn(
656 b"use revlog.feature_config.canonical_parent_order",
826 b"use revlog.feature_config.canonical_parent_order",
657 b"6.6",
827 b"6.6",
658 stacklevel=2,
828 stacklevel=2,
659 )
829 )
660 return self.feature_config.canonical_parent_order
830 return self.feature_config.canonical_parent_order
661
831
662 @property
832 @property
663 def _lazydelta(self):
833 def _lazydelta(self):
664 """temporary compatibility proxy"""
834 """temporary compatibility proxy"""
665 util.nouideprecwarn(
835 util.nouideprecwarn(
666 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
836 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
667 )
837 )
668 return self.delta_config.lazy_delta
838 return self.delta_config.lazy_delta
669
839
670 @property
840 @property
671 def _lazydeltabase(self):
841 def _lazydeltabase(self):
672 """temporary compatibility proxy"""
842 """temporary compatibility proxy"""
673 util.nouideprecwarn(
843 util.nouideprecwarn(
674 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
844 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
675 )
845 )
676 return self.delta_config.lazy_delta_base
846 return self.delta_config.lazy_delta_base
677
847
678 def _init_opts(self):
848 def _init_opts(self):
679 """process options (from above/config) to setup associated default revlog mode
849 """process options (from above/config) to setup associated default revlog mode
680
850
681 These values might be affected when actually reading on disk information.
851 These values might be affected when actually reading on disk information.
682
852
683 The relevant values are returned for use in _loadindex().
853 The relevant values are returned for use in _loadindex().
684
854
685 * newversionflags:
855 * newversionflags:
686 version header to use if we need to create a new revlog
856 version header to use if we need to create a new revlog
687
857
688 * mmapindexthreshold:
858 * mmapindexthreshold:
689 minimal index size for start to use mmap
859 minimal index size for start to use mmap
690
860
691 * force_nodemap:
861 * force_nodemap:
692 force the usage of a "development" version of the nodemap code
862 force the usage of a "development" version of the nodemap code
693 """
863 """
694 opts = self.opener.options
864 opts = self.opener.options
695
865
696 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
866 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
697 new_header = CHANGELOGV2
867 new_header = CHANGELOGV2
698 compute_rank = opts.get(b'changelogv2.compute-rank', True)
868 compute_rank = opts.get(b'changelogv2.compute-rank', True)
699 self.feature_config.compute_rank = compute_rank
869 self.feature_config.compute_rank = compute_rank
700 elif b'revlogv2' in opts:
870 elif b'revlogv2' in opts:
701 new_header = REVLOGV2
871 new_header = REVLOGV2
702 elif b'revlogv1' in opts:
872 elif b'revlogv1' in opts:
703 new_header = REVLOGV1 | FLAG_INLINE_DATA
873 new_header = REVLOGV1 | FLAG_INLINE_DATA
704 if b'generaldelta' in opts:
874 if b'generaldelta' in opts:
705 new_header |= FLAG_GENERALDELTA
875 new_header |= FLAG_GENERALDELTA
706 elif b'revlogv0' in self.opener.options:
876 elif b'revlogv0' in self.opener.options:
707 new_header = REVLOGV0
877 new_header = REVLOGV0
708 else:
878 else:
709 new_header = REVLOG_DEFAULT_VERSION
879 new_header = REVLOG_DEFAULT_VERSION
710
880
711 mmapindexthreshold = None
881 mmapindexthreshold = None
712 if self.data_config.mmap_large_index:
882 if self.data_config.mmap_large_index:
713 mmapindexthreshold = self.data_config.mmap_index_threshold
883 mmapindexthreshold = self.data_config.mmap_index_threshold
714 if self.feature_config.enable_ellipsis:
884 if self.feature_config.enable_ellipsis:
715 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
885 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
716
886
717 # revlog v0 doesn't have flag processors
887 # revlog v0 doesn't have flag processors
718 for flag, processor in opts.get(b'flagprocessors', {}).items():
888 for flag, processor in opts.get(b'flagprocessors', {}).items():
719 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
889 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
720
890
721 chunk_cache_size = self.data_config.chunk_cache_size
891 chunk_cache_size = self.data_config.chunk_cache_size
722 if chunk_cache_size <= 0:
892 if chunk_cache_size <= 0:
723 raise error.RevlogError(
893 raise error.RevlogError(
724 _(b'revlog chunk cache size %r is not greater than 0')
894 _(b'revlog chunk cache size %r is not greater than 0')
725 % chunk_cache_size
895 % chunk_cache_size
726 )
896 )
727 elif chunk_cache_size & (chunk_cache_size - 1):
897 elif chunk_cache_size & (chunk_cache_size - 1):
728 raise error.RevlogError(
898 raise error.RevlogError(
729 _(b'revlog chunk cache size %r is not a power of 2')
899 _(b'revlog chunk cache size %r is not a power of 2')
730 % chunk_cache_size
900 % chunk_cache_size
731 )
901 )
732 force_nodemap = opts.get(b'devel-force-nodemap', False)
902 force_nodemap = opts.get(b'devel-force-nodemap', False)
733 return new_header, mmapindexthreshold, force_nodemap
903 return new_header, mmapindexthreshold, force_nodemap
734
904
735 def _get_data(self, filepath, mmap_threshold, size=None):
905 def _get_data(self, filepath, mmap_threshold, size=None):
736 """return a file content with or without mmap
906 """return a file content with or without mmap
737
907
738 If the file is missing return the empty string"""
908 If the file is missing return the empty string"""
739 try:
909 try:
740 with self.opener(filepath) as fp:
910 with self.opener(filepath) as fp:
741 if mmap_threshold is not None:
911 if mmap_threshold is not None:
742 file_size = self.opener.fstat(fp).st_size
912 file_size = self.opener.fstat(fp).st_size
743 if file_size >= mmap_threshold:
913 if file_size >= mmap_threshold:
744 if size is not None:
914 if size is not None:
745 # avoid potentiel mmap crash
915 # avoid potentiel mmap crash
746 size = min(file_size, size)
916 size = min(file_size, size)
747 # TODO: should .close() to release resources without
917 # TODO: should .close() to release resources without
748 # relying on Python GC
918 # relying on Python GC
749 if size is None:
919 if size is None:
750 return util.buffer(util.mmapread(fp))
920 return util.buffer(util.mmapread(fp))
751 else:
921 else:
752 return util.buffer(util.mmapread(fp, size))
922 return util.buffer(util.mmapread(fp, size))
753 if size is None:
923 if size is None:
754 return fp.read()
924 return fp.read()
755 else:
925 else:
756 return fp.read(size)
926 return fp.read(size)
757 except FileNotFoundError:
927 except FileNotFoundError:
758 return b''
928 return b''
759
929
760 def get_streams(self, max_linkrev, force_inline=False):
930 def get_streams(self, max_linkrev, force_inline=False):
761 """return a list of streams that represent this revlog
931 """return a list of streams that represent this revlog
762
932
763 This is used by stream-clone to do bytes to bytes copies of a repository.
933 This is used by stream-clone to do bytes to bytes copies of a repository.
764
934
765 This streams data for all revisions that refer to a changelog revision up
935 This streams data for all revisions that refer to a changelog revision up
766 to `max_linkrev`.
936 to `max_linkrev`.
767
937
768 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
938 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
769
939
770 It returns is a list of three-tuple:
940 It returns is a list of three-tuple:
771
941
772 [
942 [
773 (filename, bytes_stream, stream_size),
943 (filename, bytes_stream, stream_size),
774 …
944 …
775 ]
945 ]
776 """
946 """
777 n = len(self)
947 n = len(self)
778 index = self.index
948 index = self.index
779 while n > 0:
949 while n > 0:
780 linkrev = index[n - 1][4]
950 linkrev = index[n - 1][4]
781 if linkrev < max_linkrev:
951 if linkrev < max_linkrev:
782 break
952 break
783 # note: this loop will rarely go through multiple iterations, since
953 # note: this loop will rarely go through multiple iterations, since
784 # it only traverses commits created during the current streaming
954 # it only traverses commits created during the current streaming
785 # pull operation.
955 # pull operation.
786 #
956 #
787 # If this become a problem, using a binary search should cap the
957 # If this become a problem, using a binary search should cap the
788 # runtime of this.
958 # runtime of this.
789 n = n - 1
959 n = n - 1
790 if n == 0:
960 if n == 0:
791 # no data to send
961 # no data to send
792 return []
962 return []
793 index_size = n * index.entry_size
963 index_size = n * index.entry_size
794 data_size = self.end(n - 1)
964 data_size = self.end(n - 1)
795
965
796 # XXX we might have been split (or stripped) since the object
966 # XXX we might have been split (or stripped) since the object
797 # initialization, We need to close this race too, but having a way to
967 # initialization, We need to close this race too, but having a way to
798 # pre-open the file we feed to the revlog and never closing them before
968 # pre-open the file we feed to the revlog and never closing them before
799 # we are done streaming.
969 # we are done streaming.
800
970
801 if self._inline:
971 if self._inline:
802
972
803 def get_stream():
973 def get_stream():
804 with self.opener(self._indexfile, mode=b"r") as fp:
974 with self.opener(self._indexfile, mode=b"r") as fp:
805 yield None
975 yield None
806 size = index_size + data_size
976 size = index_size + data_size
807 if size <= 65536:
977 if size <= 65536:
808 yield fp.read(size)
978 yield fp.read(size)
809 else:
979 else:
810 yield from util.filechunkiter(fp, limit=size)
980 yield from util.filechunkiter(fp, limit=size)
811
981
812 inline_stream = get_stream()
982 inline_stream = get_stream()
813 next(inline_stream)
983 next(inline_stream)
814 return [
984 return [
815 (self._indexfile, inline_stream, index_size + data_size),
985 (self._indexfile, inline_stream, index_size + data_size),
816 ]
986 ]
817 elif force_inline:
987 elif force_inline:
818
988
819 def get_stream():
989 def get_stream():
820 with self.reading():
990 with self.reading():
821 yield None
991 yield None
822
992
823 for rev in range(n):
993 for rev in range(n):
824 idx = self.index.entry_binary(rev)
994 idx = self.index.entry_binary(rev)
825 if rev == 0 and self._docket is None:
995 if rev == 0 and self._docket is None:
826 # re-inject the inline flag
996 # re-inject the inline flag
827 header = self._format_flags
997 header = self._format_flags
828 header |= self._format_version
998 header |= self._format_version
829 header |= FLAG_INLINE_DATA
999 header |= FLAG_INLINE_DATA
830 header = self.index.pack_header(header)
1000 header = self.index.pack_header(header)
831 idx = header + idx
1001 idx = header + idx
832 yield idx
1002 yield idx
833 yield self._getsegmentforrevs(rev, rev)[1]
1003 yield self._getsegmentforrevs(rev, rev)[1]
834
1004
835 inline_stream = get_stream()
1005 inline_stream = get_stream()
836 next(inline_stream)
1006 next(inline_stream)
837 return [
1007 return [
838 (self._indexfile, inline_stream, index_size + data_size),
1008 (self._indexfile, inline_stream, index_size + data_size),
839 ]
1009 ]
840 else:
1010 else:
841
1011
842 def get_index_stream():
1012 def get_index_stream():
843 with self.opener(self._indexfile, mode=b"r") as fp:
1013 with self.opener(self._indexfile, mode=b"r") as fp:
844 yield None
1014 yield None
845 if index_size <= 65536:
1015 if index_size <= 65536:
846 yield fp.read(index_size)
1016 yield fp.read(index_size)
847 else:
1017 else:
848 yield from util.filechunkiter(fp, limit=index_size)
1018 yield from util.filechunkiter(fp, limit=index_size)
849
1019
850 def get_data_stream():
1020 def get_data_stream():
851 with self._datafp() as fp:
1021 with self._datafp() as fp:
852 yield None
1022 yield None
853 if data_size <= 65536:
1023 if data_size <= 65536:
854 yield fp.read(data_size)
1024 yield fp.read(data_size)
855 else:
1025 else:
856 yield from util.filechunkiter(fp, limit=data_size)
1026 yield from util.filechunkiter(fp, limit=data_size)
857
1027
858 index_stream = get_index_stream()
1028 index_stream = get_index_stream()
859 next(index_stream)
1029 next(index_stream)
860 data_stream = get_data_stream()
1030 data_stream = get_data_stream()
861 next(data_stream)
1031 next(data_stream)
862 return [
1032 return [
863 (self._datafile, data_stream, data_size),
1033 (self._datafile, data_stream, data_size),
864 (self._indexfile, index_stream, index_size),
1034 (self._indexfile, index_stream, index_size),
865 ]
1035 ]
866
1036
867 def _loadindex(self, docket=None):
1037 def _loadindex(self, docket=None):
868
1038
869 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1039 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
870
1040
871 if self.postfix is not None:
1041 if self.postfix is not None:
872 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1042 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
873 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1043 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
874 entry_point = b'%s.i.a' % self.radix
1044 entry_point = b'%s.i.a' % self.radix
875 elif self._try_split and self.opener.exists(self._split_index_file):
1045 elif self._try_split and self.opener.exists(self._split_index_file):
876 entry_point = self._split_index_file
1046 entry_point = self._split_index_file
877 else:
1047 else:
878 entry_point = b'%s.i' % self.radix
1048 entry_point = b'%s.i' % self.radix
879
1049
880 if docket is not None:
1050 if docket is not None:
881 self._docket = docket
1051 self._docket = docket
882 self._docket_file = entry_point
1052 self._docket_file = entry_point
883 else:
1053 else:
884 self._initempty = True
1054 self._initempty = True
885 entry_data = self._get_data(entry_point, mmapindexthreshold)
1055 entry_data = self._get_data(entry_point, mmapindexthreshold)
886 if len(entry_data) > 0:
1056 if len(entry_data) > 0:
887 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1057 header = INDEX_HEADER.unpack(entry_data[:4])[0]
888 self._initempty = False
1058 self._initempty = False
889 else:
1059 else:
890 header = new_header
1060 header = new_header
891
1061
892 self._format_flags = header & ~0xFFFF
1062 self._format_flags = header & ~0xFFFF
893 self._format_version = header & 0xFFFF
1063 self._format_version = header & 0xFFFF
894
1064
895 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1065 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
896 if supported_flags is None:
1066 if supported_flags is None:
897 msg = _(b'unknown version (%d) in revlog %s')
1067 msg = _(b'unknown version (%d) in revlog %s')
898 msg %= (self._format_version, self.display_id)
1068 msg %= (self._format_version, self.display_id)
899 raise error.RevlogError(msg)
1069 raise error.RevlogError(msg)
900 elif self._format_flags & ~supported_flags:
1070 elif self._format_flags & ~supported_flags:
901 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1071 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
902 display_flag = self._format_flags >> 16
1072 display_flag = self._format_flags >> 16
903 msg %= (display_flag, self._format_version, self.display_id)
1073 msg %= (display_flag, self._format_version, self.display_id)
904 raise error.RevlogError(msg)
1074 raise error.RevlogError(msg)
905
1075
906 features = FEATURES_BY_VERSION[self._format_version]
1076 features = FEATURES_BY_VERSION[self._format_version]
907 self._inline = features[b'inline'](self._format_flags)
1077 self._inline = features[b'inline'](self._format_flags)
908 self.delta_config.general_delta = features[b'generaldelta'](
1078 self.delta_config.general_delta = features[b'generaldelta'](
909 self._format_flags
1079 self._format_flags
910 )
1080 )
911 self.feature_config.has_side_data = features[b'sidedata']
1081 self.feature_config.has_side_data = features[b'sidedata']
912
1082
913 if not features[b'docket']:
1083 if not features[b'docket']:
914 self._indexfile = entry_point
1084 self._indexfile = entry_point
915 index_data = entry_data
1085 index_data = entry_data
916 else:
1086 else:
917 self._docket_file = entry_point
1087 self._docket_file = entry_point
918 if self._initempty:
1088 if self._initempty:
919 self._docket = docketutil.default_docket(self, header)
1089 self._docket = docketutil.default_docket(self, header)
920 else:
1090 else:
921 self._docket = docketutil.parse_docket(
1091 self._docket = docketutil.parse_docket(
922 self, entry_data, use_pending=self._trypending
1092 self, entry_data, use_pending=self._trypending
923 )
1093 )
924
1094
925 if self._docket is not None:
1095 if self._docket is not None:
926 self._indexfile = self._docket.index_filepath()
1096 self._indexfile = self._docket.index_filepath()
927 index_data = b''
1097 index_data = b''
928 index_size = self._docket.index_end
1098 index_size = self._docket.index_end
929 if index_size > 0:
1099 if index_size > 0:
930 index_data = self._get_data(
1100 index_data = self._get_data(
931 self._indexfile, mmapindexthreshold, size=index_size
1101 self._indexfile, mmapindexthreshold, size=index_size
932 )
1102 )
933 if len(index_data) < index_size:
1103 if len(index_data) < index_size:
934 msg = _(b'too few index data for %s: got %d, expected %d')
1104 msg = _(b'too few index data for %s: got %d, expected %d')
935 msg %= (self.display_id, len(index_data), index_size)
1105 msg %= (self.display_id, len(index_data), index_size)
936 raise error.RevlogError(msg)
1106 raise error.RevlogError(msg)
937
1107
938 self._inline = False
1108 self._inline = False
939 # generaldelta implied by version 2 revlogs.
1109 # generaldelta implied by version 2 revlogs.
940 self.delta_config.general_delta = True
1110 self.delta_config.general_delta = True
941 # the logic for persistent nodemap will be dealt with within the
1111 # the logic for persistent nodemap will be dealt with within the
942 # main docket, so disable it for now.
1112 # main docket, so disable it for now.
943 self._nodemap_file = None
1113 self._nodemap_file = None
944
1114
945 if self._docket is not None:
1115 if self._docket is not None:
946 self._datafile = self._docket.data_filepath()
1116 self._datafile = self._docket.data_filepath()
947 self._sidedatafile = self._docket.sidedata_filepath()
1117 self._sidedatafile = self._docket.sidedata_filepath()
948 elif self.postfix is None:
1118 elif self.postfix is None:
949 self._datafile = b'%s.d' % self.radix
1119 self._datafile = b'%s.d' % self.radix
950 else:
1120 else:
951 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1121 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
952
1122
953 self.nodeconstants = sha1nodeconstants
1123 self.nodeconstants = sha1nodeconstants
954 self.nullid = self.nodeconstants.nullid
1124 self.nullid = self.nodeconstants.nullid
955
1125
956 # sparse-revlog can't be on without general-delta (issue6056)
1126 # sparse-revlog can't be on without general-delta (issue6056)
957 if not self.delta_config.general_delta:
1127 if not self.delta_config.general_delta:
958 self.delta_config.sparse_revlog = False
1128 self.delta_config.sparse_revlog = False
959
1129
960 self._storedeltachains = True
1130 self._storedeltachains = True
961
1131
962 devel_nodemap = (
1132 devel_nodemap = (
963 self._nodemap_file
1133 self._nodemap_file
964 and force_nodemap
1134 and force_nodemap
965 and parse_index_v1_nodemap is not None
1135 and parse_index_v1_nodemap is not None
966 )
1136 )
967
1137
968 use_rust_index = False
1138 use_rust_index = False
969 if rustrevlog is not None:
1139 if rustrevlog is not None:
970 if self._nodemap_file is not None:
1140 if self._nodemap_file is not None:
971 use_rust_index = True
1141 use_rust_index = True
972 else:
1142 else:
973 use_rust_index = self.opener.options.get(b'rust.index')
1143 use_rust_index = self.opener.options.get(b'rust.index')
974
1144
975 self._parse_index = parse_index_v1
1145 self._parse_index = parse_index_v1
976 if self._format_version == REVLOGV0:
1146 if self._format_version == REVLOGV0:
977 self._parse_index = revlogv0.parse_index_v0
1147 self._parse_index = revlogv0.parse_index_v0
978 elif self._format_version == REVLOGV2:
1148 elif self._format_version == REVLOGV2:
979 self._parse_index = parse_index_v2
1149 self._parse_index = parse_index_v2
980 elif self._format_version == CHANGELOGV2:
1150 elif self._format_version == CHANGELOGV2:
981 self._parse_index = parse_index_cl_v2
1151 self._parse_index = parse_index_cl_v2
982 elif devel_nodemap:
1152 elif devel_nodemap:
983 self._parse_index = parse_index_v1_nodemap
1153 self._parse_index = parse_index_v1_nodemap
984 elif use_rust_index:
1154 elif use_rust_index:
985 self._parse_index = parse_index_v1_mixed
1155 self._parse_index = parse_index_v1_mixed
986 try:
1156 try:
987 d = self._parse_index(index_data, self._inline)
1157 d = self._parse_index(index_data, self._inline)
988 index, chunkcache = d
1158 index, chunkcache = d
989 use_nodemap = (
1159 use_nodemap = (
990 not self._inline
1160 not self._inline
991 and self._nodemap_file is not None
1161 and self._nodemap_file is not None
992 and hasattr(index, 'update_nodemap_data')
1162 and hasattr(index, 'update_nodemap_data')
993 )
1163 )
994 if use_nodemap:
1164 if use_nodemap:
995 nodemap_data = nodemaputil.persisted_data(self)
1165 nodemap_data = nodemaputil.persisted_data(self)
996 if nodemap_data is not None:
1166 if nodemap_data is not None:
997 docket = nodemap_data[0]
1167 docket = nodemap_data[0]
998 if (
1168 if (
999 len(d[0]) > docket.tip_rev
1169 len(d[0]) > docket.tip_rev
1000 and d[0][docket.tip_rev][7] == docket.tip_node
1170 and d[0][docket.tip_rev][7] == docket.tip_node
1001 ):
1171 ):
1002 # no changelog tampering
1172 # no changelog tampering
1003 self._nodemap_docket = docket
1173 self._nodemap_docket = docket
1004 index.update_nodemap_data(*nodemap_data)
1174 index.update_nodemap_data(*nodemap_data)
1005 except (ValueError, IndexError):
1175 except (ValueError, IndexError):
1006 raise error.RevlogError(
1176 raise error.RevlogError(
1007 _(b"index %s is corrupted") % self.display_id
1177 _(b"index %s is corrupted") % self.display_id
1008 )
1178 )
1009 self.index = index
1179 self.index = index
1010 self._segmentfile = randomaccessfile.randomaccessfile(
1011 self.opener,
1012 (self._indexfile if self._inline else self._datafile),
1013 self.data_config.chunk_cache_size,
1014 chunkcache,
1015 )
1016 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
1017 self.opener,
1018 self._sidedatafile,
1019 self.data_config.chunk_cache_size,
1020 )
1021 # revnum -> (chain-length, sum-delta-length)
1180 # revnum -> (chain-length, sum-delta-length)
1022 self._chaininfocache = util.lrucachedict(500)
1181 self._chaininfocache = util.lrucachedict(500)
1023 # revlog header -> revlog compressor
1182 # revlog header -> revlog compressor
1024 self._decompressors = {}
1183 self._decompressors = {}
1025
1184
1185 return chunkcache
1186
1187 def _load_inner(self, chunk_cache):
1188 self._inner = _InnerRevlog(
1189 opener=self.opener,
1190 index=self.index,
1191 index_file=self._indexfile,
1192 data_file=self._datafile,
1193 sidedata_file=self._sidedatafile,
1194 inline=self._inline,
1195 data_config=self.data_config,
1196 chunk_cache=chunk_cache,
1197 )
1198
1026 def get_revlog(self):
1199 def get_revlog(self):
1027 """simple function to mirror API of other not-really-revlog API"""
1200 """simple function to mirror API of other not-really-revlog API"""
1028 return self
1201 return self
1029
1202
1030 @util.propertycache
1203 @util.propertycache
1031 def revlog_kind(self):
1204 def revlog_kind(self):
1032 return self.target[0]
1205 return self.target[0]
1033
1206
1034 @util.propertycache
1207 @util.propertycache
1035 def display_id(self):
1208 def display_id(self):
1036 """The public facing "ID" of the revlog that we use in message"""
1209 """The public facing "ID" of the revlog that we use in message"""
1037 if self.revlog_kind == KIND_FILELOG:
1210 if self.revlog_kind == KIND_FILELOG:
1038 # Reference the file without the "data/" prefix, so it is familiar
1211 # Reference the file without the "data/" prefix, so it is familiar
1039 # to the user.
1212 # to the user.
1040 return self.target[1]
1213 return self.target[1]
1041 else:
1214 else:
1042 return self.radix
1215 return self.radix
1043
1216
1044 def _get_decompressor(self, t):
1217 def _get_decompressor(self, t):
1045 try:
1218 try:
1046 compressor = self._decompressors[t]
1219 compressor = self._decompressors[t]
1047 except KeyError:
1220 except KeyError:
1048 try:
1221 try:
1049 engine = util.compengines.forrevlogheader(t)
1222 engine = util.compengines.forrevlogheader(t)
1050 compressor = engine.revlogcompressor(
1223 compressor = engine.revlogcompressor(
1051 self.feature_config.compression_engine_options
1224 self.feature_config.compression_engine_options
1052 )
1225 )
1053 self._decompressors[t] = compressor
1226 self._decompressors[t] = compressor
1054 except KeyError:
1227 except KeyError:
1055 raise error.RevlogError(
1228 raise error.RevlogError(
1056 _(b'unknown compression type %s') % binascii.hexlify(t)
1229 _(b'unknown compression type %s') % binascii.hexlify(t)
1057 )
1230 )
1058 return compressor
1231 return compressor
1059
1232
1060 @util.propertycache
1233 @util.propertycache
1061 def _compressor(self):
1234 def _compressor(self):
1062 engine = util.compengines[self.feature_config.compression_engine]
1235 engine = util.compengines[self.feature_config.compression_engine]
1063 return engine.revlogcompressor(
1236 return engine.revlogcompressor(
1064 self.feature_config.compression_engine_options
1237 self.feature_config.compression_engine_options
1065 )
1238 )
1066
1239
1067 @util.propertycache
1240 @util.propertycache
1068 def _decompressor(self):
1241 def _decompressor(self):
1069 """the default decompressor"""
1242 """the default decompressor"""
1070 if self._docket is None:
1243 if self._docket is None:
1071 return None
1244 return None
1072 t = self._docket.default_compression_header
1245 t = self._docket.default_compression_header
1073 c = self._get_decompressor(t)
1246 c = self._get_decompressor(t)
1074 return c.decompress
1247 return c.decompress
1075
1248
1076 def __index_write_fp(self):
1077 # You should not use this directly and use `_writing` instead
1078 try:
1079 f = self.opener(
1080 self._indexfile,
1081 mode=b"r+",
1082 checkambig=self.data_config.check_ambig,
1083 )
1084 if self._docket is None:
1085 f.seek(0, os.SEEK_END)
1086 else:
1087 f.seek(self._docket.index_end, os.SEEK_SET)
1088 return f
1089 except FileNotFoundError:
1090 return self.opener(
1091 self._indexfile,
1092 mode=b"w+",
1093 checkambig=self.data_config.check_ambig,
1094 )
1095
1096 def __index_new_fp(self):
1097 # You should not use this unless you are upgrading from inline revlog
1098 return self.opener(
1099 self._indexfile,
1100 mode=b"w",
1101 checkambig=self.data_config.check_ambig,
1102 atomictemp=True,
1103 )
1104
1105 def _datafp(self, mode=b'r'):
1249 def _datafp(self, mode=b'r'):
1106 """file object for the revlog's data file"""
1250 """file object for the revlog's data file"""
1107 return self.opener(self._datafile, mode=mode)
1251 return self.opener(self._datafile, mode=mode)
1108
1252
1109 def tiprev(self):
1253 def tiprev(self):
1110 return len(self.index) - 1
1254 return len(self.index) - 1
1111
1255
1112 def tip(self):
1256 def tip(self):
1113 return self.node(self.tiprev())
1257 return self.node(self.tiprev())
1114
1258
1115 def __contains__(self, rev):
1259 def __contains__(self, rev):
1116 return 0 <= rev < len(self)
1260 return 0 <= rev < len(self)
1117
1261
1118 def __len__(self):
1262 def __len__(self):
1119 return len(self.index)
1263 return len(self.index)
1120
1264
1121 def __iter__(self):
1265 def __iter__(self):
1122 return iter(range(len(self)))
1266 return iter(range(len(self)))
1123
1267
1124 def revs(self, start=0, stop=None):
1268 def revs(self, start=0, stop=None):
1125 """iterate over all rev in this revlog (from start to stop)"""
1269 """iterate over all rev in this revlog (from start to stop)"""
1126 return storageutil.iterrevs(len(self), start=start, stop=stop)
1270 return storageutil.iterrevs(len(self), start=start, stop=stop)
1127
1271
1128 def hasnode(self, node):
1272 def hasnode(self, node):
1129 try:
1273 try:
1130 self.rev(node)
1274 self.rev(node)
1131 return True
1275 return True
1132 except KeyError:
1276 except KeyError:
1133 return False
1277 return False
1134
1278
1135 def _candelta(self, baserev, rev):
1279 def _candelta(self, baserev, rev):
1136 """whether two revisions (baserev, rev) can be delta-ed or not"""
1280 """whether two revisions (baserev, rev) can be delta-ed or not"""
1137 # Disable delta if either rev requires a content-changing flag
1281 # Disable delta if either rev requires a content-changing flag
1138 # processor (ex. LFS). This is because such flag processor can alter
1282 # processor (ex. LFS). This is because such flag processor can alter
1139 # the rawtext content that the delta will be based on, and two clients
1283 # the rawtext content that the delta will be based on, and two clients
1140 # could have a same revlog node with different flags (i.e. different
1284 # could have a same revlog node with different flags (i.e. different
1141 # rawtext contents) and the delta could be incompatible.
1285 # rawtext contents) and the delta could be incompatible.
1142 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1286 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1143 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1287 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1144 ):
1288 ):
1145 return False
1289 return False
1146 return True
1290 return True
1147
1291
1148 def update_caches(self, transaction):
1292 def update_caches(self, transaction):
1149 """update on disk cache
1293 """update on disk cache
1150
1294
1151 If a transaction is passed, the update may be delayed to transaction
1295 If a transaction is passed, the update may be delayed to transaction
1152 commit."""
1296 commit."""
1153 if self._nodemap_file is not None:
1297 if self._nodemap_file is not None:
1154 if transaction is None:
1298 if transaction is None:
1155 nodemaputil.update_persistent_nodemap(self)
1299 nodemaputil.update_persistent_nodemap(self)
1156 else:
1300 else:
1157 nodemaputil.setup_persistent_nodemap(transaction, self)
1301 nodemaputil.setup_persistent_nodemap(transaction, self)
1158
1302
1159 def clearcaches(self):
1303 def clearcaches(self):
1160 """Clear in-memory caches"""
1304 """Clear in-memory caches"""
1161 self._revisioncache = None
1305 self._revisioncache = None
1162 self._chainbasecache.clear()
1306 self._chainbasecache.clear()
1163 self._segmentfile.clear_cache()
1307 self._inner._segmentfile.clear_cache()
1164 self._segmentfile_sidedata.clear_cache()
1308 self._inner._segmentfile_sidedata.clear_cache()
1165 self._pcache = {}
1309 self._pcache = {}
1166 self._nodemap_docket = None
1310 self._nodemap_docket = None
1167 self.index.clearcaches()
1311 self.index.clearcaches()
1168 # The python code is the one responsible for validating the docket, we
1312 # The python code is the one responsible for validating the docket, we
1169 # end up having to refresh it here.
1313 # end up having to refresh it here.
1170 use_nodemap = (
1314 use_nodemap = (
1171 not self._inline
1315 not self._inline
1172 and self._nodemap_file is not None
1316 and self._nodemap_file is not None
1173 and hasattr(self.index, 'update_nodemap_data')
1317 and hasattr(self.index, 'update_nodemap_data')
1174 )
1318 )
1175 if use_nodemap:
1319 if use_nodemap:
1176 nodemap_data = nodemaputil.persisted_data(self)
1320 nodemap_data = nodemaputil.persisted_data(self)
1177 if nodemap_data is not None:
1321 if nodemap_data is not None:
1178 self._nodemap_docket = nodemap_data[0]
1322 self._nodemap_docket = nodemap_data[0]
1179 self.index.update_nodemap_data(*nodemap_data)
1323 self.index.update_nodemap_data(*nodemap_data)
1180
1324
1181 def rev(self, node):
1325 def rev(self, node):
1182 """return the revision number associated with a <nodeid>"""
1326 """return the revision number associated with a <nodeid>"""
1183 try:
1327 try:
1184 return self.index.rev(node)
1328 return self.index.rev(node)
1185 except TypeError:
1329 except TypeError:
1186 raise
1330 raise
1187 except error.RevlogError:
1331 except error.RevlogError:
1188 # parsers.c radix tree lookup failed
1332 # parsers.c radix tree lookup failed
1189 if (
1333 if (
1190 node == self.nodeconstants.wdirid
1334 node == self.nodeconstants.wdirid
1191 or node in self.nodeconstants.wdirfilenodeids
1335 or node in self.nodeconstants.wdirfilenodeids
1192 ):
1336 ):
1193 raise error.WdirUnsupported
1337 raise error.WdirUnsupported
1194 raise error.LookupError(node, self.display_id, _(b'no node'))
1338 raise error.LookupError(node, self.display_id, _(b'no node'))
1195
1339
1196 # Accessors for index entries.
1340 # Accessors for index entries.
1197
1341
1198 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1342 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1199 # are flags.
1343 # are flags.
1200 def start(self, rev):
1344 def start(self, rev):
1201 return int(self.index[rev][0] >> 16)
1345 return int(self.index[rev][0] >> 16)
1202
1346
1203 def sidedata_cut_off(self, rev):
1347 def sidedata_cut_off(self, rev):
1204 sd_cut_off = self.index[rev][8]
1348 sd_cut_off = self.index[rev][8]
1205 if sd_cut_off != 0:
1349 if sd_cut_off != 0:
1206 return sd_cut_off
1350 return sd_cut_off
1207 # This is some annoying dance, because entries without sidedata
1351 # This is some annoying dance, because entries without sidedata
1208 # currently use 0 as their ofsset. (instead of previous-offset +
1352 # currently use 0 as their ofsset. (instead of previous-offset +
1209 # previous-size)
1353 # previous-size)
1210 #
1354 #
1211 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1355 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1212 # In the meantime, we need this.
1356 # In the meantime, we need this.
1213 while 0 <= rev:
1357 while 0 <= rev:
1214 e = self.index[rev]
1358 e = self.index[rev]
1215 if e[9] != 0:
1359 if e[9] != 0:
1216 return e[8] + e[9]
1360 return e[8] + e[9]
1217 rev -= 1
1361 rev -= 1
1218 return 0
1362 return 0
1219
1363
1220 def flags(self, rev):
1364 def flags(self, rev):
1221 return self.index[rev][0] & 0xFFFF
1365 return self.index[rev][0] & 0xFFFF
1222
1366
1223 def length(self, rev):
1367 def length(self, rev):
1224 return self.index[rev][1]
1368 return self.index[rev][1]
1225
1369
1226 def sidedata_length(self, rev):
1370 def sidedata_length(self, rev):
1227 if not self.feature_config.has_side_data:
1371 if not self.feature_config.has_side_data:
1228 return 0
1372 return 0
1229 return self.index[rev][9]
1373 return self.index[rev][9]
1230
1374
1231 def rawsize(self, rev):
1375 def rawsize(self, rev):
1232 """return the length of the uncompressed text for a given revision"""
1376 """return the length of the uncompressed text for a given revision"""
1233 l = self.index[rev][2]
1377 l = self.index[rev][2]
1234 if l >= 0:
1378 if l >= 0:
1235 return l
1379 return l
1236
1380
1237 t = self.rawdata(rev)
1381 t = self.rawdata(rev)
1238 return len(t)
1382 return len(t)
1239
1383
1240 def size(self, rev):
1384 def size(self, rev):
1241 """length of non-raw text (processed by a "read" flag processor)"""
1385 """length of non-raw text (processed by a "read" flag processor)"""
1242 # fast path: if no "read" flag processor could change the content,
1386 # fast path: if no "read" flag processor could change the content,
1243 # size is rawsize. note: ELLIPSIS is known to not change the content.
1387 # size is rawsize. note: ELLIPSIS is known to not change the content.
1244 flags = self.flags(rev)
1388 flags = self.flags(rev)
1245 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1389 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1246 return self.rawsize(rev)
1390 return self.rawsize(rev)
1247
1391
1248 return len(self.revision(rev))
1392 return len(self.revision(rev))
1249
1393
1250 def fast_rank(self, rev):
1394 def fast_rank(self, rev):
1251 """Return the rank of a revision if already known, or None otherwise.
1395 """Return the rank of a revision if already known, or None otherwise.
1252
1396
1253 The rank of a revision is the size of the sub-graph it defines as a
1397 The rank of a revision is the size of the sub-graph it defines as a
1254 head. Equivalently, the rank of a revision `r` is the size of the set
1398 head. Equivalently, the rank of a revision `r` is the size of the set
1255 `ancestors(r)`, `r` included.
1399 `ancestors(r)`, `r` included.
1256
1400
1257 This method returns the rank retrieved from the revlog in constant
1401 This method returns the rank retrieved from the revlog in constant
1258 time. It makes no attempt at computing unknown values for versions of
1402 time. It makes no attempt at computing unknown values for versions of
1259 the revlog which do not persist the rank.
1403 the revlog which do not persist the rank.
1260 """
1404 """
1261 rank = self.index[rev][ENTRY_RANK]
1405 rank = self.index[rev][ENTRY_RANK]
1262 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1406 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1263 return None
1407 return None
1264 if rev == nullrev:
1408 if rev == nullrev:
1265 return 0 # convention
1409 return 0 # convention
1266 return rank
1410 return rank
1267
1411
1268 def chainbase(self, rev):
1412 def chainbase(self, rev):
1269 base = self._chainbasecache.get(rev)
1413 base = self._chainbasecache.get(rev)
1270 if base is not None:
1414 if base is not None:
1271 return base
1415 return base
1272
1416
1273 index = self.index
1417 index = self.index
1274 iterrev = rev
1418 iterrev = rev
1275 base = index[iterrev][3]
1419 base = index[iterrev][3]
1276 while base != iterrev:
1420 while base != iterrev:
1277 iterrev = base
1421 iterrev = base
1278 base = index[iterrev][3]
1422 base = index[iterrev][3]
1279
1423
1280 self._chainbasecache[rev] = base
1424 self._chainbasecache[rev] = base
1281 return base
1425 return base
1282
1426
1283 def linkrev(self, rev):
1427 def linkrev(self, rev):
1284 return self.index[rev][4]
1428 return self.index[rev][4]
1285
1429
1286 def parentrevs(self, rev):
1430 def parentrevs(self, rev):
1287 try:
1431 try:
1288 entry = self.index[rev]
1432 entry = self.index[rev]
1289 except IndexError:
1433 except IndexError:
1290 if rev == wdirrev:
1434 if rev == wdirrev:
1291 raise error.WdirUnsupported
1435 raise error.WdirUnsupported
1292 raise
1436 raise
1293
1437
1294 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1438 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1295 return entry[6], entry[5]
1439 return entry[6], entry[5]
1296 else:
1440 else:
1297 return entry[5], entry[6]
1441 return entry[5], entry[6]
1298
1442
1299 # fast parentrevs(rev) where rev isn't filtered
1443 # fast parentrevs(rev) where rev isn't filtered
1300 _uncheckedparentrevs = parentrevs
1444 _uncheckedparentrevs = parentrevs
1301
1445
1302 def node(self, rev):
1446 def node(self, rev):
1303 try:
1447 try:
1304 return self.index[rev][7]
1448 return self.index[rev][7]
1305 except IndexError:
1449 except IndexError:
1306 if rev == wdirrev:
1450 if rev == wdirrev:
1307 raise error.WdirUnsupported
1451 raise error.WdirUnsupported
1308 raise
1452 raise
1309
1453
1310 # Derived from index values.
1454 # Derived from index values.
1311
1455
1312 def end(self, rev):
1456 def end(self, rev):
1313 return self.start(rev) + self.length(rev)
1457 return self.start(rev) + self.length(rev)
1314
1458
1315 def parents(self, node):
1459 def parents(self, node):
1316 i = self.index
1460 i = self.index
1317 d = i[self.rev(node)]
1461 d = i[self.rev(node)]
1318 # inline node() to avoid function call overhead
1462 # inline node() to avoid function call overhead
1319 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1463 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1320 return i[d[6]][7], i[d[5]][7]
1464 return i[d[6]][7], i[d[5]][7]
1321 else:
1465 else:
1322 return i[d[5]][7], i[d[6]][7]
1466 return i[d[5]][7], i[d[6]][7]
1323
1467
1324 def chainlen(self, rev):
1468 def chainlen(self, rev):
1325 return self._chaininfo(rev)[0]
1469 return self._chaininfo(rev)[0]
1326
1470
1327 def _chaininfo(self, rev):
1471 def _chaininfo(self, rev):
1328 chaininfocache = self._chaininfocache
1472 chaininfocache = self._chaininfocache
1329 if rev in chaininfocache:
1473 if rev in chaininfocache:
1330 return chaininfocache[rev]
1474 return chaininfocache[rev]
1331 index = self.index
1475 index = self.index
1332 generaldelta = self.delta_config.general_delta
1476 generaldelta = self.delta_config.general_delta
1333 iterrev = rev
1477 iterrev = rev
1334 e = index[iterrev]
1478 e = index[iterrev]
1335 clen = 0
1479 clen = 0
1336 compresseddeltalen = 0
1480 compresseddeltalen = 0
1337 while iterrev != e[3]:
1481 while iterrev != e[3]:
1338 clen += 1
1482 clen += 1
1339 compresseddeltalen += e[1]
1483 compresseddeltalen += e[1]
1340 if generaldelta:
1484 if generaldelta:
1341 iterrev = e[3]
1485 iterrev = e[3]
1342 else:
1486 else:
1343 iterrev -= 1
1487 iterrev -= 1
1344 if iterrev in chaininfocache:
1488 if iterrev in chaininfocache:
1345 t = chaininfocache[iterrev]
1489 t = chaininfocache[iterrev]
1346 clen += t[0]
1490 clen += t[0]
1347 compresseddeltalen += t[1]
1491 compresseddeltalen += t[1]
1348 break
1492 break
1349 e = index[iterrev]
1493 e = index[iterrev]
1350 else:
1494 else:
1351 # Add text length of base since decompressing that also takes
1495 # Add text length of base since decompressing that also takes
1352 # work. For cache hits the length is already included.
1496 # work. For cache hits the length is already included.
1353 compresseddeltalen += e[1]
1497 compresseddeltalen += e[1]
1354 r = (clen, compresseddeltalen)
1498 r = (clen, compresseddeltalen)
1355 chaininfocache[rev] = r
1499 chaininfocache[rev] = r
1356 return r
1500 return r
1357
1501
1358 def _deltachain(self, rev, stoprev=None):
1502 def _deltachain(self, rev, stoprev=None):
1359 """Obtain the delta chain for a revision.
1503 """Obtain the delta chain for a revision.
1360
1504
1361 ``stoprev`` specifies a revision to stop at. If not specified, we
1505 ``stoprev`` specifies a revision to stop at. If not specified, we
1362 stop at the base of the chain.
1506 stop at the base of the chain.
1363
1507
1364 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1508 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1365 revs in ascending order and ``stopped`` is a bool indicating whether
1509 revs in ascending order and ``stopped`` is a bool indicating whether
1366 ``stoprev`` was hit.
1510 ``stoprev`` was hit.
1367 """
1511 """
1368 generaldelta = self.delta_config.general_delta
1512 generaldelta = self.delta_config.general_delta
1369 # Try C implementation.
1513 # Try C implementation.
1370 try:
1514 try:
1371 return self.index.deltachain(rev, stoprev, generaldelta)
1515 return self.index.deltachain(rev, stoprev, generaldelta)
1372 except AttributeError:
1516 except AttributeError:
1373 pass
1517 pass
1374
1518
1375 chain = []
1519 chain = []
1376
1520
1377 # Alias to prevent attribute lookup in tight loop.
1521 # Alias to prevent attribute lookup in tight loop.
1378 index = self.index
1522 index = self.index
1379
1523
1380 iterrev = rev
1524 iterrev = rev
1381 e = index[iterrev]
1525 e = index[iterrev]
1382 while iterrev != e[3] and iterrev != stoprev:
1526 while iterrev != e[3] and iterrev != stoprev:
1383 chain.append(iterrev)
1527 chain.append(iterrev)
1384 if generaldelta:
1528 if generaldelta:
1385 iterrev = e[3]
1529 iterrev = e[3]
1386 else:
1530 else:
1387 iterrev -= 1
1531 iterrev -= 1
1388 e = index[iterrev]
1532 e = index[iterrev]
1389
1533
1390 if iterrev == stoprev:
1534 if iterrev == stoprev:
1391 stopped = True
1535 stopped = True
1392 else:
1536 else:
1393 chain.append(iterrev)
1537 chain.append(iterrev)
1394 stopped = False
1538 stopped = False
1395
1539
1396 chain.reverse()
1540 chain.reverse()
1397 return chain, stopped
1541 return chain, stopped
1398
1542
1399 def ancestors(self, revs, stoprev=0, inclusive=False):
1543 def ancestors(self, revs, stoprev=0, inclusive=False):
1400 """Generate the ancestors of 'revs' in reverse revision order.
1544 """Generate the ancestors of 'revs' in reverse revision order.
1401 Does not generate revs lower than stoprev.
1545 Does not generate revs lower than stoprev.
1402
1546
1403 See the documentation for ancestor.lazyancestors for more details."""
1547 See the documentation for ancestor.lazyancestors for more details."""
1404
1548
1405 # first, make sure start revisions aren't filtered
1549 # first, make sure start revisions aren't filtered
1406 revs = list(revs)
1550 revs = list(revs)
1407 checkrev = self.node
1551 checkrev = self.node
1408 for r in revs:
1552 for r in revs:
1409 checkrev(r)
1553 checkrev(r)
1410 # and we're sure ancestors aren't filtered as well
1554 # and we're sure ancestors aren't filtered as well
1411
1555
1412 if rustancestor is not None and self.index.rust_ext_compat:
1556 if rustancestor is not None and self.index.rust_ext_compat:
1413 lazyancestors = rustancestor.LazyAncestors
1557 lazyancestors = rustancestor.LazyAncestors
1414 arg = self.index
1558 arg = self.index
1415 else:
1559 else:
1416 lazyancestors = ancestor.lazyancestors
1560 lazyancestors = ancestor.lazyancestors
1417 arg = self._uncheckedparentrevs
1561 arg = self._uncheckedparentrevs
1418 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1562 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1419
1563
1420 def descendants(self, revs):
1564 def descendants(self, revs):
1421 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1565 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1422
1566
1423 def findcommonmissing(self, common=None, heads=None):
1567 def findcommonmissing(self, common=None, heads=None):
1424 """Return a tuple of the ancestors of common and the ancestors of heads
1568 """Return a tuple of the ancestors of common and the ancestors of heads
1425 that are not ancestors of common. In revset terminology, we return the
1569 that are not ancestors of common. In revset terminology, we return the
1426 tuple:
1570 tuple:
1427
1571
1428 ::common, (::heads) - (::common)
1572 ::common, (::heads) - (::common)
1429
1573
1430 The list is sorted by revision number, meaning it is
1574 The list is sorted by revision number, meaning it is
1431 topologically sorted.
1575 topologically sorted.
1432
1576
1433 'heads' and 'common' are both lists of node IDs. If heads is
1577 'heads' and 'common' are both lists of node IDs. If heads is
1434 not supplied, uses all of the revlog's heads. If common is not
1578 not supplied, uses all of the revlog's heads. If common is not
1435 supplied, uses nullid."""
1579 supplied, uses nullid."""
1436 if common is None:
1580 if common is None:
1437 common = [self.nullid]
1581 common = [self.nullid]
1438 if heads is None:
1582 if heads is None:
1439 heads = self.heads()
1583 heads = self.heads()
1440
1584
1441 common = [self.rev(n) for n in common]
1585 common = [self.rev(n) for n in common]
1442 heads = [self.rev(n) for n in heads]
1586 heads = [self.rev(n) for n in heads]
1443
1587
1444 # we want the ancestors, but inclusive
1588 # we want the ancestors, but inclusive
1445 class lazyset:
1589 class lazyset:
1446 def __init__(self, lazyvalues):
1590 def __init__(self, lazyvalues):
1447 self.addedvalues = set()
1591 self.addedvalues = set()
1448 self.lazyvalues = lazyvalues
1592 self.lazyvalues = lazyvalues
1449
1593
1450 def __contains__(self, value):
1594 def __contains__(self, value):
1451 return value in self.addedvalues or value in self.lazyvalues
1595 return value in self.addedvalues or value in self.lazyvalues
1452
1596
1453 def __iter__(self):
1597 def __iter__(self):
1454 added = self.addedvalues
1598 added = self.addedvalues
1455 for r in added:
1599 for r in added:
1456 yield r
1600 yield r
1457 for r in self.lazyvalues:
1601 for r in self.lazyvalues:
1458 if not r in added:
1602 if not r in added:
1459 yield r
1603 yield r
1460
1604
1461 def add(self, value):
1605 def add(self, value):
1462 self.addedvalues.add(value)
1606 self.addedvalues.add(value)
1463
1607
1464 def update(self, values):
1608 def update(self, values):
1465 self.addedvalues.update(values)
1609 self.addedvalues.update(values)
1466
1610
1467 has = lazyset(self.ancestors(common))
1611 has = lazyset(self.ancestors(common))
1468 has.add(nullrev)
1612 has.add(nullrev)
1469 has.update(common)
1613 has.update(common)
1470
1614
1471 # take all ancestors from heads that aren't in has
1615 # take all ancestors from heads that aren't in has
1472 missing = set()
1616 missing = set()
1473 visit = collections.deque(r for r in heads if r not in has)
1617 visit = collections.deque(r for r in heads if r not in has)
1474 while visit:
1618 while visit:
1475 r = visit.popleft()
1619 r = visit.popleft()
1476 if r in missing:
1620 if r in missing:
1477 continue
1621 continue
1478 else:
1622 else:
1479 missing.add(r)
1623 missing.add(r)
1480 for p in self.parentrevs(r):
1624 for p in self.parentrevs(r):
1481 if p not in has:
1625 if p not in has:
1482 visit.append(p)
1626 visit.append(p)
1483 missing = list(missing)
1627 missing = list(missing)
1484 missing.sort()
1628 missing.sort()
1485 return has, [self.node(miss) for miss in missing]
1629 return has, [self.node(miss) for miss in missing]
1486
1630
1487 def incrementalmissingrevs(self, common=None):
1631 def incrementalmissingrevs(self, common=None):
1488 """Return an object that can be used to incrementally compute the
1632 """Return an object that can be used to incrementally compute the
1489 revision numbers of the ancestors of arbitrary sets that are not
1633 revision numbers of the ancestors of arbitrary sets that are not
1490 ancestors of common. This is an ancestor.incrementalmissingancestors
1634 ancestors of common. This is an ancestor.incrementalmissingancestors
1491 object.
1635 object.
1492
1636
1493 'common' is a list of revision numbers. If common is not supplied, uses
1637 'common' is a list of revision numbers. If common is not supplied, uses
1494 nullrev.
1638 nullrev.
1495 """
1639 """
1496 if common is None:
1640 if common is None:
1497 common = [nullrev]
1641 common = [nullrev]
1498
1642
1499 if rustancestor is not None and self.index.rust_ext_compat:
1643 if rustancestor is not None and self.index.rust_ext_compat:
1500 return rustancestor.MissingAncestors(self.index, common)
1644 return rustancestor.MissingAncestors(self.index, common)
1501 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1645 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1502
1646
1503 def findmissingrevs(self, common=None, heads=None):
1647 def findmissingrevs(self, common=None, heads=None):
1504 """Return the revision numbers of the ancestors of heads that
1648 """Return the revision numbers of the ancestors of heads that
1505 are not ancestors of common.
1649 are not ancestors of common.
1506
1650
1507 More specifically, return a list of revision numbers corresponding to
1651 More specifically, return a list of revision numbers corresponding to
1508 nodes N such that every N satisfies the following constraints:
1652 nodes N such that every N satisfies the following constraints:
1509
1653
1510 1. N is an ancestor of some node in 'heads'
1654 1. N is an ancestor of some node in 'heads'
1511 2. N is not an ancestor of any node in 'common'
1655 2. N is not an ancestor of any node in 'common'
1512
1656
1513 The list is sorted by revision number, meaning it is
1657 The list is sorted by revision number, meaning it is
1514 topologically sorted.
1658 topologically sorted.
1515
1659
1516 'heads' and 'common' are both lists of revision numbers. If heads is
1660 'heads' and 'common' are both lists of revision numbers. If heads is
1517 not supplied, uses all of the revlog's heads. If common is not
1661 not supplied, uses all of the revlog's heads. If common is not
1518 supplied, uses nullid."""
1662 supplied, uses nullid."""
1519 if common is None:
1663 if common is None:
1520 common = [nullrev]
1664 common = [nullrev]
1521 if heads is None:
1665 if heads is None:
1522 heads = self.headrevs()
1666 heads = self.headrevs()
1523
1667
1524 inc = self.incrementalmissingrevs(common=common)
1668 inc = self.incrementalmissingrevs(common=common)
1525 return inc.missingancestors(heads)
1669 return inc.missingancestors(heads)
1526
1670
1527 def findmissing(self, common=None, heads=None):
1671 def findmissing(self, common=None, heads=None):
1528 """Return the ancestors of heads that are not ancestors of common.
1672 """Return the ancestors of heads that are not ancestors of common.
1529
1673
1530 More specifically, return a list of nodes N such that every N
1674 More specifically, return a list of nodes N such that every N
1531 satisfies the following constraints:
1675 satisfies the following constraints:
1532
1676
1533 1. N is an ancestor of some node in 'heads'
1677 1. N is an ancestor of some node in 'heads'
1534 2. N is not an ancestor of any node in 'common'
1678 2. N is not an ancestor of any node in 'common'
1535
1679
1536 The list is sorted by revision number, meaning it is
1680 The list is sorted by revision number, meaning it is
1537 topologically sorted.
1681 topologically sorted.
1538
1682
1539 'heads' and 'common' are both lists of node IDs. If heads is
1683 'heads' and 'common' are both lists of node IDs. If heads is
1540 not supplied, uses all of the revlog's heads. If common is not
1684 not supplied, uses all of the revlog's heads. If common is not
1541 supplied, uses nullid."""
1685 supplied, uses nullid."""
1542 if common is None:
1686 if common is None:
1543 common = [self.nullid]
1687 common = [self.nullid]
1544 if heads is None:
1688 if heads is None:
1545 heads = self.heads()
1689 heads = self.heads()
1546
1690
1547 common = [self.rev(n) for n in common]
1691 common = [self.rev(n) for n in common]
1548 heads = [self.rev(n) for n in heads]
1692 heads = [self.rev(n) for n in heads]
1549
1693
1550 inc = self.incrementalmissingrevs(common=common)
1694 inc = self.incrementalmissingrevs(common=common)
1551 return [self.node(r) for r in inc.missingancestors(heads)]
1695 return [self.node(r) for r in inc.missingancestors(heads)]
1552
1696
1553 def nodesbetween(self, roots=None, heads=None):
1697 def nodesbetween(self, roots=None, heads=None):
1554 """Return a topological path from 'roots' to 'heads'.
1698 """Return a topological path from 'roots' to 'heads'.
1555
1699
1556 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1700 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1557 topologically sorted list of all nodes N that satisfy both of
1701 topologically sorted list of all nodes N that satisfy both of
1558 these constraints:
1702 these constraints:
1559
1703
1560 1. N is a descendant of some node in 'roots'
1704 1. N is a descendant of some node in 'roots'
1561 2. N is an ancestor of some node in 'heads'
1705 2. N is an ancestor of some node in 'heads'
1562
1706
1563 Every node is considered to be both a descendant and an ancestor
1707 Every node is considered to be both a descendant and an ancestor
1564 of itself, so every reachable node in 'roots' and 'heads' will be
1708 of itself, so every reachable node in 'roots' and 'heads' will be
1565 included in 'nodes'.
1709 included in 'nodes'.
1566
1710
1567 'outroots' is the list of reachable nodes in 'roots', i.e., the
1711 'outroots' is the list of reachable nodes in 'roots', i.e., the
1568 subset of 'roots' that is returned in 'nodes'. Likewise,
1712 subset of 'roots' that is returned in 'nodes'. Likewise,
1569 'outheads' is the subset of 'heads' that is also in 'nodes'.
1713 'outheads' is the subset of 'heads' that is also in 'nodes'.
1570
1714
1571 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1715 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1572 unspecified, uses nullid as the only root. If 'heads' is
1716 unspecified, uses nullid as the only root. If 'heads' is
1573 unspecified, uses list of all of the revlog's heads."""
1717 unspecified, uses list of all of the revlog's heads."""
1574 nonodes = ([], [], [])
1718 nonodes = ([], [], [])
1575 if roots is not None:
1719 if roots is not None:
1576 roots = list(roots)
1720 roots = list(roots)
1577 if not roots:
1721 if not roots:
1578 return nonodes
1722 return nonodes
1579 lowestrev = min([self.rev(n) for n in roots])
1723 lowestrev = min([self.rev(n) for n in roots])
1580 else:
1724 else:
1581 roots = [self.nullid] # Everybody's a descendant of nullid
1725 roots = [self.nullid] # Everybody's a descendant of nullid
1582 lowestrev = nullrev
1726 lowestrev = nullrev
1583 if (lowestrev == nullrev) and (heads is None):
1727 if (lowestrev == nullrev) and (heads is None):
1584 # We want _all_ the nodes!
1728 # We want _all_ the nodes!
1585 return (
1729 return (
1586 [self.node(r) for r in self],
1730 [self.node(r) for r in self],
1587 [self.nullid],
1731 [self.nullid],
1588 list(self.heads()),
1732 list(self.heads()),
1589 )
1733 )
1590 if heads is None:
1734 if heads is None:
1591 # All nodes are ancestors, so the latest ancestor is the last
1735 # All nodes are ancestors, so the latest ancestor is the last
1592 # node.
1736 # node.
1593 highestrev = len(self) - 1
1737 highestrev = len(self) - 1
1594 # Set ancestors to None to signal that every node is an ancestor.
1738 # Set ancestors to None to signal that every node is an ancestor.
1595 ancestors = None
1739 ancestors = None
1596 # Set heads to an empty dictionary for later discovery of heads
1740 # Set heads to an empty dictionary for later discovery of heads
1597 heads = {}
1741 heads = {}
1598 else:
1742 else:
1599 heads = list(heads)
1743 heads = list(heads)
1600 if not heads:
1744 if not heads:
1601 return nonodes
1745 return nonodes
1602 ancestors = set()
1746 ancestors = set()
1603 # Turn heads into a dictionary so we can remove 'fake' heads.
1747 # Turn heads into a dictionary so we can remove 'fake' heads.
1604 # Also, later we will be using it to filter out the heads we can't
1748 # Also, later we will be using it to filter out the heads we can't
1605 # find from roots.
1749 # find from roots.
1606 heads = dict.fromkeys(heads, False)
1750 heads = dict.fromkeys(heads, False)
1607 # Start at the top and keep marking parents until we're done.
1751 # Start at the top and keep marking parents until we're done.
1608 nodestotag = set(heads)
1752 nodestotag = set(heads)
1609 # Remember where the top was so we can use it as a limit later.
1753 # Remember where the top was so we can use it as a limit later.
1610 highestrev = max([self.rev(n) for n in nodestotag])
1754 highestrev = max([self.rev(n) for n in nodestotag])
1611 while nodestotag:
1755 while nodestotag:
1612 # grab a node to tag
1756 # grab a node to tag
1613 n = nodestotag.pop()
1757 n = nodestotag.pop()
1614 # Never tag nullid
1758 # Never tag nullid
1615 if n == self.nullid:
1759 if n == self.nullid:
1616 continue
1760 continue
1617 # A node's revision number represents its place in a
1761 # A node's revision number represents its place in a
1618 # topologically sorted list of nodes.
1762 # topologically sorted list of nodes.
1619 r = self.rev(n)
1763 r = self.rev(n)
1620 if r >= lowestrev:
1764 if r >= lowestrev:
1621 if n not in ancestors:
1765 if n not in ancestors:
1622 # If we are possibly a descendant of one of the roots
1766 # If we are possibly a descendant of one of the roots
1623 # and we haven't already been marked as an ancestor
1767 # and we haven't already been marked as an ancestor
1624 ancestors.add(n) # Mark as ancestor
1768 ancestors.add(n) # Mark as ancestor
1625 # Add non-nullid parents to list of nodes to tag.
1769 # Add non-nullid parents to list of nodes to tag.
1626 nodestotag.update(
1770 nodestotag.update(
1627 [p for p in self.parents(n) if p != self.nullid]
1771 [p for p in self.parents(n) if p != self.nullid]
1628 )
1772 )
1629 elif n in heads: # We've seen it before, is it a fake head?
1773 elif n in heads: # We've seen it before, is it a fake head?
1630 # So it is, real heads should not be the ancestors of
1774 # So it is, real heads should not be the ancestors of
1631 # any other heads.
1775 # any other heads.
1632 heads.pop(n)
1776 heads.pop(n)
1633 if not ancestors:
1777 if not ancestors:
1634 return nonodes
1778 return nonodes
1635 # Now that we have our set of ancestors, we want to remove any
1779 # Now that we have our set of ancestors, we want to remove any
1636 # roots that are not ancestors.
1780 # roots that are not ancestors.
1637
1781
1638 # If one of the roots was nullid, everything is included anyway.
1782 # If one of the roots was nullid, everything is included anyway.
1639 if lowestrev > nullrev:
1783 if lowestrev > nullrev:
1640 # But, since we weren't, let's recompute the lowest rev to not
1784 # But, since we weren't, let's recompute the lowest rev to not
1641 # include roots that aren't ancestors.
1785 # include roots that aren't ancestors.
1642
1786
1643 # Filter out roots that aren't ancestors of heads
1787 # Filter out roots that aren't ancestors of heads
1644 roots = [root for root in roots if root in ancestors]
1788 roots = [root for root in roots if root in ancestors]
1645 # Recompute the lowest revision
1789 # Recompute the lowest revision
1646 if roots:
1790 if roots:
1647 lowestrev = min([self.rev(root) for root in roots])
1791 lowestrev = min([self.rev(root) for root in roots])
1648 else:
1792 else:
1649 # No more roots? Return empty list
1793 # No more roots? Return empty list
1650 return nonodes
1794 return nonodes
1651 else:
1795 else:
1652 # We are descending from nullid, and don't need to care about
1796 # We are descending from nullid, and don't need to care about
1653 # any other roots.
1797 # any other roots.
1654 lowestrev = nullrev
1798 lowestrev = nullrev
1655 roots = [self.nullid]
1799 roots = [self.nullid]
1656 # Transform our roots list into a set.
1800 # Transform our roots list into a set.
1657 descendants = set(roots)
1801 descendants = set(roots)
1658 # Also, keep the original roots so we can filter out roots that aren't
1802 # Also, keep the original roots so we can filter out roots that aren't
1659 # 'real' roots (i.e. are descended from other roots).
1803 # 'real' roots (i.e. are descended from other roots).
1660 roots = descendants.copy()
1804 roots = descendants.copy()
1661 # Our topologically sorted list of output nodes.
1805 # Our topologically sorted list of output nodes.
1662 orderedout = []
1806 orderedout = []
1663 # Don't start at nullid since we don't want nullid in our output list,
1807 # Don't start at nullid since we don't want nullid in our output list,
1664 # and if nullid shows up in descendants, empty parents will look like
1808 # and if nullid shows up in descendants, empty parents will look like
1665 # they're descendants.
1809 # they're descendants.
1666 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1810 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1667 n = self.node(r)
1811 n = self.node(r)
1668 isdescendant = False
1812 isdescendant = False
1669 if lowestrev == nullrev: # Everybody is a descendant of nullid
1813 if lowestrev == nullrev: # Everybody is a descendant of nullid
1670 isdescendant = True
1814 isdescendant = True
1671 elif n in descendants:
1815 elif n in descendants:
1672 # n is already a descendant
1816 # n is already a descendant
1673 isdescendant = True
1817 isdescendant = True
1674 # This check only needs to be done here because all the roots
1818 # This check only needs to be done here because all the roots
1675 # will start being marked is descendants before the loop.
1819 # will start being marked is descendants before the loop.
1676 if n in roots:
1820 if n in roots:
1677 # If n was a root, check if it's a 'real' root.
1821 # If n was a root, check if it's a 'real' root.
1678 p = tuple(self.parents(n))
1822 p = tuple(self.parents(n))
1679 # If any of its parents are descendants, it's not a root.
1823 # If any of its parents are descendants, it's not a root.
1680 if (p[0] in descendants) or (p[1] in descendants):
1824 if (p[0] in descendants) or (p[1] in descendants):
1681 roots.remove(n)
1825 roots.remove(n)
1682 else:
1826 else:
1683 p = tuple(self.parents(n))
1827 p = tuple(self.parents(n))
1684 # A node is a descendant if either of its parents are
1828 # A node is a descendant if either of its parents are
1685 # descendants. (We seeded the dependents list with the roots
1829 # descendants. (We seeded the dependents list with the roots
1686 # up there, remember?)
1830 # up there, remember?)
1687 if (p[0] in descendants) or (p[1] in descendants):
1831 if (p[0] in descendants) or (p[1] in descendants):
1688 descendants.add(n)
1832 descendants.add(n)
1689 isdescendant = True
1833 isdescendant = True
1690 if isdescendant and ((ancestors is None) or (n in ancestors)):
1834 if isdescendant and ((ancestors is None) or (n in ancestors)):
1691 # Only include nodes that are both descendants and ancestors.
1835 # Only include nodes that are both descendants and ancestors.
1692 orderedout.append(n)
1836 orderedout.append(n)
1693 if (ancestors is not None) and (n in heads):
1837 if (ancestors is not None) and (n in heads):
1694 # We're trying to figure out which heads are reachable
1838 # We're trying to figure out which heads are reachable
1695 # from roots.
1839 # from roots.
1696 # Mark this head as having been reached
1840 # Mark this head as having been reached
1697 heads[n] = True
1841 heads[n] = True
1698 elif ancestors is None:
1842 elif ancestors is None:
1699 # Otherwise, we're trying to discover the heads.
1843 # Otherwise, we're trying to discover the heads.
1700 # Assume this is a head because if it isn't, the next step
1844 # Assume this is a head because if it isn't, the next step
1701 # will eventually remove it.
1845 # will eventually remove it.
1702 heads[n] = True
1846 heads[n] = True
1703 # But, obviously its parents aren't.
1847 # But, obviously its parents aren't.
1704 for p in self.parents(n):
1848 for p in self.parents(n):
1705 heads.pop(p, None)
1849 heads.pop(p, None)
1706 heads = [head for head, flag in heads.items() if flag]
1850 heads = [head for head, flag in heads.items() if flag]
1707 roots = list(roots)
1851 roots = list(roots)
1708 assert orderedout
1852 assert orderedout
1709 assert roots
1853 assert roots
1710 assert heads
1854 assert heads
1711 return (orderedout, roots, heads)
1855 return (orderedout, roots, heads)
1712
1856
1713 def headrevs(self, revs=None):
1857 def headrevs(self, revs=None):
1714 if revs is None:
1858 if revs is None:
1715 try:
1859 try:
1716 return self.index.headrevs()
1860 return self.index.headrevs()
1717 except AttributeError:
1861 except AttributeError:
1718 return self._headrevs()
1862 return self._headrevs()
1719 if rustdagop is not None and self.index.rust_ext_compat:
1863 if rustdagop is not None and self.index.rust_ext_compat:
1720 return rustdagop.headrevs(self.index, revs)
1864 return rustdagop.headrevs(self.index, revs)
1721 return dagop.headrevs(revs, self._uncheckedparentrevs)
1865 return dagop.headrevs(revs, self._uncheckedparentrevs)
1722
1866
1723 def computephases(self, roots):
1867 def computephases(self, roots):
1724 return self.index.computephasesmapsets(roots)
1868 return self.index.computephasesmapsets(roots)
1725
1869
1726 def _headrevs(self):
1870 def _headrevs(self):
1727 count = len(self)
1871 count = len(self)
1728 if not count:
1872 if not count:
1729 return [nullrev]
1873 return [nullrev]
1730 # we won't iter over filtered rev so nobody is a head at start
1874 # we won't iter over filtered rev so nobody is a head at start
1731 ishead = [0] * (count + 1)
1875 ishead = [0] * (count + 1)
1732 index = self.index
1876 index = self.index
1733 for r in self:
1877 for r in self:
1734 ishead[r] = 1 # I may be an head
1878 ishead[r] = 1 # I may be an head
1735 e = index[r]
1879 e = index[r]
1736 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1880 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1737 return [r for r, val in enumerate(ishead) if val]
1881 return [r for r, val in enumerate(ishead) if val]
1738
1882
1739 def heads(self, start=None, stop=None):
1883 def heads(self, start=None, stop=None):
1740 """return the list of all nodes that have no children
1884 """return the list of all nodes that have no children
1741
1885
1742 if start is specified, only heads that are descendants of
1886 if start is specified, only heads that are descendants of
1743 start will be returned
1887 start will be returned
1744 if stop is specified, it will consider all the revs from stop
1888 if stop is specified, it will consider all the revs from stop
1745 as if they had no children
1889 as if they had no children
1746 """
1890 """
1747 if start is None and stop is None:
1891 if start is None and stop is None:
1748 if not len(self):
1892 if not len(self):
1749 return [self.nullid]
1893 return [self.nullid]
1750 return [self.node(r) for r in self.headrevs()]
1894 return [self.node(r) for r in self.headrevs()]
1751
1895
1752 if start is None:
1896 if start is None:
1753 start = nullrev
1897 start = nullrev
1754 else:
1898 else:
1755 start = self.rev(start)
1899 start = self.rev(start)
1756
1900
1757 stoprevs = {self.rev(n) for n in stop or []}
1901 stoprevs = {self.rev(n) for n in stop or []}
1758
1902
1759 revs = dagop.headrevssubset(
1903 revs = dagop.headrevssubset(
1760 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1904 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1761 )
1905 )
1762
1906
1763 return [self.node(rev) for rev in revs]
1907 return [self.node(rev) for rev in revs]
1764
1908
1765 def children(self, node):
1909 def children(self, node):
1766 """find the children of a given node"""
1910 """find the children of a given node"""
1767 c = []
1911 c = []
1768 p = self.rev(node)
1912 p = self.rev(node)
1769 for r in self.revs(start=p + 1):
1913 for r in self.revs(start=p + 1):
1770 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1914 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1771 if prevs:
1915 if prevs:
1772 for pr in prevs:
1916 for pr in prevs:
1773 if pr == p:
1917 if pr == p:
1774 c.append(self.node(r))
1918 c.append(self.node(r))
1775 elif p == nullrev:
1919 elif p == nullrev:
1776 c.append(self.node(r))
1920 c.append(self.node(r))
1777 return c
1921 return c
1778
1922
1779 def commonancestorsheads(self, a, b):
1923 def commonancestorsheads(self, a, b):
1780 """calculate all the heads of the common ancestors of nodes a and b"""
1924 """calculate all the heads of the common ancestors of nodes a and b"""
1781 a, b = self.rev(a), self.rev(b)
1925 a, b = self.rev(a), self.rev(b)
1782 ancs = self._commonancestorsheads(a, b)
1926 ancs = self._commonancestorsheads(a, b)
1783 return pycompat.maplist(self.node, ancs)
1927 return pycompat.maplist(self.node, ancs)
1784
1928
1785 def _commonancestorsheads(self, *revs):
1929 def _commonancestorsheads(self, *revs):
1786 """calculate all the heads of the common ancestors of revs"""
1930 """calculate all the heads of the common ancestors of revs"""
1787 try:
1931 try:
1788 ancs = self.index.commonancestorsheads(*revs)
1932 ancs = self.index.commonancestorsheads(*revs)
1789 except (AttributeError, OverflowError): # C implementation failed
1933 except (AttributeError, OverflowError): # C implementation failed
1790 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1934 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1791 return ancs
1935 return ancs
1792
1936
1793 def isancestor(self, a, b):
1937 def isancestor(self, a, b):
1794 """return True if node a is an ancestor of node b
1938 """return True if node a is an ancestor of node b
1795
1939
1796 A revision is considered an ancestor of itself."""
1940 A revision is considered an ancestor of itself."""
1797 a, b = self.rev(a), self.rev(b)
1941 a, b = self.rev(a), self.rev(b)
1798 return self.isancestorrev(a, b)
1942 return self.isancestorrev(a, b)
1799
1943
1800 def isancestorrev(self, a, b):
1944 def isancestorrev(self, a, b):
1801 """return True if revision a is an ancestor of revision b
1945 """return True if revision a is an ancestor of revision b
1802
1946
1803 A revision is considered an ancestor of itself.
1947 A revision is considered an ancestor of itself.
1804
1948
1805 The implementation of this is trivial but the use of
1949 The implementation of this is trivial but the use of
1806 reachableroots is not."""
1950 reachableroots is not."""
1807 if a == nullrev:
1951 if a == nullrev:
1808 return True
1952 return True
1809 elif a == b:
1953 elif a == b:
1810 return True
1954 return True
1811 elif a > b:
1955 elif a > b:
1812 return False
1956 return False
1813 return bool(self.reachableroots(a, [b], [a], includepath=False))
1957 return bool(self.reachableroots(a, [b], [a], includepath=False))
1814
1958
1815 def reachableroots(self, minroot, heads, roots, includepath=False):
1959 def reachableroots(self, minroot, heads, roots, includepath=False):
1816 """return (heads(::(<roots> and <roots>::<heads>)))
1960 """return (heads(::(<roots> and <roots>::<heads>)))
1817
1961
1818 If includepath is True, return (<roots>::<heads>)."""
1962 If includepath is True, return (<roots>::<heads>)."""
1819 try:
1963 try:
1820 return self.index.reachableroots2(
1964 return self.index.reachableroots2(
1821 minroot, heads, roots, includepath
1965 minroot, heads, roots, includepath
1822 )
1966 )
1823 except AttributeError:
1967 except AttributeError:
1824 return dagop._reachablerootspure(
1968 return dagop._reachablerootspure(
1825 self.parentrevs, minroot, roots, heads, includepath
1969 self.parentrevs, minroot, roots, heads, includepath
1826 )
1970 )
1827
1971
1828 def ancestor(self, a, b):
1972 def ancestor(self, a, b):
1829 """calculate the "best" common ancestor of nodes a and b"""
1973 """calculate the "best" common ancestor of nodes a and b"""
1830
1974
1831 a, b = self.rev(a), self.rev(b)
1975 a, b = self.rev(a), self.rev(b)
1832 try:
1976 try:
1833 ancs = self.index.ancestors(a, b)
1977 ancs = self.index.ancestors(a, b)
1834 except (AttributeError, OverflowError):
1978 except (AttributeError, OverflowError):
1835 ancs = ancestor.ancestors(self.parentrevs, a, b)
1979 ancs = ancestor.ancestors(self.parentrevs, a, b)
1836 if ancs:
1980 if ancs:
1837 # choose a consistent winner when there's a tie
1981 # choose a consistent winner when there's a tie
1838 return min(map(self.node, ancs))
1982 return min(map(self.node, ancs))
1839 return self.nullid
1983 return self.nullid
1840
1984
1841 def _match(self, id):
1985 def _match(self, id):
1842 if isinstance(id, int):
1986 if isinstance(id, int):
1843 # rev
1987 # rev
1844 return self.node(id)
1988 return self.node(id)
1845 if len(id) == self.nodeconstants.nodelen:
1989 if len(id) == self.nodeconstants.nodelen:
1846 # possibly a binary node
1990 # possibly a binary node
1847 # odds of a binary node being all hex in ASCII are 1 in 10**25
1991 # odds of a binary node being all hex in ASCII are 1 in 10**25
1848 try:
1992 try:
1849 node = id
1993 node = id
1850 self.rev(node) # quick search the index
1994 self.rev(node) # quick search the index
1851 return node
1995 return node
1852 except error.LookupError:
1996 except error.LookupError:
1853 pass # may be partial hex id
1997 pass # may be partial hex id
1854 try:
1998 try:
1855 # str(rev)
1999 # str(rev)
1856 rev = int(id)
2000 rev = int(id)
1857 if b"%d" % rev != id:
2001 if b"%d" % rev != id:
1858 raise ValueError
2002 raise ValueError
1859 if rev < 0:
2003 if rev < 0:
1860 rev = len(self) + rev
2004 rev = len(self) + rev
1861 if rev < 0 or rev >= len(self):
2005 if rev < 0 or rev >= len(self):
1862 raise ValueError
2006 raise ValueError
1863 return self.node(rev)
2007 return self.node(rev)
1864 except (ValueError, OverflowError):
2008 except (ValueError, OverflowError):
1865 pass
2009 pass
1866 if len(id) == 2 * self.nodeconstants.nodelen:
2010 if len(id) == 2 * self.nodeconstants.nodelen:
1867 try:
2011 try:
1868 # a full hex nodeid?
2012 # a full hex nodeid?
1869 node = bin(id)
2013 node = bin(id)
1870 self.rev(node)
2014 self.rev(node)
1871 return node
2015 return node
1872 except (binascii.Error, error.LookupError):
2016 except (binascii.Error, error.LookupError):
1873 pass
2017 pass
1874
2018
1875 def _partialmatch(self, id):
2019 def _partialmatch(self, id):
1876 # we don't care wdirfilenodeids as they should be always full hash
2020 # we don't care wdirfilenodeids as they should be always full hash
1877 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2021 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1878 ambiguous = False
2022 ambiguous = False
1879 try:
2023 try:
1880 partial = self.index.partialmatch(id)
2024 partial = self.index.partialmatch(id)
1881 if partial and self.hasnode(partial):
2025 if partial and self.hasnode(partial):
1882 if maybewdir:
2026 if maybewdir:
1883 # single 'ff...' match in radix tree, ambiguous with wdir
2027 # single 'ff...' match in radix tree, ambiguous with wdir
1884 ambiguous = True
2028 ambiguous = True
1885 else:
2029 else:
1886 return partial
2030 return partial
1887 elif maybewdir:
2031 elif maybewdir:
1888 # no 'ff...' match in radix tree, wdir identified
2032 # no 'ff...' match in radix tree, wdir identified
1889 raise error.WdirUnsupported
2033 raise error.WdirUnsupported
1890 else:
2034 else:
1891 return None
2035 return None
1892 except error.RevlogError:
2036 except error.RevlogError:
1893 # parsers.c radix tree lookup gave multiple matches
2037 # parsers.c radix tree lookup gave multiple matches
1894 # fast path: for unfiltered changelog, radix tree is accurate
2038 # fast path: for unfiltered changelog, radix tree is accurate
1895 if not getattr(self, 'filteredrevs', None):
2039 if not getattr(self, 'filteredrevs', None):
1896 ambiguous = True
2040 ambiguous = True
1897 # fall through to slow path that filters hidden revisions
2041 # fall through to slow path that filters hidden revisions
1898 except (AttributeError, ValueError):
2042 except (AttributeError, ValueError):
1899 # we are pure python, or key is not hex
2043 # we are pure python, or key is not hex
1900 pass
2044 pass
1901 if ambiguous:
2045 if ambiguous:
1902 raise error.AmbiguousPrefixLookupError(
2046 raise error.AmbiguousPrefixLookupError(
1903 id, self.display_id, _(b'ambiguous identifier')
2047 id, self.display_id, _(b'ambiguous identifier')
1904 )
2048 )
1905
2049
1906 if id in self._pcache:
2050 if id in self._pcache:
1907 return self._pcache[id]
2051 return self._pcache[id]
1908
2052
1909 if len(id) <= 40:
2053 if len(id) <= 40:
1910 # hex(node)[:...]
2054 # hex(node)[:...]
1911 l = len(id) // 2 * 2 # grab an even number of digits
2055 l = len(id) // 2 * 2 # grab an even number of digits
1912 try:
2056 try:
1913 # we're dropping the last digit, so let's check that it's hex,
2057 # we're dropping the last digit, so let's check that it's hex,
1914 # to avoid the expensive computation below if it's not
2058 # to avoid the expensive computation below if it's not
1915 if len(id) % 2 > 0:
2059 if len(id) % 2 > 0:
1916 if not (id[-1] in hexdigits):
2060 if not (id[-1] in hexdigits):
1917 return None
2061 return None
1918 prefix = bin(id[:l])
2062 prefix = bin(id[:l])
1919 except binascii.Error:
2063 except binascii.Error:
1920 pass
2064 pass
1921 else:
2065 else:
1922 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2066 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1923 nl = [
2067 nl = [
1924 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2068 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1925 ]
2069 ]
1926 if self.nodeconstants.nullhex.startswith(id):
2070 if self.nodeconstants.nullhex.startswith(id):
1927 nl.append(self.nullid)
2071 nl.append(self.nullid)
1928 if len(nl) > 0:
2072 if len(nl) > 0:
1929 if len(nl) == 1 and not maybewdir:
2073 if len(nl) == 1 and not maybewdir:
1930 self._pcache[id] = nl[0]
2074 self._pcache[id] = nl[0]
1931 return nl[0]
2075 return nl[0]
1932 raise error.AmbiguousPrefixLookupError(
2076 raise error.AmbiguousPrefixLookupError(
1933 id, self.display_id, _(b'ambiguous identifier')
2077 id, self.display_id, _(b'ambiguous identifier')
1934 )
2078 )
1935 if maybewdir:
2079 if maybewdir:
1936 raise error.WdirUnsupported
2080 raise error.WdirUnsupported
1937 return None
2081 return None
1938
2082
1939 def lookup(self, id):
2083 def lookup(self, id):
1940 """locate a node based on:
2084 """locate a node based on:
1941 - revision number or str(revision number)
2085 - revision number or str(revision number)
1942 - nodeid or subset of hex nodeid
2086 - nodeid or subset of hex nodeid
1943 """
2087 """
1944 n = self._match(id)
2088 n = self._match(id)
1945 if n is not None:
2089 if n is not None:
1946 return n
2090 return n
1947 n = self._partialmatch(id)
2091 n = self._partialmatch(id)
1948 if n:
2092 if n:
1949 return n
2093 return n
1950
2094
1951 raise error.LookupError(id, self.display_id, _(b'no match found'))
2095 raise error.LookupError(id, self.display_id, _(b'no match found'))
1952
2096
1953 def shortest(self, node, minlength=1):
2097 def shortest(self, node, minlength=1):
1954 """Find the shortest unambiguous prefix that matches node."""
2098 """Find the shortest unambiguous prefix that matches node."""
1955
2099
1956 def isvalid(prefix):
2100 def isvalid(prefix):
1957 try:
2101 try:
1958 matchednode = self._partialmatch(prefix)
2102 matchednode = self._partialmatch(prefix)
1959 except error.AmbiguousPrefixLookupError:
2103 except error.AmbiguousPrefixLookupError:
1960 return False
2104 return False
1961 except error.WdirUnsupported:
2105 except error.WdirUnsupported:
1962 # single 'ff...' match
2106 # single 'ff...' match
1963 return True
2107 return True
1964 if matchednode is None:
2108 if matchednode is None:
1965 raise error.LookupError(node, self.display_id, _(b'no node'))
2109 raise error.LookupError(node, self.display_id, _(b'no node'))
1966 return True
2110 return True
1967
2111
1968 def maybewdir(prefix):
2112 def maybewdir(prefix):
1969 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2113 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1970
2114
1971 hexnode = hex(node)
2115 hexnode = hex(node)
1972
2116
1973 def disambiguate(hexnode, minlength):
2117 def disambiguate(hexnode, minlength):
1974 """Disambiguate against wdirid."""
2118 """Disambiguate against wdirid."""
1975 for length in range(minlength, len(hexnode) + 1):
2119 for length in range(minlength, len(hexnode) + 1):
1976 prefix = hexnode[:length]
2120 prefix = hexnode[:length]
1977 if not maybewdir(prefix):
2121 if not maybewdir(prefix):
1978 return prefix
2122 return prefix
1979
2123
1980 if not getattr(self, 'filteredrevs', None):
2124 if not getattr(self, 'filteredrevs', None):
1981 try:
2125 try:
1982 length = max(self.index.shortest(node), minlength)
2126 length = max(self.index.shortest(node), minlength)
1983 return disambiguate(hexnode, length)
2127 return disambiguate(hexnode, length)
1984 except error.RevlogError:
2128 except error.RevlogError:
1985 if node != self.nodeconstants.wdirid:
2129 if node != self.nodeconstants.wdirid:
1986 raise error.LookupError(
2130 raise error.LookupError(
1987 node, self.display_id, _(b'no node')
2131 node, self.display_id, _(b'no node')
1988 )
2132 )
1989 except AttributeError:
2133 except AttributeError:
1990 # Fall through to pure code
2134 # Fall through to pure code
1991 pass
2135 pass
1992
2136
1993 if node == self.nodeconstants.wdirid:
2137 if node == self.nodeconstants.wdirid:
1994 for length in range(minlength, len(hexnode) + 1):
2138 for length in range(minlength, len(hexnode) + 1):
1995 prefix = hexnode[:length]
2139 prefix = hexnode[:length]
1996 if isvalid(prefix):
2140 if isvalid(prefix):
1997 return prefix
2141 return prefix
1998
2142
1999 for length in range(minlength, len(hexnode) + 1):
2143 for length in range(minlength, len(hexnode) + 1):
2000 prefix = hexnode[:length]
2144 prefix = hexnode[:length]
2001 if isvalid(prefix):
2145 if isvalid(prefix):
2002 return disambiguate(hexnode, length)
2146 return disambiguate(hexnode, length)
2003
2147
2004 def cmp(self, node, text):
2148 def cmp(self, node, text):
2005 """compare text with a given file revision
2149 """compare text with a given file revision
2006
2150
2007 returns True if text is different than what is stored.
2151 returns True if text is different than what is stored.
2008 """
2152 """
2009 p1, p2 = self.parents(node)
2153 p1, p2 = self.parents(node)
2010 return storageutil.hashrevisionsha1(text, p1, p2) != node
2154 return storageutil.hashrevisionsha1(text, p1, p2) != node
2011
2155
2012 def _getsegmentforrevs(self, startrev, endrev):
2156 def _getsegmentforrevs(self, startrev, endrev):
2013 """Obtain a segment of raw data corresponding to a range of revisions.
2157 """Obtain a segment of raw data corresponding to a range of revisions.
2014
2158
2015 Accepts the start and end revisions and an optional already-open
2159 Accepts the start and end revisions and an optional already-open
2016 file handle to be used for reading. If the file handle is read, its
2160 file handle to be used for reading. If the file handle is read, its
2017 seek position will not be preserved.
2161 seek position will not be preserved.
2018
2162
2019 Requests for data may be satisfied by a cache.
2163 Requests for data may be satisfied by a cache.
2020
2164
2021 Returns a 2-tuple of (offset, data) for the requested range of
2165 Returns a 2-tuple of (offset, data) for the requested range of
2022 revisions. Offset is the integer offset from the beginning of the
2166 revisions. Offset is the integer offset from the beginning of the
2023 revlog and data is a str or buffer of the raw byte data.
2167 revlog and data is a str or buffer of the raw byte data.
2024
2168
2025 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2169 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
2026 to determine where each revision's data begins and ends.
2170 to determine where each revision's data begins and ends.
2027 """
2171 """
2028 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2172 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
2029 # (functions are expensive).
2173 # (functions are expensive).
2030 index = self.index
2174 index = self.index
2031 istart = index[startrev]
2175 istart = index[startrev]
2032 start = int(istart[0] >> 16)
2176 start = int(istart[0] >> 16)
2033 if startrev == endrev:
2177 if startrev == endrev:
2034 end = start + istart[1]
2178 end = start + istart[1]
2035 else:
2179 else:
2036 iend = index[endrev]
2180 iend = index[endrev]
2037 end = int(iend[0] >> 16) + iend[1]
2181 end = int(iend[0] >> 16) + iend[1]
2038
2182
2039 if self._inline:
2183 if self._inline:
2040 start += (startrev + 1) * self.index.entry_size
2184 start += (startrev + 1) * self.index.entry_size
2041 end += (endrev + 1) * self.index.entry_size
2185 end += (endrev + 1) * self.index.entry_size
2042 length = end - start
2186 length = end - start
2043
2187
2044 return start, self._segmentfile.read_chunk(start, length)
2188 return start, self._inner._segmentfile.read_chunk(start, length)
2045
2189
2046 def _chunk(self, rev):
2190 def _chunk(self, rev):
2047 """Obtain a single decompressed chunk for a revision.
2191 """Obtain a single decompressed chunk for a revision.
2048
2192
2049 Accepts an integer revision and an optional already-open file handle
2193 Accepts an integer revision and an optional already-open file handle
2050 to be used for reading. If used, the seek position of the file will not
2194 to be used for reading. If used, the seek position of the file will not
2051 be preserved.
2195 be preserved.
2052
2196
2053 Returns a str holding uncompressed data for the requested revision.
2197 Returns a str holding uncompressed data for the requested revision.
2054 """
2198 """
2055 compression_mode = self.index[rev][10]
2199 compression_mode = self.index[rev][10]
2056 data = self._getsegmentforrevs(rev, rev)[1]
2200 data = self._getsegmentforrevs(rev, rev)[1]
2057 if compression_mode == COMP_MODE_PLAIN:
2201 if compression_mode == COMP_MODE_PLAIN:
2058 return data
2202 return data
2059 elif compression_mode == COMP_MODE_DEFAULT:
2203 elif compression_mode == COMP_MODE_DEFAULT:
2060 return self._decompressor(data)
2204 return self._decompressor(data)
2061 elif compression_mode == COMP_MODE_INLINE:
2205 elif compression_mode == COMP_MODE_INLINE:
2062 return self.decompress(data)
2206 return self.decompress(data)
2063 else:
2207 else:
2064 msg = b'unknown compression mode %d'
2208 msg = b'unknown compression mode %d'
2065 msg %= compression_mode
2209 msg %= compression_mode
2066 raise error.RevlogError(msg)
2210 raise error.RevlogError(msg)
2067
2211
2068 def _chunks(self, revs, targetsize=None):
2212 def _chunks(self, revs, targetsize=None):
2069 """Obtain decompressed chunks for the specified revisions.
2213 """Obtain decompressed chunks for the specified revisions.
2070
2214
2071 Accepts an iterable of numeric revisions that are assumed to be in
2215 Accepts an iterable of numeric revisions that are assumed to be in
2072 ascending order. Also accepts an optional already-open file handle
2216 ascending order. Also accepts an optional already-open file handle
2073 to be used for reading. If used, the seek position of the file will
2217 to be used for reading. If used, the seek position of the file will
2074 not be preserved.
2218 not be preserved.
2075
2219
2076 This function is similar to calling ``self._chunk()`` multiple times,
2220 This function is similar to calling ``self._chunk()`` multiple times,
2077 but is faster.
2221 but is faster.
2078
2222
2079 Returns a list with decompressed data for each requested revision.
2223 Returns a list with decompressed data for each requested revision.
2080 """
2224 """
2081 if not revs:
2225 if not revs:
2082 return []
2226 return []
2083 start = self.start
2227 start = self.start
2084 length = self.length
2228 length = self.length
2085 inline = self._inline
2229 inline = self._inline
2086 iosize = self.index.entry_size
2230 iosize = self.index.entry_size
2087 buffer = util.buffer
2231 buffer = util.buffer
2088
2232
2089 l = []
2233 l = []
2090 ladd = l.append
2234 ladd = l.append
2091
2235
2092 if not self.data_config.with_sparse_read:
2236 if not self.data_config.with_sparse_read:
2093 slicedchunks = (revs,)
2237 slicedchunks = (revs,)
2094 else:
2238 else:
2095 slicedchunks = deltautil.slicechunk(
2239 slicedchunks = deltautil.slicechunk(
2096 self, revs, targetsize=targetsize
2240 self, revs, targetsize=targetsize
2097 )
2241 )
2098
2242
2099 for revschunk in slicedchunks:
2243 for revschunk in slicedchunks:
2100 firstrev = revschunk[0]
2244 firstrev = revschunk[0]
2101 # Skip trailing revisions with empty diff
2245 # Skip trailing revisions with empty diff
2102 for lastrev in revschunk[::-1]:
2246 for lastrev in revschunk[::-1]:
2103 if length(lastrev) != 0:
2247 if length(lastrev) != 0:
2104 break
2248 break
2105
2249
2106 try:
2250 try:
2107 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2251 offset, data = self._getsegmentforrevs(firstrev, lastrev)
2108 except OverflowError:
2252 except OverflowError:
2109 # issue4215 - we can't cache a run of chunks greater than
2253 # issue4215 - we can't cache a run of chunks greater than
2110 # 2G on Windows
2254 # 2G on Windows
2111 return [self._chunk(rev) for rev in revschunk]
2255 return [self._chunk(rev) for rev in revschunk]
2112
2256
2113 decomp = self.decompress
2257 decomp = self.decompress
2114 # self._decompressor might be None, but will not be used in that case
2258 # self._decompressor might be None, but will not be used in that case
2115 def_decomp = self._decompressor
2259 def_decomp = self._decompressor
2116 for rev in revschunk:
2260 for rev in revschunk:
2117 chunkstart = start(rev)
2261 chunkstart = start(rev)
2118 if inline:
2262 if inline:
2119 chunkstart += (rev + 1) * iosize
2263 chunkstart += (rev + 1) * iosize
2120 chunklength = length(rev)
2264 chunklength = length(rev)
2121 comp_mode = self.index[rev][10]
2265 comp_mode = self.index[rev][10]
2122 c = buffer(data, chunkstart - offset, chunklength)
2266 c = buffer(data, chunkstart - offset, chunklength)
2123 if comp_mode == COMP_MODE_PLAIN:
2267 if comp_mode == COMP_MODE_PLAIN:
2124 ladd(c)
2268 ladd(c)
2125 elif comp_mode == COMP_MODE_INLINE:
2269 elif comp_mode == COMP_MODE_INLINE:
2126 ladd(decomp(c))
2270 ladd(decomp(c))
2127 elif comp_mode == COMP_MODE_DEFAULT:
2271 elif comp_mode == COMP_MODE_DEFAULT:
2128 ladd(def_decomp(c))
2272 ladd(def_decomp(c))
2129 else:
2273 else:
2130 msg = b'unknown compression mode %d'
2274 msg = b'unknown compression mode %d'
2131 msg %= comp_mode
2275 msg %= comp_mode
2132 raise error.RevlogError(msg)
2276 raise error.RevlogError(msg)
2133
2277
2134 return l
2278 return l
2135
2279
2136 def deltaparent(self, rev):
2280 def deltaparent(self, rev):
2137 """return deltaparent of the given revision"""
2281 """return deltaparent of the given revision"""
2138 base = self.index[rev][3]
2282 base = self.index[rev][3]
2139 if base == rev:
2283 if base == rev:
2140 return nullrev
2284 return nullrev
2141 elif self.delta_config.general_delta:
2285 elif self.delta_config.general_delta:
2142 return base
2286 return base
2143 else:
2287 else:
2144 return rev - 1
2288 return rev - 1
2145
2289
2146 def issnapshot(self, rev):
2290 def issnapshot(self, rev):
2147 """tells whether rev is a snapshot"""
2291 """tells whether rev is a snapshot"""
2148 if not self.delta_config.sparse_revlog:
2292 if not self.delta_config.sparse_revlog:
2149 return self.deltaparent(rev) == nullrev
2293 return self.deltaparent(rev) == nullrev
2150 elif hasattr(self.index, 'issnapshot'):
2294 elif hasattr(self.index, 'issnapshot'):
2151 # directly assign the method to cache the testing and access
2295 # directly assign the method to cache the testing and access
2152 self.issnapshot = self.index.issnapshot
2296 self.issnapshot = self.index.issnapshot
2153 return self.issnapshot(rev)
2297 return self.issnapshot(rev)
2154 if rev == nullrev:
2298 if rev == nullrev:
2155 return True
2299 return True
2156 entry = self.index[rev]
2300 entry = self.index[rev]
2157 base = entry[3]
2301 base = entry[3]
2158 if base == rev:
2302 if base == rev:
2159 return True
2303 return True
2160 if base == nullrev:
2304 if base == nullrev:
2161 return True
2305 return True
2162 p1 = entry[5]
2306 p1 = entry[5]
2163 while self.length(p1) == 0:
2307 while self.length(p1) == 0:
2164 b = self.deltaparent(p1)
2308 b = self.deltaparent(p1)
2165 if b == p1:
2309 if b == p1:
2166 break
2310 break
2167 p1 = b
2311 p1 = b
2168 p2 = entry[6]
2312 p2 = entry[6]
2169 while self.length(p2) == 0:
2313 while self.length(p2) == 0:
2170 b = self.deltaparent(p2)
2314 b = self.deltaparent(p2)
2171 if b == p2:
2315 if b == p2:
2172 break
2316 break
2173 p2 = b
2317 p2 = b
2174 if base == p1 or base == p2:
2318 if base == p1 or base == p2:
2175 return False
2319 return False
2176 return self.issnapshot(base)
2320 return self.issnapshot(base)
2177
2321
2178 def snapshotdepth(self, rev):
2322 def snapshotdepth(self, rev):
2179 """number of snapshot in the chain before this one"""
2323 """number of snapshot in the chain before this one"""
2180 if not self.issnapshot(rev):
2324 if not self.issnapshot(rev):
2181 raise error.ProgrammingError(b'revision %d not a snapshot')
2325 raise error.ProgrammingError(b'revision %d not a snapshot')
2182 return len(self._deltachain(rev)[0]) - 1
2326 return len(self._deltachain(rev)[0]) - 1
2183
2327
2184 def revdiff(self, rev1, rev2):
2328 def revdiff(self, rev1, rev2):
2185 """return or calculate a delta between two revisions
2329 """return or calculate a delta between two revisions
2186
2330
2187 The delta calculated is in binary form and is intended to be written to
2331 The delta calculated is in binary form and is intended to be written to
2188 revlog data directly. So this function needs raw revision data.
2332 revlog data directly. So this function needs raw revision data.
2189 """
2333 """
2190 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2334 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2191 return bytes(self._chunk(rev2))
2335 return bytes(self._chunk(rev2))
2192
2336
2193 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2337 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2194
2338
2195 def revision(self, nodeorrev):
2339 def revision(self, nodeorrev):
2196 """return an uncompressed revision of a given node or revision
2340 """return an uncompressed revision of a given node or revision
2197 number.
2341 number.
2198 """
2342 """
2199 return self._revisiondata(nodeorrev)
2343 return self._revisiondata(nodeorrev)
2200
2344
2201 def sidedata(self, nodeorrev):
2345 def sidedata(self, nodeorrev):
2202 """a map of extra data related to the changeset but not part of the hash
2346 """a map of extra data related to the changeset but not part of the hash
2203
2347
2204 This function currently return a dictionary. However, more advanced
2348 This function currently return a dictionary. However, more advanced
2205 mapping object will likely be used in the future for a more
2349 mapping object will likely be used in the future for a more
2206 efficient/lazy code.
2350 efficient/lazy code.
2207 """
2351 """
2208 # deal with <nodeorrev> argument type
2352 # deal with <nodeorrev> argument type
2209 if isinstance(nodeorrev, int):
2353 if isinstance(nodeorrev, int):
2210 rev = nodeorrev
2354 rev = nodeorrev
2211 else:
2355 else:
2212 rev = self.rev(nodeorrev)
2356 rev = self.rev(nodeorrev)
2213 return self._sidedata(rev)
2357 return self._sidedata(rev)
2214
2358
2215 def _revisiondata(self, nodeorrev, raw=False):
2359 def _revisiondata(self, nodeorrev, raw=False):
2216 # deal with <nodeorrev> argument type
2360 # deal with <nodeorrev> argument type
2217 if isinstance(nodeorrev, int):
2361 if isinstance(nodeorrev, int):
2218 rev = nodeorrev
2362 rev = nodeorrev
2219 node = self.node(rev)
2363 node = self.node(rev)
2220 else:
2364 else:
2221 node = nodeorrev
2365 node = nodeorrev
2222 rev = None
2366 rev = None
2223
2367
2224 # fast path the special `nullid` rev
2368 # fast path the special `nullid` rev
2225 if node == self.nullid:
2369 if node == self.nullid:
2226 return b""
2370 return b""
2227
2371
2228 # ``rawtext`` is the text as stored inside the revlog. Might be the
2372 # ``rawtext`` is the text as stored inside the revlog. Might be the
2229 # revision or might need to be processed to retrieve the revision.
2373 # revision or might need to be processed to retrieve the revision.
2230 rev, rawtext, validated = self._rawtext(node, rev)
2374 rev, rawtext, validated = self._rawtext(node, rev)
2231
2375
2232 if raw and validated:
2376 if raw and validated:
2233 # if we don't want to process the raw text and that raw
2377 # if we don't want to process the raw text and that raw
2234 # text is cached, we can exit early.
2378 # text is cached, we can exit early.
2235 return rawtext
2379 return rawtext
2236 if rev is None:
2380 if rev is None:
2237 rev = self.rev(node)
2381 rev = self.rev(node)
2238 # the revlog's flag for this revision
2382 # the revlog's flag for this revision
2239 # (usually alter its state or content)
2383 # (usually alter its state or content)
2240 flags = self.flags(rev)
2384 flags = self.flags(rev)
2241
2385
2242 if validated and flags == REVIDX_DEFAULT_FLAGS:
2386 if validated and flags == REVIDX_DEFAULT_FLAGS:
2243 # no extra flags set, no flag processor runs, text = rawtext
2387 # no extra flags set, no flag processor runs, text = rawtext
2244 return rawtext
2388 return rawtext
2245
2389
2246 if raw:
2390 if raw:
2247 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2391 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2248 text = rawtext
2392 text = rawtext
2249 else:
2393 else:
2250 r = flagutil.processflagsread(self, rawtext, flags)
2394 r = flagutil.processflagsread(self, rawtext, flags)
2251 text, validatehash = r
2395 text, validatehash = r
2252 if validatehash:
2396 if validatehash:
2253 self.checkhash(text, node, rev=rev)
2397 self.checkhash(text, node, rev=rev)
2254 if not validated:
2398 if not validated:
2255 self._revisioncache = (node, rev, rawtext)
2399 self._revisioncache = (node, rev, rawtext)
2256
2400
2257 return text
2401 return text
2258
2402
2259 def _rawtext(self, node, rev):
2403 def _rawtext(self, node, rev):
2260 """return the possibly unvalidated rawtext for a revision
2404 """return the possibly unvalidated rawtext for a revision
2261
2405
2262 returns (rev, rawtext, validated)
2406 returns (rev, rawtext, validated)
2263 """
2407 """
2264
2408
2265 # revision in the cache (could be useful to apply delta)
2409 # revision in the cache (could be useful to apply delta)
2266 cachedrev = None
2410 cachedrev = None
2267 # An intermediate text to apply deltas to
2411 # An intermediate text to apply deltas to
2268 basetext = None
2412 basetext = None
2269
2413
2270 # Check if we have the entry in cache
2414 # Check if we have the entry in cache
2271 # The cache entry looks like (node, rev, rawtext)
2415 # The cache entry looks like (node, rev, rawtext)
2272 if self._revisioncache:
2416 if self._revisioncache:
2273 if self._revisioncache[0] == node:
2417 if self._revisioncache[0] == node:
2274 return (rev, self._revisioncache[2], True)
2418 return (rev, self._revisioncache[2], True)
2275 cachedrev = self._revisioncache[1]
2419 cachedrev = self._revisioncache[1]
2276
2420
2277 if rev is None:
2421 if rev is None:
2278 rev = self.rev(node)
2422 rev = self.rev(node)
2279
2423
2280 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2424 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2281 if stopped:
2425 if stopped:
2282 basetext = self._revisioncache[2]
2426 basetext = self._revisioncache[2]
2283
2427
2284 # drop cache to save memory, the caller is expected to
2428 # drop cache to save memory, the caller is expected to
2285 # update self._revisioncache after validating the text
2429 # update self._revisioncache after validating the text
2286 self._revisioncache = None
2430 self._revisioncache = None
2287
2431
2288 targetsize = None
2432 targetsize = None
2289 rawsize = self.index[rev][2]
2433 rawsize = self.index[rev][2]
2290 if 0 <= rawsize:
2434 if 0 <= rawsize:
2291 targetsize = 4 * rawsize
2435 targetsize = 4 * rawsize
2292
2436
2293 bins = self._chunks(chain, targetsize=targetsize)
2437 bins = self._chunks(chain, targetsize=targetsize)
2294 if basetext is None:
2438 if basetext is None:
2295 basetext = bytes(bins[0])
2439 basetext = bytes(bins[0])
2296 bins = bins[1:]
2440 bins = bins[1:]
2297
2441
2298 rawtext = mdiff.patches(basetext, bins)
2442 rawtext = mdiff.patches(basetext, bins)
2299 del basetext # let us have a chance to free memory early
2443 del basetext # let us have a chance to free memory early
2300 return (rev, rawtext, False)
2444 return (rev, rawtext, False)
2301
2445
2302 def _sidedata(self, rev):
2446 def _sidedata(self, rev):
2303 """Return the sidedata for a given revision number."""
2447 """Return the sidedata for a given revision number."""
2304 index_entry = self.index[rev]
2448 index_entry = self.index[rev]
2305 sidedata_offset = index_entry[8]
2449 sidedata_offset = index_entry[8]
2306 sidedata_size = index_entry[9]
2450 sidedata_size = index_entry[9]
2307
2451
2308 if self._inline:
2452 if self._inline:
2309 sidedata_offset += self.index.entry_size * (1 + rev)
2453 sidedata_offset += self.index.entry_size * (1 + rev)
2310 if sidedata_size == 0:
2454 if sidedata_size == 0:
2311 return {}
2455 return {}
2312
2456
2313 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2457 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2314 filename = self._sidedatafile
2458 filename = self._sidedatafile
2315 end = self._docket.sidedata_end
2459 end = self._docket.sidedata_end
2316 offset = sidedata_offset
2460 offset = sidedata_offset
2317 length = sidedata_size
2461 length = sidedata_size
2318 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2462 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2319 raise error.RevlogError(m)
2463 raise error.RevlogError(m)
2320
2464
2321 comp_segment = self._segmentfile_sidedata.read_chunk(
2465 comp_segment = self._inner._segmentfile_sidedata.read_chunk(
2322 sidedata_offset, sidedata_size
2466 sidedata_offset, sidedata_size
2323 )
2467 )
2324
2468
2325 comp = self.index[rev][11]
2469 comp = self.index[rev][11]
2326 if comp == COMP_MODE_PLAIN:
2470 if comp == COMP_MODE_PLAIN:
2327 segment = comp_segment
2471 segment = comp_segment
2328 elif comp == COMP_MODE_DEFAULT:
2472 elif comp == COMP_MODE_DEFAULT:
2329 segment = self._decompressor(comp_segment)
2473 segment = self._decompressor(comp_segment)
2330 elif comp == COMP_MODE_INLINE:
2474 elif comp == COMP_MODE_INLINE:
2331 segment = self.decompress(comp_segment)
2475 segment = self.decompress(comp_segment)
2332 else:
2476 else:
2333 msg = b'unknown compression mode %d'
2477 msg = b'unknown compression mode %d'
2334 msg %= comp
2478 msg %= comp
2335 raise error.RevlogError(msg)
2479 raise error.RevlogError(msg)
2336
2480
2337 sidedata = sidedatautil.deserialize_sidedata(segment)
2481 sidedata = sidedatautil.deserialize_sidedata(segment)
2338 return sidedata
2482 return sidedata
2339
2483
2340 def rawdata(self, nodeorrev):
2484 def rawdata(self, nodeorrev):
2341 """return an uncompressed raw data of a given node or revision number."""
2485 """return an uncompressed raw data of a given node or revision number."""
2342 return self._revisiondata(nodeorrev, raw=True)
2486 return self._revisiondata(nodeorrev, raw=True)
2343
2487
2344 def hash(self, text, p1, p2):
2488 def hash(self, text, p1, p2):
2345 """Compute a node hash.
2489 """Compute a node hash.
2346
2490
2347 Available as a function so that subclasses can replace the hash
2491 Available as a function so that subclasses can replace the hash
2348 as needed.
2492 as needed.
2349 """
2493 """
2350 return storageutil.hashrevisionsha1(text, p1, p2)
2494 return storageutil.hashrevisionsha1(text, p1, p2)
2351
2495
2352 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2496 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2353 """Check node hash integrity.
2497 """Check node hash integrity.
2354
2498
2355 Available as a function so that subclasses can extend hash mismatch
2499 Available as a function so that subclasses can extend hash mismatch
2356 behaviors as needed.
2500 behaviors as needed.
2357 """
2501 """
2358 try:
2502 try:
2359 if p1 is None and p2 is None:
2503 if p1 is None and p2 is None:
2360 p1, p2 = self.parents(node)
2504 p1, p2 = self.parents(node)
2361 if node != self.hash(text, p1, p2):
2505 if node != self.hash(text, p1, p2):
2362 # Clear the revision cache on hash failure. The revision cache
2506 # Clear the revision cache on hash failure. The revision cache
2363 # only stores the raw revision and clearing the cache does have
2507 # only stores the raw revision and clearing the cache does have
2364 # the side-effect that we won't have a cache hit when the raw
2508 # the side-effect that we won't have a cache hit when the raw
2365 # revision data is accessed. But this case should be rare and
2509 # revision data is accessed. But this case should be rare and
2366 # it is extra work to teach the cache about the hash
2510 # it is extra work to teach the cache about the hash
2367 # verification state.
2511 # verification state.
2368 if self._revisioncache and self._revisioncache[0] == node:
2512 if self._revisioncache and self._revisioncache[0] == node:
2369 self._revisioncache = None
2513 self._revisioncache = None
2370
2514
2371 revornode = rev
2515 revornode = rev
2372 if revornode is None:
2516 if revornode is None:
2373 revornode = templatefilters.short(hex(node))
2517 revornode = templatefilters.short(hex(node))
2374 raise error.RevlogError(
2518 raise error.RevlogError(
2375 _(b"integrity check failed on %s:%s")
2519 _(b"integrity check failed on %s:%s")
2376 % (self.display_id, pycompat.bytestr(revornode))
2520 % (self.display_id, pycompat.bytestr(revornode))
2377 )
2521 )
2378 except error.RevlogError:
2522 except error.RevlogError:
2379 if self.feature_config.censorable and storageutil.iscensoredtext(
2523 if self.feature_config.censorable and storageutil.iscensoredtext(
2380 text
2524 text
2381 ):
2525 ):
2382 raise error.CensoredNodeError(self.display_id, node, text)
2526 raise error.CensoredNodeError(self.display_id, node, text)
2383 raise
2527 raise
2384
2528
2385 @property
2529 @property
2386 def _split_index_file(self):
2530 def _split_index_file(self):
2387 """the path where to expect the index of an ongoing splitting operation
2531 """the path where to expect the index of an ongoing splitting operation
2388
2532
2389 The file will only exist if a splitting operation is in progress, but
2533 The file will only exist if a splitting operation is in progress, but
2390 it is always expected at the same location."""
2534 it is always expected at the same location."""
2391 parts = self.radix.split(b'/')
2535 parts = self.radix.split(b'/')
2392 if len(parts) > 1:
2536 if len(parts) > 1:
2393 # adds a '-s' prefix to the ``data/` or `meta/` base
2537 # adds a '-s' prefix to the ``data/` or `meta/` base
2394 head = parts[0] + b'-s'
2538 head = parts[0] + b'-s'
2395 mids = parts[1:-1]
2539 mids = parts[1:-1]
2396 tail = parts[-1] + b'.i'
2540 tail = parts[-1] + b'.i'
2397 pieces = [head] + mids + [tail]
2541 pieces = [head] + mids + [tail]
2398 return b'/'.join(pieces)
2542 return b'/'.join(pieces)
2399 else:
2543 else:
2400 # the revlog is stored at the root of the store (changelog or
2544 # the revlog is stored at the root of the store (changelog or
2401 # manifest), no risk of collision.
2545 # manifest), no risk of collision.
2402 return self.radix + b'.i.s'
2546 return self.radix + b'.i.s'
2403
2547
2404 def _enforceinlinesize(self, tr, side_write=True):
2548 def _enforceinlinesize(self, tr, side_write=True):
2405 """Check if the revlog is too big for inline and convert if so.
2549 """Check if the revlog is too big for inline and convert if so.
2406
2550
2407 This should be called after revisions are added to the revlog. If the
2551 This should be called after revisions are added to the revlog. If the
2408 revlog has grown too large to be an inline revlog, it will convert it
2552 revlog has grown too large to be an inline revlog, it will convert it
2409 to use multiple index and data files.
2553 to use multiple index and data files.
2410 """
2554 """
2411 tiprev = len(self) - 1
2555 tiprev = len(self) - 1
2412 total_size = self.start(tiprev) + self.length(tiprev)
2556 total_size = self.start(tiprev) + self.length(tiprev)
2413 if not self._inline or total_size < _maxinline:
2557 if not self._inline or total_size < _maxinline:
2414 return
2558 return
2415
2559
2416 troffset = tr.findoffset(self._indexfile)
2560 troffset = tr.findoffset(self._indexfile)
2417 if troffset is None:
2561 if troffset is None:
2418 raise error.RevlogError(
2562 raise error.RevlogError(
2419 _(b"%s not found in the transaction") % self._indexfile
2563 _(b"%s not found in the transaction") % self._indexfile
2420 )
2564 )
2421 if troffset:
2565 if troffset:
2422 tr.addbackup(self._indexfile, for_offset=True)
2566 tr.addbackup(self._indexfile, for_offset=True)
2423 tr.add(self._datafile, 0)
2567 tr.add(self._datafile, 0)
2424
2568
2425 existing_handles = False
2569 existing_handles = False
2426 if self._writinghandles is not None:
2570 if self._inner._writinghandles is not None:
2427 existing_handles = True
2571 existing_handles = True
2428 fp = self._writinghandles[0]
2572 fp = self._inner._writinghandles[0]
2429 fp.flush()
2573 fp.flush()
2430 fp.close()
2574 fp.close()
2431 # We can't use the cached file handle after close(). So prevent
2575 # We can't use the cached file handle after close(). So prevent
2432 # its usage.
2576 # its usage.
2433 self._writinghandles = None
2577 self._inner._writinghandles = None
2434 self._segmentfile.writing_handle = None
2578 self._inner._segmentfile.writing_handle = None
2435 # No need to deal with sidedata writing handle as it is only
2579 # No need to deal with sidedata writing handle as it is only
2436 # relevant with revlog-v2 which is never inline, not reaching
2580 # relevant with revlog-v2 which is never inline, not reaching
2437 # this code
2581 # this code
2438 if side_write:
2582 if side_write:
2439 old_index_file_path = self._indexfile
2583 old_index_file_path = self._indexfile
2440 new_index_file_path = self._split_index_file
2584 new_index_file_path = self._split_index_file
2441 opener = self.opener
2585 opener = self.opener
2442 weak_self = weakref.ref(self)
2586 weak_self = weakref.ref(self)
2443
2587
2444 # the "split" index replace the real index when the transaction is finalized
2588 # the "split" index replace the real index when the transaction is finalized
2445 def finalize_callback(tr):
2589 def finalize_callback(tr):
2446 opener.rename(
2590 opener.rename(
2447 new_index_file_path,
2591 new_index_file_path,
2448 old_index_file_path,
2592 old_index_file_path,
2449 checkambig=True,
2593 checkambig=True,
2450 )
2594 )
2451 maybe_self = weak_self()
2595 maybe_self = weak_self()
2452 if maybe_self is not None:
2596 if maybe_self is not None:
2453 maybe_self._indexfile = old_index_file_path
2597 maybe_self._indexfile = old_index_file_path
2598 maybe_self._inner.index_file = maybe_self._indexfile
2454
2599
2455 def abort_callback(tr):
2600 def abort_callback(tr):
2456 maybe_self = weak_self()
2601 maybe_self = weak_self()
2457 if maybe_self is not None:
2602 if maybe_self is not None:
2458 maybe_self._indexfile = old_index_file_path
2603 maybe_self._indexfile = old_index_file_path
2604 maybe_self._inner.index_file = old_index_file_path
2459
2605
2460 tr.registertmp(new_index_file_path)
2606 tr.registertmp(new_index_file_path)
2461 if self.target[1] is not None:
2607 if self.target[1] is not None:
2462 callback_id = b'000-revlog-split-%d-%s' % self.target
2608 callback_id = b'000-revlog-split-%d-%s' % self.target
2463 else:
2609 else:
2464 callback_id = b'000-revlog-split-%d' % self.target[0]
2610 callback_id = b'000-revlog-split-%d' % self.target[0]
2465 tr.addfinalize(callback_id, finalize_callback)
2611 tr.addfinalize(callback_id, finalize_callback)
2466 tr.addabort(callback_id, abort_callback)
2612 tr.addabort(callback_id, abort_callback)
2467
2613
2468 new_dfh = self._datafp(b'w+')
2614 new_dfh = self._datafp(b'w+')
2469 new_dfh.truncate(0) # drop any potentially existing data
2615 new_dfh.truncate(0) # drop any potentially existing data
2470 try:
2616 try:
2471 with self.reading():
2617 with self.reading():
2472 for r in self:
2618 for r in self:
2473 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2619 new_dfh.write(self._getsegmentforrevs(r, r)[1])
2474 new_dfh.flush()
2620 new_dfh.flush()
2475
2621
2476 if side_write:
2622 if side_write:
2477 self._indexfile = new_index_file_path
2623 self._indexfile = new_index_file_path
2478 with self.__index_new_fp() as fp:
2624 self._inner.index_file = self._indexfile
2625 with self._inner._InnerRevlog__index_new_fp() as fp:
2479 self._format_flags &= ~FLAG_INLINE_DATA
2626 self._format_flags &= ~FLAG_INLINE_DATA
2480 self._inline = False
2627 self._inline = False
2628 self._inner.inline = False
2481 for i in self:
2629 for i in self:
2482 e = self.index.entry_binary(i)
2630 e = self.index.entry_binary(i)
2483 if i == 0 and self._docket is None:
2631 if i == 0 and self._docket is None:
2484 header = self._format_flags | self._format_version
2632 header = self._format_flags | self._format_version
2485 header = self.index.pack_header(header)
2633 header = self.index.pack_header(header)
2486 e = header + e
2634 e = header + e
2487 fp.write(e)
2635 fp.write(e)
2488 if self._docket is not None:
2636 if self._docket is not None:
2489 self._docket.index_end = fp.tell()
2637 self._docket.index_end = fp.tell()
2490
2638
2491 # If we don't use side-write, the temp file replace the real
2639 # If we don't use side-write, the temp file replace the real
2492 # index when we exit the context manager
2640 # index when we exit the context manager
2493
2641
2494 nodemaputil.setup_persistent_nodemap(tr, self)
2642 nodemaputil.setup_persistent_nodemap(tr, self)
2495 self._segmentfile = randomaccessfile.randomaccessfile(
2643 self._inner._segmentfile = randomaccessfile.randomaccessfile(
2496 self.opener,
2644 self.opener,
2497 self._datafile,
2645 self._datafile,
2498 self.data_config.chunk_cache_size,
2646 self.data_config.chunk_cache_size,
2499 )
2647 )
2500
2648
2501 if existing_handles:
2649 if existing_handles:
2502 # switched from inline to conventional reopen the index
2650 # switched from inline to conventional reopen the index
2503 ifh = self.__index_write_fp()
2651 index_end = None
2504 self._writinghandles = (ifh, new_dfh, None)
2652 if self._docket is not None:
2505 self._segmentfile.writing_handle = new_dfh
2653 index_end = self._docket.index_end
2654 ifh = self._inner._InnerRevlog__index_write_fp(
2655 index_end=index_end
2656 )
2657 self._inner._writinghandles = (ifh, new_dfh, None)
2658 self._inner._segmentfile.writing_handle = new_dfh
2506 new_dfh = None
2659 new_dfh = None
2507 # No need to deal with sidedata writing handle as it is only
2660 # No need to deal with sidedata writing handle as it is only
2508 # relevant with revlog-v2 which is never inline, not reaching
2661 # relevant with revlog-v2 which is never inline, not reaching
2509 # this code
2662 # this code
2510 finally:
2663 finally:
2511 if new_dfh is not None:
2664 if new_dfh is not None:
2512 new_dfh.close()
2665 new_dfh.close()
2513
2666
2514 def _nodeduplicatecallback(self, transaction, node):
2667 def _nodeduplicatecallback(self, transaction, node):
2515 """called when trying to add a node already stored."""
2668 """called when trying to add a node already stored."""
2516
2669
2517 @contextlib.contextmanager
2670 @contextlib.contextmanager
2518 def reading(self):
2671 def reading(self):
2519 """Context manager that keeps data and sidedata files open for reading"""
2672 with self._inner.reading():
2520 if len(self.index) == 0:
2673 yield
2521 yield # nothing to be read
2522 else:
2523 with self._segmentfile.reading():
2524 with self._segmentfile_sidedata.reading():
2525 yield
2526
2674
2527 @contextlib.contextmanager
2675 @contextlib.contextmanager
2528 def _writing(self, transaction):
2676 def _writing(self, transaction):
2529 if self._trypending:
2677 if self._trypending:
2530 msg = b'try to write in a `trypending` revlog: %s'
2678 msg = b'try to write in a `trypending` revlog: %s'
2531 msg %= self.display_id
2679 msg %= self.display_id
2532 raise error.ProgrammingError(msg)
2680 raise error.ProgrammingError(msg)
2533 if self._writinghandles is not None:
2681 if self._inner.is_writing:
2534 yield
2682 yield
2535 else:
2683 else:
2536 ifh = dfh = sdfh = None
2684 data_end = None
2537 try:
2685 sidedata_end = None
2538 r = len(self)
2686 if self._docket is not None:
2539 # opening the data file.
2687 data_end = self._docket.data_end
2540 dsize = 0
2688 sidedata_end = self._docket.sidedata_end
2541 if r:
2689 with self._inner.writing(
2542 dsize = self.end(r - 1)
2690 transaction,
2543 dfh = None
2691 data_end=data_end,
2544 if not self._inline:
2692 sidedata_end=sidedata_end,
2545 try:
2693 ):
2546 dfh = self._datafp(b"r+")
2547 if self._docket is None:
2548 dfh.seek(0, os.SEEK_END)
2549 else:
2550 dfh.seek(self._docket.data_end, os.SEEK_SET)
2551 except FileNotFoundError:
2552 dfh = self._datafp(b"w+")
2553 transaction.add(self._datafile, dsize)
2554 if self._sidedatafile is not None:
2555 # revlog-v2 does not inline, help Pytype
2556 assert dfh is not None
2557 try:
2558 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2559 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2560 except FileNotFoundError:
2561 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2562 transaction.add(
2563 self._sidedatafile, self._docket.sidedata_end
2564 )
2565
2566 # opening the index file.
2567 isize = r * self.index.entry_size
2568 ifh = self.__index_write_fp()
2569 if self._inline:
2570 transaction.add(self._indexfile, dsize + isize)
2571 else:
2572 transaction.add(self._indexfile, isize)
2573 # exposing all file handle for writing.
2574 self._writinghandles = (ifh, dfh, sdfh)
2575 self._segmentfile.writing_handle = ifh if self._inline else dfh
2576 self._segmentfile_sidedata.writing_handle = sdfh
2577 yield
2694 yield
2578 if self._docket is not None:
2695 if self._docket is not None:
2579 self._write_docket(transaction)
2696 self._write_docket(transaction)
2580 finally:
2581 self._writinghandles = None
2582 self._segmentfile.writing_handle = None
2583 self._segmentfile_sidedata.writing_handle = None
2584 if dfh is not None:
2585 dfh.close()
2586 if sdfh is not None:
2587 sdfh.close()
2588 # closing the index file last to avoid exposing referent to
2589 # potential unflushed data content.
2590 if ifh is not None:
2591 ifh.close()
2592
2697
2593 def _write_docket(self, transaction):
2698 def _write_docket(self, transaction):
2594 """write the current docket on disk
2699 """write the current docket on disk
2595
2700
2596 Exist as a method to help changelog to implement transaction logic
2701 Exist as a method to help changelog to implement transaction logic
2597
2702
2598 We could also imagine using the same transaction logic for all revlog
2703 We could also imagine using the same transaction logic for all revlog
2599 since docket are cheap."""
2704 since docket are cheap."""
2600 self._docket.write(transaction)
2705 self._docket.write(transaction)
2601
2706
2602 def addrevision(
2707 def addrevision(
2603 self,
2708 self,
2604 text,
2709 text,
2605 transaction,
2710 transaction,
2606 link,
2711 link,
2607 p1,
2712 p1,
2608 p2,
2713 p2,
2609 cachedelta=None,
2714 cachedelta=None,
2610 node=None,
2715 node=None,
2611 flags=REVIDX_DEFAULT_FLAGS,
2716 flags=REVIDX_DEFAULT_FLAGS,
2612 deltacomputer=None,
2717 deltacomputer=None,
2613 sidedata=None,
2718 sidedata=None,
2614 ):
2719 ):
2615 """add a revision to the log
2720 """add a revision to the log
2616
2721
2617 text - the revision data to add
2722 text - the revision data to add
2618 transaction - the transaction object used for rollback
2723 transaction - the transaction object used for rollback
2619 link - the linkrev data to add
2724 link - the linkrev data to add
2620 p1, p2 - the parent nodeids of the revision
2725 p1, p2 - the parent nodeids of the revision
2621 cachedelta - an optional precomputed delta
2726 cachedelta - an optional precomputed delta
2622 node - nodeid of revision; typically node is not specified, and it is
2727 node - nodeid of revision; typically node is not specified, and it is
2623 computed by default as hash(text, p1, p2), however subclasses might
2728 computed by default as hash(text, p1, p2), however subclasses might
2624 use different hashing method (and override checkhash() in such case)
2729 use different hashing method (and override checkhash() in such case)
2625 flags - the known flags to set on the revision
2730 flags - the known flags to set on the revision
2626 deltacomputer - an optional deltacomputer instance shared between
2731 deltacomputer - an optional deltacomputer instance shared between
2627 multiple calls
2732 multiple calls
2628 """
2733 """
2629 if link == nullrev:
2734 if link == nullrev:
2630 raise error.RevlogError(
2735 raise error.RevlogError(
2631 _(b"attempted to add linkrev -1 to %s") % self.display_id
2736 _(b"attempted to add linkrev -1 to %s") % self.display_id
2632 )
2737 )
2633
2738
2634 if sidedata is None:
2739 if sidedata is None:
2635 sidedata = {}
2740 sidedata = {}
2636 elif sidedata and not self.feature_config.has_side_data:
2741 elif sidedata and not self.feature_config.has_side_data:
2637 raise error.ProgrammingError(
2742 raise error.ProgrammingError(
2638 _(b"trying to add sidedata to a revlog who don't support them")
2743 _(b"trying to add sidedata to a revlog who don't support them")
2639 )
2744 )
2640
2745
2641 if flags:
2746 if flags:
2642 node = node or self.hash(text, p1, p2)
2747 node = node or self.hash(text, p1, p2)
2643
2748
2644 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2749 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2645
2750
2646 # If the flag processor modifies the revision data, ignore any provided
2751 # If the flag processor modifies the revision data, ignore any provided
2647 # cachedelta.
2752 # cachedelta.
2648 if rawtext != text:
2753 if rawtext != text:
2649 cachedelta = None
2754 cachedelta = None
2650
2755
2651 if len(rawtext) > _maxentrysize:
2756 if len(rawtext) > _maxentrysize:
2652 raise error.RevlogError(
2757 raise error.RevlogError(
2653 _(
2758 _(
2654 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2759 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2655 )
2760 )
2656 % (self.display_id, len(rawtext))
2761 % (self.display_id, len(rawtext))
2657 )
2762 )
2658
2763
2659 node = node or self.hash(rawtext, p1, p2)
2764 node = node or self.hash(rawtext, p1, p2)
2660 rev = self.index.get_rev(node)
2765 rev = self.index.get_rev(node)
2661 if rev is not None:
2766 if rev is not None:
2662 return rev
2767 return rev
2663
2768
2664 if validatehash:
2769 if validatehash:
2665 self.checkhash(rawtext, node, p1=p1, p2=p2)
2770 self.checkhash(rawtext, node, p1=p1, p2=p2)
2666
2771
2667 return self.addrawrevision(
2772 return self.addrawrevision(
2668 rawtext,
2773 rawtext,
2669 transaction,
2774 transaction,
2670 link,
2775 link,
2671 p1,
2776 p1,
2672 p2,
2777 p2,
2673 node,
2778 node,
2674 flags,
2779 flags,
2675 cachedelta=cachedelta,
2780 cachedelta=cachedelta,
2676 deltacomputer=deltacomputer,
2781 deltacomputer=deltacomputer,
2677 sidedata=sidedata,
2782 sidedata=sidedata,
2678 )
2783 )
2679
2784
2680 def addrawrevision(
2785 def addrawrevision(
2681 self,
2786 self,
2682 rawtext,
2787 rawtext,
2683 transaction,
2788 transaction,
2684 link,
2789 link,
2685 p1,
2790 p1,
2686 p2,
2791 p2,
2687 node,
2792 node,
2688 flags,
2793 flags,
2689 cachedelta=None,
2794 cachedelta=None,
2690 deltacomputer=None,
2795 deltacomputer=None,
2691 sidedata=None,
2796 sidedata=None,
2692 ):
2797 ):
2693 """add a raw revision with known flags, node and parents
2798 """add a raw revision with known flags, node and parents
2694 useful when reusing a revision not stored in this revlog (ex: received
2799 useful when reusing a revision not stored in this revlog (ex: received
2695 over wire, or read from an external bundle).
2800 over wire, or read from an external bundle).
2696 """
2801 """
2697 with self._writing(transaction):
2802 with self._writing(transaction):
2698 return self._addrevision(
2803 return self._addrevision(
2699 node,
2804 node,
2700 rawtext,
2805 rawtext,
2701 transaction,
2806 transaction,
2702 link,
2807 link,
2703 p1,
2808 p1,
2704 p2,
2809 p2,
2705 flags,
2810 flags,
2706 cachedelta,
2811 cachedelta,
2707 deltacomputer=deltacomputer,
2812 deltacomputer=deltacomputer,
2708 sidedata=sidedata,
2813 sidedata=sidedata,
2709 )
2814 )
2710
2815
2711 def compress(self, data):
2816 def compress(self, data):
2712 """Generate a possibly-compressed representation of data."""
2817 """Generate a possibly-compressed representation of data."""
2713 if not data:
2818 if not data:
2714 return b'', data
2819 return b'', data
2715
2820
2716 compressed = self._compressor.compress(data)
2821 compressed = self._compressor.compress(data)
2717
2822
2718 if compressed:
2823 if compressed:
2719 # The revlog compressor added the header in the returned data.
2824 # The revlog compressor added the header in the returned data.
2720 return b'', compressed
2825 return b'', compressed
2721
2826
2722 if data[0:1] == b'\0':
2827 if data[0:1] == b'\0':
2723 return b'', data
2828 return b'', data
2724 return b'u', data
2829 return b'u', data
2725
2830
2726 def decompress(self, data):
2831 def decompress(self, data):
2727 """Decompress a revlog chunk.
2832 """Decompress a revlog chunk.
2728
2833
2729 The chunk is expected to begin with a header identifying the
2834 The chunk is expected to begin with a header identifying the
2730 format type so it can be routed to an appropriate decompressor.
2835 format type so it can be routed to an appropriate decompressor.
2731 """
2836 """
2732 if not data:
2837 if not data:
2733 return data
2838 return data
2734
2839
2735 # Revlogs are read much more frequently than they are written and many
2840 # Revlogs are read much more frequently than they are written and many
2736 # chunks only take microseconds to decompress, so performance is
2841 # chunks only take microseconds to decompress, so performance is
2737 # important here.
2842 # important here.
2738 #
2843 #
2739 # We can make a few assumptions about revlogs:
2844 # We can make a few assumptions about revlogs:
2740 #
2845 #
2741 # 1) the majority of chunks will be compressed (as opposed to inline
2846 # 1) the majority of chunks will be compressed (as opposed to inline
2742 # raw data).
2847 # raw data).
2743 # 2) decompressing *any* data will likely by at least 10x slower than
2848 # 2) decompressing *any* data will likely by at least 10x slower than
2744 # returning raw inline data.
2849 # returning raw inline data.
2745 # 3) we want to prioritize common and officially supported compression
2850 # 3) we want to prioritize common and officially supported compression
2746 # engines
2851 # engines
2747 #
2852 #
2748 # It follows that we want to optimize for "decompress compressed data
2853 # It follows that we want to optimize for "decompress compressed data
2749 # when encoded with common and officially supported compression engines"
2854 # when encoded with common and officially supported compression engines"
2750 # case over "raw data" and "data encoded by less common or non-official
2855 # case over "raw data" and "data encoded by less common or non-official
2751 # compression engines." That is why we have the inline lookup first
2856 # compression engines." That is why we have the inline lookup first
2752 # followed by the compengines lookup.
2857 # followed by the compengines lookup.
2753 #
2858 #
2754 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2859 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2755 # compressed chunks. And this matters for changelog and manifest reads.
2860 # compressed chunks. And this matters for changelog and manifest reads.
2756 t = data[0:1]
2861 t = data[0:1]
2757
2862
2758 if t == b'x':
2863 if t == b'x':
2759 try:
2864 try:
2760 return _zlibdecompress(data)
2865 return _zlibdecompress(data)
2761 except zlib.error as e:
2866 except zlib.error as e:
2762 raise error.RevlogError(
2867 raise error.RevlogError(
2763 _(b'revlog decompress error: %s')
2868 _(b'revlog decompress error: %s')
2764 % stringutil.forcebytestr(e)
2869 % stringutil.forcebytestr(e)
2765 )
2870 )
2766 # '\0' is more common than 'u' so it goes first.
2871 # '\0' is more common than 'u' so it goes first.
2767 elif t == b'\0':
2872 elif t == b'\0':
2768 return data
2873 return data
2769 elif t == b'u':
2874 elif t == b'u':
2770 return util.buffer(data, 1)
2875 return util.buffer(data, 1)
2771
2876
2772 compressor = self._get_decompressor(t)
2877 compressor = self._get_decompressor(t)
2773
2878
2774 return compressor.decompress(data)
2879 return compressor.decompress(data)
2775
2880
2776 def _addrevision(
2881 def _addrevision(
2777 self,
2882 self,
2778 node,
2883 node,
2779 rawtext,
2884 rawtext,
2780 transaction,
2885 transaction,
2781 link,
2886 link,
2782 p1,
2887 p1,
2783 p2,
2888 p2,
2784 flags,
2889 flags,
2785 cachedelta,
2890 cachedelta,
2786 alwayscache=False,
2891 alwayscache=False,
2787 deltacomputer=None,
2892 deltacomputer=None,
2788 sidedata=None,
2893 sidedata=None,
2789 ):
2894 ):
2790 """internal function to add revisions to the log
2895 """internal function to add revisions to the log
2791
2896
2792 see addrevision for argument descriptions.
2897 see addrevision for argument descriptions.
2793
2898
2794 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2899 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2795
2900
2796 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2901 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2797 be used.
2902 be used.
2798
2903
2799 invariants:
2904 invariants:
2800 - rawtext is optional (can be None); if not set, cachedelta must be set.
2905 - rawtext is optional (can be None); if not set, cachedelta must be set.
2801 if both are set, they must correspond to each other.
2906 if both are set, they must correspond to each other.
2802 """
2907 """
2803 if node == self.nullid:
2908 if node == self.nullid:
2804 raise error.RevlogError(
2909 raise error.RevlogError(
2805 _(b"%s: attempt to add null revision") % self.display_id
2910 _(b"%s: attempt to add null revision") % self.display_id
2806 )
2911 )
2807 if (
2912 if (
2808 node == self.nodeconstants.wdirid
2913 node == self.nodeconstants.wdirid
2809 or node in self.nodeconstants.wdirfilenodeids
2914 or node in self.nodeconstants.wdirfilenodeids
2810 ):
2915 ):
2811 raise error.RevlogError(
2916 raise error.RevlogError(
2812 _(b"%s: attempt to add wdir revision") % self.display_id
2917 _(b"%s: attempt to add wdir revision") % self.display_id
2813 )
2918 )
2814 if self._writinghandles is None:
2919 if self._inner._writinghandles is None:
2815 msg = b'adding revision outside `revlog._writing` context'
2920 msg = b'adding revision outside `revlog._writing` context'
2816 raise error.ProgrammingError(msg)
2921 raise error.ProgrammingError(msg)
2817
2922
2818 btext = [rawtext]
2923 btext = [rawtext]
2819
2924
2820 curr = len(self)
2925 curr = len(self)
2821 prev = curr - 1
2926 prev = curr - 1
2822
2927
2823 offset = self._get_data_offset(prev)
2928 offset = self._get_data_offset(prev)
2824
2929
2825 if self._concurrencychecker:
2930 if self._concurrencychecker:
2826 ifh, dfh, sdfh = self._writinghandles
2931 ifh, dfh, sdfh = self._inner._writinghandles
2827 # XXX no checking for the sidedata file
2932 # XXX no checking for the sidedata file
2828 if self._inline:
2933 if self._inline:
2829 # offset is "as if" it were in the .d file, so we need to add on
2934 # offset is "as if" it were in the .d file, so we need to add on
2830 # the size of the entry metadata.
2935 # the size of the entry metadata.
2831 self._concurrencychecker(
2936 self._concurrencychecker(
2832 ifh, self._indexfile, offset + curr * self.index.entry_size
2937 ifh, self._indexfile, offset + curr * self.index.entry_size
2833 )
2938 )
2834 else:
2939 else:
2835 # Entries in the .i are a consistent size.
2940 # Entries in the .i are a consistent size.
2836 self._concurrencychecker(
2941 self._concurrencychecker(
2837 ifh, self._indexfile, curr * self.index.entry_size
2942 ifh, self._indexfile, curr * self.index.entry_size
2838 )
2943 )
2839 self._concurrencychecker(dfh, self._datafile, offset)
2944 self._concurrencychecker(dfh, self._datafile, offset)
2840
2945
2841 p1r, p2r = self.rev(p1), self.rev(p2)
2946 p1r, p2r = self.rev(p1), self.rev(p2)
2842
2947
2843 # full versions are inserted when the needed deltas
2948 # full versions are inserted when the needed deltas
2844 # become comparable to the uncompressed text
2949 # become comparable to the uncompressed text
2845 if rawtext is None:
2950 if rawtext is None:
2846 # need rawtext size, before changed by flag processors, which is
2951 # need rawtext size, before changed by flag processors, which is
2847 # the non-raw size. use revlog explicitly to avoid filelog's extra
2952 # the non-raw size. use revlog explicitly to avoid filelog's extra
2848 # logic that might remove metadata size.
2953 # logic that might remove metadata size.
2849 textlen = mdiff.patchedsize(
2954 textlen = mdiff.patchedsize(
2850 revlog.size(self, cachedelta[0]), cachedelta[1]
2955 revlog.size(self, cachedelta[0]), cachedelta[1]
2851 )
2956 )
2852 else:
2957 else:
2853 textlen = len(rawtext)
2958 textlen = len(rawtext)
2854
2959
2855 if deltacomputer is None:
2960 if deltacomputer is None:
2856 write_debug = None
2961 write_debug = None
2857 if self.delta_config.debug_delta:
2962 if self.delta_config.debug_delta:
2858 write_debug = transaction._report
2963 write_debug = transaction._report
2859 deltacomputer = deltautil.deltacomputer(
2964 deltacomputer = deltautil.deltacomputer(
2860 self, write_debug=write_debug
2965 self, write_debug=write_debug
2861 )
2966 )
2862
2967
2863 if cachedelta is not None and len(cachedelta) == 2:
2968 if cachedelta is not None and len(cachedelta) == 2:
2864 # If the cached delta has no information about how it should be
2969 # If the cached delta has no information about how it should be
2865 # reused, add the default reuse instruction according to the
2970 # reused, add the default reuse instruction according to the
2866 # revlog's configuration.
2971 # revlog's configuration.
2867 if (
2972 if (
2868 self.delta_config.general_delta
2973 self.delta_config.general_delta
2869 and self.delta_config.lazy_delta_base
2974 and self.delta_config.lazy_delta_base
2870 ):
2975 ):
2871 delta_base_reuse = DELTA_BASE_REUSE_TRY
2976 delta_base_reuse = DELTA_BASE_REUSE_TRY
2872 else:
2977 else:
2873 delta_base_reuse = DELTA_BASE_REUSE_NO
2978 delta_base_reuse = DELTA_BASE_REUSE_NO
2874 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2979 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
2875
2980
2876 revinfo = revlogutils.revisioninfo(
2981 revinfo = revlogutils.revisioninfo(
2877 node,
2982 node,
2878 p1,
2983 p1,
2879 p2,
2984 p2,
2880 btext,
2985 btext,
2881 textlen,
2986 textlen,
2882 cachedelta,
2987 cachedelta,
2883 flags,
2988 flags,
2884 )
2989 )
2885
2990
2886 deltainfo = deltacomputer.finddeltainfo(revinfo)
2991 deltainfo = deltacomputer.finddeltainfo(revinfo)
2887
2992
2888 compression_mode = COMP_MODE_INLINE
2993 compression_mode = COMP_MODE_INLINE
2889 if self._docket is not None:
2994 if self._docket is not None:
2890 default_comp = self._docket.default_compression_header
2995 default_comp = self._docket.default_compression_header
2891 r = deltautil.delta_compression(default_comp, deltainfo)
2996 r = deltautil.delta_compression(default_comp, deltainfo)
2892 compression_mode, deltainfo = r
2997 compression_mode, deltainfo = r
2893
2998
2894 sidedata_compression_mode = COMP_MODE_INLINE
2999 sidedata_compression_mode = COMP_MODE_INLINE
2895 if sidedata and self.feature_config.has_side_data:
3000 if sidedata and self.feature_config.has_side_data:
2896 sidedata_compression_mode = COMP_MODE_PLAIN
3001 sidedata_compression_mode = COMP_MODE_PLAIN
2897 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3002 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2898 sidedata_offset = self._docket.sidedata_end
3003 sidedata_offset = self._docket.sidedata_end
2899 h, comp_sidedata = self.compress(serialized_sidedata)
3004 h, comp_sidedata = self.compress(serialized_sidedata)
2900 if (
3005 if (
2901 h != b'u'
3006 h != b'u'
2902 and comp_sidedata[0:1] != b'\0'
3007 and comp_sidedata[0:1] != b'\0'
2903 and len(comp_sidedata) < len(serialized_sidedata)
3008 and len(comp_sidedata) < len(serialized_sidedata)
2904 ):
3009 ):
2905 assert not h
3010 assert not h
2906 if (
3011 if (
2907 comp_sidedata[0:1]
3012 comp_sidedata[0:1]
2908 == self._docket.default_compression_header
3013 == self._docket.default_compression_header
2909 ):
3014 ):
2910 sidedata_compression_mode = COMP_MODE_DEFAULT
3015 sidedata_compression_mode = COMP_MODE_DEFAULT
2911 serialized_sidedata = comp_sidedata
3016 serialized_sidedata = comp_sidedata
2912 else:
3017 else:
2913 sidedata_compression_mode = COMP_MODE_INLINE
3018 sidedata_compression_mode = COMP_MODE_INLINE
2914 serialized_sidedata = comp_sidedata
3019 serialized_sidedata = comp_sidedata
2915 else:
3020 else:
2916 serialized_sidedata = b""
3021 serialized_sidedata = b""
2917 # Don't store the offset if the sidedata is empty, that way
3022 # Don't store the offset if the sidedata is empty, that way
2918 # we can easily detect empty sidedata and they will be no different
3023 # we can easily detect empty sidedata and they will be no different
2919 # than ones we manually add.
3024 # than ones we manually add.
2920 sidedata_offset = 0
3025 sidedata_offset = 0
2921
3026
2922 rank = RANK_UNKNOWN
3027 rank = RANK_UNKNOWN
2923 if self.feature_config.compute_rank:
3028 if self.feature_config.compute_rank:
2924 if (p1r, p2r) == (nullrev, nullrev):
3029 if (p1r, p2r) == (nullrev, nullrev):
2925 rank = 1
3030 rank = 1
2926 elif p1r != nullrev and p2r == nullrev:
3031 elif p1r != nullrev and p2r == nullrev:
2927 rank = 1 + self.fast_rank(p1r)
3032 rank = 1 + self.fast_rank(p1r)
2928 elif p1r == nullrev and p2r != nullrev:
3033 elif p1r == nullrev and p2r != nullrev:
2929 rank = 1 + self.fast_rank(p2r)
3034 rank = 1 + self.fast_rank(p2r)
2930 else: # merge node
3035 else: # merge node
2931 if rustdagop is not None and self.index.rust_ext_compat:
3036 if rustdagop is not None and self.index.rust_ext_compat:
2932 rank = rustdagop.rank(self.index, p1r, p2r)
3037 rank = rustdagop.rank(self.index, p1r, p2r)
2933 else:
3038 else:
2934 pmin, pmax = sorted((p1r, p2r))
3039 pmin, pmax = sorted((p1r, p2r))
2935 rank = 1 + self.fast_rank(pmax)
3040 rank = 1 + self.fast_rank(pmax)
2936 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3041 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
2937
3042
2938 e = revlogutils.entry(
3043 e = revlogutils.entry(
2939 flags=flags,
3044 flags=flags,
2940 data_offset=offset,
3045 data_offset=offset,
2941 data_compressed_length=deltainfo.deltalen,
3046 data_compressed_length=deltainfo.deltalen,
2942 data_uncompressed_length=textlen,
3047 data_uncompressed_length=textlen,
2943 data_compression_mode=compression_mode,
3048 data_compression_mode=compression_mode,
2944 data_delta_base=deltainfo.base,
3049 data_delta_base=deltainfo.base,
2945 link_rev=link,
3050 link_rev=link,
2946 parent_rev_1=p1r,
3051 parent_rev_1=p1r,
2947 parent_rev_2=p2r,
3052 parent_rev_2=p2r,
2948 node_id=node,
3053 node_id=node,
2949 sidedata_offset=sidedata_offset,
3054 sidedata_offset=sidedata_offset,
2950 sidedata_compressed_length=len(serialized_sidedata),
3055 sidedata_compressed_length=len(serialized_sidedata),
2951 sidedata_compression_mode=sidedata_compression_mode,
3056 sidedata_compression_mode=sidedata_compression_mode,
2952 rank=rank,
3057 rank=rank,
2953 )
3058 )
2954
3059
2955 self.index.append(e)
3060 self.index.append(e)
2956 entry = self.index.entry_binary(curr)
3061 entry = self.index.entry_binary(curr)
2957 if curr == 0 and self._docket is None:
3062 if curr == 0 and self._docket is None:
2958 header = self._format_flags | self._format_version
3063 header = self._format_flags | self._format_version
2959 header = self.index.pack_header(header)
3064 header = self.index.pack_header(header)
2960 entry = header + entry
3065 entry = header + entry
2961 self._writeentry(
3066 self._writeentry(
2962 transaction,
3067 transaction,
2963 entry,
3068 entry,
2964 deltainfo.data,
3069 deltainfo.data,
2965 link,
3070 link,
2966 offset,
3071 offset,
2967 serialized_sidedata,
3072 serialized_sidedata,
2968 sidedata_offset,
3073 sidedata_offset,
2969 )
3074 )
2970
3075
2971 rawtext = btext[0]
3076 rawtext = btext[0]
2972
3077
2973 if alwayscache and rawtext is None:
3078 if alwayscache and rawtext is None:
2974 rawtext = deltacomputer.buildtext(revinfo)
3079 rawtext = deltacomputer.buildtext(revinfo)
2975
3080
2976 if type(rawtext) == bytes: # only accept immutable objects
3081 if type(rawtext) == bytes: # only accept immutable objects
2977 self._revisioncache = (node, curr, rawtext)
3082 self._revisioncache = (node, curr, rawtext)
2978 self._chainbasecache[curr] = deltainfo.chainbase
3083 self._chainbasecache[curr] = deltainfo.chainbase
2979 return curr
3084 return curr
2980
3085
2981 def _get_data_offset(self, prev):
3086 def _get_data_offset(self, prev):
2982 """Returns the current offset in the (in-transaction) data file.
3087 """Returns the current offset in the (in-transaction) data file.
2983 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3088 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2984 file to store that information: since sidedata can be rewritten to the
3089 file to store that information: since sidedata can be rewritten to the
2985 end of the data file within a transaction, you can have cases where, for
3090 end of the data file within a transaction, you can have cases where, for
2986 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3091 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2987 to `n - 1`'s sidedata being written after `n`'s data.
3092 to `n - 1`'s sidedata being written after `n`'s data.
2988
3093
2989 TODO cache this in a docket file before getting out of experimental."""
3094 TODO cache this in a docket file before getting out of experimental."""
2990 if self._docket is None:
3095 if self._docket is None:
2991 return self.end(prev)
3096 return self.end(prev)
2992 else:
3097 else:
2993 return self._docket.data_end
3098 return self._docket.data_end
2994
3099
2995 def _writeentry(
3100 def _writeentry(
2996 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
3101 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2997 ):
3102 ):
2998 # Files opened in a+ mode have inconsistent behavior on various
3103 # Files opened in a+ mode have inconsistent behavior on various
2999 # platforms. Windows requires that a file positioning call be made
3104 # platforms. Windows requires that a file positioning call be made
3000 # when the file handle transitions between reads and writes. See
3105 # when the file handle transitions between reads and writes. See
3001 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3106 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3002 # platforms, Python or the platform itself can be buggy. Some versions
3107 # platforms, Python or the platform itself can be buggy. Some versions
3003 # of Solaris have been observed to not append at the end of the file
3108 # of Solaris have been observed to not append at the end of the file
3004 # if the file was seeked to before the end. See issue4943 for more.
3109 # if the file was seeked to before the end. See issue4943 for more.
3005 #
3110 #
3006 # We work around this issue by inserting a seek() before writing.
3111 # We work around this issue by inserting a seek() before writing.
3007 # Note: This is likely not necessary on Python 3. However, because
3112 # Note: This is likely not necessary on Python 3. However, because
3008 # the file handle is reused for reads and may be seeked there, we need
3113 # the file handle is reused for reads and may be seeked there, we need
3009 # to be careful before changing this.
3114 # to be careful before changing this.
3010 if self._writinghandles is None:
3115 if self._inner._writinghandles is None:
3011 msg = b'adding revision outside `revlog._writing` context'
3116 msg = b'adding revision outside `revlog._writing` context'
3012 raise error.ProgrammingError(msg)
3117 raise error.ProgrammingError(msg)
3013 ifh, dfh, sdfh = self._writinghandles
3118 ifh, dfh, sdfh = self._inner._writinghandles
3014 if self._docket is None:
3119 if self._docket is None:
3015 ifh.seek(0, os.SEEK_END)
3120 ifh.seek(0, os.SEEK_END)
3016 else:
3121 else:
3017 ifh.seek(self._docket.index_end, os.SEEK_SET)
3122 ifh.seek(self._docket.index_end, os.SEEK_SET)
3018 if dfh:
3123 if dfh:
3019 if self._docket is None:
3124 if self._docket is None:
3020 dfh.seek(0, os.SEEK_END)
3125 dfh.seek(0, os.SEEK_END)
3021 else:
3126 else:
3022 dfh.seek(self._docket.data_end, os.SEEK_SET)
3127 dfh.seek(self._docket.data_end, os.SEEK_SET)
3023 if sdfh:
3128 if sdfh:
3024 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3129 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3025
3130
3026 curr = len(self) - 1
3131 curr = len(self) - 1
3027 if not self._inline:
3132 if not self._inline:
3028 transaction.add(self._datafile, offset)
3133 transaction.add(self._datafile, offset)
3029 if self._sidedatafile:
3134 if self._sidedatafile:
3030 transaction.add(self._sidedatafile, sidedata_offset)
3135 transaction.add(self._sidedatafile, sidedata_offset)
3031 transaction.add(self._indexfile, curr * len(entry))
3136 transaction.add(self._indexfile, curr * len(entry))
3032 if data[0]:
3137 if data[0]:
3033 dfh.write(data[0])
3138 dfh.write(data[0])
3034 dfh.write(data[1])
3139 dfh.write(data[1])
3035 if sidedata:
3140 if sidedata:
3036 sdfh.write(sidedata)
3141 sdfh.write(sidedata)
3037 ifh.write(entry)
3142 ifh.write(entry)
3038 else:
3143 else:
3039 offset += curr * self.index.entry_size
3144 offset += curr * self.index.entry_size
3040 transaction.add(self._indexfile, offset)
3145 transaction.add(self._indexfile, offset)
3041 ifh.write(entry)
3146 ifh.write(entry)
3042 ifh.write(data[0])
3147 ifh.write(data[0])
3043 ifh.write(data[1])
3148 ifh.write(data[1])
3044 assert not sidedata
3149 assert not sidedata
3045 self._enforceinlinesize(transaction)
3150 self._enforceinlinesize(transaction)
3046 if self._docket is not None:
3151 if self._docket is not None:
3047 # revlog-v2 always has 3 writing handles, help Pytype
3152 # revlog-v2 always has 3 writing handles, help Pytype
3048 wh1 = self._writinghandles[0]
3153 wh1 = self._inner._writinghandles[0]
3049 wh2 = self._writinghandles[1]
3154 wh2 = self._inner._writinghandles[1]
3050 wh3 = self._writinghandles[2]
3155 wh3 = self._inner._writinghandles[2]
3051 assert wh1 is not None
3156 assert wh1 is not None
3052 assert wh2 is not None
3157 assert wh2 is not None
3053 assert wh3 is not None
3158 assert wh3 is not None
3054 self._docket.index_end = wh1.tell()
3159 self._docket.index_end = wh1.tell()
3055 self._docket.data_end = wh2.tell()
3160 self._docket.data_end = wh2.tell()
3056 self._docket.sidedata_end = wh3.tell()
3161 self._docket.sidedata_end = wh3.tell()
3057
3162
3058 nodemaputil.setup_persistent_nodemap(transaction, self)
3163 nodemaputil.setup_persistent_nodemap(transaction, self)
3059
3164
3060 def addgroup(
3165 def addgroup(
3061 self,
3166 self,
3062 deltas,
3167 deltas,
3063 linkmapper,
3168 linkmapper,
3064 transaction,
3169 transaction,
3065 alwayscache=False,
3170 alwayscache=False,
3066 addrevisioncb=None,
3171 addrevisioncb=None,
3067 duplicaterevisioncb=None,
3172 duplicaterevisioncb=None,
3068 debug_info=None,
3173 debug_info=None,
3069 delta_base_reuse_policy=None,
3174 delta_base_reuse_policy=None,
3070 ):
3175 ):
3071 """
3176 """
3072 add a delta group
3177 add a delta group
3073
3178
3074 given a set of deltas, add them to the revision log. the
3179 given a set of deltas, add them to the revision log. the
3075 first delta is against its parent, which should be in our
3180 first delta is against its parent, which should be in our
3076 log, the rest are against the previous delta.
3181 log, the rest are against the previous delta.
3077
3182
3078 If ``addrevisioncb`` is defined, it will be called with arguments of
3183 If ``addrevisioncb`` is defined, it will be called with arguments of
3079 this revlog and the node that was added.
3184 this revlog and the node that was added.
3080 """
3185 """
3081
3186
3082 if self._adding_group:
3187 if self._adding_group:
3083 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3188 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3084
3189
3085 # read the default delta-base reuse policy from revlog config if the
3190 # read the default delta-base reuse policy from revlog config if the
3086 # group did not specify one.
3191 # group did not specify one.
3087 if delta_base_reuse_policy is None:
3192 if delta_base_reuse_policy is None:
3088 if (
3193 if (
3089 self.delta_config.general_delta
3194 self.delta_config.general_delta
3090 and self.delta_config.lazy_delta_base
3195 and self.delta_config.lazy_delta_base
3091 ):
3196 ):
3092 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3197 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3093 else:
3198 else:
3094 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3199 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3095
3200
3096 self._adding_group = True
3201 self._adding_group = True
3097 empty = True
3202 empty = True
3098 try:
3203 try:
3099 with self._writing(transaction):
3204 with self._writing(transaction):
3100 write_debug = None
3205 write_debug = None
3101 if self.delta_config.debug_delta:
3206 if self.delta_config.debug_delta:
3102 write_debug = transaction._report
3207 write_debug = transaction._report
3103 deltacomputer = deltautil.deltacomputer(
3208 deltacomputer = deltautil.deltacomputer(
3104 self,
3209 self,
3105 write_debug=write_debug,
3210 write_debug=write_debug,
3106 debug_info=debug_info,
3211 debug_info=debug_info,
3107 )
3212 )
3108 # loop through our set of deltas
3213 # loop through our set of deltas
3109 for data in deltas:
3214 for data in deltas:
3110 (
3215 (
3111 node,
3216 node,
3112 p1,
3217 p1,
3113 p2,
3218 p2,
3114 linknode,
3219 linknode,
3115 deltabase,
3220 deltabase,
3116 delta,
3221 delta,
3117 flags,
3222 flags,
3118 sidedata,
3223 sidedata,
3119 ) = data
3224 ) = data
3120 link = linkmapper(linknode)
3225 link = linkmapper(linknode)
3121 flags = flags or REVIDX_DEFAULT_FLAGS
3226 flags = flags or REVIDX_DEFAULT_FLAGS
3122
3227
3123 rev = self.index.get_rev(node)
3228 rev = self.index.get_rev(node)
3124 if rev is not None:
3229 if rev is not None:
3125 # this can happen if two branches make the same change
3230 # this can happen if two branches make the same change
3126 self._nodeduplicatecallback(transaction, rev)
3231 self._nodeduplicatecallback(transaction, rev)
3127 if duplicaterevisioncb:
3232 if duplicaterevisioncb:
3128 duplicaterevisioncb(self, rev)
3233 duplicaterevisioncb(self, rev)
3129 empty = False
3234 empty = False
3130 continue
3235 continue
3131
3236
3132 for p in (p1, p2):
3237 for p in (p1, p2):
3133 if not self.index.has_node(p):
3238 if not self.index.has_node(p):
3134 raise error.LookupError(
3239 raise error.LookupError(
3135 p, self.radix, _(b'unknown parent')
3240 p, self.radix, _(b'unknown parent')
3136 )
3241 )
3137
3242
3138 if not self.index.has_node(deltabase):
3243 if not self.index.has_node(deltabase):
3139 raise error.LookupError(
3244 raise error.LookupError(
3140 deltabase, self.display_id, _(b'unknown delta base')
3245 deltabase, self.display_id, _(b'unknown delta base')
3141 )
3246 )
3142
3247
3143 baserev = self.rev(deltabase)
3248 baserev = self.rev(deltabase)
3144
3249
3145 if baserev != nullrev and self.iscensored(baserev):
3250 if baserev != nullrev and self.iscensored(baserev):
3146 # if base is censored, delta must be full replacement in a
3251 # if base is censored, delta must be full replacement in a
3147 # single patch operation
3252 # single patch operation
3148 hlen = struct.calcsize(b">lll")
3253 hlen = struct.calcsize(b">lll")
3149 oldlen = self.rawsize(baserev)
3254 oldlen = self.rawsize(baserev)
3150 newlen = len(delta) - hlen
3255 newlen = len(delta) - hlen
3151 if delta[:hlen] != mdiff.replacediffheader(
3256 if delta[:hlen] != mdiff.replacediffheader(
3152 oldlen, newlen
3257 oldlen, newlen
3153 ):
3258 ):
3154 raise error.CensoredBaseError(
3259 raise error.CensoredBaseError(
3155 self.display_id, self.node(baserev)
3260 self.display_id, self.node(baserev)
3156 )
3261 )
3157
3262
3158 if not flags and self._peek_iscensored(baserev, delta):
3263 if not flags and self._peek_iscensored(baserev, delta):
3159 flags |= REVIDX_ISCENSORED
3264 flags |= REVIDX_ISCENSORED
3160
3265
3161 # We assume consumers of addrevisioncb will want to retrieve
3266 # We assume consumers of addrevisioncb will want to retrieve
3162 # the added revision, which will require a call to
3267 # the added revision, which will require a call to
3163 # revision(). revision() will fast path if there is a cache
3268 # revision(). revision() will fast path if there is a cache
3164 # hit. So, we tell _addrevision() to always cache in this case.
3269 # hit. So, we tell _addrevision() to always cache in this case.
3165 # We're only using addgroup() in the context of changegroup
3270 # We're only using addgroup() in the context of changegroup
3166 # generation so the revision data can always be handled as raw
3271 # generation so the revision data can always be handled as raw
3167 # by the flagprocessor.
3272 # by the flagprocessor.
3168 rev = self._addrevision(
3273 rev = self._addrevision(
3169 node,
3274 node,
3170 None,
3275 None,
3171 transaction,
3276 transaction,
3172 link,
3277 link,
3173 p1,
3278 p1,
3174 p2,
3279 p2,
3175 flags,
3280 flags,
3176 (baserev, delta, delta_base_reuse_policy),
3281 (baserev, delta, delta_base_reuse_policy),
3177 alwayscache=alwayscache,
3282 alwayscache=alwayscache,
3178 deltacomputer=deltacomputer,
3283 deltacomputer=deltacomputer,
3179 sidedata=sidedata,
3284 sidedata=sidedata,
3180 )
3285 )
3181
3286
3182 if addrevisioncb:
3287 if addrevisioncb:
3183 addrevisioncb(self, rev)
3288 addrevisioncb(self, rev)
3184 empty = False
3289 empty = False
3185 finally:
3290 finally:
3186 self._adding_group = False
3291 self._adding_group = False
3187 return not empty
3292 return not empty
3188
3293
3189 def iscensored(self, rev):
3294 def iscensored(self, rev):
3190 """Check if a file revision is censored."""
3295 """Check if a file revision is censored."""
3191 if not self.feature_config.censorable:
3296 if not self.feature_config.censorable:
3192 return False
3297 return False
3193
3298
3194 return self.flags(rev) & REVIDX_ISCENSORED
3299 return self.flags(rev) & REVIDX_ISCENSORED
3195
3300
3196 def _peek_iscensored(self, baserev, delta):
3301 def _peek_iscensored(self, baserev, delta):
3197 """Quickly check if a delta produces a censored revision."""
3302 """Quickly check if a delta produces a censored revision."""
3198 if not self.feature_config.censorable:
3303 if not self.feature_config.censorable:
3199 return False
3304 return False
3200
3305
3201 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3306 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3202
3307
3203 def getstrippoint(self, minlink):
3308 def getstrippoint(self, minlink):
3204 """find the minimum rev that must be stripped to strip the linkrev
3309 """find the minimum rev that must be stripped to strip the linkrev
3205
3310
3206 Returns a tuple containing the minimum rev and a set of all revs that
3311 Returns a tuple containing the minimum rev and a set of all revs that
3207 have linkrevs that will be broken by this strip.
3312 have linkrevs that will be broken by this strip.
3208 """
3313 """
3209 return storageutil.resolvestripinfo(
3314 return storageutil.resolvestripinfo(
3210 minlink,
3315 minlink,
3211 len(self) - 1,
3316 len(self) - 1,
3212 self.headrevs(),
3317 self.headrevs(),
3213 self.linkrev,
3318 self.linkrev,
3214 self.parentrevs,
3319 self.parentrevs,
3215 )
3320 )
3216
3321
3217 def strip(self, minlink, transaction):
3322 def strip(self, minlink, transaction):
3218 """truncate the revlog on the first revision with a linkrev >= minlink
3323 """truncate the revlog on the first revision with a linkrev >= minlink
3219
3324
3220 This function is called when we're stripping revision minlink and
3325 This function is called when we're stripping revision minlink and
3221 its descendants from the repository.
3326 its descendants from the repository.
3222
3327
3223 We have to remove all revisions with linkrev >= minlink, because
3328 We have to remove all revisions with linkrev >= minlink, because
3224 the equivalent changelog revisions will be renumbered after the
3329 the equivalent changelog revisions will be renumbered after the
3225 strip.
3330 strip.
3226
3331
3227 So we truncate the revlog on the first of these revisions, and
3332 So we truncate the revlog on the first of these revisions, and
3228 trust that the caller has saved the revisions that shouldn't be
3333 trust that the caller has saved the revisions that shouldn't be
3229 removed and that it'll re-add them after this truncation.
3334 removed and that it'll re-add them after this truncation.
3230 """
3335 """
3231 if len(self) == 0:
3336 if len(self) == 0:
3232 return
3337 return
3233
3338
3234 rev, _ = self.getstrippoint(minlink)
3339 rev, _ = self.getstrippoint(minlink)
3235 if rev == len(self):
3340 if rev == len(self):
3236 return
3341 return
3237
3342
3238 # first truncate the files on disk
3343 # first truncate the files on disk
3239 data_end = self.start(rev)
3344 data_end = self.start(rev)
3240 if not self._inline:
3345 if not self._inline:
3241 transaction.add(self._datafile, data_end)
3346 transaction.add(self._datafile, data_end)
3242 end = rev * self.index.entry_size
3347 end = rev * self.index.entry_size
3243 else:
3348 else:
3244 end = data_end + (rev * self.index.entry_size)
3349 end = data_end + (rev * self.index.entry_size)
3245
3350
3246 if self._sidedatafile:
3351 if self._sidedatafile:
3247 sidedata_end = self.sidedata_cut_off(rev)
3352 sidedata_end = self.sidedata_cut_off(rev)
3248 transaction.add(self._sidedatafile, sidedata_end)
3353 transaction.add(self._sidedatafile, sidedata_end)
3249
3354
3250 transaction.add(self._indexfile, end)
3355 transaction.add(self._indexfile, end)
3251 if self._docket is not None:
3356 if self._docket is not None:
3252 # XXX we could, leverage the docket while stripping. However it is
3357 # XXX we could, leverage the docket while stripping. However it is
3253 # not powerfull enough at the time of this comment
3358 # not powerfull enough at the time of this comment
3254 self._docket.index_end = end
3359 self._docket.index_end = end
3255 self._docket.data_end = data_end
3360 self._docket.data_end = data_end
3256 self._docket.sidedata_end = sidedata_end
3361 self._docket.sidedata_end = sidedata_end
3257 self._docket.write(transaction, stripping=True)
3362 self._docket.write(transaction, stripping=True)
3258
3363
3259 # then reset internal state in memory to forget those revisions
3364 # then reset internal state in memory to forget those revisions
3260 self._revisioncache = None
3365 self._revisioncache = None
3261 self._chaininfocache = util.lrucachedict(500)
3366 self._chaininfocache = util.lrucachedict(500)
3262 self._segmentfile.clear_cache()
3367 self._inner._segmentfile.clear_cache()
3263 self._segmentfile_sidedata.clear_cache()
3368 self._inner._segmentfile_sidedata.clear_cache()
3264
3369
3265 del self.index[rev:-1]
3370 del self.index[rev:-1]
3266
3371
3267 def checksize(self):
3372 def checksize(self):
3268 """Check size of index and data files
3373 """Check size of index and data files
3269
3374
3270 return a (dd, di) tuple.
3375 return a (dd, di) tuple.
3271 - dd: extra bytes for the "data" file
3376 - dd: extra bytes for the "data" file
3272 - di: extra bytes for the "index" file
3377 - di: extra bytes for the "index" file
3273
3378
3274 A healthy revlog will return (0, 0).
3379 A healthy revlog will return (0, 0).
3275 """
3380 """
3276 expected = 0
3381 expected = 0
3277 if len(self):
3382 if len(self):
3278 expected = max(0, self.end(len(self) - 1))
3383 expected = max(0, self.end(len(self) - 1))
3279
3384
3280 try:
3385 try:
3281 with self._datafp() as f:
3386 with self._datafp() as f:
3282 f.seek(0, io.SEEK_END)
3387 f.seek(0, io.SEEK_END)
3283 actual = f.tell()
3388 actual = f.tell()
3284 dd = actual - expected
3389 dd = actual - expected
3285 except FileNotFoundError:
3390 except FileNotFoundError:
3286 dd = 0
3391 dd = 0
3287
3392
3288 try:
3393 try:
3289 f = self.opener(self._indexfile)
3394 f = self.opener(self._indexfile)
3290 f.seek(0, io.SEEK_END)
3395 f.seek(0, io.SEEK_END)
3291 actual = f.tell()
3396 actual = f.tell()
3292 f.close()
3397 f.close()
3293 s = self.index.entry_size
3398 s = self.index.entry_size
3294 i = max(0, actual // s)
3399 i = max(0, actual // s)
3295 di = actual - (i * s)
3400 di = actual - (i * s)
3296 if self._inline:
3401 if self._inline:
3297 databytes = 0
3402 databytes = 0
3298 for r in self:
3403 for r in self:
3299 databytes += max(0, self.length(r))
3404 databytes += max(0, self.length(r))
3300 dd = 0
3405 dd = 0
3301 di = actual - len(self) * s - databytes
3406 di = actual - len(self) * s - databytes
3302 except FileNotFoundError:
3407 except FileNotFoundError:
3303 di = 0
3408 di = 0
3304
3409
3305 return (dd, di)
3410 return (dd, di)
3306
3411
3307 def files(self):
3412 def files(self):
3308 """return list of files that compose this revlog"""
3413 """return list of files that compose this revlog"""
3309 res = [self._indexfile]
3414 res = [self._indexfile]
3310 if self._docket_file is None:
3415 if self._docket_file is None:
3311 if not self._inline:
3416 if not self._inline:
3312 res.append(self._datafile)
3417 res.append(self._datafile)
3313 else:
3418 else:
3314 res.append(self._docket_file)
3419 res.append(self._docket_file)
3315 res.extend(self._docket.old_index_filepaths(include_empty=False))
3420 res.extend(self._docket.old_index_filepaths(include_empty=False))
3316 if self._docket.data_end:
3421 if self._docket.data_end:
3317 res.append(self._datafile)
3422 res.append(self._datafile)
3318 res.extend(self._docket.old_data_filepaths(include_empty=False))
3423 res.extend(self._docket.old_data_filepaths(include_empty=False))
3319 if self._docket.sidedata_end:
3424 if self._docket.sidedata_end:
3320 res.append(self._sidedatafile)
3425 res.append(self._sidedatafile)
3321 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3426 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3322 return res
3427 return res
3323
3428
3324 def emitrevisions(
3429 def emitrevisions(
3325 self,
3430 self,
3326 nodes,
3431 nodes,
3327 nodesorder=None,
3432 nodesorder=None,
3328 revisiondata=False,
3433 revisiondata=False,
3329 assumehaveparentrevisions=False,
3434 assumehaveparentrevisions=False,
3330 deltamode=repository.CG_DELTAMODE_STD,
3435 deltamode=repository.CG_DELTAMODE_STD,
3331 sidedata_helpers=None,
3436 sidedata_helpers=None,
3332 debug_info=None,
3437 debug_info=None,
3333 ):
3438 ):
3334 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3439 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3335 raise error.ProgrammingError(
3440 raise error.ProgrammingError(
3336 b'unhandled value for nodesorder: %s' % nodesorder
3441 b'unhandled value for nodesorder: %s' % nodesorder
3337 )
3442 )
3338
3443
3339 if nodesorder is None and not self.delta_config.general_delta:
3444 if nodesorder is None and not self.delta_config.general_delta:
3340 nodesorder = b'storage'
3445 nodesorder = b'storage'
3341
3446
3342 if (
3447 if (
3343 not self._storedeltachains
3448 not self._storedeltachains
3344 and deltamode != repository.CG_DELTAMODE_PREV
3449 and deltamode != repository.CG_DELTAMODE_PREV
3345 ):
3450 ):
3346 deltamode = repository.CG_DELTAMODE_FULL
3451 deltamode = repository.CG_DELTAMODE_FULL
3347
3452
3348 return storageutil.emitrevisions(
3453 return storageutil.emitrevisions(
3349 self,
3454 self,
3350 nodes,
3455 nodes,
3351 nodesorder,
3456 nodesorder,
3352 revlogrevisiondelta,
3457 revlogrevisiondelta,
3353 deltaparentfn=self.deltaparent,
3458 deltaparentfn=self.deltaparent,
3354 candeltafn=self._candelta,
3459 candeltafn=self._candelta,
3355 rawsizefn=self.rawsize,
3460 rawsizefn=self.rawsize,
3356 revdifffn=self.revdiff,
3461 revdifffn=self.revdiff,
3357 flagsfn=self.flags,
3462 flagsfn=self.flags,
3358 deltamode=deltamode,
3463 deltamode=deltamode,
3359 revisiondata=revisiondata,
3464 revisiondata=revisiondata,
3360 assumehaveparentrevisions=assumehaveparentrevisions,
3465 assumehaveparentrevisions=assumehaveparentrevisions,
3361 sidedata_helpers=sidedata_helpers,
3466 sidedata_helpers=sidedata_helpers,
3362 debug_info=debug_info,
3467 debug_info=debug_info,
3363 )
3468 )
3364
3469
3365 DELTAREUSEALWAYS = b'always'
3470 DELTAREUSEALWAYS = b'always'
3366 DELTAREUSESAMEREVS = b'samerevs'
3471 DELTAREUSESAMEREVS = b'samerevs'
3367 DELTAREUSENEVER = b'never'
3472 DELTAREUSENEVER = b'never'
3368
3473
3369 DELTAREUSEFULLADD = b'fulladd'
3474 DELTAREUSEFULLADD = b'fulladd'
3370
3475
3371 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3476 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3372
3477
3373 def clone(
3478 def clone(
3374 self,
3479 self,
3375 tr,
3480 tr,
3376 destrevlog,
3481 destrevlog,
3377 addrevisioncb=None,
3482 addrevisioncb=None,
3378 deltareuse=DELTAREUSESAMEREVS,
3483 deltareuse=DELTAREUSESAMEREVS,
3379 forcedeltabothparents=None,
3484 forcedeltabothparents=None,
3380 sidedata_helpers=None,
3485 sidedata_helpers=None,
3381 ):
3486 ):
3382 """Copy this revlog to another, possibly with format changes.
3487 """Copy this revlog to another, possibly with format changes.
3383
3488
3384 The destination revlog will contain the same revisions and nodes.
3489 The destination revlog will contain the same revisions and nodes.
3385 However, it may not be bit-for-bit identical due to e.g. delta encoding
3490 However, it may not be bit-for-bit identical due to e.g. delta encoding
3386 differences.
3491 differences.
3387
3492
3388 The ``deltareuse`` argument control how deltas from the existing revlog
3493 The ``deltareuse`` argument control how deltas from the existing revlog
3389 are preserved in the destination revlog. The argument can have the
3494 are preserved in the destination revlog. The argument can have the
3390 following values:
3495 following values:
3391
3496
3392 DELTAREUSEALWAYS
3497 DELTAREUSEALWAYS
3393 Deltas will always be reused (if possible), even if the destination
3498 Deltas will always be reused (if possible), even if the destination
3394 revlog would not select the same revisions for the delta. This is the
3499 revlog would not select the same revisions for the delta. This is the
3395 fastest mode of operation.
3500 fastest mode of operation.
3396 DELTAREUSESAMEREVS
3501 DELTAREUSESAMEREVS
3397 Deltas will be reused if the destination revlog would pick the same
3502 Deltas will be reused if the destination revlog would pick the same
3398 revisions for the delta. This mode strikes a balance between speed
3503 revisions for the delta. This mode strikes a balance between speed
3399 and optimization.
3504 and optimization.
3400 DELTAREUSENEVER
3505 DELTAREUSENEVER
3401 Deltas will never be reused. This is the slowest mode of execution.
3506 Deltas will never be reused. This is the slowest mode of execution.
3402 This mode can be used to recompute deltas (e.g. if the diff/delta
3507 This mode can be used to recompute deltas (e.g. if the diff/delta
3403 algorithm changes).
3508 algorithm changes).
3404 DELTAREUSEFULLADD
3509 DELTAREUSEFULLADD
3405 Revision will be re-added as if their were new content. This is
3510 Revision will be re-added as if their were new content. This is
3406 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3511 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3407 eg: large file detection and handling.
3512 eg: large file detection and handling.
3408
3513
3409 Delta computation can be slow, so the choice of delta reuse policy can
3514 Delta computation can be slow, so the choice of delta reuse policy can
3410 significantly affect run time.
3515 significantly affect run time.
3411
3516
3412 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3517 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3413 two extremes. Deltas will be reused if they are appropriate. But if the
3518 two extremes. Deltas will be reused if they are appropriate. But if the
3414 delta could choose a better revision, it will do so. This means if you
3519 delta could choose a better revision, it will do so. This means if you
3415 are converting a non-generaldelta revlog to a generaldelta revlog,
3520 are converting a non-generaldelta revlog to a generaldelta revlog,
3416 deltas will be recomputed if the delta's parent isn't a parent of the
3521 deltas will be recomputed if the delta's parent isn't a parent of the
3417 revision.
3522 revision.
3418
3523
3419 In addition to the delta policy, the ``forcedeltabothparents``
3524 In addition to the delta policy, the ``forcedeltabothparents``
3420 argument controls whether to force compute deltas against both parents
3525 argument controls whether to force compute deltas against both parents
3421 for merges. By default, the current default is used.
3526 for merges. By default, the current default is used.
3422
3527
3423 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3528 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3424 `sidedata_helpers`.
3529 `sidedata_helpers`.
3425 """
3530 """
3426 if deltareuse not in self.DELTAREUSEALL:
3531 if deltareuse not in self.DELTAREUSEALL:
3427 raise ValueError(
3532 raise ValueError(
3428 _(b'value for deltareuse invalid: %s') % deltareuse
3533 _(b'value for deltareuse invalid: %s') % deltareuse
3429 )
3534 )
3430
3535
3431 if len(destrevlog):
3536 if len(destrevlog):
3432 raise ValueError(_(b'destination revlog is not empty'))
3537 raise ValueError(_(b'destination revlog is not empty'))
3433
3538
3434 if getattr(self, 'filteredrevs', None):
3539 if getattr(self, 'filteredrevs', None):
3435 raise ValueError(_(b'source revlog has filtered revisions'))
3540 raise ValueError(_(b'source revlog has filtered revisions'))
3436 if getattr(destrevlog, 'filteredrevs', None):
3541 if getattr(destrevlog, 'filteredrevs', None):
3437 raise ValueError(_(b'destination revlog has filtered revisions'))
3542 raise ValueError(_(b'destination revlog has filtered revisions'))
3438
3543
3439 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3544 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3440 # if possible.
3545 # if possible.
3441 old_delta_config = destrevlog.delta_config
3546 old_delta_config = destrevlog.delta_config
3442 destrevlog.delta_config = destrevlog.delta_config.copy()
3547 destrevlog.delta_config = destrevlog.delta_config.copy()
3443
3548
3444 try:
3549 try:
3445 if deltareuse == self.DELTAREUSEALWAYS:
3550 if deltareuse == self.DELTAREUSEALWAYS:
3446 destrevlog.delta_config.lazy_delta_base = True
3551 destrevlog.delta_config.lazy_delta_base = True
3447 destrevlog.delta_config.lazy_delta = True
3552 destrevlog.delta_config.lazy_delta = True
3448 elif deltareuse == self.DELTAREUSESAMEREVS:
3553 elif deltareuse == self.DELTAREUSESAMEREVS:
3449 destrevlog.delta_config.lazy_delta_base = False
3554 destrevlog.delta_config.lazy_delta_base = False
3450 destrevlog.delta_config.lazy_delta = True
3555 destrevlog.delta_config.lazy_delta = True
3451 elif deltareuse == self.DELTAREUSENEVER:
3556 elif deltareuse == self.DELTAREUSENEVER:
3452 destrevlog.delta_config.lazy_delta_base = False
3557 destrevlog.delta_config.lazy_delta_base = False
3453 destrevlog.delta_config.lazy_delta = False
3558 destrevlog.delta_config.lazy_delta = False
3454
3559
3455 delta_both_parents = (
3560 delta_both_parents = (
3456 forcedeltabothparents or old_delta_config.delta_both_parents
3561 forcedeltabothparents or old_delta_config.delta_both_parents
3457 )
3562 )
3458 destrevlog.delta_config.delta_both_parents = delta_both_parents
3563 destrevlog.delta_config.delta_both_parents = delta_both_parents
3459
3564
3460 with self.reading(), destrevlog._writing(tr):
3565 with self.reading(), destrevlog._writing(tr):
3461 self._clone(
3566 self._clone(
3462 tr,
3567 tr,
3463 destrevlog,
3568 destrevlog,
3464 addrevisioncb,
3569 addrevisioncb,
3465 deltareuse,
3570 deltareuse,
3466 forcedeltabothparents,
3571 forcedeltabothparents,
3467 sidedata_helpers,
3572 sidedata_helpers,
3468 )
3573 )
3469
3574
3470 finally:
3575 finally:
3471 destrevlog.delta_config = old_delta_config
3576 destrevlog.delta_config = old_delta_config
3472
3577
3473 def _clone(
3578 def _clone(
3474 self,
3579 self,
3475 tr,
3580 tr,
3476 destrevlog,
3581 destrevlog,
3477 addrevisioncb,
3582 addrevisioncb,
3478 deltareuse,
3583 deltareuse,
3479 forcedeltabothparents,
3584 forcedeltabothparents,
3480 sidedata_helpers,
3585 sidedata_helpers,
3481 ):
3586 ):
3482 """perform the core duty of `revlog.clone` after parameter processing"""
3587 """perform the core duty of `revlog.clone` after parameter processing"""
3483 write_debug = None
3588 write_debug = None
3484 if self.delta_config.debug_delta:
3589 if self.delta_config.debug_delta:
3485 write_debug = tr._report
3590 write_debug = tr._report
3486 deltacomputer = deltautil.deltacomputer(
3591 deltacomputer = deltautil.deltacomputer(
3487 destrevlog,
3592 destrevlog,
3488 write_debug=write_debug,
3593 write_debug=write_debug,
3489 )
3594 )
3490 index = self.index
3595 index = self.index
3491 for rev in self:
3596 for rev in self:
3492 entry = index[rev]
3597 entry = index[rev]
3493
3598
3494 # Some classes override linkrev to take filtered revs into
3599 # Some classes override linkrev to take filtered revs into
3495 # account. Use raw entry from index.
3600 # account. Use raw entry from index.
3496 flags = entry[0] & 0xFFFF
3601 flags = entry[0] & 0xFFFF
3497 linkrev = entry[4]
3602 linkrev = entry[4]
3498 p1 = index[entry[5]][7]
3603 p1 = index[entry[5]][7]
3499 p2 = index[entry[6]][7]
3604 p2 = index[entry[6]][7]
3500 node = entry[7]
3605 node = entry[7]
3501
3606
3502 # (Possibly) reuse the delta from the revlog if allowed and
3607 # (Possibly) reuse the delta from the revlog if allowed and
3503 # the revlog chunk is a delta.
3608 # the revlog chunk is a delta.
3504 cachedelta = None
3609 cachedelta = None
3505 rawtext = None
3610 rawtext = None
3506 if deltareuse == self.DELTAREUSEFULLADD:
3611 if deltareuse == self.DELTAREUSEFULLADD:
3507 text = self._revisiondata(rev)
3612 text = self._revisiondata(rev)
3508 sidedata = self.sidedata(rev)
3613 sidedata = self.sidedata(rev)
3509
3614
3510 if sidedata_helpers is not None:
3615 if sidedata_helpers is not None:
3511 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3616 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3512 self, sidedata_helpers, sidedata, rev
3617 self, sidedata_helpers, sidedata, rev
3513 )
3618 )
3514 flags = flags | new_flags[0] & ~new_flags[1]
3619 flags = flags | new_flags[0] & ~new_flags[1]
3515
3620
3516 destrevlog.addrevision(
3621 destrevlog.addrevision(
3517 text,
3622 text,
3518 tr,
3623 tr,
3519 linkrev,
3624 linkrev,
3520 p1,
3625 p1,
3521 p2,
3626 p2,
3522 cachedelta=cachedelta,
3627 cachedelta=cachedelta,
3523 node=node,
3628 node=node,
3524 flags=flags,
3629 flags=flags,
3525 deltacomputer=deltacomputer,
3630 deltacomputer=deltacomputer,
3526 sidedata=sidedata,
3631 sidedata=sidedata,
3527 )
3632 )
3528 else:
3633 else:
3529 if destrevlog.delta_config.lazy_delta:
3634 if destrevlog.delta_config.lazy_delta:
3530 dp = self.deltaparent(rev)
3635 dp = self.deltaparent(rev)
3531 if dp != nullrev:
3636 if dp != nullrev:
3532 cachedelta = (dp, bytes(self._chunk(rev)))
3637 cachedelta = (dp, bytes(self._chunk(rev)))
3533
3638
3534 sidedata = None
3639 sidedata = None
3535 if not cachedelta:
3640 if not cachedelta:
3536 try:
3641 try:
3537 rawtext = self._revisiondata(rev)
3642 rawtext = self._revisiondata(rev)
3538 except error.CensoredNodeError as censored:
3643 except error.CensoredNodeError as censored:
3539 assert flags & REVIDX_ISCENSORED
3644 assert flags & REVIDX_ISCENSORED
3540 rawtext = censored.tombstone
3645 rawtext = censored.tombstone
3541 sidedata = self.sidedata(rev)
3646 sidedata = self.sidedata(rev)
3542 if sidedata is None:
3647 if sidedata is None:
3543 sidedata = self.sidedata(rev)
3648 sidedata = self.sidedata(rev)
3544
3649
3545 if sidedata_helpers is not None:
3650 if sidedata_helpers is not None:
3546 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3651 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3547 self, sidedata_helpers, sidedata, rev
3652 self, sidedata_helpers, sidedata, rev
3548 )
3653 )
3549 flags = flags | new_flags[0] & ~new_flags[1]
3654 flags = flags | new_flags[0] & ~new_flags[1]
3550
3655
3551 destrevlog._addrevision(
3656 destrevlog._addrevision(
3552 node,
3657 node,
3553 rawtext,
3658 rawtext,
3554 tr,
3659 tr,
3555 linkrev,
3660 linkrev,
3556 p1,
3661 p1,
3557 p2,
3662 p2,
3558 flags,
3663 flags,
3559 cachedelta,
3664 cachedelta,
3560 deltacomputer=deltacomputer,
3665 deltacomputer=deltacomputer,
3561 sidedata=sidedata,
3666 sidedata=sidedata,
3562 )
3667 )
3563
3668
3564 if addrevisioncb:
3669 if addrevisioncb:
3565 addrevisioncb(self, rev, node)
3670 addrevisioncb(self, rev, node)
3566
3671
3567 def censorrevision(self, tr, censornode, tombstone=b''):
3672 def censorrevision(self, tr, censornode, tombstone=b''):
3568 if self._format_version == REVLOGV0:
3673 if self._format_version == REVLOGV0:
3569 raise error.RevlogError(
3674 raise error.RevlogError(
3570 _(b'cannot censor with version %d revlogs')
3675 _(b'cannot censor with version %d revlogs')
3571 % self._format_version
3676 % self._format_version
3572 )
3677 )
3573 elif self._format_version == REVLOGV1:
3678 elif self._format_version == REVLOGV1:
3574 rewrite.v1_censor(self, tr, censornode, tombstone)
3679 rewrite.v1_censor(self, tr, censornode, tombstone)
3575 else:
3680 else:
3576 rewrite.v2_censor(self, tr, censornode, tombstone)
3681 rewrite.v2_censor(self, tr, censornode, tombstone)
3577
3682
3578 def verifyintegrity(self, state):
3683 def verifyintegrity(self, state):
3579 """Verifies the integrity of the revlog.
3684 """Verifies the integrity of the revlog.
3580
3685
3581 Yields ``revlogproblem`` instances describing problems that are
3686 Yields ``revlogproblem`` instances describing problems that are
3582 found.
3687 found.
3583 """
3688 """
3584 dd, di = self.checksize()
3689 dd, di = self.checksize()
3585 if dd:
3690 if dd:
3586 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3691 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3587 if di:
3692 if di:
3588 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3693 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3589
3694
3590 version = self._format_version
3695 version = self._format_version
3591
3696
3592 # The verifier tells us what version revlog we should be.
3697 # The verifier tells us what version revlog we should be.
3593 if version != state[b'expectedversion']:
3698 if version != state[b'expectedversion']:
3594 yield revlogproblem(
3699 yield revlogproblem(
3595 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3700 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3596 % (self.display_id, version, state[b'expectedversion'])
3701 % (self.display_id, version, state[b'expectedversion'])
3597 )
3702 )
3598
3703
3599 state[b'skipread'] = set()
3704 state[b'skipread'] = set()
3600 state[b'safe_renamed'] = set()
3705 state[b'safe_renamed'] = set()
3601
3706
3602 for rev in self:
3707 for rev in self:
3603 node = self.node(rev)
3708 node = self.node(rev)
3604
3709
3605 # Verify contents. 4 cases to care about:
3710 # Verify contents. 4 cases to care about:
3606 #
3711 #
3607 # common: the most common case
3712 # common: the most common case
3608 # rename: with a rename
3713 # rename: with a rename
3609 # meta: file content starts with b'\1\n', the metadata
3714 # meta: file content starts with b'\1\n', the metadata
3610 # header defined in filelog.py, but without a rename
3715 # header defined in filelog.py, but without a rename
3611 # ext: content stored externally
3716 # ext: content stored externally
3612 #
3717 #
3613 # More formally, their differences are shown below:
3718 # More formally, their differences are shown below:
3614 #
3719 #
3615 # | common | rename | meta | ext
3720 # | common | rename | meta | ext
3616 # -------------------------------------------------------
3721 # -------------------------------------------------------
3617 # flags() | 0 | 0 | 0 | not 0
3722 # flags() | 0 | 0 | 0 | not 0
3618 # renamed() | False | True | False | ?
3723 # renamed() | False | True | False | ?
3619 # rawtext[0:2]=='\1\n'| False | True | True | ?
3724 # rawtext[0:2]=='\1\n'| False | True | True | ?
3620 #
3725 #
3621 # "rawtext" means the raw text stored in revlog data, which
3726 # "rawtext" means the raw text stored in revlog data, which
3622 # could be retrieved by "rawdata(rev)". "text"
3727 # could be retrieved by "rawdata(rev)". "text"
3623 # mentioned below is "revision(rev)".
3728 # mentioned below is "revision(rev)".
3624 #
3729 #
3625 # There are 3 different lengths stored physically:
3730 # There are 3 different lengths stored physically:
3626 # 1. L1: rawsize, stored in revlog index
3731 # 1. L1: rawsize, stored in revlog index
3627 # 2. L2: len(rawtext), stored in revlog data
3732 # 2. L2: len(rawtext), stored in revlog data
3628 # 3. L3: len(text), stored in revlog data if flags==0, or
3733 # 3. L3: len(text), stored in revlog data if flags==0, or
3629 # possibly somewhere else if flags!=0
3734 # possibly somewhere else if flags!=0
3630 #
3735 #
3631 # L1 should be equal to L2. L3 could be different from them.
3736 # L1 should be equal to L2. L3 could be different from them.
3632 # "text" may or may not affect commit hash depending on flag
3737 # "text" may or may not affect commit hash depending on flag
3633 # processors (see flagutil.addflagprocessor).
3738 # processors (see flagutil.addflagprocessor).
3634 #
3739 #
3635 # | common | rename | meta | ext
3740 # | common | rename | meta | ext
3636 # -------------------------------------------------
3741 # -------------------------------------------------
3637 # rawsize() | L1 | L1 | L1 | L1
3742 # rawsize() | L1 | L1 | L1 | L1
3638 # size() | L1 | L2-LM | L1(*) | L1 (?)
3743 # size() | L1 | L2-LM | L1(*) | L1 (?)
3639 # len(rawtext) | L2 | L2 | L2 | L2
3744 # len(rawtext) | L2 | L2 | L2 | L2
3640 # len(text) | L2 | L2 | L2 | L3
3745 # len(text) | L2 | L2 | L2 | L3
3641 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3746 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3642 #
3747 #
3643 # LM: length of metadata, depending on rawtext
3748 # LM: length of metadata, depending on rawtext
3644 # (*): not ideal, see comment in filelog.size
3749 # (*): not ideal, see comment in filelog.size
3645 # (?): could be "- len(meta)" if the resolved content has
3750 # (?): could be "- len(meta)" if the resolved content has
3646 # rename metadata
3751 # rename metadata
3647 #
3752 #
3648 # Checks needed to be done:
3753 # Checks needed to be done:
3649 # 1. length check: L1 == L2, in all cases.
3754 # 1. length check: L1 == L2, in all cases.
3650 # 2. hash check: depending on flag processor, we may need to
3755 # 2. hash check: depending on flag processor, we may need to
3651 # use either "text" (external), or "rawtext" (in revlog).
3756 # use either "text" (external), or "rawtext" (in revlog).
3652
3757
3653 try:
3758 try:
3654 skipflags = state.get(b'skipflags', 0)
3759 skipflags = state.get(b'skipflags', 0)
3655 if skipflags:
3760 if skipflags:
3656 skipflags &= self.flags(rev)
3761 skipflags &= self.flags(rev)
3657
3762
3658 _verify_revision(self, skipflags, state, node)
3763 _verify_revision(self, skipflags, state, node)
3659
3764
3660 l1 = self.rawsize(rev)
3765 l1 = self.rawsize(rev)
3661 l2 = len(self.rawdata(node))
3766 l2 = len(self.rawdata(node))
3662
3767
3663 if l1 != l2:
3768 if l1 != l2:
3664 yield revlogproblem(
3769 yield revlogproblem(
3665 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3770 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3666 node=node,
3771 node=node,
3667 )
3772 )
3668
3773
3669 except error.CensoredNodeError:
3774 except error.CensoredNodeError:
3670 if state[b'erroroncensored']:
3775 if state[b'erroroncensored']:
3671 yield revlogproblem(
3776 yield revlogproblem(
3672 error=_(b'censored file data'), node=node
3777 error=_(b'censored file data'), node=node
3673 )
3778 )
3674 state[b'skipread'].add(node)
3779 state[b'skipread'].add(node)
3675 except Exception as e:
3780 except Exception as e:
3676 yield revlogproblem(
3781 yield revlogproblem(
3677 error=_(b'unpacking %s: %s')
3782 error=_(b'unpacking %s: %s')
3678 % (short(node), stringutil.forcebytestr(e)),
3783 % (short(node), stringutil.forcebytestr(e)),
3679 node=node,
3784 node=node,
3680 )
3785 )
3681 state[b'skipread'].add(node)
3786 state[b'skipread'].add(node)
3682
3787
3683 def storageinfo(
3788 def storageinfo(
3684 self,
3789 self,
3685 exclusivefiles=False,
3790 exclusivefiles=False,
3686 sharedfiles=False,
3791 sharedfiles=False,
3687 revisionscount=False,
3792 revisionscount=False,
3688 trackedsize=False,
3793 trackedsize=False,
3689 storedsize=False,
3794 storedsize=False,
3690 ):
3795 ):
3691 d = {}
3796 d = {}
3692
3797
3693 if exclusivefiles:
3798 if exclusivefiles:
3694 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3799 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3695 if not self._inline:
3800 if not self._inline:
3696 d[b'exclusivefiles'].append((self.opener, self._datafile))
3801 d[b'exclusivefiles'].append((self.opener, self._datafile))
3697
3802
3698 if sharedfiles:
3803 if sharedfiles:
3699 d[b'sharedfiles'] = []
3804 d[b'sharedfiles'] = []
3700
3805
3701 if revisionscount:
3806 if revisionscount:
3702 d[b'revisionscount'] = len(self)
3807 d[b'revisionscount'] = len(self)
3703
3808
3704 if trackedsize:
3809 if trackedsize:
3705 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3810 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3706
3811
3707 if storedsize:
3812 if storedsize:
3708 d[b'storedsize'] = sum(
3813 d[b'storedsize'] = sum(
3709 self.opener.stat(path).st_size for path in self.files()
3814 self.opener.stat(path).st_size for path in self.files()
3710 )
3815 )
3711
3816
3712 return d
3817 return d
3713
3818
3714 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3819 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3715 if not self.feature_config.has_side_data:
3820 if not self.feature_config.has_side_data:
3716 return
3821 return
3717 # revlog formats with sidedata support does not support inline
3822 # revlog formats with sidedata support does not support inline
3718 assert not self._inline
3823 assert not self._inline
3719 if not helpers[1] and not helpers[2]:
3824 if not helpers[1] and not helpers[2]:
3720 # Nothing to generate or remove
3825 # Nothing to generate or remove
3721 return
3826 return
3722
3827
3723 new_entries = []
3828 new_entries = []
3724 # append the new sidedata
3829 # append the new sidedata
3725 with self._writing(transaction):
3830 with self._writing(transaction):
3726 ifh, dfh, sdfh = self._writinghandles
3831 ifh, dfh, sdfh = self._inner._writinghandles
3727 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3832 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3728
3833
3729 current_offset = sdfh.tell()
3834 current_offset = sdfh.tell()
3730 for rev in range(startrev, endrev + 1):
3835 for rev in range(startrev, endrev + 1):
3731 entry = self.index[rev]
3836 entry = self.index[rev]
3732 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3837 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3733 store=self,
3838 store=self,
3734 sidedata_helpers=helpers,
3839 sidedata_helpers=helpers,
3735 sidedata={},
3840 sidedata={},
3736 rev=rev,
3841 rev=rev,
3737 )
3842 )
3738
3843
3739 serialized_sidedata = sidedatautil.serialize_sidedata(
3844 serialized_sidedata = sidedatautil.serialize_sidedata(
3740 new_sidedata
3845 new_sidedata
3741 )
3846 )
3742
3847
3743 sidedata_compression_mode = COMP_MODE_INLINE
3848 sidedata_compression_mode = COMP_MODE_INLINE
3744 if serialized_sidedata and self.feature_config.has_side_data:
3849 if serialized_sidedata and self.feature_config.has_side_data:
3745 sidedata_compression_mode = COMP_MODE_PLAIN
3850 sidedata_compression_mode = COMP_MODE_PLAIN
3746 h, comp_sidedata = self.compress(serialized_sidedata)
3851 h, comp_sidedata = self.compress(serialized_sidedata)
3747 if (
3852 if (
3748 h != b'u'
3853 h != b'u'
3749 and comp_sidedata[0] != b'\0'
3854 and comp_sidedata[0] != b'\0'
3750 and len(comp_sidedata) < len(serialized_sidedata)
3855 and len(comp_sidedata) < len(serialized_sidedata)
3751 ):
3856 ):
3752 assert not h
3857 assert not h
3753 if (
3858 if (
3754 comp_sidedata[0]
3859 comp_sidedata[0]
3755 == self._docket.default_compression_header
3860 == self._docket.default_compression_header
3756 ):
3861 ):
3757 sidedata_compression_mode = COMP_MODE_DEFAULT
3862 sidedata_compression_mode = COMP_MODE_DEFAULT
3758 serialized_sidedata = comp_sidedata
3863 serialized_sidedata = comp_sidedata
3759 else:
3864 else:
3760 sidedata_compression_mode = COMP_MODE_INLINE
3865 sidedata_compression_mode = COMP_MODE_INLINE
3761 serialized_sidedata = comp_sidedata
3866 serialized_sidedata = comp_sidedata
3762 if entry[8] != 0 or entry[9] != 0:
3867 if entry[8] != 0 or entry[9] != 0:
3763 # rewriting entries that already have sidedata is not
3868 # rewriting entries that already have sidedata is not
3764 # supported yet, because it introduces garbage data in the
3869 # supported yet, because it introduces garbage data in the
3765 # revlog.
3870 # revlog.
3766 msg = b"rewriting existing sidedata is not supported yet"
3871 msg = b"rewriting existing sidedata is not supported yet"
3767 raise error.Abort(msg)
3872 raise error.Abort(msg)
3768
3873
3769 # Apply (potential) flags to add and to remove after running
3874 # Apply (potential) flags to add and to remove after running
3770 # the sidedata helpers
3875 # the sidedata helpers
3771 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3876 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3772 entry_update = (
3877 entry_update = (
3773 current_offset,
3878 current_offset,
3774 len(serialized_sidedata),
3879 len(serialized_sidedata),
3775 new_offset_flags,
3880 new_offset_flags,
3776 sidedata_compression_mode,
3881 sidedata_compression_mode,
3777 )
3882 )
3778
3883
3779 # the sidedata computation might have move the file cursors around
3884 # the sidedata computation might have move the file cursors around
3780 sdfh.seek(current_offset, os.SEEK_SET)
3885 sdfh.seek(current_offset, os.SEEK_SET)
3781 sdfh.write(serialized_sidedata)
3886 sdfh.write(serialized_sidedata)
3782 new_entries.append(entry_update)
3887 new_entries.append(entry_update)
3783 current_offset += len(serialized_sidedata)
3888 current_offset += len(serialized_sidedata)
3784 self._docket.sidedata_end = sdfh.tell()
3889 self._docket.sidedata_end = sdfh.tell()
3785
3890
3786 # rewrite the new index entries
3891 # rewrite the new index entries
3787 ifh.seek(startrev * self.index.entry_size)
3892 ifh.seek(startrev * self.index.entry_size)
3788 for i, e in enumerate(new_entries):
3893 for i, e in enumerate(new_entries):
3789 rev = startrev + i
3894 rev = startrev + i
3790 self.index.replace_sidedata_info(rev, *e)
3895 self.index.replace_sidedata_info(rev, *e)
3791 packed = self.index.entry_binary(rev)
3896 packed = self.index.entry_binary(rev)
3792 if rev == 0 and self._docket is None:
3897 if rev == 0 and self._docket is None:
3793 header = self._format_flags | self._format_version
3898 header = self._format_flags | self._format_version
3794 header = self.index.pack_header(header)
3899 header = self.index.pack_header(header)
3795 packed = header + packed
3900 packed = header + packed
3796 ifh.write(packed)
3901 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now