##// END OF EJS Templates
changelog-delay: adds some check around delaying and diverting write...
marmoute -
r51995:594f9128 default
parent child Browse files
Show More
@@ -1,641 +1,644 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 from .i18n import _
9 from .i18n import _
10 from .node import (
10 from .node import (
11 bin,
11 bin,
12 hex,
12 hex,
13 )
13 )
14 from .thirdparty import attr
14 from .thirdparty import attr
15
15
16 from . import (
16 from . import (
17 encoding,
17 encoding,
18 error,
18 error,
19 metadata,
19 metadata,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 dateutil,
24 dateutil,
25 stringutil,
25 stringutil,
26 )
26 )
27 from .revlogutils import (
27 from .revlogutils import (
28 constants as revlog_constants,
28 constants as revlog_constants,
29 flagutil,
29 flagutil,
30 )
30 )
31
31
32 _defaultextra = {b'branch': b'default'}
32 _defaultextra = {b'branch': b'default'}
33
33
34
34
35 def _string_escape(text):
35 def _string_escape(text):
36 """
36 """
37 >>> from .pycompat import bytechr as chr
37 >>> from .pycompat import bytechr as chr
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s
40 >>> s
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 >>> res = _string_escape(s)
42 >>> res = _string_escape(s)
43 >>> s == _string_unescape(res)
43 >>> s == _string_unescape(res)
44 True
44 True
45 """
45 """
46 # subset of the string_escape codec
46 # subset of the string_escape codec
47 text = (
47 text = (
48 text.replace(b'\\', b'\\\\')
48 text.replace(b'\\', b'\\\\')
49 .replace(b'\n', b'\\n')
49 .replace(b'\n', b'\\n')
50 .replace(b'\r', b'\\r')
50 .replace(b'\r', b'\\r')
51 )
51 )
52 return text.replace(b'\0', b'\\0')
52 return text.replace(b'\0', b'\\0')
53
53
54
54
55 def _string_unescape(text):
55 def _string_unescape(text):
56 if b'\\0' in text:
56 if b'\\0' in text:
57 # fix up \0 without getting into trouble with \\0
57 # fix up \0 without getting into trouble with \\0
58 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\n', b'')
60 text = text.replace(b'\n', b'')
61 return stringutil.unescapestr(text)
61 return stringutil.unescapestr(text)
62
62
63
63
64 def decodeextra(text):
64 def decodeextra(text):
65 """
65 """
66 >>> from .pycompat import bytechr as chr
66 >>> from .pycompat import bytechr as chr
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 ... ).items())
68 ... ).items())
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... ).items())
72 ... ).items())
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 """
74 """
75 extra = _defaultextra.copy()
75 extra = _defaultextra.copy()
76 for l in text.split(b'\0'):
76 for l in text.split(b'\0'):
77 if l:
77 if l:
78 k, v = _string_unescape(l).split(b':', 1)
78 k, v = _string_unescape(l).split(b':', 1)
79 extra[k] = v
79 extra[k] = v
80 return extra
80 return extra
81
81
82
82
83 def encodeextra(d):
83 def encodeextra(d):
84 # keys must be sorted to produce a deterministic changelog entry
84 # keys must be sorted to produce a deterministic changelog entry
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 return b"\0".join(items)
86 return b"\0".join(items)
87
87
88
88
89 def stripdesc(desc):
89 def stripdesc(desc):
90 """strip trailing whitespace and leading and trailing empty lines"""
90 """strip trailing whitespace and leading and trailing empty lines"""
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92
92
93
93
94 class appender:
94 class appender:
95 """the changelog index must be updated last on disk, so we use this class
95 """the changelog index must be updated last on disk, so we use this class
96 to delay writes to it"""
96 to delay writes to it"""
97
97
98 def __init__(self, vfs, name, mode, buf):
98 def __init__(self, vfs, name, mode, buf):
99 self.data = buf
99 self.data = buf
100 fp = vfs(name, mode)
100 fp = vfs(name, mode)
101 self.fp = fp
101 self.fp = fp
102 self.offset = fp.tell()
102 self.offset = fp.tell()
103 self.size = vfs.fstat(fp).st_size
103 self.size = vfs.fstat(fp).st_size
104 self._end = self.size
104 self._end = self.size
105
105
106 def end(self):
106 def end(self):
107 return self._end
107 return self._end
108
108
109 def tell(self):
109 def tell(self):
110 return self.offset
110 return self.offset
111
111
112 def flush(self):
112 def flush(self):
113 pass
113 pass
114
114
115 @property
115 @property
116 def closed(self):
116 def closed(self):
117 return self.fp.closed
117 return self.fp.closed
118
118
119 def close(self):
119 def close(self):
120 self.fp.close()
120 self.fp.close()
121
121
122 def seek(self, offset, whence=0):
122 def seek(self, offset, whence=0):
123 '''virtual file offset spans real file and data'''
123 '''virtual file offset spans real file and data'''
124 if whence == 0:
124 if whence == 0:
125 self.offset = offset
125 self.offset = offset
126 elif whence == 1:
126 elif whence == 1:
127 self.offset += offset
127 self.offset += offset
128 elif whence == 2:
128 elif whence == 2:
129 self.offset = self.end() + offset
129 self.offset = self.end() + offset
130 if self.offset < self.size:
130 if self.offset < self.size:
131 self.fp.seek(self.offset)
131 self.fp.seek(self.offset)
132
132
133 def read(self, count=-1):
133 def read(self, count=-1):
134 '''only trick here is reads that span real file and data'''
134 '''only trick here is reads that span real file and data'''
135 ret = b""
135 ret = b""
136 if self.offset < self.size:
136 if self.offset < self.size:
137 s = self.fp.read(count)
137 s = self.fp.read(count)
138 ret = s
138 ret = s
139 self.offset += len(s)
139 self.offset += len(s)
140 if count > 0:
140 if count > 0:
141 count -= len(s)
141 count -= len(s)
142 if count != 0:
142 if count != 0:
143 doff = self.offset - self.size
143 doff = self.offset - self.size
144 self.data.insert(0, b"".join(self.data))
144 self.data.insert(0, b"".join(self.data))
145 del self.data[1:]
145 del self.data[1:]
146 s = self.data[0][doff : doff + count]
146 s = self.data[0][doff : doff + count]
147 self.offset += len(s)
147 self.offset += len(s)
148 ret += s
148 ret += s
149 return ret
149 return ret
150
150
151 def write(self, s):
151 def write(self, s):
152 self.data.append(bytes(s))
152 self.data.append(bytes(s))
153 self.offset += len(s)
153 self.offset += len(s)
154 self._end += len(s)
154 self._end += len(s)
155
155
156 def __enter__(self):
156 def __enter__(self):
157 self.fp.__enter__()
157 self.fp.__enter__()
158 return self
158 return self
159
159
160 def __exit__(self, *args):
160 def __exit__(self, *args):
161 return self.fp.__exit__(*args)
161 return self.fp.__exit__(*args)
162
162
163
163
164 class _divertopener:
164 class _divertopener:
165 def __init__(self, opener, target):
165 def __init__(self, opener, target):
166 self._opener = opener
166 self._opener = opener
167 self._target = target
167 self._target = target
168
168
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 if name != self._target:
170 if name != self._target:
171 return self._opener(name, mode, **kwargs)
171 return self._opener(name, mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
172 return self._opener(name + b".a", mode, **kwargs)
173
173
174 def __getattr__(self, attr):
174 def __getattr__(self, attr):
175 return getattr(self._opener, attr)
175 return getattr(self._opener, attr)
176
176
177
177
178 class _delayopener:
178 class _delayopener:
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 """build an opener that stores chunks in 'buf' instead of 'target'"""
180
180
181 def __init__(self, opener, target, buf):
181 def __init__(self, opener, target, buf):
182 self._opener = opener
182 self._opener = opener
183 self._target = target
183 self._target = target
184 self._buf = buf
184 self._buf = buf
185
185
186 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
186 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
187 if name != self._target:
187 if name != self._target:
188 return self._opener(name, mode, **kwargs)
188 return self._opener(name, mode, **kwargs)
189 assert not kwargs
189 assert not kwargs
190 return appender(self._opener, name, mode, self._buf)
190 return appender(self._opener, name, mode, self._buf)
191
191
192 def __getattr__(self, attr):
192 def __getattr__(self, attr):
193 return getattr(self._opener, attr)
193 return getattr(self._opener, attr)
194
194
195
195
196 @attr.s
196 @attr.s
197 class _changelogrevision:
197 class _changelogrevision:
198 # Extensions might modify _defaultextra, so let the constructor below pass
198 # Extensions might modify _defaultextra, so let the constructor below pass
199 # it in
199 # it in
200 extra = attr.ib()
200 extra = attr.ib()
201 manifest = attr.ib()
201 manifest = attr.ib()
202 user = attr.ib(default=b'')
202 user = attr.ib(default=b'')
203 date = attr.ib(default=(0, 0))
203 date = attr.ib(default=(0, 0))
204 files = attr.ib(default=attr.Factory(list))
204 files = attr.ib(default=attr.Factory(list))
205 filesadded = attr.ib(default=None)
205 filesadded = attr.ib(default=None)
206 filesremoved = attr.ib(default=None)
206 filesremoved = attr.ib(default=None)
207 p1copies = attr.ib(default=None)
207 p1copies = attr.ib(default=None)
208 p2copies = attr.ib(default=None)
208 p2copies = attr.ib(default=None)
209 description = attr.ib(default=b'')
209 description = attr.ib(default=b'')
210 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
210 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
211
211
212
212
213 class changelogrevision:
213 class changelogrevision:
214 """Holds results of a parsed changelog revision.
214 """Holds results of a parsed changelog revision.
215
215
216 Changelog revisions consist of multiple pieces of data, including
216 Changelog revisions consist of multiple pieces of data, including
217 the manifest node, user, and date. This object exposes a view into
217 the manifest node, user, and date. This object exposes a view into
218 the parsed object.
218 the parsed object.
219 """
219 """
220
220
221 __slots__ = (
221 __slots__ = (
222 '_offsets',
222 '_offsets',
223 '_text',
223 '_text',
224 '_sidedata',
224 '_sidedata',
225 '_cpsd',
225 '_cpsd',
226 '_changes',
226 '_changes',
227 )
227 )
228
228
229 def __new__(cls, cl, text, sidedata, cpsd):
229 def __new__(cls, cl, text, sidedata, cpsd):
230 if not text:
230 if not text:
231 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
231 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
232
232
233 self = super(changelogrevision, cls).__new__(cls)
233 self = super(changelogrevision, cls).__new__(cls)
234 # We could return here and implement the following as an __init__.
234 # We could return here and implement the following as an __init__.
235 # But doing it here is equivalent and saves an extra function call.
235 # But doing it here is equivalent and saves an extra function call.
236
236
237 # format used:
237 # format used:
238 # nodeid\n : manifest node in ascii
238 # nodeid\n : manifest node in ascii
239 # user\n : user, no \n or \r allowed
239 # user\n : user, no \n or \r allowed
240 # time tz extra\n : date (time is int or float, timezone is int)
240 # time tz extra\n : date (time is int or float, timezone is int)
241 # : extra is metadata, encoded and separated by '\0'
241 # : extra is metadata, encoded and separated by '\0'
242 # : older versions ignore it
242 # : older versions ignore it
243 # files\n\n : files modified by the cset, no \n or \r allowed
243 # files\n\n : files modified by the cset, no \n or \r allowed
244 # (.*) : comment (free text, ideally utf-8)
244 # (.*) : comment (free text, ideally utf-8)
245 #
245 #
246 # changelog v0 doesn't use extra
246 # changelog v0 doesn't use extra
247
247
248 nl1 = text.index(b'\n')
248 nl1 = text.index(b'\n')
249 nl2 = text.index(b'\n', nl1 + 1)
249 nl2 = text.index(b'\n', nl1 + 1)
250 nl3 = text.index(b'\n', nl2 + 1)
250 nl3 = text.index(b'\n', nl2 + 1)
251
251
252 # The list of files may be empty. Which means nl3 is the first of the
252 # The list of files may be empty. Which means nl3 is the first of the
253 # double newline that precedes the description.
253 # double newline that precedes the description.
254 if text[nl3 + 1 : nl3 + 2] == b'\n':
254 if text[nl3 + 1 : nl3 + 2] == b'\n':
255 doublenl = nl3
255 doublenl = nl3
256 else:
256 else:
257 doublenl = text.index(b'\n\n', nl3 + 1)
257 doublenl = text.index(b'\n\n', nl3 + 1)
258
258
259 self._offsets = (nl1, nl2, nl3, doublenl)
259 self._offsets = (nl1, nl2, nl3, doublenl)
260 self._text = text
260 self._text = text
261 self._sidedata = sidedata
261 self._sidedata = sidedata
262 self._cpsd = cpsd
262 self._cpsd = cpsd
263 self._changes = None
263 self._changes = None
264
264
265 return self
265 return self
266
266
267 @property
267 @property
268 def manifest(self):
268 def manifest(self):
269 return bin(self._text[0 : self._offsets[0]])
269 return bin(self._text[0 : self._offsets[0]])
270
270
271 @property
271 @property
272 def user(self):
272 def user(self):
273 off = self._offsets
273 off = self._offsets
274 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
274 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
275
275
276 @property
276 @property
277 def _rawdate(self):
277 def _rawdate(self):
278 off = self._offsets
278 off = self._offsets
279 dateextra = self._text[off[1] + 1 : off[2]]
279 dateextra = self._text[off[1] + 1 : off[2]]
280 return dateextra.split(b' ', 2)[0:2]
280 return dateextra.split(b' ', 2)[0:2]
281
281
282 @property
282 @property
283 def _rawextra(self):
283 def _rawextra(self):
284 off = self._offsets
284 off = self._offsets
285 dateextra = self._text[off[1] + 1 : off[2]]
285 dateextra = self._text[off[1] + 1 : off[2]]
286 fields = dateextra.split(b' ', 2)
286 fields = dateextra.split(b' ', 2)
287 if len(fields) != 3:
287 if len(fields) != 3:
288 return None
288 return None
289
289
290 return fields[2]
290 return fields[2]
291
291
292 @property
292 @property
293 def date(self):
293 def date(self):
294 raw = self._rawdate
294 raw = self._rawdate
295 time = float(raw[0])
295 time = float(raw[0])
296 # Various tools did silly things with the timezone.
296 # Various tools did silly things with the timezone.
297 try:
297 try:
298 timezone = int(raw[1])
298 timezone = int(raw[1])
299 except ValueError:
299 except ValueError:
300 timezone = 0
300 timezone = 0
301
301
302 return time, timezone
302 return time, timezone
303
303
304 @property
304 @property
305 def extra(self):
305 def extra(self):
306 raw = self._rawextra
306 raw = self._rawextra
307 if raw is None:
307 if raw is None:
308 return _defaultextra
308 return _defaultextra
309
309
310 return decodeextra(raw)
310 return decodeextra(raw)
311
311
312 @property
312 @property
313 def changes(self):
313 def changes(self):
314 if self._changes is not None:
314 if self._changes is not None:
315 return self._changes
315 return self._changes
316 if self._cpsd:
316 if self._cpsd:
317 changes = metadata.decode_files_sidedata(self._sidedata)
317 changes = metadata.decode_files_sidedata(self._sidedata)
318 else:
318 else:
319 changes = metadata.ChangingFiles(
319 changes = metadata.ChangingFiles(
320 touched=self.files or (),
320 touched=self.files or (),
321 added=self.filesadded or (),
321 added=self.filesadded or (),
322 removed=self.filesremoved or (),
322 removed=self.filesremoved or (),
323 p1_copies=self.p1copies or {},
323 p1_copies=self.p1copies or {},
324 p2_copies=self.p2copies or {},
324 p2_copies=self.p2copies or {},
325 )
325 )
326 self._changes = changes
326 self._changes = changes
327 return changes
327 return changes
328
328
329 @property
329 @property
330 def files(self):
330 def files(self):
331 if self._cpsd:
331 if self._cpsd:
332 return sorted(self.changes.touched)
332 return sorted(self.changes.touched)
333 off = self._offsets
333 off = self._offsets
334 if off[2] == off[3]:
334 if off[2] == off[3]:
335 return []
335 return []
336
336
337 return self._text[off[2] + 1 : off[3]].split(b'\n')
337 return self._text[off[2] + 1 : off[3]].split(b'\n')
338
338
339 @property
339 @property
340 def filesadded(self):
340 def filesadded(self):
341 if self._cpsd:
341 if self._cpsd:
342 return self.changes.added
342 return self.changes.added
343 else:
343 else:
344 rawindices = self.extra.get(b'filesadded')
344 rawindices = self.extra.get(b'filesadded')
345 if rawindices is None:
345 if rawindices is None:
346 return None
346 return None
347 return metadata.decodefileindices(self.files, rawindices)
347 return metadata.decodefileindices(self.files, rawindices)
348
348
349 @property
349 @property
350 def filesremoved(self):
350 def filesremoved(self):
351 if self._cpsd:
351 if self._cpsd:
352 return self.changes.removed
352 return self.changes.removed
353 else:
353 else:
354 rawindices = self.extra.get(b'filesremoved')
354 rawindices = self.extra.get(b'filesremoved')
355 if rawindices is None:
355 if rawindices is None:
356 return None
356 return None
357 return metadata.decodefileindices(self.files, rawindices)
357 return metadata.decodefileindices(self.files, rawindices)
358
358
359 @property
359 @property
360 def p1copies(self):
360 def p1copies(self):
361 if self._cpsd:
361 if self._cpsd:
362 return self.changes.copied_from_p1
362 return self.changes.copied_from_p1
363 else:
363 else:
364 rawcopies = self.extra.get(b'p1copies')
364 rawcopies = self.extra.get(b'p1copies')
365 if rawcopies is None:
365 if rawcopies is None:
366 return None
366 return None
367 return metadata.decodecopies(self.files, rawcopies)
367 return metadata.decodecopies(self.files, rawcopies)
368
368
369 @property
369 @property
370 def p2copies(self):
370 def p2copies(self):
371 if self._cpsd:
371 if self._cpsd:
372 return self.changes.copied_from_p2
372 return self.changes.copied_from_p2
373 else:
373 else:
374 rawcopies = self.extra.get(b'p2copies')
374 rawcopies = self.extra.get(b'p2copies')
375 if rawcopies is None:
375 if rawcopies is None:
376 return None
376 return None
377 return metadata.decodecopies(self.files, rawcopies)
377 return metadata.decodecopies(self.files, rawcopies)
378
378
379 @property
379 @property
380 def description(self):
380 def description(self):
381 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
381 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
382
382
383 @property
383 @property
384 def branchinfo(self):
384 def branchinfo(self):
385 extra = self.extra
385 extra = self.extra
386 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
386 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
387
387
388
388
389 class changelog(revlog.revlog):
389 class changelog(revlog.revlog):
390 def __init__(self, opener, trypending=False, concurrencychecker=None):
390 def __init__(self, opener, trypending=False, concurrencychecker=None):
391 """Load a changelog revlog using an opener.
391 """Load a changelog revlog using an opener.
392
392
393 If ``trypending`` is true, we attempt to load the index from a
393 If ``trypending`` is true, we attempt to load the index from a
394 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
394 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
395 The ``00changelog.i.a`` file contains index (and possibly inline
395 The ``00changelog.i.a`` file contains index (and possibly inline
396 revision) data for a transaction that hasn't been finalized yet.
396 revision) data for a transaction that hasn't been finalized yet.
397 It exists in a separate file to facilitate readers (such as
397 It exists in a separate file to facilitate readers (such as
398 hooks processes) accessing data before a transaction is finalized.
398 hooks processes) accessing data before a transaction is finalized.
399
399
400 ``concurrencychecker`` will be passed to the revlog init function, see
400 ``concurrencychecker`` will be passed to the revlog init function, see
401 the documentation there.
401 the documentation there.
402 """
402 """
403 revlog.revlog.__init__(
403 revlog.revlog.__init__(
404 self,
404 self,
405 opener,
405 opener,
406 target=(revlog_constants.KIND_CHANGELOG, None),
406 target=(revlog_constants.KIND_CHANGELOG, None),
407 radix=b'00changelog',
407 radix=b'00changelog',
408 checkambig=True,
408 checkambig=True,
409 mmaplargeindex=True,
409 mmaplargeindex=True,
410 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
410 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
411 concurrencychecker=concurrencychecker,
411 concurrencychecker=concurrencychecker,
412 trypending=trypending,
412 trypending=trypending,
413 )
413 )
414
414
415 if self._initempty and (self._format_version == revlog.REVLOGV1):
415 if self._initempty and (self._format_version == revlog.REVLOGV1):
416 # changelogs don't benefit from generaldelta.
416 # changelogs don't benefit from generaldelta.
417
417
418 self._format_flags &= ~revlog.FLAG_GENERALDELTA
418 self._format_flags &= ~revlog.FLAG_GENERALDELTA
419 self.delta_config.general_delta = False
419 self.delta_config.general_delta = False
420
420
421 # Delta chains for changelogs tend to be very small because entries
421 # Delta chains for changelogs tend to be very small because entries
422 # tend to be small and don't delta well with each. So disable delta
422 # tend to be small and don't delta well with each. So disable delta
423 # chains.
423 # chains.
424 self._storedeltachains = False
424 self._storedeltachains = False
425
425
426 self._realopener = opener
426 self._realopener = opener
427 self._delayed = False
427 self._delayed = False
428 self._delaybuf = None
428 self._delaybuf = None
429 self._divert = False
429 self._divert = False
430 self._filteredrevs = frozenset()
430 self._filteredrevs = frozenset()
431 self._filteredrevs_hashcache = {}
431 self._filteredrevs_hashcache = {}
432 self._copiesstorage = opener.options.get(b'copies-storage')
432 self._copiesstorage = opener.options.get(b'copies-storage')
433
433
434 @property
434 @property
435 def filteredrevs(self):
435 def filteredrevs(self):
436 return self._filteredrevs
436 return self._filteredrevs
437
437
438 @filteredrevs.setter
438 @filteredrevs.setter
439 def filteredrevs(self, val):
439 def filteredrevs(self, val):
440 # Ensure all updates go through this function
440 # Ensure all updates go through this function
441 assert isinstance(val, frozenset)
441 assert isinstance(val, frozenset)
442 self._filteredrevs = val
442 self._filteredrevs = val
443 self._filteredrevs_hashcache = {}
443 self._filteredrevs_hashcache = {}
444
444
445 def _write_docket(self, tr):
445 def _write_docket(self, tr):
446 if not self._delayed:
446 if not self._delayed:
447 super(changelog, self)._write_docket(tr)
447 super(changelog, self)._write_docket(tr)
448
448
449 def delayupdate(self, tr):
449 def delayupdate(self, tr):
450 """delay visibility of index updates to other readers"""
450 """delay visibility of index updates to other readers"""
451 assert not self._inner.is_open
451 if self._docket is None and not self._delayed:
452 if self._docket is None and not self._delayed:
452 if len(self) == 0:
453 if len(self) == 0:
453 self._divert = True
454 self._divert = True
454 if self._realopener.exists(self._indexfile + b'.a'):
455 if self._realopener.exists(self._indexfile + b'.a'):
455 self._realopener.unlink(self._indexfile + b'.a')
456 self._realopener.unlink(self._indexfile + b'.a')
456 self.opener = _divertopener(self._realopener, self._indexfile)
457 self.opener = _divertopener(self._realopener, self._indexfile)
457 else:
458 else:
458 self._delaybuf = []
459 self._delaybuf = []
459 self.opener = _delayopener(
460 self.opener = _delayopener(
460 self._realopener, self._indexfile, self._delaybuf
461 self._realopener, self._indexfile, self._delaybuf
461 )
462 )
462 self._inner.opener = self.opener
463 self._inner.opener = self.opener
463 self._inner._segmentfile.opener = self.opener
464 self._inner._segmentfile.opener = self.opener
464 self._inner._segmentfile_sidedata.opener = self.opener
465 self._inner._segmentfile_sidedata.opener = self.opener
465 self._delayed = True
466 self._delayed = True
466 tr.addpending(b'cl-%i' % id(self), self._writepending)
467 tr.addpending(b'cl-%i' % id(self), self._writepending)
467 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
468 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
468
469
469 def _finalize(self, tr):
470 def _finalize(self, tr):
470 """finalize index updates"""
471 """finalize index updates"""
472 assert not self._inner.is_open
471 self._delayed = False
473 self._delayed = False
472 self.opener = self._realopener
474 self.opener = self._realopener
473 self._inner.opener = self.opener
475 self._inner.opener = self.opener
474 self._inner._segmentfile.opener = self.opener
476 self._inner._segmentfile.opener = self.opener
475 self._inner._segmentfile_sidedata.opener = self.opener
477 self._inner._segmentfile_sidedata.opener = self.opener
476 # move redirected index data back into place
478 # move redirected index data back into place
477 if self._docket is not None:
479 if self._docket is not None:
478 self._write_docket(tr)
480 self._write_docket(tr)
479 elif self._divert:
481 elif self._divert:
480 assert not self._delaybuf
482 assert not self._delaybuf
481 tmpname = self._indexfile + b".a"
483 tmpname = self._indexfile + b".a"
482 nfile = self.opener.open(tmpname)
484 nfile = self.opener.open(tmpname)
483 nfile.close()
485 nfile.close()
484 self.opener.rename(tmpname, self._indexfile, checkambig=True)
486 self.opener.rename(tmpname, self._indexfile, checkambig=True)
485 elif self._delaybuf:
487 elif self._delaybuf:
486 fp = self.opener(self._indexfile, b'a', checkambig=True)
488 fp = self.opener(self._indexfile, b'a', checkambig=True)
487 fp.write(b"".join(self._delaybuf))
489 fp.write(b"".join(self._delaybuf))
488 fp.close()
490 fp.close()
489 self._delaybuf = None
491 self._delaybuf = None
490 self._divert = False
492 self._divert = False
491 # split when we're done
493 # split when we're done
492 self._enforceinlinesize(tr, side_write=False)
494 self._enforceinlinesize(tr, side_write=False)
493
495
494 def _writepending(self, tr):
496 def _writepending(self, tr):
495 """create a file containing the unfinalized state for
497 """create a file containing the unfinalized state for
496 pretxnchangegroup"""
498 pretxnchangegroup"""
499 assert not self._inner.is_open
497 if self._docket:
500 if self._docket:
498 return self._docket.write(tr, pending=True)
501 return self._docket.write(tr, pending=True)
499 if self._delaybuf:
502 if self._delaybuf:
500 # make a temporary copy of the index
503 # make a temporary copy of the index
501 fp1 = self._realopener(self._indexfile)
504 fp1 = self._realopener(self._indexfile)
502 pendingfilename = self._indexfile + b".a"
505 pendingfilename = self._indexfile + b".a"
503 # register as a temp file to ensure cleanup on failure
506 # register as a temp file to ensure cleanup on failure
504 tr.registertmp(pendingfilename)
507 tr.registertmp(pendingfilename)
505 # write existing data
508 # write existing data
506 fp2 = self._realopener(pendingfilename, b"w")
509 fp2 = self._realopener(pendingfilename, b"w")
507 fp2.write(fp1.read())
510 fp2.write(fp1.read())
508 # add pending data
511 # add pending data
509 fp2.write(b"".join(self._delaybuf))
512 fp2.write(b"".join(self._delaybuf))
510 fp2.close()
513 fp2.close()
511 # switch modes so finalize can simply rename
514 # switch modes so finalize can simply rename
512 self._delaybuf = None
515 self._delaybuf = None
513 self._divert = True
516 self._divert = True
514 self.opener = _divertopener(self._realopener, self._indexfile)
517 self.opener = _divertopener(self._realopener, self._indexfile)
515 self._inner.opener = self.opener
518 self._inner.opener = self.opener
516 self._inner._segmentfile.opener = self.opener
519 self._inner._segmentfile.opener = self.opener
517 self._inner._segmentfile_sidedata.opener = self.opener
520 self._inner._segmentfile_sidedata.opener = self.opener
518
521
519 if self._divert:
522 if self._divert:
520 return True
523 return True
521
524
522 return False
525 return False
523
526
524 def _enforceinlinesize(self, tr, side_write=True):
527 def _enforceinlinesize(self, tr, side_write=True):
525 if not self._delayed:
528 if not self._delayed:
526 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
529 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
527
530
528 def read(self, nodeorrev):
531 def read(self, nodeorrev):
529 """Obtain data from a parsed changelog revision.
532 """Obtain data from a parsed changelog revision.
530
533
531 Returns a 6-tuple of:
534 Returns a 6-tuple of:
532
535
533 - manifest node in binary
536 - manifest node in binary
534 - author/user as a localstr
537 - author/user as a localstr
535 - date as a 2-tuple of (time, timezone)
538 - date as a 2-tuple of (time, timezone)
536 - list of files
539 - list of files
537 - commit message as a localstr
540 - commit message as a localstr
538 - dict of extra metadata
541 - dict of extra metadata
539
542
540 Unless you need to access all fields, consider calling
543 Unless you need to access all fields, consider calling
541 ``changelogrevision`` instead, as it is faster for partial object
544 ``changelogrevision`` instead, as it is faster for partial object
542 access.
545 access.
543 """
546 """
544 d = self._revisiondata(nodeorrev)
547 d = self._revisiondata(nodeorrev)
545 sidedata = self.sidedata(nodeorrev)
548 sidedata = self.sidedata(nodeorrev)
546 copy_sd = self._copiesstorage == b'changeset-sidedata'
549 copy_sd = self._copiesstorage == b'changeset-sidedata'
547 c = changelogrevision(self, d, sidedata, copy_sd)
550 c = changelogrevision(self, d, sidedata, copy_sd)
548 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
551 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
549
552
550 def changelogrevision(self, nodeorrev):
553 def changelogrevision(self, nodeorrev):
551 """Obtain a ``changelogrevision`` for a node or revision."""
554 """Obtain a ``changelogrevision`` for a node or revision."""
552 text = self._revisiondata(nodeorrev)
555 text = self._revisiondata(nodeorrev)
553 sidedata = self.sidedata(nodeorrev)
556 sidedata = self.sidedata(nodeorrev)
554 return changelogrevision(
557 return changelogrevision(
555 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
558 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
556 )
559 )
557
560
558 def readfiles(self, nodeorrev):
561 def readfiles(self, nodeorrev):
559 """
562 """
560 short version of read that only returns the files modified by the cset
563 short version of read that only returns the files modified by the cset
561 """
564 """
562 text = self.revision(nodeorrev)
565 text = self.revision(nodeorrev)
563 if not text:
566 if not text:
564 return []
567 return []
565 last = text.index(b"\n\n")
568 last = text.index(b"\n\n")
566 l = text[:last].split(b'\n')
569 l = text[:last].split(b'\n')
567 return l[3:]
570 return l[3:]
568
571
569 def add(
572 def add(
570 self,
573 self,
571 manifest,
574 manifest,
572 files,
575 files,
573 desc,
576 desc,
574 transaction,
577 transaction,
575 p1,
578 p1,
576 p2,
579 p2,
577 user,
580 user,
578 date=None,
581 date=None,
579 extra=None,
582 extra=None,
580 ):
583 ):
581 # Convert to UTF-8 encoded bytestrings as the very first
584 # Convert to UTF-8 encoded bytestrings as the very first
582 # thing: calling any method on a localstr object will turn it
585 # thing: calling any method on a localstr object will turn it
583 # into a str object and the cached UTF-8 string is thus lost.
586 # into a str object and the cached UTF-8 string is thus lost.
584 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
587 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
585
588
586 user = user.strip()
589 user = user.strip()
587 # An empty username or a username with a "\n" will make the
590 # An empty username or a username with a "\n" will make the
588 # revision text contain two "\n\n" sequences -> corrupt
591 # revision text contain two "\n\n" sequences -> corrupt
589 # repository since read cannot unpack the revision.
592 # repository since read cannot unpack the revision.
590 if not user:
593 if not user:
591 raise error.StorageError(_(b"empty username"))
594 raise error.StorageError(_(b"empty username"))
592 if b"\n" in user:
595 if b"\n" in user:
593 raise error.StorageError(
596 raise error.StorageError(
594 _(b"username %r contains a newline") % pycompat.bytestr(user)
597 _(b"username %r contains a newline") % pycompat.bytestr(user)
595 )
598 )
596
599
597 desc = stripdesc(desc)
600 desc = stripdesc(desc)
598
601
599 if date:
602 if date:
600 parseddate = b"%d %d" % dateutil.parsedate(date)
603 parseddate = b"%d %d" % dateutil.parsedate(date)
601 else:
604 else:
602 parseddate = b"%d %d" % dateutil.makedate()
605 parseddate = b"%d %d" % dateutil.makedate()
603 if extra:
606 if extra:
604 branch = extra.get(b"branch")
607 branch = extra.get(b"branch")
605 if branch in (b"default", b""):
608 if branch in (b"default", b""):
606 del extra[b"branch"]
609 del extra[b"branch"]
607 elif branch in (b".", b"null", b"tip"):
610 elif branch in (b".", b"null", b"tip"):
608 raise error.StorageError(
611 raise error.StorageError(
609 _(b'the name \'%s\' is reserved') % branch
612 _(b'the name \'%s\' is reserved') % branch
610 )
613 )
611 sortedfiles = sorted(files.touched)
614 sortedfiles = sorted(files.touched)
612 flags = 0
615 flags = 0
613 sidedata = None
616 sidedata = None
614 if self._copiesstorage == b'changeset-sidedata':
617 if self._copiesstorage == b'changeset-sidedata':
615 if files.has_copies_info:
618 if files.has_copies_info:
616 flags |= flagutil.REVIDX_HASCOPIESINFO
619 flags |= flagutil.REVIDX_HASCOPIESINFO
617 sidedata = metadata.encode_files_sidedata(files)
620 sidedata = metadata.encode_files_sidedata(files)
618
621
619 if extra:
622 if extra:
620 extra = encodeextra(extra)
623 extra = encodeextra(extra)
621 parseddate = b"%s %s" % (parseddate, extra)
624 parseddate = b"%s %s" % (parseddate, extra)
622 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
625 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
623 text = b"\n".join(l)
626 text = b"\n".join(l)
624 rev = self.addrevision(
627 rev = self.addrevision(
625 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
628 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
626 )
629 )
627 return self.node(rev)
630 return self.node(rev)
628
631
629 def branchinfo(self, rev):
632 def branchinfo(self, rev):
630 """return the branch name and open/close state of a revision
633 """return the branch name and open/close state of a revision
631
634
632 This function exists because creating a changectx object
635 This function exists because creating a changectx object
633 just to access this is costly."""
636 just to access this is costly."""
634 return self.changelogrevision(rev).branchinfo
637 return self.changelogrevision(rev).branchinfo
635
638
636 def _nodeduplicatecallback(self, transaction, rev):
639 def _nodeduplicatecallback(self, transaction, rev):
637 # keep track of revisions that got "re-added", eg: unbunde of know rev.
640 # keep track of revisions that got "re-added", eg: unbunde of know rev.
638 #
641 #
639 # We track them in a list to preserve their order from the source bundle
642 # We track them in a list to preserve their order from the source bundle
640 duplicates = transaction.changes.setdefault(b'revduplicates', [])
643 duplicates = transaction.changes.setdefault(b'revduplicates', [])
641 duplicates.append(rev)
644 duplicates.append(rev)
@@ -1,4042 +1,4049 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import weakref
22 import weakref
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .revlogutils.constants import (
35 from .revlogutils.constants import (
36 ALL_KINDS,
36 ALL_KINDS,
37 CHANGELOGV2,
37 CHANGELOGV2,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 DELTA_BASE_REUSE_NO,
41 DELTA_BASE_REUSE_NO,
42 DELTA_BASE_REUSE_TRY,
42 DELTA_BASE_REUSE_TRY,
43 ENTRY_RANK,
43 ENTRY_RANK,
44 FEATURES_BY_VERSION,
44 FEATURES_BY_VERSION,
45 FLAG_GENERALDELTA,
45 FLAG_GENERALDELTA,
46 FLAG_INLINE_DATA,
46 FLAG_INLINE_DATA,
47 INDEX_HEADER,
47 INDEX_HEADER,
48 KIND_CHANGELOG,
48 KIND_CHANGELOG,
49 KIND_FILELOG,
49 KIND_FILELOG,
50 RANK_UNKNOWN,
50 RANK_UNKNOWN,
51 REVLOGV0,
51 REVLOGV0,
52 REVLOGV1,
52 REVLOGV1,
53 REVLOGV1_FLAGS,
53 REVLOGV1_FLAGS,
54 REVLOGV2,
54 REVLOGV2,
55 REVLOGV2_FLAGS,
55 REVLOGV2_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
56 REVLOG_DEFAULT_FLAGS,
57 REVLOG_DEFAULT_FORMAT,
57 REVLOG_DEFAULT_FORMAT,
58 REVLOG_DEFAULT_VERSION,
58 REVLOG_DEFAULT_VERSION,
59 SUPPORTED_FLAGS,
59 SUPPORTED_FLAGS,
60 )
60 )
61 from .revlogutils.flagutil import (
61 from .revlogutils.flagutil import (
62 REVIDX_DEFAULT_FLAGS,
62 REVIDX_DEFAULT_FLAGS,
63 REVIDX_ELLIPSIS,
63 REVIDX_ELLIPSIS,
64 REVIDX_EXTSTORED,
64 REVIDX_EXTSTORED,
65 REVIDX_FLAGS_ORDER,
65 REVIDX_FLAGS_ORDER,
66 REVIDX_HASCOPIESINFO,
66 REVIDX_HASCOPIESINFO,
67 REVIDX_ISCENSORED,
67 REVIDX_ISCENSORED,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
68 REVIDX_RAWTEXT_CHANGING_FLAGS,
69 )
69 )
70 from .thirdparty import attr
70 from .thirdparty import attr
71 from . import (
71 from . import (
72 ancestor,
72 ancestor,
73 dagop,
73 dagop,
74 error,
74 error,
75 mdiff,
75 mdiff,
76 policy,
76 policy,
77 pycompat,
77 pycompat,
78 revlogutils,
78 revlogutils,
79 templatefilters,
79 templatefilters,
80 util,
80 util,
81 )
81 )
82 from .interfaces import (
82 from .interfaces import (
83 repository,
83 repository,
84 util as interfaceutil,
84 util as interfaceutil,
85 )
85 )
86 from .revlogutils import (
86 from .revlogutils import (
87 deltas as deltautil,
87 deltas as deltautil,
88 docket as docketutil,
88 docket as docketutil,
89 flagutil,
89 flagutil,
90 nodemap as nodemaputil,
90 nodemap as nodemaputil,
91 randomaccessfile,
91 randomaccessfile,
92 revlogv0,
92 revlogv0,
93 rewrite,
93 rewrite,
94 sidedata as sidedatautil,
94 sidedata as sidedatautil,
95 )
95 )
96 from .utils import (
96 from .utils import (
97 storageutil,
97 storageutil,
98 stringutil,
98 stringutil,
99 )
99 )
100
100
101 # blanked usage of all the name to prevent pyflakes constraints
101 # blanked usage of all the name to prevent pyflakes constraints
102 # We need these name available in the module for extensions.
102 # We need these name available in the module for extensions.
103
103
104 REVLOGV0
104 REVLOGV0
105 REVLOGV1
105 REVLOGV1
106 REVLOGV2
106 REVLOGV2
107 CHANGELOGV2
107 CHANGELOGV2
108 FLAG_INLINE_DATA
108 FLAG_INLINE_DATA
109 FLAG_GENERALDELTA
109 FLAG_GENERALDELTA
110 REVLOG_DEFAULT_FLAGS
110 REVLOG_DEFAULT_FLAGS
111 REVLOG_DEFAULT_FORMAT
111 REVLOG_DEFAULT_FORMAT
112 REVLOG_DEFAULT_VERSION
112 REVLOG_DEFAULT_VERSION
113 REVLOGV1_FLAGS
113 REVLOGV1_FLAGS
114 REVLOGV2_FLAGS
114 REVLOGV2_FLAGS
115 REVIDX_ISCENSORED
115 REVIDX_ISCENSORED
116 REVIDX_ELLIPSIS
116 REVIDX_ELLIPSIS
117 REVIDX_HASCOPIESINFO
117 REVIDX_HASCOPIESINFO
118 REVIDX_EXTSTORED
118 REVIDX_EXTSTORED
119 REVIDX_DEFAULT_FLAGS
119 REVIDX_DEFAULT_FLAGS
120 REVIDX_FLAGS_ORDER
120 REVIDX_FLAGS_ORDER
121 REVIDX_RAWTEXT_CHANGING_FLAGS
121 REVIDX_RAWTEXT_CHANGING_FLAGS
122
122
123 parsers = policy.importmod('parsers')
123 parsers = policy.importmod('parsers')
124 rustancestor = policy.importrust('ancestor')
124 rustancestor = policy.importrust('ancestor')
125 rustdagop = policy.importrust('dagop')
125 rustdagop = policy.importrust('dagop')
126 rustrevlog = policy.importrust('revlog')
126 rustrevlog = policy.importrust('revlog')
127
127
128 # Aliased for performance.
128 # Aliased for performance.
129 _zlibdecompress = zlib.decompress
129 _zlibdecompress = zlib.decompress
130
130
131 # max size of inline data embedded into a revlog
131 # max size of inline data embedded into a revlog
132 _maxinline = 131072
132 _maxinline = 131072
133
133
134 # Flag processors for REVIDX_ELLIPSIS.
134 # Flag processors for REVIDX_ELLIPSIS.
135 def ellipsisreadprocessor(rl, text):
135 def ellipsisreadprocessor(rl, text):
136 return text, False
136 return text, False
137
137
138
138
139 def ellipsiswriteprocessor(rl, text):
139 def ellipsiswriteprocessor(rl, text):
140 return text, False
140 return text, False
141
141
142
142
143 def ellipsisrawprocessor(rl, text):
143 def ellipsisrawprocessor(rl, text):
144 return False
144 return False
145
145
146
146
147 ellipsisprocessor = (
147 ellipsisprocessor = (
148 ellipsisreadprocessor,
148 ellipsisreadprocessor,
149 ellipsiswriteprocessor,
149 ellipsiswriteprocessor,
150 ellipsisrawprocessor,
150 ellipsisrawprocessor,
151 )
151 )
152
152
153
153
154 def _verify_revision(rl, skipflags, state, node):
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
156 point for extensions to influence the operation."""
157 if skipflags:
157 if skipflags:
158 state[b'skipread'].add(node)
158 state[b'skipread'].add(node)
159 else:
159 else:
160 # Side-effect: read content and verify hash.
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
161 rl.revision(node)
162
162
163
163
164 # True if a fast implementation for persistent-nodemap is available
164 # True if a fast implementation for persistent-nodemap is available
165 #
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
170 parsers, 'BaseIndexObject'
170 parsers, 'BaseIndexObject'
171 )
171 )
172
172
173
173
174 @interfaceutil.implementer(repository.irevisiondelta)
174 @interfaceutil.implementer(repository.irevisiondelta)
175 @attr.s(slots=True)
175 @attr.s(slots=True)
176 class revlogrevisiondelta:
176 class revlogrevisiondelta:
177 node = attr.ib()
177 node = attr.ib()
178 p1node = attr.ib()
178 p1node = attr.ib()
179 p2node = attr.ib()
179 p2node = attr.ib()
180 basenode = attr.ib()
180 basenode = attr.ib()
181 flags = attr.ib()
181 flags = attr.ib()
182 baserevisionsize = attr.ib()
182 baserevisionsize = attr.ib()
183 revision = attr.ib()
183 revision = attr.ib()
184 delta = attr.ib()
184 delta = attr.ib()
185 sidedata = attr.ib()
185 sidedata = attr.ib()
186 protocol_flags = attr.ib()
186 protocol_flags = attr.ib()
187 linknode = attr.ib(default=None)
187 linknode = attr.ib(default=None)
188
188
189
189
190 @interfaceutil.implementer(repository.iverifyproblem)
190 @interfaceutil.implementer(repository.iverifyproblem)
191 @attr.s(frozen=True)
191 @attr.s(frozen=True)
192 class revlogproblem:
192 class revlogproblem:
193 warning = attr.ib(default=None)
193 warning = attr.ib(default=None)
194 error = attr.ib(default=None)
194 error = attr.ib(default=None)
195 node = attr.ib(default=None)
195 node = attr.ib(default=None)
196
196
197
197
198 def parse_index_v1(data, inline):
198 def parse_index_v1(data, inline):
199 # call the C implementation to parse the index data
199 # call the C implementation to parse the index data
200 index, cache = parsers.parse_index2(data, inline)
200 index, cache = parsers.parse_index2(data, inline)
201 return index, cache
201 return index, cache
202
202
203
203
204 def parse_index_v2(data, inline):
204 def parse_index_v2(data, inline):
205 # call the C implementation to parse the index data
205 # call the C implementation to parse the index data
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
206 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
207 return index, cache
207 return index, cache
208
208
209
209
210 def parse_index_cl_v2(data, inline):
210 def parse_index_cl_v2(data, inline):
211 # call the C implementation to parse the index data
211 # call the C implementation to parse the index data
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
212 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
213 return index, cache
213 return index, cache
214
214
215
215
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
216 if hasattr(parsers, 'parse_index_devel_nodemap'):
217
217
218 def parse_index_v1_nodemap(data, inline):
218 def parse_index_v1_nodemap(data, inline):
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
219 index, cache = parsers.parse_index_devel_nodemap(data, inline)
220 return index, cache
220 return index, cache
221
221
222
222
223 else:
223 else:
224 parse_index_v1_nodemap = None
224 parse_index_v1_nodemap = None
225
225
226
226
227 def parse_index_v1_mixed(data, inline):
227 def parse_index_v1_mixed(data, inline):
228 index, cache = parse_index_v1(data, inline)
228 index, cache = parse_index_v1(data, inline)
229 return rustrevlog.MixedIndex(index), cache
229 return rustrevlog.MixedIndex(index), cache
230
230
231
231
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
232 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
233 # signed integer)
233 # signed integer)
234 _maxentrysize = 0x7FFFFFFF
234 _maxentrysize = 0x7FFFFFFF
235
235
236 FILE_TOO_SHORT_MSG = _(
236 FILE_TOO_SHORT_MSG = _(
237 b'cannot read from revlog %s;'
237 b'cannot read from revlog %s;'
238 b' expected %d bytes from offset %d, data size is %d'
238 b' expected %d bytes from offset %d, data size is %d'
239 )
239 )
240
240
241 hexdigits = b'0123456789abcdefABCDEF'
241 hexdigits = b'0123456789abcdefABCDEF'
242
242
243
243
244 class _Config:
244 class _Config:
245 def copy(self):
245 def copy(self):
246 return self.__class__(**self.__dict__)
246 return self.__class__(**self.__dict__)
247
247
248
248
249 @attr.s()
249 @attr.s()
250 class FeatureConfig(_Config):
250 class FeatureConfig(_Config):
251 """Hold configuration values about the available revlog features"""
251 """Hold configuration values about the available revlog features"""
252
252
253 # the default compression engine
253 # the default compression engine
254 compression_engine = attr.ib(default=b'zlib')
254 compression_engine = attr.ib(default=b'zlib')
255 # compression engines options
255 # compression engines options
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
256 compression_engine_options = attr.ib(default=attr.Factory(dict))
257
257
258 # can we use censor on this revlog
258 # can we use censor on this revlog
259 censorable = attr.ib(default=False)
259 censorable = attr.ib(default=False)
260 # does this revlog use the "side data" feature
260 # does this revlog use the "side data" feature
261 has_side_data = attr.ib(default=False)
261 has_side_data = attr.ib(default=False)
262 # might remove rank configuration once the computation has no impact
262 # might remove rank configuration once the computation has no impact
263 compute_rank = attr.ib(default=False)
263 compute_rank = attr.ib(default=False)
264 # parent order is supposed to be semantically irrelevant, so we
264 # parent order is supposed to be semantically irrelevant, so we
265 # normally resort parents to ensure that the first parent is non-null,
265 # normally resort parents to ensure that the first parent is non-null,
266 # if there is a non-null parent at all.
266 # if there is a non-null parent at all.
267 # filelog abuses the parent order as flag to mark some instances of
267 # filelog abuses the parent order as flag to mark some instances of
268 # meta-encoded files, so allow it to disable this behavior.
268 # meta-encoded files, so allow it to disable this behavior.
269 canonical_parent_order = attr.ib(default=False)
269 canonical_parent_order = attr.ib(default=False)
270 # can ellipsis commit be used
270 # can ellipsis commit be used
271 enable_ellipsis = attr.ib(default=False)
271 enable_ellipsis = attr.ib(default=False)
272
272
273 def copy(self):
273 def copy(self):
274 new = super().copy()
274 new = super().copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
275 new.compression_engine_options = self.compression_engine_options.copy()
276 return new
276 return new
277
277
278
278
279 @attr.s()
279 @attr.s()
280 class DataConfig(_Config):
280 class DataConfig(_Config):
281 """Hold configuration value about how the revlog data are read"""
281 """Hold configuration value about how the revlog data are read"""
282
282
283 # should we try to open the "pending" version of the revlog
283 # should we try to open the "pending" version of the revlog
284 try_pending = attr.ib(default=False)
284 try_pending = attr.ib(default=False)
285 # should we try to open the "splitted" version of the revlog
285 # should we try to open the "splitted" version of the revlog
286 try_split = attr.ib(default=False)
286 try_split = attr.ib(default=False)
287 # When True, indexfile should be opened with checkambig=True at writing,
287 # When True, indexfile should be opened with checkambig=True at writing,
288 # to avoid file stat ambiguity.
288 # to avoid file stat ambiguity.
289 check_ambig = attr.ib(default=False)
289 check_ambig = attr.ib(default=False)
290
290
291 # If true, use mmap instead of reading to deal with large index
291 # If true, use mmap instead of reading to deal with large index
292 mmap_large_index = attr.ib(default=False)
292 mmap_large_index = attr.ib(default=False)
293 # how much data is large
293 # how much data is large
294 mmap_index_threshold = attr.ib(default=None)
294 mmap_index_threshold = attr.ib(default=None)
295 # How much data to read and cache into the raw revlog data cache.
295 # How much data to read and cache into the raw revlog data cache.
296 chunk_cache_size = attr.ib(default=65536)
296 chunk_cache_size = attr.ib(default=65536)
297
297
298 # Allow sparse reading of the revlog data
298 # Allow sparse reading of the revlog data
299 with_sparse_read = attr.ib(default=False)
299 with_sparse_read = attr.ib(default=False)
300 # minimal density of a sparse read chunk
300 # minimal density of a sparse read chunk
301 sr_density_threshold = attr.ib(default=0.50)
301 sr_density_threshold = attr.ib(default=0.50)
302 # minimal size of data we skip when performing sparse read
302 # minimal size of data we skip when performing sparse read
303 sr_min_gap_size = attr.ib(default=262144)
303 sr_min_gap_size = attr.ib(default=262144)
304
304
305 # are delta encoded against arbitrary bases.
305 # are delta encoded against arbitrary bases.
306 generaldelta = attr.ib(default=False)
306 generaldelta = attr.ib(default=False)
307
307
308
308
309 @attr.s()
309 @attr.s()
310 class DeltaConfig(_Config):
310 class DeltaConfig(_Config):
311 """Hold configuration value about how new delta are computed
311 """Hold configuration value about how new delta are computed
312
312
313 Some attributes are duplicated from DataConfig to help havign each object
313 Some attributes are duplicated from DataConfig to help havign each object
314 self contained.
314 self contained.
315 """
315 """
316
316
317 # can delta be encoded against arbitrary bases.
317 # can delta be encoded against arbitrary bases.
318 general_delta = attr.ib(default=False)
318 general_delta = attr.ib(default=False)
319 # Allow sparse writing of the revlog data
319 # Allow sparse writing of the revlog data
320 sparse_revlog = attr.ib(default=False)
320 sparse_revlog = attr.ib(default=False)
321 # maximum length of a delta chain
321 # maximum length of a delta chain
322 max_chain_len = attr.ib(default=None)
322 max_chain_len = attr.ib(default=None)
323 # Maximum distance between delta chain base start and end
323 # Maximum distance between delta chain base start and end
324 max_deltachain_span = attr.ib(default=-1)
324 max_deltachain_span = attr.ib(default=-1)
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
325 # If `upper_bound_comp` is not None, this is the expected maximal gain from
326 # compression for the data content.
326 # compression for the data content.
327 upper_bound_comp = attr.ib(default=None)
327 upper_bound_comp = attr.ib(default=None)
328 # Should we try a delta against both parent
328 # Should we try a delta against both parent
329 delta_both_parents = attr.ib(default=True)
329 delta_both_parents = attr.ib(default=True)
330 # Test delta base candidate group by chunk of this maximal size.
330 # Test delta base candidate group by chunk of this maximal size.
331 candidate_group_chunk_size = attr.ib(default=0)
331 candidate_group_chunk_size = attr.ib(default=0)
332 # Should we display debug information about delta computation
332 # Should we display debug information about delta computation
333 debug_delta = attr.ib(default=False)
333 debug_delta = attr.ib(default=False)
334 # trust incoming delta by default
334 # trust incoming delta by default
335 lazy_delta = attr.ib(default=True)
335 lazy_delta = attr.ib(default=True)
336 # trust the base of incoming delta by default
336 # trust the base of incoming delta by default
337 lazy_delta_base = attr.ib(default=False)
337 lazy_delta_base = attr.ib(default=False)
338
338
339
339
340 class _InnerRevlog:
340 class _InnerRevlog:
341 """An inner layer of the revlog object
341 """An inner layer of the revlog object
342
342
343 That layer exist to be able to delegate some operation to Rust, its
343 That layer exist to be able to delegate some operation to Rust, its
344 boundaries are arbitrary and based on what we can delegate to Rust.
344 boundaries are arbitrary and based on what we can delegate to Rust.
345 """
345 """
346
346
347 def __init__(
347 def __init__(
348 self,
348 self,
349 opener,
349 opener,
350 index,
350 index,
351 index_file,
351 index_file,
352 data_file,
352 data_file,
353 sidedata_file,
353 sidedata_file,
354 inline,
354 inline,
355 data_config,
355 data_config,
356 delta_config,
356 delta_config,
357 feature_config,
357 feature_config,
358 chunk_cache,
358 chunk_cache,
359 default_compression_header,
359 default_compression_header,
360 ):
360 ):
361 self.opener = opener
361 self.opener = opener
362 self.index = index
362 self.index = index
363
363
364 self.__index_file = index_file
364 self.__index_file = index_file
365 self.data_file = data_file
365 self.data_file = data_file
366 self.sidedata_file = sidedata_file
366 self.sidedata_file = sidedata_file
367 self.inline = inline
367 self.inline = inline
368 self.data_config = data_config
368 self.data_config = data_config
369 self.delta_config = delta_config
369 self.delta_config = delta_config
370 self.feature_config = feature_config
370 self.feature_config = feature_config
371
371
372 self._default_compression_header = default_compression_header
372 self._default_compression_header = default_compression_header
373
373
374 # index
374 # index
375
375
376 # 3-tuple of file handles being used for active writing.
376 # 3-tuple of file handles being used for active writing.
377 self._writinghandles = None
377 self._writinghandles = None
378
378
379 self._segmentfile = randomaccessfile.randomaccessfile(
379 self._segmentfile = randomaccessfile.randomaccessfile(
380 self.opener,
380 self.opener,
381 (self.index_file if self.inline else self.data_file),
381 (self.index_file if self.inline else self.data_file),
382 self.data_config.chunk_cache_size,
382 self.data_config.chunk_cache_size,
383 chunk_cache,
383 chunk_cache,
384 )
384 )
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
385 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
386 self.opener,
386 self.opener,
387 self.sidedata_file,
387 self.sidedata_file,
388 self.data_config.chunk_cache_size,
388 self.data_config.chunk_cache_size,
389 )
389 )
390
390
391 # revlog header -> revlog compressor
391 # revlog header -> revlog compressor
392 self._decompressors = {}
392 self._decompressors = {}
393 # 3-tuple of (node, rev, text) for a raw revision.
393 # 3-tuple of (node, rev, text) for a raw revision.
394 self._revisioncache = None
394 self._revisioncache = None
395
395
396 @property
396 @property
397 def index_file(self):
397 def index_file(self):
398 return self.__index_file
398 return self.__index_file
399
399
400 @index_file.setter
400 @index_file.setter
401 def index_file(self, new_index_file):
401 def index_file(self, new_index_file):
402 self.__index_file = new_index_file
402 self.__index_file = new_index_file
403 if self.inline:
403 if self.inline:
404 self._segmentfile.filename = new_index_file
404 self._segmentfile.filename = new_index_file
405
405
406 def __len__(self):
406 def __len__(self):
407 return len(self.index)
407 return len(self.index)
408
408
409 def clear_cache(self):
409 def clear_cache(self):
410 self._revisioncache = None
410 self._revisioncache = None
411 self._segmentfile.clear_cache()
411 self._segmentfile.clear_cache()
412 self._segmentfile_sidedata.clear_cache()
412 self._segmentfile_sidedata.clear_cache()
413
413
414 # Derived from index values.
414 # Derived from index values.
415
415
416 def start(self, rev):
416 def start(self, rev):
417 """the offset of the data chunk for this revision"""
417 """the offset of the data chunk for this revision"""
418 return int(self.index[rev][0] >> 16)
418 return int(self.index[rev][0] >> 16)
419
419
420 def length(self, rev):
420 def length(self, rev):
421 """the length of the data chunk for this revision"""
421 """the length of the data chunk for this revision"""
422 return self.index[rev][1]
422 return self.index[rev][1]
423
423
424 def end(self, rev):
424 def end(self, rev):
425 """the end of the data chunk for this revision"""
425 """the end of the data chunk for this revision"""
426 return self.start(rev) + self.length(rev)
426 return self.start(rev) + self.length(rev)
427
427
428 def deltaparent(self, rev):
428 def deltaparent(self, rev):
429 """return deltaparent of the given revision"""
429 """return deltaparent of the given revision"""
430 base = self.index[rev][3]
430 base = self.index[rev][3]
431 if base == rev:
431 if base == rev:
432 return nullrev
432 return nullrev
433 elif self.delta_config.general_delta:
433 elif self.delta_config.general_delta:
434 return base
434 return base
435 else:
435 else:
436 return rev - 1
436 return rev - 1
437
437
438 def issnapshot(self, rev):
438 def issnapshot(self, rev):
439 """tells whether rev is a snapshot"""
439 """tells whether rev is a snapshot"""
440 if not self.delta_config.sparse_revlog:
440 if not self.delta_config.sparse_revlog:
441 return self.deltaparent(rev) == nullrev
441 return self.deltaparent(rev) == nullrev
442 elif hasattr(self.index, 'issnapshot'):
442 elif hasattr(self.index, 'issnapshot'):
443 # directly assign the method to cache the testing and access
443 # directly assign the method to cache the testing and access
444 self.issnapshot = self.index.issnapshot
444 self.issnapshot = self.index.issnapshot
445 return self.issnapshot(rev)
445 return self.issnapshot(rev)
446 if rev == nullrev:
446 if rev == nullrev:
447 return True
447 return True
448 entry = self.index[rev]
448 entry = self.index[rev]
449 base = entry[3]
449 base = entry[3]
450 if base == rev:
450 if base == rev:
451 return True
451 return True
452 if base == nullrev:
452 if base == nullrev:
453 return True
453 return True
454 p1 = entry[5]
454 p1 = entry[5]
455 while self.length(p1) == 0:
455 while self.length(p1) == 0:
456 b = self.deltaparent(p1)
456 b = self.deltaparent(p1)
457 if b == p1:
457 if b == p1:
458 break
458 break
459 p1 = b
459 p1 = b
460 p2 = entry[6]
460 p2 = entry[6]
461 while self.length(p2) == 0:
461 while self.length(p2) == 0:
462 b = self.deltaparent(p2)
462 b = self.deltaparent(p2)
463 if b == p2:
463 if b == p2:
464 break
464 break
465 p2 = b
465 p2 = b
466 if base == p1 or base == p2:
466 if base == p1 or base == p2:
467 return False
467 return False
468 return self.issnapshot(base)
468 return self.issnapshot(base)
469
469
470 def _deltachain(self, rev, stoprev=None):
470 def _deltachain(self, rev, stoprev=None):
471 """Obtain the delta chain for a revision.
471 """Obtain the delta chain for a revision.
472
472
473 ``stoprev`` specifies a revision to stop at. If not specified, we
473 ``stoprev`` specifies a revision to stop at. If not specified, we
474 stop at the base of the chain.
474 stop at the base of the chain.
475
475
476 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
476 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
477 revs in ascending order and ``stopped`` is a bool indicating whether
477 revs in ascending order and ``stopped`` is a bool indicating whether
478 ``stoprev`` was hit.
478 ``stoprev`` was hit.
479 """
479 """
480 generaldelta = self.delta_config.general_delta
480 generaldelta = self.delta_config.general_delta
481 # Try C implementation.
481 # Try C implementation.
482 try:
482 try:
483 return self.index.deltachain(rev, stoprev, generaldelta)
483 return self.index.deltachain(rev, stoprev, generaldelta)
484 except AttributeError:
484 except AttributeError:
485 pass
485 pass
486
486
487 chain = []
487 chain = []
488
488
489 # Alias to prevent attribute lookup in tight loop.
489 # Alias to prevent attribute lookup in tight loop.
490 index = self.index
490 index = self.index
491
491
492 iterrev = rev
492 iterrev = rev
493 e = index[iterrev]
493 e = index[iterrev]
494 while iterrev != e[3] and iterrev != stoprev:
494 while iterrev != e[3] and iterrev != stoprev:
495 chain.append(iterrev)
495 chain.append(iterrev)
496 if generaldelta:
496 if generaldelta:
497 iterrev = e[3]
497 iterrev = e[3]
498 else:
498 else:
499 iterrev -= 1
499 iterrev -= 1
500 e = index[iterrev]
500 e = index[iterrev]
501
501
502 if iterrev == stoprev:
502 if iterrev == stoprev:
503 stopped = True
503 stopped = True
504 else:
504 else:
505 chain.append(iterrev)
505 chain.append(iterrev)
506 stopped = False
506 stopped = False
507
507
508 chain.reverse()
508 chain.reverse()
509 return chain, stopped
509 return chain, stopped
510
510
511 @util.propertycache
511 @util.propertycache
512 def _compressor(self):
512 def _compressor(self):
513 engine = util.compengines[self.feature_config.compression_engine]
513 engine = util.compengines[self.feature_config.compression_engine]
514 return engine.revlogcompressor(
514 return engine.revlogcompressor(
515 self.feature_config.compression_engine_options
515 self.feature_config.compression_engine_options
516 )
516 )
517
517
518 @util.propertycache
518 @util.propertycache
519 def _decompressor(self):
519 def _decompressor(self):
520 """the default decompressor"""
520 """the default decompressor"""
521 if self._default_compression_header is None:
521 if self._default_compression_header is None:
522 return None
522 return None
523 t = self._default_compression_header
523 t = self._default_compression_header
524 c = self._get_decompressor(t)
524 c = self._get_decompressor(t)
525 return c.decompress
525 return c.decompress
526
526
527 def _get_decompressor(self, t):
527 def _get_decompressor(self, t):
528 try:
528 try:
529 compressor = self._decompressors[t]
529 compressor = self._decompressors[t]
530 except KeyError:
530 except KeyError:
531 try:
531 try:
532 engine = util.compengines.forrevlogheader(t)
532 engine = util.compengines.forrevlogheader(t)
533 compressor = engine.revlogcompressor(
533 compressor = engine.revlogcompressor(
534 self.feature_config.compression_engine_options
534 self.feature_config.compression_engine_options
535 )
535 )
536 self._decompressors[t] = compressor
536 self._decompressors[t] = compressor
537 except KeyError:
537 except KeyError:
538 raise error.RevlogError(
538 raise error.RevlogError(
539 _(b'unknown compression type %s') % binascii.hexlify(t)
539 _(b'unknown compression type %s') % binascii.hexlify(t)
540 )
540 )
541 return compressor
541 return compressor
542
542
543 def compress(self, data):
543 def compress(self, data):
544 """Generate a possibly-compressed representation of data."""
544 """Generate a possibly-compressed representation of data."""
545 if not data:
545 if not data:
546 return b'', data
546 return b'', data
547
547
548 compressed = self._compressor.compress(data)
548 compressed = self._compressor.compress(data)
549
549
550 if compressed:
550 if compressed:
551 # The revlog compressor added the header in the returned data.
551 # The revlog compressor added the header in the returned data.
552 return b'', compressed
552 return b'', compressed
553
553
554 if data[0:1] == b'\0':
554 if data[0:1] == b'\0':
555 return b'', data
555 return b'', data
556 return b'u', data
556 return b'u', data
557
557
558 def decompress(self, data):
558 def decompress(self, data):
559 """Decompress a revlog chunk.
559 """Decompress a revlog chunk.
560
560
561 The chunk is expected to begin with a header identifying the
561 The chunk is expected to begin with a header identifying the
562 format type so it can be routed to an appropriate decompressor.
562 format type so it can be routed to an appropriate decompressor.
563 """
563 """
564 if not data:
564 if not data:
565 return data
565 return data
566
566
567 # Revlogs are read much more frequently than they are written and many
567 # Revlogs are read much more frequently than they are written and many
568 # chunks only take microseconds to decompress, so performance is
568 # chunks only take microseconds to decompress, so performance is
569 # important here.
569 # important here.
570 #
570 #
571 # We can make a few assumptions about revlogs:
571 # We can make a few assumptions about revlogs:
572 #
572 #
573 # 1) the majority of chunks will be compressed (as opposed to inline
573 # 1) the majority of chunks will be compressed (as opposed to inline
574 # raw data).
574 # raw data).
575 # 2) decompressing *any* data will likely by at least 10x slower than
575 # 2) decompressing *any* data will likely by at least 10x slower than
576 # returning raw inline data.
576 # returning raw inline data.
577 # 3) we want to prioritize common and officially supported compression
577 # 3) we want to prioritize common and officially supported compression
578 # engines
578 # engines
579 #
579 #
580 # It follows that we want to optimize for "decompress compressed data
580 # It follows that we want to optimize for "decompress compressed data
581 # when encoded with common and officially supported compression engines"
581 # when encoded with common and officially supported compression engines"
582 # case over "raw data" and "data encoded by less common or non-official
582 # case over "raw data" and "data encoded by less common or non-official
583 # compression engines." That is why we have the inline lookup first
583 # compression engines." That is why we have the inline lookup first
584 # followed by the compengines lookup.
584 # followed by the compengines lookup.
585 #
585 #
586 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
586 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
587 # compressed chunks. And this matters for changelog and manifest reads.
587 # compressed chunks. And this matters for changelog and manifest reads.
588 t = data[0:1]
588 t = data[0:1]
589
589
590 if t == b'x':
590 if t == b'x':
591 try:
591 try:
592 return _zlibdecompress(data)
592 return _zlibdecompress(data)
593 except zlib.error as e:
593 except zlib.error as e:
594 raise error.RevlogError(
594 raise error.RevlogError(
595 _(b'revlog decompress error: %s')
595 _(b'revlog decompress error: %s')
596 % stringutil.forcebytestr(e)
596 % stringutil.forcebytestr(e)
597 )
597 )
598 # '\0' is more common than 'u' so it goes first.
598 # '\0' is more common than 'u' so it goes first.
599 elif t == b'\0':
599 elif t == b'\0':
600 return data
600 return data
601 elif t == b'u':
601 elif t == b'u':
602 return util.buffer(data, 1)
602 return util.buffer(data, 1)
603
603
604 compressor = self._get_decompressor(t)
604 compressor = self._get_decompressor(t)
605
605
606 return compressor.decompress(data)
606 return compressor.decompress(data)
607
607
608 @contextlib.contextmanager
608 @contextlib.contextmanager
609 def reading(self):
609 def reading(self):
610 """Context manager that keeps data and sidedata files open for reading"""
610 """Context manager that keeps data and sidedata files open for reading"""
611 if len(self.index) == 0:
611 if len(self.index) == 0:
612 yield # nothing to be read
612 yield # nothing to be read
613 else:
613 else:
614 with self._segmentfile.reading():
614 with self._segmentfile.reading():
615 with self._segmentfile_sidedata.reading():
615 with self._segmentfile_sidedata.reading():
616 yield
616 yield
617
617
618 @property
618 @property
619 def is_writing(self):
619 def is_writing(self):
620 """True is a writing context is open"""
620 """True is a writing context is open"""
621 return self._writinghandles is not None
621 return self._writinghandles is not None
622
622
623 @property
624 def is_open(self):
625 """True if any file handle is being held
626
627 Used for assert and debug in the python code"""
628 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
629
623 @contextlib.contextmanager
630 @contextlib.contextmanager
624 def writing(self, transaction, data_end=None, sidedata_end=None):
631 def writing(self, transaction, data_end=None, sidedata_end=None):
625 """Open the revlog files for writing
632 """Open the revlog files for writing
626
633
627 Add content to a revlog should be done within such context.
634 Add content to a revlog should be done within such context.
628 """
635 """
629 if self.is_writing:
636 if self.is_writing:
630 yield
637 yield
631 else:
638 else:
632 ifh = dfh = sdfh = None
639 ifh = dfh = sdfh = None
633 try:
640 try:
634 r = len(self.index)
641 r = len(self.index)
635 # opening the data file.
642 # opening the data file.
636 dsize = 0
643 dsize = 0
637 if r:
644 if r:
638 dsize = self.end(r - 1)
645 dsize = self.end(r - 1)
639 dfh = None
646 dfh = None
640 if not self.inline:
647 if not self.inline:
641 try:
648 try:
642 dfh = self.opener(self.data_file, mode=b"r+")
649 dfh = self.opener(self.data_file, mode=b"r+")
643 if data_end is None:
650 if data_end is None:
644 dfh.seek(0, os.SEEK_END)
651 dfh.seek(0, os.SEEK_END)
645 else:
652 else:
646 dfh.seek(data_end, os.SEEK_SET)
653 dfh.seek(data_end, os.SEEK_SET)
647 except FileNotFoundError:
654 except FileNotFoundError:
648 dfh = self.opener(self.data_file, mode=b"w+")
655 dfh = self.opener(self.data_file, mode=b"w+")
649 transaction.add(self.data_file, dsize)
656 transaction.add(self.data_file, dsize)
650 if self.sidedata_file is not None:
657 if self.sidedata_file is not None:
651 assert sidedata_end is not None
658 assert sidedata_end is not None
652 # revlog-v2 does not inline, help Pytype
659 # revlog-v2 does not inline, help Pytype
653 assert dfh is not None
660 assert dfh is not None
654 try:
661 try:
655 sdfh = self.opener(self.sidedata_file, mode=b"r+")
662 sdfh = self.opener(self.sidedata_file, mode=b"r+")
656 dfh.seek(sidedata_end, os.SEEK_SET)
663 dfh.seek(sidedata_end, os.SEEK_SET)
657 except FileNotFoundError:
664 except FileNotFoundError:
658 sdfh = self.opener(self.sidedata_file, mode=b"w+")
665 sdfh = self.opener(self.sidedata_file, mode=b"w+")
659 transaction.add(self.sidedata_file, sidedata_end)
666 transaction.add(self.sidedata_file, sidedata_end)
660
667
661 # opening the index file.
668 # opening the index file.
662 isize = r * self.index.entry_size
669 isize = r * self.index.entry_size
663 ifh = self.__index_write_fp()
670 ifh = self.__index_write_fp()
664 if self.inline:
671 if self.inline:
665 transaction.add(self.index_file, dsize + isize)
672 transaction.add(self.index_file, dsize + isize)
666 else:
673 else:
667 transaction.add(self.index_file, isize)
674 transaction.add(self.index_file, isize)
668 # exposing all file handle for writing.
675 # exposing all file handle for writing.
669 self._writinghandles = (ifh, dfh, sdfh)
676 self._writinghandles = (ifh, dfh, sdfh)
670 self._segmentfile.writing_handle = ifh if self.inline else dfh
677 self._segmentfile.writing_handle = ifh if self.inline else dfh
671 self._segmentfile_sidedata.writing_handle = sdfh
678 self._segmentfile_sidedata.writing_handle = sdfh
672 yield
679 yield
673 finally:
680 finally:
674 self._writinghandles = None
681 self._writinghandles = None
675 self._segmentfile.writing_handle = None
682 self._segmentfile.writing_handle = None
676 self._segmentfile_sidedata.writing_handle = None
683 self._segmentfile_sidedata.writing_handle = None
677 if dfh is not None:
684 if dfh is not None:
678 dfh.close()
685 dfh.close()
679 if sdfh is not None:
686 if sdfh is not None:
680 sdfh.close()
687 sdfh.close()
681 # closing the index file last to avoid exposing referent to
688 # closing the index file last to avoid exposing referent to
682 # potential unflushed data content.
689 # potential unflushed data content.
683 if ifh is not None:
690 if ifh is not None:
684 ifh.close()
691 ifh.close()
685
692
686 def __index_write_fp(self, index_end=None):
693 def __index_write_fp(self, index_end=None):
687 """internal method to open the index file for writing
694 """internal method to open the index file for writing
688
695
689 You should not use this directly and use `_writing` instead
696 You should not use this directly and use `_writing` instead
690 """
697 """
691 try:
698 try:
692 f = self.opener(
699 f = self.opener(
693 self.index_file,
700 self.index_file,
694 mode=b"r+",
701 mode=b"r+",
695 checkambig=self.data_config.check_ambig,
702 checkambig=self.data_config.check_ambig,
696 )
703 )
697 if index_end is None:
704 if index_end is None:
698 f.seek(0, os.SEEK_END)
705 f.seek(0, os.SEEK_END)
699 else:
706 else:
700 f.seek(index_end, os.SEEK_SET)
707 f.seek(index_end, os.SEEK_SET)
701 return f
708 return f
702 except FileNotFoundError:
709 except FileNotFoundError:
703 return self.opener(
710 return self.opener(
704 self.index_file,
711 self.index_file,
705 mode=b"w+",
712 mode=b"w+",
706 checkambig=self.data_config.check_ambig,
713 checkambig=self.data_config.check_ambig,
707 )
714 )
708
715
709 def __index_new_fp(self):
716 def __index_new_fp(self):
710 """internal method to create a new index file for writing
717 """internal method to create a new index file for writing
711
718
712 You should not use this unless you are upgrading from inline revlog
719 You should not use this unless you are upgrading from inline revlog
713 """
720 """
714 return self.opener(
721 return self.opener(
715 self.index_file,
722 self.index_file,
716 mode=b"w",
723 mode=b"w",
717 checkambig=self.data_config.check_ambig,
724 checkambig=self.data_config.check_ambig,
718 atomictemp=True,
725 atomictemp=True,
719 )
726 )
720
727
721 def split_inline(self, tr, header, new_index_file_path=None):
728 def split_inline(self, tr, header, new_index_file_path=None):
722 """split the data of an inline revlog into an index and a data file"""
729 """split the data of an inline revlog into an index and a data file"""
723 existing_handles = False
730 existing_handles = False
724 if self._writinghandles is not None:
731 if self._writinghandles is not None:
725 existing_handles = True
732 existing_handles = True
726 fp = self._writinghandles[0]
733 fp = self._writinghandles[0]
727 fp.flush()
734 fp.flush()
728 fp.close()
735 fp.close()
729 # We can't use the cached file handle after close(). So prevent
736 # We can't use the cached file handle after close(). So prevent
730 # its usage.
737 # its usage.
731 self._writinghandles = None
738 self._writinghandles = None
732 self._segmentfile.writing_handle = None
739 self._segmentfile.writing_handle = None
733 # No need to deal with sidedata writing handle as it is only
740 # No need to deal with sidedata writing handle as it is only
734 # relevant with revlog-v2 which is never inline, not reaching
741 # relevant with revlog-v2 which is never inline, not reaching
735 # this code
742 # this code
736
743
737 new_dfh = self.opener(self.data_file, mode=b"w+")
744 new_dfh = self.opener(self.data_file, mode=b"w+")
738 new_dfh.truncate(0) # drop any potentially existing data
745 new_dfh.truncate(0) # drop any potentially existing data
739 try:
746 try:
740 with self.reading():
747 with self.reading():
741 for r in range(len(self.index)):
748 for r in range(len(self.index)):
742 new_dfh.write(self.get_segment_for_revs(r, r)[1])
749 new_dfh.write(self.get_segment_for_revs(r, r)[1])
743 new_dfh.flush()
750 new_dfh.flush()
744
751
745 if new_index_file_path is not None:
752 if new_index_file_path is not None:
746 self.index_file = new_index_file_path
753 self.index_file = new_index_file_path
747 with self.__index_new_fp() as fp:
754 with self.__index_new_fp() as fp:
748 self.inline = False
755 self.inline = False
749 for i in range(len(self.index)):
756 for i in range(len(self.index)):
750 e = self.index.entry_binary(i)
757 e = self.index.entry_binary(i)
751 if i == 0:
758 if i == 0:
752 packed_header = self.index.pack_header(header)
759 packed_header = self.index.pack_header(header)
753 e = packed_header + e
760 e = packed_header + e
754 fp.write(e)
761 fp.write(e)
755
762
756 # If we don't use side-write, the temp file replace the real
763 # If we don't use side-write, the temp file replace the real
757 # index when we exit the context manager
764 # index when we exit the context manager
758
765
759 self._segmentfile = randomaccessfile.randomaccessfile(
766 self._segmentfile = randomaccessfile.randomaccessfile(
760 self.opener,
767 self.opener,
761 self.data_file,
768 self.data_file,
762 self.data_config.chunk_cache_size,
769 self.data_config.chunk_cache_size,
763 )
770 )
764
771
765 if existing_handles:
772 if existing_handles:
766 # switched from inline to conventional reopen the index
773 # switched from inline to conventional reopen the index
767 ifh = self.__index_write_fp()
774 ifh = self.__index_write_fp()
768 self._writinghandles = (ifh, new_dfh, None)
775 self._writinghandles = (ifh, new_dfh, None)
769 self._segmentfile.writing_handle = new_dfh
776 self._segmentfile.writing_handle = new_dfh
770 new_dfh = None
777 new_dfh = None
771 # No need to deal with sidedata writing handle as it is only
778 # No need to deal with sidedata writing handle as it is only
772 # relevant with revlog-v2 which is never inline, not reaching
779 # relevant with revlog-v2 which is never inline, not reaching
773 # this code
780 # this code
774 finally:
781 finally:
775 if new_dfh is not None:
782 if new_dfh is not None:
776 new_dfh.close()
783 new_dfh.close()
777 return self.index_file
784 return self.index_file
778
785
779 def get_segment_for_revs(self, startrev, endrev):
786 def get_segment_for_revs(self, startrev, endrev):
780 """Obtain a segment of raw data corresponding to a range of revisions.
787 """Obtain a segment of raw data corresponding to a range of revisions.
781
788
782 Accepts the start and end revisions and an optional already-open
789 Accepts the start and end revisions and an optional already-open
783 file handle to be used for reading. If the file handle is read, its
790 file handle to be used for reading. If the file handle is read, its
784 seek position will not be preserved.
791 seek position will not be preserved.
785
792
786 Requests for data may be satisfied by a cache.
793 Requests for data may be satisfied by a cache.
787
794
788 Returns a 2-tuple of (offset, data) for the requested range of
795 Returns a 2-tuple of (offset, data) for the requested range of
789 revisions. Offset is the integer offset from the beginning of the
796 revisions. Offset is the integer offset from the beginning of the
790 revlog and data is a str or buffer of the raw byte data.
797 revlog and data is a str or buffer of the raw byte data.
791
798
792 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
799 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
793 to determine where each revision's data begins and ends.
800 to determine where each revision's data begins and ends.
794
801
795 API: we should consider making this a private part of the InnerRevlog
802 API: we should consider making this a private part of the InnerRevlog
796 at some point.
803 at some point.
797 """
804 """
798 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
805 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
799 # (functions are expensive).
806 # (functions are expensive).
800 index = self.index
807 index = self.index
801 istart = index[startrev]
808 istart = index[startrev]
802 start = int(istart[0] >> 16)
809 start = int(istart[0] >> 16)
803 if startrev == endrev:
810 if startrev == endrev:
804 end = start + istart[1]
811 end = start + istart[1]
805 else:
812 else:
806 iend = index[endrev]
813 iend = index[endrev]
807 end = int(iend[0] >> 16) + iend[1]
814 end = int(iend[0] >> 16) + iend[1]
808
815
809 if self.inline:
816 if self.inline:
810 start += (startrev + 1) * self.index.entry_size
817 start += (startrev + 1) * self.index.entry_size
811 end += (endrev + 1) * self.index.entry_size
818 end += (endrev + 1) * self.index.entry_size
812 length = end - start
819 length = end - start
813
820
814 return start, self._segmentfile.read_chunk(start, length)
821 return start, self._segmentfile.read_chunk(start, length)
815
822
816 def _chunk(self, rev):
823 def _chunk(self, rev):
817 """Obtain a single decompressed chunk for a revision.
824 """Obtain a single decompressed chunk for a revision.
818
825
819 Accepts an integer revision and an optional already-open file handle
826 Accepts an integer revision and an optional already-open file handle
820 to be used for reading. If used, the seek position of the file will not
827 to be used for reading. If used, the seek position of the file will not
821 be preserved.
828 be preserved.
822
829
823 Returns a str holding uncompressed data for the requested revision.
830 Returns a str holding uncompressed data for the requested revision.
824 """
831 """
825 compression_mode = self.index[rev][10]
832 compression_mode = self.index[rev][10]
826 data = self.get_segment_for_revs(rev, rev)[1]
833 data = self.get_segment_for_revs(rev, rev)[1]
827 if compression_mode == COMP_MODE_PLAIN:
834 if compression_mode == COMP_MODE_PLAIN:
828 return data
835 return data
829 elif compression_mode == COMP_MODE_DEFAULT:
836 elif compression_mode == COMP_MODE_DEFAULT:
830 return self._decompressor(data)
837 return self._decompressor(data)
831 elif compression_mode == COMP_MODE_INLINE:
838 elif compression_mode == COMP_MODE_INLINE:
832 return self.decompress(data)
839 return self.decompress(data)
833 else:
840 else:
834 msg = b'unknown compression mode %d'
841 msg = b'unknown compression mode %d'
835 msg %= compression_mode
842 msg %= compression_mode
836 raise error.RevlogError(msg)
843 raise error.RevlogError(msg)
837
844
838 def _chunks(self, revs, targetsize=None):
845 def _chunks(self, revs, targetsize=None):
839 """Obtain decompressed chunks for the specified revisions.
846 """Obtain decompressed chunks for the specified revisions.
840
847
841 Accepts an iterable of numeric revisions that are assumed to be in
848 Accepts an iterable of numeric revisions that are assumed to be in
842 ascending order. Also accepts an optional already-open file handle
849 ascending order. Also accepts an optional already-open file handle
843 to be used for reading. If used, the seek position of the file will
850 to be used for reading. If used, the seek position of the file will
844 not be preserved.
851 not be preserved.
845
852
846 This function is similar to calling ``self._chunk()`` multiple times,
853 This function is similar to calling ``self._chunk()`` multiple times,
847 but is faster.
854 but is faster.
848
855
849 Returns a list with decompressed data for each requested revision.
856 Returns a list with decompressed data for each requested revision.
850 """
857 """
851 if not revs:
858 if not revs:
852 return []
859 return []
853 start = self.start
860 start = self.start
854 length = self.length
861 length = self.length
855 inline = self.inline
862 inline = self.inline
856 iosize = self.index.entry_size
863 iosize = self.index.entry_size
857 buffer = util.buffer
864 buffer = util.buffer
858
865
859 l = []
866 l = []
860 ladd = l.append
867 ladd = l.append
861
868
862 if not self.data_config.with_sparse_read:
869 if not self.data_config.with_sparse_read:
863 slicedchunks = (revs,)
870 slicedchunks = (revs,)
864 else:
871 else:
865 slicedchunks = deltautil.slicechunk(
872 slicedchunks = deltautil.slicechunk(
866 self,
873 self,
867 revs,
874 revs,
868 targetsize=targetsize,
875 targetsize=targetsize,
869 )
876 )
870
877
871 for revschunk in slicedchunks:
878 for revschunk in slicedchunks:
872 firstrev = revschunk[0]
879 firstrev = revschunk[0]
873 # Skip trailing revisions with empty diff
880 # Skip trailing revisions with empty diff
874 for lastrev in revschunk[::-1]:
881 for lastrev in revschunk[::-1]:
875 if length(lastrev) != 0:
882 if length(lastrev) != 0:
876 break
883 break
877
884
878 try:
885 try:
879 offset, data = self.get_segment_for_revs(firstrev, lastrev)
886 offset, data = self.get_segment_for_revs(firstrev, lastrev)
880 except OverflowError:
887 except OverflowError:
881 # issue4215 - we can't cache a run of chunks greater than
888 # issue4215 - we can't cache a run of chunks greater than
882 # 2G on Windows
889 # 2G on Windows
883 return [self._chunk(rev) for rev in revschunk]
890 return [self._chunk(rev) for rev in revschunk]
884
891
885 decomp = self.decompress
892 decomp = self.decompress
886 # self._decompressor might be None, but will not be used in that case
893 # self._decompressor might be None, but will not be used in that case
887 def_decomp = self._decompressor
894 def_decomp = self._decompressor
888 for rev in revschunk:
895 for rev in revschunk:
889 chunkstart = start(rev)
896 chunkstart = start(rev)
890 if inline:
897 if inline:
891 chunkstart += (rev + 1) * iosize
898 chunkstart += (rev + 1) * iosize
892 chunklength = length(rev)
899 chunklength = length(rev)
893 comp_mode = self.index[rev][10]
900 comp_mode = self.index[rev][10]
894 c = buffer(data, chunkstart - offset, chunklength)
901 c = buffer(data, chunkstart - offset, chunklength)
895 if comp_mode == COMP_MODE_PLAIN:
902 if comp_mode == COMP_MODE_PLAIN:
896 ladd(c)
903 ladd(c)
897 elif comp_mode == COMP_MODE_INLINE:
904 elif comp_mode == COMP_MODE_INLINE:
898 ladd(decomp(c))
905 ladd(decomp(c))
899 elif comp_mode == COMP_MODE_DEFAULT:
906 elif comp_mode == COMP_MODE_DEFAULT:
900 ladd(def_decomp(c))
907 ladd(def_decomp(c))
901 else:
908 else:
902 msg = b'unknown compression mode %d'
909 msg = b'unknown compression mode %d'
903 msg %= comp_mode
910 msg %= comp_mode
904 raise error.RevlogError(msg)
911 raise error.RevlogError(msg)
905
912
906 return l
913 return l
907
914
908 def raw_text(self, node, rev):
915 def raw_text(self, node, rev):
909 """return the possibly unvalidated rawtext for a revision
916 """return the possibly unvalidated rawtext for a revision
910
917
911 returns (rev, rawtext, validated)
918 returns (rev, rawtext, validated)
912 """
919 """
913
920
914 # revision in the cache (could be useful to apply delta)
921 # revision in the cache (could be useful to apply delta)
915 cachedrev = None
922 cachedrev = None
916 # An intermediate text to apply deltas to
923 # An intermediate text to apply deltas to
917 basetext = None
924 basetext = None
918
925
919 # Check if we have the entry in cache
926 # Check if we have the entry in cache
920 # The cache entry looks like (node, rev, rawtext)
927 # The cache entry looks like (node, rev, rawtext)
921 if self._revisioncache:
928 if self._revisioncache:
922 cachedrev = self._revisioncache[1]
929 cachedrev = self._revisioncache[1]
923
930
924 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
931 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
925 if stopped:
932 if stopped:
926 basetext = self._revisioncache[2]
933 basetext = self._revisioncache[2]
927
934
928 # drop cache to save memory, the caller is expected to
935 # drop cache to save memory, the caller is expected to
929 # update self._inner._revisioncache after validating the text
936 # update self._inner._revisioncache after validating the text
930 self._revisioncache = None
937 self._revisioncache = None
931
938
932 targetsize = None
939 targetsize = None
933 rawsize = self.index[rev][2]
940 rawsize = self.index[rev][2]
934 if 0 <= rawsize:
941 if 0 <= rawsize:
935 targetsize = 4 * rawsize
942 targetsize = 4 * rawsize
936
943
937 bins = self._chunks(chain, targetsize=targetsize)
944 bins = self._chunks(chain, targetsize=targetsize)
938 if basetext is None:
945 if basetext is None:
939 basetext = bytes(bins[0])
946 basetext = bytes(bins[0])
940 bins = bins[1:]
947 bins = bins[1:]
941
948
942 rawtext = mdiff.patches(basetext, bins)
949 rawtext = mdiff.patches(basetext, bins)
943 del basetext # let us have a chance to free memory early
950 del basetext # let us have a chance to free memory early
944 return (rev, rawtext, False)
951 return (rev, rawtext, False)
945
952
946 def sidedata(self, rev, sidedata_end):
953 def sidedata(self, rev, sidedata_end):
947 """Return the sidedata for a given revision number."""
954 """Return the sidedata for a given revision number."""
948 index_entry = self.index[rev]
955 index_entry = self.index[rev]
949 sidedata_offset = index_entry[8]
956 sidedata_offset = index_entry[8]
950 sidedata_size = index_entry[9]
957 sidedata_size = index_entry[9]
951
958
952 if self.inline:
959 if self.inline:
953 sidedata_offset += self.index.entry_size * (1 + rev)
960 sidedata_offset += self.index.entry_size * (1 + rev)
954 if sidedata_size == 0:
961 if sidedata_size == 0:
955 return {}
962 return {}
956
963
957 if sidedata_end < sidedata_offset + sidedata_size:
964 if sidedata_end < sidedata_offset + sidedata_size:
958 filename = self.sidedata_file
965 filename = self.sidedata_file
959 end = sidedata_end
966 end = sidedata_end
960 offset = sidedata_offset
967 offset = sidedata_offset
961 length = sidedata_size
968 length = sidedata_size
962 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
969 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
963 raise error.RevlogError(m)
970 raise error.RevlogError(m)
964
971
965 comp_segment = self._segmentfile_sidedata.read_chunk(
972 comp_segment = self._segmentfile_sidedata.read_chunk(
966 sidedata_offset, sidedata_size
973 sidedata_offset, sidedata_size
967 )
974 )
968
975
969 comp = self.index[rev][11]
976 comp = self.index[rev][11]
970 if comp == COMP_MODE_PLAIN:
977 if comp == COMP_MODE_PLAIN:
971 segment = comp_segment
978 segment = comp_segment
972 elif comp == COMP_MODE_DEFAULT:
979 elif comp == COMP_MODE_DEFAULT:
973 segment = self._decompressor(comp_segment)
980 segment = self._decompressor(comp_segment)
974 elif comp == COMP_MODE_INLINE:
981 elif comp == COMP_MODE_INLINE:
975 segment = self.decompress(comp_segment)
982 segment = self.decompress(comp_segment)
976 else:
983 else:
977 msg = b'unknown compression mode %d'
984 msg = b'unknown compression mode %d'
978 msg %= comp
985 msg %= comp
979 raise error.RevlogError(msg)
986 raise error.RevlogError(msg)
980
987
981 sidedata = sidedatautil.deserialize_sidedata(segment)
988 sidedata = sidedatautil.deserialize_sidedata(segment)
982 return sidedata
989 return sidedata
983
990
984 def write_entry(
991 def write_entry(
985 self,
992 self,
986 transaction,
993 transaction,
987 entry,
994 entry,
988 data,
995 data,
989 link,
996 link,
990 offset,
997 offset,
991 sidedata,
998 sidedata,
992 sidedata_offset,
999 sidedata_offset,
993 index_end,
1000 index_end,
994 data_end,
1001 data_end,
995 sidedata_end,
1002 sidedata_end,
996 ):
1003 ):
997 # Files opened in a+ mode have inconsistent behavior on various
1004 # Files opened in a+ mode have inconsistent behavior on various
998 # platforms. Windows requires that a file positioning call be made
1005 # platforms. Windows requires that a file positioning call be made
999 # when the file handle transitions between reads and writes. See
1006 # when the file handle transitions between reads and writes. See
1000 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1007 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1001 # platforms, Python or the platform itself can be buggy. Some versions
1008 # platforms, Python or the platform itself can be buggy. Some versions
1002 # of Solaris have been observed to not append at the end of the file
1009 # of Solaris have been observed to not append at the end of the file
1003 # if the file was seeked to before the end. See issue4943 for more.
1010 # if the file was seeked to before the end. See issue4943 for more.
1004 #
1011 #
1005 # We work around this issue by inserting a seek() before writing.
1012 # We work around this issue by inserting a seek() before writing.
1006 # Note: This is likely not necessary on Python 3. However, because
1013 # Note: This is likely not necessary on Python 3. However, because
1007 # the file handle is reused for reads and may be seeked there, we need
1014 # the file handle is reused for reads and may be seeked there, we need
1008 # to be careful before changing this.
1015 # to be careful before changing this.
1009 if self._writinghandles is None:
1016 if self._writinghandles is None:
1010 msg = b'adding revision outside `revlog._writing` context'
1017 msg = b'adding revision outside `revlog._writing` context'
1011 raise error.ProgrammingError(msg)
1018 raise error.ProgrammingError(msg)
1012 ifh, dfh, sdfh = self._writinghandles
1019 ifh, dfh, sdfh = self._writinghandles
1013 if index_end is None:
1020 if index_end is None:
1014 ifh.seek(0, os.SEEK_END)
1021 ifh.seek(0, os.SEEK_END)
1015 else:
1022 else:
1016 ifh.seek(index_end, os.SEEK_SET)
1023 ifh.seek(index_end, os.SEEK_SET)
1017 if dfh:
1024 if dfh:
1018 if data_end is None:
1025 if data_end is None:
1019 dfh.seek(0, os.SEEK_END)
1026 dfh.seek(0, os.SEEK_END)
1020 else:
1027 else:
1021 dfh.seek(data_end, os.SEEK_SET)
1028 dfh.seek(data_end, os.SEEK_SET)
1022 if sdfh:
1029 if sdfh:
1023 sdfh.seek(sidedata_end, os.SEEK_SET)
1030 sdfh.seek(sidedata_end, os.SEEK_SET)
1024
1031
1025 curr = len(self.index) - 1
1032 curr = len(self.index) - 1
1026 if not self.inline:
1033 if not self.inline:
1027 transaction.add(self.data_file, offset)
1034 transaction.add(self.data_file, offset)
1028 if self.sidedata_file:
1035 if self.sidedata_file:
1029 transaction.add(self.sidedata_file, sidedata_offset)
1036 transaction.add(self.sidedata_file, sidedata_offset)
1030 transaction.add(self.index_file, curr * len(entry))
1037 transaction.add(self.index_file, curr * len(entry))
1031 if data[0]:
1038 if data[0]:
1032 dfh.write(data[0])
1039 dfh.write(data[0])
1033 dfh.write(data[1])
1040 dfh.write(data[1])
1034 if sidedata:
1041 if sidedata:
1035 sdfh.write(sidedata)
1042 sdfh.write(sidedata)
1036 ifh.write(entry)
1043 ifh.write(entry)
1037 else:
1044 else:
1038 offset += curr * self.index.entry_size
1045 offset += curr * self.index.entry_size
1039 transaction.add(self.index_file, offset)
1046 transaction.add(self.index_file, offset)
1040 ifh.write(entry)
1047 ifh.write(entry)
1041 ifh.write(data[0])
1048 ifh.write(data[0])
1042 ifh.write(data[1])
1049 ifh.write(data[1])
1043 assert not sidedata
1050 assert not sidedata
1044 return (
1051 return (
1045 ifh.tell(),
1052 ifh.tell(),
1046 dfh.tell() if dfh else None,
1053 dfh.tell() if dfh else None,
1047 sdfh.tell() if sdfh else None,
1054 sdfh.tell() if sdfh else None,
1048 )
1055 )
1049
1056
1050
1057
1051 class revlog:
1058 class revlog:
1052 """
1059 """
1053 the underlying revision storage object
1060 the underlying revision storage object
1054
1061
1055 A revlog consists of two parts, an index and the revision data.
1062 A revlog consists of two parts, an index and the revision data.
1056
1063
1057 The index is a file with a fixed record size containing
1064 The index is a file with a fixed record size containing
1058 information on each revision, including its nodeid (hash), the
1065 information on each revision, including its nodeid (hash), the
1059 nodeids of its parents, the position and offset of its data within
1066 nodeids of its parents, the position and offset of its data within
1060 the data file, and the revision it's based on. Finally, each entry
1067 the data file, and the revision it's based on. Finally, each entry
1061 contains a linkrev entry that can serve as a pointer to external
1068 contains a linkrev entry that can serve as a pointer to external
1062 data.
1069 data.
1063
1070
1064 The revision data itself is a linear collection of data chunks.
1071 The revision data itself is a linear collection of data chunks.
1065 Each chunk represents a revision and is usually represented as a
1072 Each chunk represents a revision and is usually represented as a
1066 delta against the previous chunk. To bound lookup time, runs of
1073 delta against the previous chunk. To bound lookup time, runs of
1067 deltas are limited to about 2 times the length of the original
1074 deltas are limited to about 2 times the length of the original
1068 version data. This makes retrieval of a version proportional to
1075 version data. This makes retrieval of a version proportional to
1069 its size, or O(1) relative to the number of revisions.
1076 its size, or O(1) relative to the number of revisions.
1070
1077
1071 Both pieces of the revlog are written to in an append-only
1078 Both pieces of the revlog are written to in an append-only
1072 fashion, which means we never need to rewrite a file to insert or
1079 fashion, which means we never need to rewrite a file to insert or
1073 remove data, and can use some simple techniques to avoid the need
1080 remove data, and can use some simple techniques to avoid the need
1074 for locking while reading.
1081 for locking while reading.
1075
1082
1076 If checkambig, indexfile is opened with checkambig=True at
1083 If checkambig, indexfile is opened with checkambig=True at
1077 writing, to avoid file stat ambiguity.
1084 writing, to avoid file stat ambiguity.
1078
1085
1079 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1086 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1080 index will be mmapped rather than read if it is larger than the
1087 index will be mmapped rather than read if it is larger than the
1081 configured threshold.
1088 configured threshold.
1082
1089
1083 If censorable is True, the revlog can have censored revisions.
1090 If censorable is True, the revlog can have censored revisions.
1084
1091
1085 If `upperboundcomp` is not None, this is the expected maximal gain from
1092 If `upperboundcomp` is not None, this is the expected maximal gain from
1086 compression for the data content.
1093 compression for the data content.
1087
1094
1088 `concurrencychecker` is an optional function that receives 3 arguments: a
1095 `concurrencychecker` is an optional function that receives 3 arguments: a
1089 file handle, a filename, and an expected position. It should check whether
1096 file handle, a filename, and an expected position. It should check whether
1090 the current position in the file handle is valid, and log/warn/fail (by
1097 the current position in the file handle is valid, and log/warn/fail (by
1091 raising).
1098 raising).
1092
1099
1093 See mercurial/revlogutils/contants.py for details about the content of an
1100 See mercurial/revlogutils/contants.py for details about the content of an
1094 index entry.
1101 index entry.
1095 """
1102 """
1096
1103
1097 _flagserrorclass = error.RevlogError
1104 _flagserrorclass = error.RevlogError
1098
1105
1099 @staticmethod
1106 @staticmethod
1100 def is_inline_index(header_bytes):
1107 def is_inline_index(header_bytes):
1101 """Determine if a revlog is inline from the initial bytes of the index"""
1108 """Determine if a revlog is inline from the initial bytes of the index"""
1102 header = INDEX_HEADER.unpack(header_bytes)[0]
1109 header = INDEX_HEADER.unpack(header_bytes)[0]
1103
1110
1104 _format_flags = header & ~0xFFFF
1111 _format_flags = header & ~0xFFFF
1105 _format_version = header & 0xFFFF
1112 _format_version = header & 0xFFFF
1106
1113
1107 features = FEATURES_BY_VERSION[_format_version]
1114 features = FEATURES_BY_VERSION[_format_version]
1108 return features[b'inline'](_format_flags)
1115 return features[b'inline'](_format_flags)
1109
1116
1110 def __init__(
1117 def __init__(
1111 self,
1118 self,
1112 opener,
1119 opener,
1113 target,
1120 target,
1114 radix,
1121 radix,
1115 postfix=None, # only exist for `tmpcensored` now
1122 postfix=None, # only exist for `tmpcensored` now
1116 checkambig=False,
1123 checkambig=False,
1117 mmaplargeindex=False,
1124 mmaplargeindex=False,
1118 censorable=False,
1125 censorable=False,
1119 upperboundcomp=None,
1126 upperboundcomp=None,
1120 persistentnodemap=False,
1127 persistentnodemap=False,
1121 concurrencychecker=None,
1128 concurrencychecker=None,
1122 trypending=False,
1129 trypending=False,
1123 try_split=False,
1130 try_split=False,
1124 canonical_parent_order=True,
1131 canonical_parent_order=True,
1125 ):
1132 ):
1126 """
1133 """
1127 create a revlog object
1134 create a revlog object
1128
1135
1129 opener is a function that abstracts the file opening operation
1136 opener is a function that abstracts the file opening operation
1130 and can be used to implement COW semantics or the like.
1137 and can be used to implement COW semantics or the like.
1131
1138
1132 `target`: a (KIND, ID) tuple that identify the content stored in
1139 `target`: a (KIND, ID) tuple that identify the content stored in
1133 this revlog. It help the rest of the code to understand what the revlog
1140 this revlog. It help the rest of the code to understand what the revlog
1134 is about without having to resort to heuristic and index filename
1141 is about without having to resort to heuristic and index filename
1135 analysis. Note: that this must be reliably be set by normal code, but
1142 analysis. Note: that this must be reliably be set by normal code, but
1136 that test, debug, or performance measurement code might not set this to
1143 that test, debug, or performance measurement code might not set this to
1137 accurate value.
1144 accurate value.
1138 """
1145 """
1139
1146
1140 self.radix = radix
1147 self.radix = radix
1141
1148
1142 self._docket_file = None
1149 self._docket_file = None
1143 self._indexfile = None
1150 self._indexfile = None
1144 self._datafile = None
1151 self._datafile = None
1145 self._sidedatafile = None
1152 self._sidedatafile = None
1146 self._nodemap_file = None
1153 self._nodemap_file = None
1147 self.postfix = postfix
1154 self.postfix = postfix
1148 self._trypending = trypending
1155 self._trypending = trypending
1149 self._try_split = try_split
1156 self._try_split = try_split
1150 self.opener = opener
1157 self.opener = opener
1151 if persistentnodemap:
1158 if persistentnodemap:
1152 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1159 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1153
1160
1154 assert target[0] in ALL_KINDS
1161 assert target[0] in ALL_KINDS
1155 assert len(target) == 2
1162 assert len(target) == 2
1156 self.target = target
1163 self.target = target
1157 if b'feature-config' in self.opener.options:
1164 if b'feature-config' in self.opener.options:
1158 self.feature_config = self.opener.options[b'feature-config'].copy()
1165 self.feature_config = self.opener.options[b'feature-config'].copy()
1159 else:
1166 else:
1160 self.feature_config = FeatureConfig()
1167 self.feature_config = FeatureConfig()
1161 self.feature_config.censorable = censorable
1168 self.feature_config.censorable = censorable
1162 self.feature_config.canonical_parent_order = canonical_parent_order
1169 self.feature_config.canonical_parent_order = canonical_parent_order
1163 if b'data-config' in self.opener.options:
1170 if b'data-config' in self.opener.options:
1164 self.data_config = self.opener.options[b'data-config'].copy()
1171 self.data_config = self.opener.options[b'data-config'].copy()
1165 else:
1172 else:
1166 self.data_config = DataConfig()
1173 self.data_config = DataConfig()
1167 self.data_config.check_ambig = checkambig
1174 self.data_config.check_ambig = checkambig
1168 self.data_config.mmap_large_index = mmaplargeindex
1175 self.data_config.mmap_large_index = mmaplargeindex
1169 if b'delta-config' in self.opener.options:
1176 if b'delta-config' in self.opener.options:
1170 self.delta_config = self.opener.options[b'delta-config'].copy()
1177 self.delta_config = self.opener.options[b'delta-config'].copy()
1171 else:
1178 else:
1172 self.delta_config = DeltaConfig()
1179 self.delta_config = DeltaConfig()
1173 self.delta_config.upper_bound_comp = upperboundcomp
1180 self.delta_config.upper_bound_comp = upperboundcomp
1174
1181
1175 # Maps rev to chain base rev.
1182 # Maps rev to chain base rev.
1176 self._chainbasecache = util.lrucachedict(100)
1183 self._chainbasecache = util.lrucachedict(100)
1177
1184
1178 self.index = None
1185 self.index = None
1179 self._docket = None
1186 self._docket = None
1180 self._nodemap_docket = None
1187 self._nodemap_docket = None
1181 # Mapping of partial identifiers to full nodes.
1188 # Mapping of partial identifiers to full nodes.
1182 self._pcache = {}
1189 self._pcache = {}
1183
1190
1184 # other optionnals features
1191 # other optionnals features
1185
1192
1186 # Make copy of flag processors so each revlog instance can support
1193 # Make copy of flag processors so each revlog instance can support
1187 # custom flags.
1194 # custom flags.
1188 self._flagprocessors = dict(flagutil.flagprocessors)
1195 self._flagprocessors = dict(flagutil.flagprocessors)
1189 # prevent nesting of addgroup
1196 # prevent nesting of addgroup
1190 self._adding_group = None
1197 self._adding_group = None
1191
1198
1192 chunk_cache = self._loadindex()
1199 chunk_cache = self._loadindex()
1193 self._load_inner(chunk_cache)
1200 self._load_inner(chunk_cache)
1194 self._concurrencychecker = concurrencychecker
1201 self._concurrencychecker = concurrencychecker
1195
1202
1196 @property
1203 @property
1197 def _generaldelta(self):
1204 def _generaldelta(self):
1198 """temporary compatibility proxy"""
1205 """temporary compatibility proxy"""
1199 util.nouideprecwarn(
1206 util.nouideprecwarn(
1200 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1207 b"use revlog.delta_config.general_delta", b"6.6", stacklevel=2
1201 )
1208 )
1202 return self.delta_config.general_delta
1209 return self.delta_config.general_delta
1203
1210
1204 @property
1211 @property
1205 def _checkambig(self):
1212 def _checkambig(self):
1206 """temporary compatibility proxy"""
1213 """temporary compatibility proxy"""
1207 util.nouideprecwarn(
1214 util.nouideprecwarn(
1208 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1215 b"use revlog.data_config.checkambig", b"6.6", stacklevel=2
1209 )
1216 )
1210 return self.data_config.check_ambig
1217 return self.data_config.check_ambig
1211
1218
1212 @property
1219 @property
1213 def _mmaplargeindex(self):
1220 def _mmaplargeindex(self):
1214 """temporary compatibility proxy"""
1221 """temporary compatibility proxy"""
1215 util.nouideprecwarn(
1222 util.nouideprecwarn(
1216 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1223 b"use revlog.data_config.mmap_large_index", b"6.6", stacklevel=2
1217 )
1224 )
1218 return self.data_config.mmap_large_index
1225 return self.data_config.mmap_large_index
1219
1226
1220 @property
1227 @property
1221 def _censorable(self):
1228 def _censorable(self):
1222 """temporary compatibility proxy"""
1229 """temporary compatibility proxy"""
1223 util.nouideprecwarn(
1230 util.nouideprecwarn(
1224 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1231 b"use revlog.feature_config.censorable", b"6.6", stacklevel=2
1225 )
1232 )
1226 return self.feature_config.censorable
1233 return self.feature_config.censorable
1227
1234
1228 @property
1235 @property
1229 def _chunkcachesize(self):
1236 def _chunkcachesize(self):
1230 """temporary compatibility proxy"""
1237 """temporary compatibility proxy"""
1231 util.nouideprecwarn(
1238 util.nouideprecwarn(
1232 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1239 b"use revlog.data_config.chunk_cache_size", b"6.6", stacklevel=2
1233 )
1240 )
1234 return self.data_config.chunk_cache_size
1241 return self.data_config.chunk_cache_size
1235
1242
1236 @property
1243 @property
1237 def _maxchainlen(self):
1244 def _maxchainlen(self):
1238 """temporary compatibility proxy"""
1245 """temporary compatibility proxy"""
1239 util.nouideprecwarn(
1246 util.nouideprecwarn(
1240 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1247 b"use revlog.delta_config.max_chain_len", b"6.6", stacklevel=2
1241 )
1248 )
1242 return self.delta_config.max_chain_len
1249 return self.delta_config.max_chain_len
1243
1250
1244 @property
1251 @property
1245 def _deltabothparents(self):
1252 def _deltabothparents(self):
1246 """temporary compatibility proxy"""
1253 """temporary compatibility proxy"""
1247 util.nouideprecwarn(
1254 util.nouideprecwarn(
1248 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1255 b"use revlog.delta_config.delta_both_parents", b"6.6", stacklevel=2
1249 )
1256 )
1250 return self.delta_config.delta_both_parents
1257 return self.delta_config.delta_both_parents
1251
1258
1252 @property
1259 @property
1253 def _candidate_group_chunk_size(self):
1260 def _candidate_group_chunk_size(self):
1254 """temporary compatibility proxy"""
1261 """temporary compatibility proxy"""
1255 util.nouideprecwarn(
1262 util.nouideprecwarn(
1256 b"use revlog.delta_config.candidate_group_chunk_size",
1263 b"use revlog.delta_config.candidate_group_chunk_size",
1257 b"6.6",
1264 b"6.6",
1258 stacklevel=2,
1265 stacklevel=2,
1259 )
1266 )
1260 return self.delta_config.candidate_group_chunk_size
1267 return self.delta_config.candidate_group_chunk_size
1261
1268
1262 @property
1269 @property
1263 def _debug_delta(self):
1270 def _debug_delta(self):
1264 """temporary compatibility proxy"""
1271 """temporary compatibility proxy"""
1265 util.nouideprecwarn(
1272 util.nouideprecwarn(
1266 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1273 b"use revlog.delta_config.debug_delta", b"6.6", stacklevel=2
1267 )
1274 )
1268 return self.delta_config.debug_delta
1275 return self.delta_config.debug_delta
1269
1276
1270 @property
1277 @property
1271 def _compengine(self):
1278 def _compengine(self):
1272 """temporary compatibility proxy"""
1279 """temporary compatibility proxy"""
1273 util.nouideprecwarn(
1280 util.nouideprecwarn(
1274 b"use revlog.feature_config.compression_engine",
1281 b"use revlog.feature_config.compression_engine",
1275 b"6.6",
1282 b"6.6",
1276 stacklevel=2,
1283 stacklevel=2,
1277 )
1284 )
1278 return self.feature_config.compression_engine
1285 return self.feature_config.compression_engine
1279
1286
1280 @property
1287 @property
1281 def upperboundcomp(self):
1288 def upperboundcomp(self):
1282 """temporary compatibility proxy"""
1289 """temporary compatibility proxy"""
1283 util.nouideprecwarn(
1290 util.nouideprecwarn(
1284 b"use revlog.delta_config.upper_bound_comp",
1291 b"use revlog.delta_config.upper_bound_comp",
1285 b"6.6",
1292 b"6.6",
1286 stacklevel=2,
1293 stacklevel=2,
1287 )
1294 )
1288 return self.delta_config.upper_bound_comp
1295 return self.delta_config.upper_bound_comp
1289
1296
1290 @property
1297 @property
1291 def _compengineopts(self):
1298 def _compengineopts(self):
1292 """temporary compatibility proxy"""
1299 """temporary compatibility proxy"""
1293 util.nouideprecwarn(
1300 util.nouideprecwarn(
1294 b"use revlog.feature_config.compression_engine_options",
1301 b"use revlog.feature_config.compression_engine_options",
1295 b"6.6",
1302 b"6.6",
1296 stacklevel=2,
1303 stacklevel=2,
1297 )
1304 )
1298 return self.feature_config.compression_engine_options
1305 return self.feature_config.compression_engine_options
1299
1306
1300 @property
1307 @property
1301 def _maxdeltachainspan(self):
1308 def _maxdeltachainspan(self):
1302 """temporary compatibility proxy"""
1309 """temporary compatibility proxy"""
1303 util.nouideprecwarn(
1310 util.nouideprecwarn(
1304 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1311 b"use revlog.delta_config.max_deltachain_span", b"6.6", stacklevel=2
1305 )
1312 )
1306 return self.delta_config.max_deltachain_span
1313 return self.delta_config.max_deltachain_span
1307
1314
1308 @property
1315 @property
1309 def _withsparseread(self):
1316 def _withsparseread(self):
1310 """temporary compatibility proxy"""
1317 """temporary compatibility proxy"""
1311 util.nouideprecwarn(
1318 util.nouideprecwarn(
1312 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1319 b"use revlog.data_config.with_sparse_read", b"6.6", stacklevel=2
1313 )
1320 )
1314 return self.data_config.with_sparse_read
1321 return self.data_config.with_sparse_read
1315
1322
1316 @property
1323 @property
1317 def _sparserevlog(self):
1324 def _sparserevlog(self):
1318 """temporary compatibility proxy"""
1325 """temporary compatibility proxy"""
1319 util.nouideprecwarn(
1326 util.nouideprecwarn(
1320 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1327 b"use revlog.delta_config.sparse_revlog", b"6.6", stacklevel=2
1321 )
1328 )
1322 return self.delta_config.sparse_revlog
1329 return self.delta_config.sparse_revlog
1323
1330
1324 @property
1331 @property
1325 def hassidedata(self):
1332 def hassidedata(self):
1326 """temporary compatibility proxy"""
1333 """temporary compatibility proxy"""
1327 util.nouideprecwarn(
1334 util.nouideprecwarn(
1328 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1335 b"use revlog.feature_config.has_side_data", b"6.6", stacklevel=2
1329 )
1336 )
1330 return self.feature_config.has_side_data
1337 return self.feature_config.has_side_data
1331
1338
1332 @property
1339 @property
1333 def _srdensitythreshold(self):
1340 def _srdensitythreshold(self):
1334 """temporary compatibility proxy"""
1341 """temporary compatibility proxy"""
1335 util.nouideprecwarn(
1342 util.nouideprecwarn(
1336 b"use revlog.data_config.sr_density_threshold",
1343 b"use revlog.data_config.sr_density_threshold",
1337 b"6.6",
1344 b"6.6",
1338 stacklevel=2,
1345 stacklevel=2,
1339 )
1346 )
1340 return self.data_config.sr_density_threshold
1347 return self.data_config.sr_density_threshold
1341
1348
1342 @property
1349 @property
1343 def _srmingapsize(self):
1350 def _srmingapsize(self):
1344 """temporary compatibility proxy"""
1351 """temporary compatibility proxy"""
1345 util.nouideprecwarn(
1352 util.nouideprecwarn(
1346 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1353 b"use revlog.data_config.sr_min_gap_size", b"6.6", stacklevel=2
1347 )
1354 )
1348 return self.data_config.sr_min_gap_size
1355 return self.data_config.sr_min_gap_size
1349
1356
1350 @property
1357 @property
1351 def _compute_rank(self):
1358 def _compute_rank(self):
1352 """temporary compatibility proxy"""
1359 """temporary compatibility proxy"""
1353 util.nouideprecwarn(
1360 util.nouideprecwarn(
1354 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1361 b"use revlog.feature_config.compute_rank", b"6.6", stacklevel=2
1355 )
1362 )
1356 return self.feature_config.compute_rank
1363 return self.feature_config.compute_rank
1357
1364
1358 @property
1365 @property
1359 def canonical_parent_order(self):
1366 def canonical_parent_order(self):
1360 """temporary compatibility proxy"""
1367 """temporary compatibility proxy"""
1361 util.nouideprecwarn(
1368 util.nouideprecwarn(
1362 b"use revlog.feature_config.canonical_parent_order",
1369 b"use revlog.feature_config.canonical_parent_order",
1363 b"6.6",
1370 b"6.6",
1364 stacklevel=2,
1371 stacklevel=2,
1365 )
1372 )
1366 return self.feature_config.canonical_parent_order
1373 return self.feature_config.canonical_parent_order
1367
1374
1368 @property
1375 @property
1369 def _lazydelta(self):
1376 def _lazydelta(self):
1370 """temporary compatibility proxy"""
1377 """temporary compatibility proxy"""
1371 util.nouideprecwarn(
1378 util.nouideprecwarn(
1372 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1379 b"use revlog.delta_config.lazy_delta", b"6.6", stacklevel=2
1373 )
1380 )
1374 return self.delta_config.lazy_delta
1381 return self.delta_config.lazy_delta
1375
1382
1376 @property
1383 @property
1377 def _lazydeltabase(self):
1384 def _lazydeltabase(self):
1378 """temporary compatibility proxy"""
1385 """temporary compatibility proxy"""
1379 util.nouideprecwarn(
1386 util.nouideprecwarn(
1380 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1387 b"use revlog.delta_config.lazy_delta_base", b"6.6", stacklevel=2
1381 )
1388 )
1382 return self.delta_config.lazy_delta_base
1389 return self.delta_config.lazy_delta_base
1383
1390
1384 def _init_opts(self):
1391 def _init_opts(self):
1385 """process options (from above/config) to setup associated default revlog mode
1392 """process options (from above/config) to setup associated default revlog mode
1386
1393
1387 These values might be affected when actually reading on disk information.
1394 These values might be affected when actually reading on disk information.
1388
1395
1389 The relevant values are returned for use in _loadindex().
1396 The relevant values are returned for use in _loadindex().
1390
1397
1391 * newversionflags:
1398 * newversionflags:
1392 version header to use if we need to create a new revlog
1399 version header to use if we need to create a new revlog
1393
1400
1394 * mmapindexthreshold:
1401 * mmapindexthreshold:
1395 minimal index size for start to use mmap
1402 minimal index size for start to use mmap
1396
1403
1397 * force_nodemap:
1404 * force_nodemap:
1398 force the usage of a "development" version of the nodemap code
1405 force the usage of a "development" version of the nodemap code
1399 """
1406 """
1400 opts = self.opener.options
1407 opts = self.opener.options
1401
1408
1402 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1409 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1403 new_header = CHANGELOGV2
1410 new_header = CHANGELOGV2
1404 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1411 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1405 self.feature_config.compute_rank = compute_rank
1412 self.feature_config.compute_rank = compute_rank
1406 elif b'revlogv2' in opts:
1413 elif b'revlogv2' in opts:
1407 new_header = REVLOGV2
1414 new_header = REVLOGV2
1408 elif b'revlogv1' in opts:
1415 elif b'revlogv1' in opts:
1409 new_header = REVLOGV1 | FLAG_INLINE_DATA
1416 new_header = REVLOGV1 | FLAG_INLINE_DATA
1410 if b'generaldelta' in opts:
1417 if b'generaldelta' in opts:
1411 new_header |= FLAG_GENERALDELTA
1418 new_header |= FLAG_GENERALDELTA
1412 elif b'revlogv0' in self.opener.options:
1419 elif b'revlogv0' in self.opener.options:
1413 new_header = REVLOGV0
1420 new_header = REVLOGV0
1414 else:
1421 else:
1415 new_header = REVLOG_DEFAULT_VERSION
1422 new_header = REVLOG_DEFAULT_VERSION
1416
1423
1417 mmapindexthreshold = None
1424 mmapindexthreshold = None
1418 if self.data_config.mmap_large_index:
1425 if self.data_config.mmap_large_index:
1419 mmapindexthreshold = self.data_config.mmap_index_threshold
1426 mmapindexthreshold = self.data_config.mmap_index_threshold
1420 if self.feature_config.enable_ellipsis:
1427 if self.feature_config.enable_ellipsis:
1421 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1428 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1422
1429
1423 # revlog v0 doesn't have flag processors
1430 # revlog v0 doesn't have flag processors
1424 for flag, processor in opts.get(b'flagprocessors', {}).items():
1431 for flag, processor in opts.get(b'flagprocessors', {}).items():
1425 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1432 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1426
1433
1427 chunk_cache_size = self.data_config.chunk_cache_size
1434 chunk_cache_size = self.data_config.chunk_cache_size
1428 if chunk_cache_size <= 0:
1435 if chunk_cache_size <= 0:
1429 raise error.RevlogError(
1436 raise error.RevlogError(
1430 _(b'revlog chunk cache size %r is not greater than 0')
1437 _(b'revlog chunk cache size %r is not greater than 0')
1431 % chunk_cache_size
1438 % chunk_cache_size
1432 )
1439 )
1433 elif chunk_cache_size & (chunk_cache_size - 1):
1440 elif chunk_cache_size & (chunk_cache_size - 1):
1434 raise error.RevlogError(
1441 raise error.RevlogError(
1435 _(b'revlog chunk cache size %r is not a power of 2')
1442 _(b'revlog chunk cache size %r is not a power of 2')
1436 % chunk_cache_size
1443 % chunk_cache_size
1437 )
1444 )
1438 force_nodemap = opts.get(b'devel-force-nodemap', False)
1445 force_nodemap = opts.get(b'devel-force-nodemap', False)
1439 return new_header, mmapindexthreshold, force_nodemap
1446 return new_header, mmapindexthreshold, force_nodemap
1440
1447
1441 def _get_data(self, filepath, mmap_threshold, size=None):
1448 def _get_data(self, filepath, mmap_threshold, size=None):
1442 """return a file content with or without mmap
1449 """return a file content with or without mmap
1443
1450
1444 If the file is missing return the empty string"""
1451 If the file is missing return the empty string"""
1445 try:
1452 try:
1446 with self.opener(filepath) as fp:
1453 with self.opener(filepath) as fp:
1447 if mmap_threshold is not None:
1454 if mmap_threshold is not None:
1448 file_size = self.opener.fstat(fp).st_size
1455 file_size = self.opener.fstat(fp).st_size
1449 if file_size >= mmap_threshold:
1456 if file_size >= mmap_threshold:
1450 if size is not None:
1457 if size is not None:
1451 # avoid potentiel mmap crash
1458 # avoid potentiel mmap crash
1452 size = min(file_size, size)
1459 size = min(file_size, size)
1453 # TODO: should .close() to release resources without
1460 # TODO: should .close() to release resources without
1454 # relying on Python GC
1461 # relying on Python GC
1455 if size is None:
1462 if size is None:
1456 return util.buffer(util.mmapread(fp))
1463 return util.buffer(util.mmapread(fp))
1457 else:
1464 else:
1458 return util.buffer(util.mmapread(fp, size))
1465 return util.buffer(util.mmapread(fp, size))
1459 if size is None:
1466 if size is None:
1460 return fp.read()
1467 return fp.read()
1461 else:
1468 else:
1462 return fp.read(size)
1469 return fp.read(size)
1463 except FileNotFoundError:
1470 except FileNotFoundError:
1464 return b''
1471 return b''
1465
1472
1466 def get_streams(self, max_linkrev, force_inline=False):
1473 def get_streams(self, max_linkrev, force_inline=False):
1467 """return a list of streams that represent this revlog
1474 """return a list of streams that represent this revlog
1468
1475
1469 This is used by stream-clone to do bytes to bytes copies of a repository.
1476 This is used by stream-clone to do bytes to bytes copies of a repository.
1470
1477
1471 This streams data for all revisions that refer to a changelog revision up
1478 This streams data for all revisions that refer to a changelog revision up
1472 to `max_linkrev`.
1479 to `max_linkrev`.
1473
1480
1474 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1481 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1475
1482
1476 It returns is a list of three-tuple:
1483 It returns is a list of three-tuple:
1477
1484
1478 [
1485 [
1479 (filename, bytes_stream, stream_size),
1486 (filename, bytes_stream, stream_size),
1480 …
1487 …
1481 ]
1488 ]
1482 """
1489 """
1483 n = len(self)
1490 n = len(self)
1484 index = self.index
1491 index = self.index
1485 while n > 0:
1492 while n > 0:
1486 linkrev = index[n - 1][4]
1493 linkrev = index[n - 1][4]
1487 if linkrev < max_linkrev:
1494 if linkrev < max_linkrev:
1488 break
1495 break
1489 # note: this loop will rarely go through multiple iterations, since
1496 # note: this loop will rarely go through multiple iterations, since
1490 # it only traverses commits created during the current streaming
1497 # it only traverses commits created during the current streaming
1491 # pull operation.
1498 # pull operation.
1492 #
1499 #
1493 # If this become a problem, using a binary search should cap the
1500 # If this become a problem, using a binary search should cap the
1494 # runtime of this.
1501 # runtime of this.
1495 n = n - 1
1502 n = n - 1
1496 if n == 0:
1503 if n == 0:
1497 # no data to send
1504 # no data to send
1498 return []
1505 return []
1499 index_size = n * index.entry_size
1506 index_size = n * index.entry_size
1500 data_size = self.end(n - 1)
1507 data_size = self.end(n - 1)
1501
1508
1502 # XXX we might have been split (or stripped) since the object
1509 # XXX we might have been split (or stripped) since the object
1503 # initialization, We need to close this race too, but having a way to
1510 # initialization, We need to close this race too, but having a way to
1504 # pre-open the file we feed to the revlog and never closing them before
1511 # pre-open the file we feed to the revlog and never closing them before
1505 # we are done streaming.
1512 # we are done streaming.
1506
1513
1507 if self._inline:
1514 if self._inline:
1508
1515
1509 def get_stream():
1516 def get_stream():
1510 with self.opener(self._indexfile, mode=b"r") as fp:
1517 with self.opener(self._indexfile, mode=b"r") as fp:
1511 yield None
1518 yield None
1512 size = index_size + data_size
1519 size = index_size + data_size
1513 if size <= 65536:
1520 if size <= 65536:
1514 yield fp.read(size)
1521 yield fp.read(size)
1515 else:
1522 else:
1516 yield from util.filechunkiter(fp, limit=size)
1523 yield from util.filechunkiter(fp, limit=size)
1517
1524
1518 inline_stream = get_stream()
1525 inline_stream = get_stream()
1519 next(inline_stream)
1526 next(inline_stream)
1520 return [
1527 return [
1521 (self._indexfile, inline_stream, index_size + data_size),
1528 (self._indexfile, inline_stream, index_size + data_size),
1522 ]
1529 ]
1523 elif force_inline:
1530 elif force_inline:
1524
1531
1525 def get_stream():
1532 def get_stream():
1526 with self.reading():
1533 with self.reading():
1527 yield None
1534 yield None
1528
1535
1529 for rev in range(n):
1536 for rev in range(n):
1530 idx = self.index.entry_binary(rev)
1537 idx = self.index.entry_binary(rev)
1531 if rev == 0 and self._docket is None:
1538 if rev == 0 and self._docket is None:
1532 # re-inject the inline flag
1539 # re-inject the inline flag
1533 header = self._format_flags
1540 header = self._format_flags
1534 header |= self._format_version
1541 header |= self._format_version
1535 header |= FLAG_INLINE_DATA
1542 header |= FLAG_INLINE_DATA
1536 header = self.index.pack_header(header)
1543 header = self.index.pack_header(header)
1537 idx = header + idx
1544 idx = header + idx
1538 yield idx
1545 yield idx
1539 yield self._inner.get_segment_for_revs(rev, rev)[1]
1546 yield self._inner.get_segment_for_revs(rev, rev)[1]
1540
1547
1541 inline_stream = get_stream()
1548 inline_stream = get_stream()
1542 next(inline_stream)
1549 next(inline_stream)
1543 return [
1550 return [
1544 (self._indexfile, inline_stream, index_size + data_size),
1551 (self._indexfile, inline_stream, index_size + data_size),
1545 ]
1552 ]
1546 else:
1553 else:
1547
1554
1548 def get_index_stream():
1555 def get_index_stream():
1549 with self.opener(self._indexfile, mode=b"r") as fp:
1556 with self.opener(self._indexfile, mode=b"r") as fp:
1550 yield None
1557 yield None
1551 if index_size <= 65536:
1558 if index_size <= 65536:
1552 yield fp.read(index_size)
1559 yield fp.read(index_size)
1553 else:
1560 else:
1554 yield from util.filechunkiter(fp, limit=index_size)
1561 yield from util.filechunkiter(fp, limit=index_size)
1555
1562
1556 def get_data_stream():
1563 def get_data_stream():
1557 with self._datafp() as fp:
1564 with self._datafp() as fp:
1558 yield None
1565 yield None
1559 if data_size <= 65536:
1566 if data_size <= 65536:
1560 yield fp.read(data_size)
1567 yield fp.read(data_size)
1561 else:
1568 else:
1562 yield from util.filechunkiter(fp, limit=data_size)
1569 yield from util.filechunkiter(fp, limit=data_size)
1563
1570
1564 index_stream = get_index_stream()
1571 index_stream = get_index_stream()
1565 next(index_stream)
1572 next(index_stream)
1566 data_stream = get_data_stream()
1573 data_stream = get_data_stream()
1567 next(data_stream)
1574 next(data_stream)
1568 return [
1575 return [
1569 (self._datafile, data_stream, data_size),
1576 (self._datafile, data_stream, data_size),
1570 (self._indexfile, index_stream, index_size),
1577 (self._indexfile, index_stream, index_size),
1571 ]
1578 ]
1572
1579
1573 def _loadindex(self, docket=None):
1580 def _loadindex(self, docket=None):
1574
1581
1575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1582 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1576
1583
1577 if self.postfix is not None:
1584 if self.postfix is not None:
1578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1585 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1586 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1580 entry_point = b'%s.i.a' % self.radix
1587 entry_point = b'%s.i.a' % self.radix
1581 elif self._try_split and self.opener.exists(self._split_index_file):
1588 elif self._try_split and self.opener.exists(self._split_index_file):
1582 entry_point = self._split_index_file
1589 entry_point = self._split_index_file
1583 else:
1590 else:
1584 entry_point = b'%s.i' % self.radix
1591 entry_point = b'%s.i' % self.radix
1585
1592
1586 if docket is not None:
1593 if docket is not None:
1587 self._docket = docket
1594 self._docket = docket
1588 self._docket_file = entry_point
1595 self._docket_file = entry_point
1589 else:
1596 else:
1590 self._initempty = True
1597 self._initempty = True
1591 entry_data = self._get_data(entry_point, mmapindexthreshold)
1598 entry_data = self._get_data(entry_point, mmapindexthreshold)
1592 if len(entry_data) > 0:
1599 if len(entry_data) > 0:
1593 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1600 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1594 self._initempty = False
1601 self._initempty = False
1595 else:
1602 else:
1596 header = new_header
1603 header = new_header
1597
1604
1598 self._format_flags = header & ~0xFFFF
1605 self._format_flags = header & ~0xFFFF
1599 self._format_version = header & 0xFFFF
1606 self._format_version = header & 0xFFFF
1600
1607
1601 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1608 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1602 if supported_flags is None:
1609 if supported_flags is None:
1603 msg = _(b'unknown version (%d) in revlog %s')
1610 msg = _(b'unknown version (%d) in revlog %s')
1604 msg %= (self._format_version, self.display_id)
1611 msg %= (self._format_version, self.display_id)
1605 raise error.RevlogError(msg)
1612 raise error.RevlogError(msg)
1606 elif self._format_flags & ~supported_flags:
1613 elif self._format_flags & ~supported_flags:
1607 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1614 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1608 display_flag = self._format_flags >> 16
1615 display_flag = self._format_flags >> 16
1609 msg %= (display_flag, self._format_version, self.display_id)
1616 msg %= (display_flag, self._format_version, self.display_id)
1610 raise error.RevlogError(msg)
1617 raise error.RevlogError(msg)
1611
1618
1612 features = FEATURES_BY_VERSION[self._format_version]
1619 features = FEATURES_BY_VERSION[self._format_version]
1613 self._inline = features[b'inline'](self._format_flags)
1620 self._inline = features[b'inline'](self._format_flags)
1614 self.delta_config.general_delta = features[b'generaldelta'](
1621 self.delta_config.general_delta = features[b'generaldelta'](
1615 self._format_flags
1622 self._format_flags
1616 )
1623 )
1617 self.feature_config.has_side_data = features[b'sidedata']
1624 self.feature_config.has_side_data = features[b'sidedata']
1618
1625
1619 if not features[b'docket']:
1626 if not features[b'docket']:
1620 self._indexfile = entry_point
1627 self._indexfile = entry_point
1621 index_data = entry_data
1628 index_data = entry_data
1622 else:
1629 else:
1623 self._docket_file = entry_point
1630 self._docket_file = entry_point
1624 if self._initempty:
1631 if self._initempty:
1625 self._docket = docketutil.default_docket(self, header)
1632 self._docket = docketutil.default_docket(self, header)
1626 else:
1633 else:
1627 self._docket = docketutil.parse_docket(
1634 self._docket = docketutil.parse_docket(
1628 self, entry_data, use_pending=self._trypending
1635 self, entry_data, use_pending=self._trypending
1629 )
1636 )
1630
1637
1631 if self._docket is not None:
1638 if self._docket is not None:
1632 self._indexfile = self._docket.index_filepath()
1639 self._indexfile = self._docket.index_filepath()
1633 index_data = b''
1640 index_data = b''
1634 index_size = self._docket.index_end
1641 index_size = self._docket.index_end
1635 if index_size > 0:
1642 if index_size > 0:
1636 index_data = self._get_data(
1643 index_data = self._get_data(
1637 self._indexfile, mmapindexthreshold, size=index_size
1644 self._indexfile, mmapindexthreshold, size=index_size
1638 )
1645 )
1639 if len(index_data) < index_size:
1646 if len(index_data) < index_size:
1640 msg = _(b'too few index data for %s: got %d, expected %d')
1647 msg = _(b'too few index data for %s: got %d, expected %d')
1641 msg %= (self.display_id, len(index_data), index_size)
1648 msg %= (self.display_id, len(index_data), index_size)
1642 raise error.RevlogError(msg)
1649 raise error.RevlogError(msg)
1643
1650
1644 self._inline = False
1651 self._inline = False
1645 # generaldelta implied by version 2 revlogs.
1652 # generaldelta implied by version 2 revlogs.
1646 self.delta_config.general_delta = True
1653 self.delta_config.general_delta = True
1647 # the logic for persistent nodemap will be dealt with within the
1654 # the logic for persistent nodemap will be dealt with within the
1648 # main docket, so disable it for now.
1655 # main docket, so disable it for now.
1649 self._nodemap_file = None
1656 self._nodemap_file = None
1650
1657
1651 if self._docket is not None:
1658 if self._docket is not None:
1652 self._datafile = self._docket.data_filepath()
1659 self._datafile = self._docket.data_filepath()
1653 self._sidedatafile = self._docket.sidedata_filepath()
1660 self._sidedatafile = self._docket.sidedata_filepath()
1654 elif self.postfix is None:
1661 elif self.postfix is None:
1655 self._datafile = b'%s.d' % self.radix
1662 self._datafile = b'%s.d' % self.radix
1656 else:
1663 else:
1657 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1664 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1658
1665
1659 self.nodeconstants = sha1nodeconstants
1666 self.nodeconstants = sha1nodeconstants
1660 self.nullid = self.nodeconstants.nullid
1667 self.nullid = self.nodeconstants.nullid
1661
1668
1662 # sparse-revlog can't be on without general-delta (issue6056)
1669 # sparse-revlog can't be on without general-delta (issue6056)
1663 if not self.delta_config.general_delta:
1670 if not self.delta_config.general_delta:
1664 self.delta_config.sparse_revlog = False
1671 self.delta_config.sparse_revlog = False
1665
1672
1666 self._storedeltachains = True
1673 self._storedeltachains = True
1667
1674
1668 devel_nodemap = (
1675 devel_nodemap = (
1669 self._nodemap_file
1676 self._nodemap_file
1670 and force_nodemap
1677 and force_nodemap
1671 and parse_index_v1_nodemap is not None
1678 and parse_index_v1_nodemap is not None
1672 )
1679 )
1673
1680
1674 use_rust_index = False
1681 use_rust_index = False
1675 if rustrevlog is not None:
1682 if rustrevlog is not None:
1676 if self._nodemap_file is not None:
1683 if self._nodemap_file is not None:
1677 use_rust_index = True
1684 use_rust_index = True
1678 else:
1685 else:
1679 use_rust_index = self.opener.options.get(b'rust.index')
1686 use_rust_index = self.opener.options.get(b'rust.index')
1680
1687
1681 self._parse_index = parse_index_v1
1688 self._parse_index = parse_index_v1
1682 if self._format_version == REVLOGV0:
1689 if self._format_version == REVLOGV0:
1683 self._parse_index = revlogv0.parse_index_v0
1690 self._parse_index = revlogv0.parse_index_v0
1684 elif self._format_version == REVLOGV2:
1691 elif self._format_version == REVLOGV2:
1685 self._parse_index = parse_index_v2
1692 self._parse_index = parse_index_v2
1686 elif self._format_version == CHANGELOGV2:
1693 elif self._format_version == CHANGELOGV2:
1687 self._parse_index = parse_index_cl_v2
1694 self._parse_index = parse_index_cl_v2
1688 elif devel_nodemap:
1695 elif devel_nodemap:
1689 self._parse_index = parse_index_v1_nodemap
1696 self._parse_index = parse_index_v1_nodemap
1690 elif use_rust_index:
1697 elif use_rust_index:
1691 self._parse_index = parse_index_v1_mixed
1698 self._parse_index = parse_index_v1_mixed
1692 try:
1699 try:
1693 d = self._parse_index(index_data, self._inline)
1700 d = self._parse_index(index_data, self._inline)
1694 index, chunkcache = d
1701 index, chunkcache = d
1695 use_nodemap = (
1702 use_nodemap = (
1696 not self._inline
1703 not self._inline
1697 and self._nodemap_file is not None
1704 and self._nodemap_file is not None
1698 and hasattr(index, 'update_nodemap_data')
1705 and hasattr(index, 'update_nodemap_data')
1699 )
1706 )
1700 if use_nodemap:
1707 if use_nodemap:
1701 nodemap_data = nodemaputil.persisted_data(self)
1708 nodemap_data = nodemaputil.persisted_data(self)
1702 if nodemap_data is not None:
1709 if nodemap_data is not None:
1703 docket = nodemap_data[0]
1710 docket = nodemap_data[0]
1704 if (
1711 if (
1705 len(d[0]) > docket.tip_rev
1712 len(d[0]) > docket.tip_rev
1706 and d[0][docket.tip_rev][7] == docket.tip_node
1713 and d[0][docket.tip_rev][7] == docket.tip_node
1707 ):
1714 ):
1708 # no changelog tampering
1715 # no changelog tampering
1709 self._nodemap_docket = docket
1716 self._nodemap_docket = docket
1710 index.update_nodemap_data(*nodemap_data)
1717 index.update_nodemap_data(*nodemap_data)
1711 except (ValueError, IndexError):
1718 except (ValueError, IndexError):
1712 raise error.RevlogError(
1719 raise error.RevlogError(
1713 _(b"index %s is corrupted") % self.display_id
1720 _(b"index %s is corrupted") % self.display_id
1714 )
1721 )
1715 self.index = index
1722 self.index = index
1716 # revnum -> (chain-length, sum-delta-length)
1723 # revnum -> (chain-length, sum-delta-length)
1717 self._chaininfocache = util.lrucachedict(500)
1724 self._chaininfocache = util.lrucachedict(500)
1718
1725
1719 return chunkcache
1726 return chunkcache
1720
1727
1721 def _load_inner(self, chunk_cache):
1728 def _load_inner(self, chunk_cache):
1722 if self._docket is None:
1729 if self._docket is None:
1723 default_compression_header = None
1730 default_compression_header = None
1724 else:
1731 else:
1725 default_compression_header = self._docket.default_compression_header
1732 default_compression_header = self._docket.default_compression_header
1726
1733
1727 self._inner = _InnerRevlog(
1734 self._inner = _InnerRevlog(
1728 opener=self.opener,
1735 opener=self.opener,
1729 index=self.index,
1736 index=self.index,
1730 index_file=self._indexfile,
1737 index_file=self._indexfile,
1731 data_file=self._datafile,
1738 data_file=self._datafile,
1732 sidedata_file=self._sidedatafile,
1739 sidedata_file=self._sidedatafile,
1733 inline=self._inline,
1740 inline=self._inline,
1734 data_config=self.data_config,
1741 data_config=self.data_config,
1735 delta_config=self.delta_config,
1742 delta_config=self.delta_config,
1736 feature_config=self.feature_config,
1743 feature_config=self.feature_config,
1737 chunk_cache=chunk_cache,
1744 chunk_cache=chunk_cache,
1738 default_compression_header=default_compression_header,
1745 default_compression_header=default_compression_header,
1739 )
1746 )
1740
1747
1741 def get_revlog(self):
1748 def get_revlog(self):
1742 """simple function to mirror API of other not-really-revlog API"""
1749 """simple function to mirror API of other not-really-revlog API"""
1743 return self
1750 return self
1744
1751
1745 @util.propertycache
1752 @util.propertycache
1746 def revlog_kind(self):
1753 def revlog_kind(self):
1747 return self.target[0]
1754 return self.target[0]
1748
1755
1749 @util.propertycache
1756 @util.propertycache
1750 def display_id(self):
1757 def display_id(self):
1751 """The public facing "ID" of the revlog that we use in message"""
1758 """The public facing "ID" of the revlog that we use in message"""
1752 if self.revlog_kind == KIND_FILELOG:
1759 if self.revlog_kind == KIND_FILELOG:
1753 # Reference the file without the "data/" prefix, so it is familiar
1760 # Reference the file without the "data/" prefix, so it is familiar
1754 # to the user.
1761 # to the user.
1755 return self.target[1]
1762 return self.target[1]
1756 else:
1763 else:
1757 return self.radix
1764 return self.radix
1758
1765
1759 def _datafp(self, mode=b'r'):
1766 def _datafp(self, mode=b'r'):
1760 """file object for the revlog's data file"""
1767 """file object for the revlog's data file"""
1761 return self.opener(self._datafile, mode=mode)
1768 return self.opener(self._datafile, mode=mode)
1762
1769
1763 def tiprev(self):
1770 def tiprev(self):
1764 return len(self.index) - 1
1771 return len(self.index) - 1
1765
1772
1766 def tip(self):
1773 def tip(self):
1767 return self.node(self.tiprev())
1774 return self.node(self.tiprev())
1768
1775
1769 def __contains__(self, rev):
1776 def __contains__(self, rev):
1770 return 0 <= rev < len(self)
1777 return 0 <= rev < len(self)
1771
1778
1772 def __len__(self):
1779 def __len__(self):
1773 return len(self.index)
1780 return len(self.index)
1774
1781
1775 def __iter__(self):
1782 def __iter__(self):
1776 return iter(range(len(self)))
1783 return iter(range(len(self)))
1777
1784
1778 def revs(self, start=0, stop=None):
1785 def revs(self, start=0, stop=None):
1779 """iterate over all rev in this revlog (from start to stop)"""
1786 """iterate over all rev in this revlog (from start to stop)"""
1780 return storageutil.iterrevs(len(self), start=start, stop=stop)
1787 return storageutil.iterrevs(len(self), start=start, stop=stop)
1781
1788
1782 def hasnode(self, node):
1789 def hasnode(self, node):
1783 try:
1790 try:
1784 self.rev(node)
1791 self.rev(node)
1785 return True
1792 return True
1786 except KeyError:
1793 except KeyError:
1787 return False
1794 return False
1788
1795
1789 def _candelta(self, baserev, rev):
1796 def _candelta(self, baserev, rev):
1790 """whether two revisions (baserev, rev) can be delta-ed or not"""
1797 """whether two revisions (baserev, rev) can be delta-ed or not"""
1791 # Disable delta if either rev requires a content-changing flag
1798 # Disable delta if either rev requires a content-changing flag
1792 # processor (ex. LFS). This is because such flag processor can alter
1799 # processor (ex. LFS). This is because such flag processor can alter
1793 # the rawtext content that the delta will be based on, and two clients
1800 # the rawtext content that the delta will be based on, and two clients
1794 # could have a same revlog node with different flags (i.e. different
1801 # could have a same revlog node with different flags (i.e. different
1795 # rawtext contents) and the delta could be incompatible.
1802 # rawtext contents) and the delta could be incompatible.
1796 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1803 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1797 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1804 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1798 ):
1805 ):
1799 return False
1806 return False
1800 return True
1807 return True
1801
1808
1802 def update_caches(self, transaction):
1809 def update_caches(self, transaction):
1803 """update on disk cache
1810 """update on disk cache
1804
1811
1805 If a transaction is passed, the update may be delayed to transaction
1812 If a transaction is passed, the update may be delayed to transaction
1806 commit."""
1813 commit."""
1807 if self._nodemap_file is not None:
1814 if self._nodemap_file is not None:
1808 if transaction is None:
1815 if transaction is None:
1809 nodemaputil.update_persistent_nodemap(self)
1816 nodemaputil.update_persistent_nodemap(self)
1810 else:
1817 else:
1811 nodemaputil.setup_persistent_nodemap(transaction, self)
1818 nodemaputil.setup_persistent_nodemap(transaction, self)
1812
1819
1813 def clearcaches(self):
1820 def clearcaches(self):
1814 """Clear in-memory caches"""
1821 """Clear in-memory caches"""
1815 self._chainbasecache.clear()
1822 self._chainbasecache.clear()
1816 self._inner.clear_cache()
1823 self._inner.clear_cache()
1817 self._pcache = {}
1824 self._pcache = {}
1818 self._nodemap_docket = None
1825 self._nodemap_docket = None
1819 self.index.clearcaches()
1826 self.index.clearcaches()
1820 # The python code is the one responsible for validating the docket, we
1827 # The python code is the one responsible for validating the docket, we
1821 # end up having to refresh it here.
1828 # end up having to refresh it here.
1822 use_nodemap = (
1829 use_nodemap = (
1823 not self._inline
1830 not self._inline
1824 and self._nodemap_file is not None
1831 and self._nodemap_file is not None
1825 and hasattr(self.index, 'update_nodemap_data')
1832 and hasattr(self.index, 'update_nodemap_data')
1826 )
1833 )
1827 if use_nodemap:
1834 if use_nodemap:
1828 nodemap_data = nodemaputil.persisted_data(self)
1835 nodemap_data = nodemaputil.persisted_data(self)
1829 if nodemap_data is not None:
1836 if nodemap_data is not None:
1830 self._nodemap_docket = nodemap_data[0]
1837 self._nodemap_docket = nodemap_data[0]
1831 self.index.update_nodemap_data(*nodemap_data)
1838 self.index.update_nodemap_data(*nodemap_data)
1832
1839
1833 def rev(self, node):
1840 def rev(self, node):
1834 """return the revision number associated with a <nodeid>"""
1841 """return the revision number associated with a <nodeid>"""
1835 try:
1842 try:
1836 return self.index.rev(node)
1843 return self.index.rev(node)
1837 except TypeError:
1844 except TypeError:
1838 raise
1845 raise
1839 except error.RevlogError:
1846 except error.RevlogError:
1840 # parsers.c radix tree lookup failed
1847 # parsers.c radix tree lookup failed
1841 if (
1848 if (
1842 node == self.nodeconstants.wdirid
1849 node == self.nodeconstants.wdirid
1843 or node in self.nodeconstants.wdirfilenodeids
1850 or node in self.nodeconstants.wdirfilenodeids
1844 ):
1851 ):
1845 raise error.WdirUnsupported
1852 raise error.WdirUnsupported
1846 raise error.LookupError(node, self.display_id, _(b'no node'))
1853 raise error.LookupError(node, self.display_id, _(b'no node'))
1847
1854
1848 # Accessors for index entries.
1855 # Accessors for index entries.
1849
1856
1850 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1857 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1851 # are flags.
1858 # are flags.
1852 def start(self, rev):
1859 def start(self, rev):
1853 return int(self.index[rev][0] >> 16)
1860 return int(self.index[rev][0] >> 16)
1854
1861
1855 def sidedata_cut_off(self, rev):
1862 def sidedata_cut_off(self, rev):
1856 sd_cut_off = self.index[rev][8]
1863 sd_cut_off = self.index[rev][8]
1857 if sd_cut_off != 0:
1864 if sd_cut_off != 0:
1858 return sd_cut_off
1865 return sd_cut_off
1859 # This is some annoying dance, because entries without sidedata
1866 # This is some annoying dance, because entries without sidedata
1860 # currently use 0 as their ofsset. (instead of previous-offset +
1867 # currently use 0 as their ofsset. (instead of previous-offset +
1861 # previous-size)
1868 # previous-size)
1862 #
1869 #
1863 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1870 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1864 # In the meantime, we need this.
1871 # In the meantime, we need this.
1865 while 0 <= rev:
1872 while 0 <= rev:
1866 e = self.index[rev]
1873 e = self.index[rev]
1867 if e[9] != 0:
1874 if e[9] != 0:
1868 return e[8] + e[9]
1875 return e[8] + e[9]
1869 rev -= 1
1876 rev -= 1
1870 return 0
1877 return 0
1871
1878
1872 def flags(self, rev):
1879 def flags(self, rev):
1873 return self.index[rev][0] & 0xFFFF
1880 return self.index[rev][0] & 0xFFFF
1874
1881
1875 def length(self, rev):
1882 def length(self, rev):
1876 return self.index[rev][1]
1883 return self.index[rev][1]
1877
1884
1878 def sidedata_length(self, rev):
1885 def sidedata_length(self, rev):
1879 if not self.feature_config.has_side_data:
1886 if not self.feature_config.has_side_data:
1880 return 0
1887 return 0
1881 return self.index[rev][9]
1888 return self.index[rev][9]
1882
1889
1883 def rawsize(self, rev):
1890 def rawsize(self, rev):
1884 """return the length of the uncompressed text for a given revision"""
1891 """return the length of the uncompressed text for a given revision"""
1885 l = self.index[rev][2]
1892 l = self.index[rev][2]
1886 if l >= 0:
1893 if l >= 0:
1887 return l
1894 return l
1888
1895
1889 t = self.rawdata(rev)
1896 t = self.rawdata(rev)
1890 return len(t)
1897 return len(t)
1891
1898
1892 def size(self, rev):
1899 def size(self, rev):
1893 """length of non-raw text (processed by a "read" flag processor)"""
1900 """length of non-raw text (processed by a "read" flag processor)"""
1894 # fast path: if no "read" flag processor could change the content,
1901 # fast path: if no "read" flag processor could change the content,
1895 # size is rawsize. note: ELLIPSIS is known to not change the content.
1902 # size is rawsize. note: ELLIPSIS is known to not change the content.
1896 flags = self.flags(rev)
1903 flags = self.flags(rev)
1897 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1904 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1898 return self.rawsize(rev)
1905 return self.rawsize(rev)
1899
1906
1900 return len(self.revision(rev))
1907 return len(self.revision(rev))
1901
1908
1902 def fast_rank(self, rev):
1909 def fast_rank(self, rev):
1903 """Return the rank of a revision if already known, or None otherwise.
1910 """Return the rank of a revision if already known, or None otherwise.
1904
1911
1905 The rank of a revision is the size of the sub-graph it defines as a
1912 The rank of a revision is the size of the sub-graph it defines as a
1906 head. Equivalently, the rank of a revision `r` is the size of the set
1913 head. Equivalently, the rank of a revision `r` is the size of the set
1907 `ancestors(r)`, `r` included.
1914 `ancestors(r)`, `r` included.
1908
1915
1909 This method returns the rank retrieved from the revlog in constant
1916 This method returns the rank retrieved from the revlog in constant
1910 time. It makes no attempt at computing unknown values for versions of
1917 time. It makes no attempt at computing unknown values for versions of
1911 the revlog which do not persist the rank.
1918 the revlog which do not persist the rank.
1912 """
1919 """
1913 rank = self.index[rev][ENTRY_RANK]
1920 rank = self.index[rev][ENTRY_RANK]
1914 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1921 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1915 return None
1922 return None
1916 if rev == nullrev:
1923 if rev == nullrev:
1917 return 0 # convention
1924 return 0 # convention
1918 return rank
1925 return rank
1919
1926
1920 def chainbase(self, rev):
1927 def chainbase(self, rev):
1921 base = self._chainbasecache.get(rev)
1928 base = self._chainbasecache.get(rev)
1922 if base is not None:
1929 if base is not None:
1923 return base
1930 return base
1924
1931
1925 index = self.index
1932 index = self.index
1926 iterrev = rev
1933 iterrev = rev
1927 base = index[iterrev][3]
1934 base = index[iterrev][3]
1928 while base != iterrev:
1935 while base != iterrev:
1929 iterrev = base
1936 iterrev = base
1930 base = index[iterrev][3]
1937 base = index[iterrev][3]
1931
1938
1932 self._chainbasecache[rev] = base
1939 self._chainbasecache[rev] = base
1933 return base
1940 return base
1934
1941
1935 def linkrev(self, rev):
1942 def linkrev(self, rev):
1936 return self.index[rev][4]
1943 return self.index[rev][4]
1937
1944
1938 def parentrevs(self, rev):
1945 def parentrevs(self, rev):
1939 try:
1946 try:
1940 entry = self.index[rev]
1947 entry = self.index[rev]
1941 except IndexError:
1948 except IndexError:
1942 if rev == wdirrev:
1949 if rev == wdirrev:
1943 raise error.WdirUnsupported
1950 raise error.WdirUnsupported
1944 raise
1951 raise
1945
1952
1946 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1953 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
1947 return entry[6], entry[5]
1954 return entry[6], entry[5]
1948 else:
1955 else:
1949 return entry[5], entry[6]
1956 return entry[5], entry[6]
1950
1957
1951 # fast parentrevs(rev) where rev isn't filtered
1958 # fast parentrevs(rev) where rev isn't filtered
1952 _uncheckedparentrevs = parentrevs
1959 _uncheckedparentrevs = parentrevs
1953
1960
1954 def node(self, rev):
1961 def node(self, rev):
1955 try:
1962 try:
1956 return self.index[rev][7]
1963 return self.index[rev][7]
1957 except IndexError:
1964 except IndexError:
1958 if rev == wdirrev:
1965 if rev == wdirrev:
1959 raise error.WdirUnsupported
1966 raise error.WdirUnsupported
1960 raise
1967 raise
1961
1968
1962 # Derived from index values.
1969 # Derived from index values.
1963
1970
1964 def end(self, rev):
1971 def end(self, rev):
1965 return self.start(rev) + self.length(rev)
1972 return self.start(rev) + self.length(rev)
1966
1973
1967 def parents(self, node):
1974 def parents(self, node):
1968 i = self.index
1975 i = self.index
1969 d = i[self.rev(node)]
1976 d = i[self.rev(node)]
1970 # inline node() to avoid function call overhead
1977 # inline node() to avoid function call overhead
1971 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1978 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
1972 return i[d[6]][7], i[d[5]][7]
1979 return i[d[6]][7], i[d[5]][7]
1973 else:
1980 else:
1974 return i[d[5]][7], i[d[6]][7]
1981 return i[d[5]][7], i[d[6]][7]
1975
1982
1976 def chainlen(self, rev):
1983 def chainlen(self, rev):
1977 return self._chaininfo(rev)[0]
1984 return self._chaininfo(rev)[0]
1978
1985
1979 def _chaininfo(self, rev):
1986 def _chaininfo(self, rev):
1980 chaininfocache = self._chaininfocache
1987 chaininfocache = self._chaininfocache
1981 if rev in chaininfocache:
1988 if rev in chaininfocache:
1982 return chaininfocache[rev]
1989 return chaininfocache[rev]
1983 index = self.index
1990 index = self.index
1984 generaldelta = self.delta_config.general_delta
1991 generaldelta = self.delta_config.general_delta
1985 iterrev = rev
1992 iterrev = rev
1986 e = index[iterrev]
1993 e = index[iterrev]
1987 clen = 0
1994 clen = 0
1988 compresseddeltalen = 0
1995 compresseddeltalen = 0
1989 while iterrev != e[3]:
1996 while iterrev != e[3]:
1990 clen += 1
1997 clen += 1
1991 compresseddeltalen += e[1]
1998 compresseddeltalen += e[1]
1992 if generaldelta:
1999 if generaldelta:
1993 iterrev = e[3]
2000 iterrev = e[3]
1994 else:
2001 else:
1995 iterrev -= 1
2002 iterrev -= 1
1996 if iterrev in chaininfocache:
2003 if iterrev in chaininfocache:
1997 t = chaininfocache[iterrev]
2004 t = chaininfocache[iterrev]
1998 clen += t[0]
2005 clen += t[0]
1999 compresseddeltalen += t[1]
2006 compresseddeltalen += t[1]
2000 break
2007 break
2001 e = index[iterrev]
2008 e = index[iterrev]
2002 else:
2009 else:
2003 # Add text length of base since decompressing that also takes
2010 # Add text length of base since decompressing that also takes
2004 # work. For cache hits the length is already included.
2011 # work. For cache hits the length is already included.
2005 compresseddeltalen += e[1]
2012 compresseddeltalen += e[1]
2006 r = (clen, compresseddeltalen)
2013 r = (clen, compresseddeltalen)
2007 chaininfocache[rev] = r
2014 chaininfocache[rev] = r
2008 return r
2015 return r
2009
2016
2010 def _deltachain(self, rev, stoprev=None):
2017 def _deltachain(self, rev, stoprev=None):
2011 return self._inner._deltachain(rev, stoprev=stoprev)
2018 return self._inner._deltachain(rev, stoprev=stoprev)
2012
2019
2013 def ancestors(self, revs, stoprev=0, inclusive=False):
2020 def ancestors(self, revs, stoprev=0, inclusive=False):
2014 """Generate the ancestors of 'revs' in reverse revision order.
2021 """Generate the ancestors of 'revs' in reverse revision order.
2015 Does not generate revs lower than stoprev.
2022 Does not generate revs lower than stoprev.
2016
2023
2017 See the documentation for ancestor.lazyancestors for more details."""
2024 See the documentation for ancestor.lazyancestors for more details."""
2018
2025
2019 # first, make sure start revisions aren't filtered
2026 # first, make sure start revisions aren't filtered
2020 revs = list(revs)
2027 revs = list(revs)
2021 checkrev = self.node
2028 checkrev = self.node
2022 for r in revs:
2029 for r in revs:
2023 checkrev(r)
2030 checkrev(r)
2024 # and we're sure ancestors aren't filtered as well
2031 # and we're sure ancestors aren't filtered as well
2025
2032
2026 if rustancestor is not None and self.index.rust_ext_compat:
2033 if rustancestor is not None and self.index.rust_ext_compat:
2027 lazyancestors = rustancestor.LazyAncestors
2034 lazyancestors = rustancestor.LazyAncestors
2028 arg = self.index
2035 arg = self.index
2029 else:
2036 else:
2030 lazyancestors = ancestor.lazyancestors
2037 lazyancestors = ancestor.lazyancestors
2031 arg = self._uncheckedparentrevs
2038 arg = self._uncheckedparentrevs
2032 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2039 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2033
2040
2034 def descendants(self, revs):
2041 def descendants(self, revs):
2035 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2042 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2036
2043
2037 def findcommonmissing(self, common=None, heads=None):
2044 def findcommonmissing(self, common=None, heads=None):
2038 """Return a tuple of the ancestors of common and the ancestors of heads
2045 """Return a tuple of the ancestors of common and the ancestors of heads
2039 that are not ancestors of common. In revset terminology, we return the
2046 that are not ancestors of common. In revset terminology, we return the
2040 tuple:
2047 tuple:
2041
2048
2042 ::common, (::heads) - (::common)
2049 ::common, (::heads) - (::common)
2043
2050
2044 The list is sorted by revision number, meaning it is
2051 The list is sorted by revision number, meaning it is
2045 topologically sorted.
2052 topologically sorted.
2046
2053
2047 'heads' and 'common' are both lists of node IDs. If heads is
2054 'heads' and 'common' are both lists of node IDs. If heads is
2048 not supplied, uses all of the revlog's heads. If common is not
2055 not supplied, uses all of the revlog's heads. If common is not
2049 supplied, uses nullid."""
2056 supplied, uses nullid."""
2050 if common is None:
2057 if common is None:
2051 common = [self.nullid]
2058 common = [self.nullid]
2052 if heads is None:
2059 if heads is None:
2053 heads = self.heads()
2060 heads = self.heads()
2054
2061
2055 common = [self.rev(n) for n in common]
2062 common = [self.rev(n) for n in common]
2056 heads = [self.rev(n) for n in heads]
2063 heads = [self.rev(n) for n in heads]
2057
2064
2058 # we want the ancestors, but inclusive
2065 # we want the ancestors, but inclusive
2059 class lazyset:
2066 class lazyset:
2060 def __init__(self, lazyvalues):
2067 def __init__(self, lazyvalues):
2061 self.addedvalues = set()
2068 self.addedvalues = set()
2062 self.lazyvalues = lazyvalues
2069 self.lazyvalues = lazyvalues
2063
2070
2064 def __contains__(self, value):
2071 def __contains__(self, value):
2065 return value in self.addedvalues or value in self.lazyvalues
2072 return value in self.addedvalues or value in self.lazyvalues
2066
2073
2067 def __iter__(self):
2074 def __iter__(self):
2068 added = self.addedvalues
2075 added = self.addedvalues
2069 for r in added:
2076 for r in added:
2070 yield r
2077 yield r
2071 for r in self.lazyvalues:
2078 for r in self.lazyvalues:
2072 if not r in added:
2079 if not r in added:
2073 yield r
2080 yield r
2074
2081
2075 def add(self, value):
2082 def add(self, value):
2076 self.addedvalues.add(value)
2083 self.addedvalues.add(value)
2077
2084
2078 def update(self, values):
2085 def update(self, values):
2079 self.addedvalues.update(values)
2086 self.addedvalues.update(values)
2080
2087
2081 has = lazyset(self.ancestors(common))
2088 has = lazyset(self.ancestors(common))
2082 has.add(nullrev)
2089 has.add(nullrev)
2083 has.update(common)
2090 has.update(common)
2084
2091
2085 # take all ancestors from heads that aren't in has
2092 # take all ancestors from heads that aren't in has
2086 missing = set()
2093 missing = set()
2087 visit = collections.deque(r for r in heads if r not in has)
2094 visit = collections.deque(r for r in heads if r not in has)
2088 while visit:
2095 while visit:
2089 r = visit.popleft()
2096 r = visit.popleft()
2090 if r in missing:
2097 if r in missing:
2091 continue
2098 continue
2092 else:
2099 else:
2093 missing.add(r)
2100 missing.add(r)
2094 for p in self.parentrevs(r):
2101 for p in self.parentrevs(r):
2095 if p not in has:
2102 if p not in has:
2096 visit.append(p)
2103 visit.append(p)
2097 missing = list(missing)
2104 missing = list(missing)
2098 missing.sort()
2105 missing.sort()
2099 return has, [self.node(miss) for miss in missing]
2106 return has, [self.node(miss) for miss in missing]
2100
2107
2101 def incrementalmissingrevs(self, common=None):
2108 def incrementalmissingrevs(self, common=None):
2102 """Return an object that can be used to incrementally compute the
2109 """Return an object that can be used to incrementally compute the
2103 revision numbers of the ancestors of arbitrary sets that are not
2110 revision numbers of the ancestors of arbitrary sets that are not
2104 ancestors of common. This is an ancestor.incrementalmissingancestors
2111 ancestors of common. This is an ancestor.incrementalmissingancestors
2105 object.
2112 object.
2106
2113
2107 'common' is a list of revision numbers. If common is not supplied, uses
2114 'common' is a list of revision numbers. If common is not supplied, uses
2108 nullrev.
2115 nullrev.
2109 """
2116 """
2110 if common is None:
2117 if common is None:
2111 common = [nullrev]
2118 common = [nullrev]
2112
2119
2113 if rustancestor is not None and self.index.rust_ext_compat:
2120 if rustancestor is not None and self.index.rust_ext_compat:
2114 return rustancestor.MissingAncestors(self.index, common)
2121 return rustancestor.MissingAncestors(self.index, common)
2115 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2122 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2116
2123
2117 def findmissingrevs(self, common=None, heads=None):
2124 def findmissingrevs(self, common=None, heads=None):
2118 """Return the revision numbers of the ancestors of heads that
2125 """Return the revision numbers of the ancestors of heads that
2119 are not ancestors of common.
2126 are not ancestors of common.
2120
2127
2121 More specifically, return a list of revision numbers corresponding to
2128 More specifically, return a list of revision numbers corresponding to
2122 nodes N such that every N satisfies the following constraints:
2129 nodes N such that every N satisfies the following constraints:
2123
2130
2124 1. N is an ancestor of some node in 'heads'
2131 1. N is an ancestor of some node in 'heads'
2125 2. N is not an ancestor of any node in 'common'
2132 2. N is not an ancestor of any node in 'common'
2126
2133
2127 The list is sorted by revision number, meaning it is
2134 The list is sorted by revision number, meaning it is
2128 topologically sorted.
2135 topologically sorted.
2129
2136
2130 'heads' and 'common' are both lists of revision numbers. If heads is
2137 'heads' and 'common' are both lists of revision numbers. If heads is
2131 not supplied, uses all of the revlog's heads. If common is not
2138 not supplied, uses all of the revlog's heads. If common is not
2132 supplied, uses nullid."""
2139 supplied, uses nullid."""
2133 if common is None:
2140 if common is None:
2134 common = [nullrev]
2141 common = [nullrev]
2135 if heads is None:
2142 if heads is None:
2136 heads = self.headrevs()
2143 heads = self.headrevs()
2137
2144
2138 inc = self.incrementalmissingrevs(common=common)
2145 inc = self.incrementalmissingrevs(common=common)
2139 return inc.missingancestors(heads)
2146 return inc.missingancestors(heads)
2140
2147
2141 def findmissing(self, common=None, heads=None):
2148 def findmissing(self, common=None, heads=None):
2142 """Return the ancestors of heads that are not ancestors of common.
2149 """Return the ancestors of heads that are not ancestors of common.
2143
2150
2144 More specifically, return a list of nodes N such that every N
2151 More specifically, return a list of nodes N such that every N
2145 satisfies the following constraints:
2152 satisfies the following constraints:
2146
2153
2147 1. N is an ancestor of some node in 'heads'
2154 1. N is an ancestor of some node in 'heads'
2148 2. N is not an ancestor of any node in 'common'
2155 2. N is not an ancestor of any node in 'common'
2149
2156
2150 The list is sorted by revision number, meaning it is
2157 The list is sorted by revision number, meaning it is
2151 topologically sorted.
2158 topologically sorted.
2152
2159
2153 'heads' and 'common' are both lists of node IDs. If heads is
2160 'heads' and 'common' are both lists of node IDs. If heads is
2154 not supplied, uses all of the revlog's heads. If common is not
2161 not supplied, uses all of the revlog's heads. If common is not
2155 supplied, uses nullid."""
2162 supplied, uses nullid."""
2156 if common is None:
2163 if common is None:
2157 common = [self.nullid]
2164 common = [self.nullid]
2158 if heads is None:
2165 if heads is None:
2159 heads = self.heads()
2166 heads = self.heads()
2160
2167
2161 common = [self.rev(n) for n in common]
2168 common = [self.rev(n) for n in common]
2162 heads = [self.rev(n) for n in heads]
2169 heads = [self.rev(n) for n in heads]
2163
2170
2164 inc = self.incrementalmissingrevs(common=common)
2171 inc = self.incrementalmissingrevs(common=common)
2165 return [self.node(r) for r in inc.missingancestors(heads)]
2172 return [self.node(r) for r in inc.missingancestors(heads)]
2166
2173
2167 def nodesbetween(self, roots=None, heads=None):
2174 def nodesbetween(self, roots=None, heads=None):
2168 """Return a topological path from 'roots' to 'heads'.
2175 """Return a topological path from 'roots' to 'heads'.
2169
2176
2170 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2177 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2171 topologically sorted list of all nodes N that satisfy both of
2178 topologically sorted list of all nodes N that satisfy both of
2172 these constraints:
2179 these constraints:
2173
2180
2174 1. N is a descendant of some node in 'roots'
2181 1. N is a descendant of some node in 'roots'
2175 2. N is an ancestor of some node in 'heads'
2182 2. N is an ancestor of some node in 'heads'
2176
2183
2177 Every node is considered to be both a descendant and an ancestor
2184 Every node is considered to be both a descendant and an ancestor
2178 of itself, so every reachable node in 'roots' and 'heads' will be
2185 of itself, so every reachable node in 'roots' and 'heads' will be
2179 included in 'nodes'.
2186 included in 'nodes'.
2180
2187
2181 'outroots' is the list of reachable nodes in 'roots', i.e., the
2188 'outroots' is the list of reachable nodes in 'roots', i.e., the
2182 subset of 'roots' that is returned in 'nodes'. Likewise,
2189 subset of 'roots' that is returned in 'nodes'. Likewise,
2183 'outheads' is the subset of 'heads' that is also in 'nodes'.
2190 'outheads' is the subset of 'heads' that is also in 'nodes'.
2184
2191
2185 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2192 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2186 unspecified, uses nullid as the only root. If 'heads' is
2193 unspecified, uses nullid as the only root. If 'heads' is
2187 unspecified, uses list of all of the revlog's heads."""
2194 unspecified, uses list of all of the revlog's heads."""
2188 nonodes = ([], [], [])
2195 nonodes = ([], [], [])
2189 if roots is not None:
2196 if roots is not None:
2190 roots = list(roots)
2197 roots = list(roots)
2191 if not roots:
2198 if not roots:
2192 return nonodes
2199 return nonodes
2193 lowestrev = min([self.rev(n) for n in roots])
2200 lowestrev = min([self.rev(n) for n in roots])
2194 else:
2201 else:
2195 roots = [self.nullid] # Everybody's a descendant of nullid
2202 roots = [self.nullid] # Everybody's a descendant of nullid
2196 lowestrev = nullrev
2203 lowestrev = nullrev
2197 if (lowestrev == nullrev) and (heads is None):
2204 if (lowestrev == nullrev) and (heads is None):
2198 # We want _all_ the nodes!
2205 # We want _all_ the nodes!
2199 return (
2206 return (
2200 [self.node(r) for r in self],
2207 [self.node(r) for r in self],
2201 [self.nullid],
2208 [self.nullid],
2202 list(self.heads()),
2209 list(self.heads()),
2203 )
2210 )
2204 if heads is None:
2211 if heads is None:
2205 # All nodes are ancestors, so the latest ancestor is the last
2212 # All nodes are ancestors, so the latest ancestor is the last
2206 # node.
2213 # node.
2207 highestrev = len(self) - 1
2214 highestrev = len(self) - 1
2208 # Set ancestors to None to signal that every node is an ancestor.
2215 # Set ancestors to None to signal that every node is an ancestor.
2209 ancestors = None
2216 ancestors = None
2210 # Set heads to an empty dictionary for later discovery of heads
2217 # Set heads to an empty dictionary for later discovery of heads
2211 heads = {}
2218 heads = {}
2212 else:
2219 else:
2213 heads = list(heads)
2220 heads = list(heads)
2214 if not heads:
2221 if not heads:
2215 return nonodes
2222 return nonodes
2216 ancestors = set()
2223 ancestors = set()
2217 # Turn heads into a dictionary so we can remove 'fake' heads.
2224 # Turn heads into a dictionary so we can remove 'fake' heads.
2218 # Also, later we will be using it to filter out the heads we can't
2225 # Also, later we will be using it to filter out the heads we can't
2219 # find from roots.
2226 # find from roots.
2220 heads = dict.fromkeys(heads, False)
2227 heads = dict.fromkeys(heads, False)
2221 # Start at the top and keep marking parents until we're done.
2228 # Start at the top and keep marking parents until we're done.
2222 nodestotag = set(heads)
2229 nodestotag = set(heads)
2223 # Remember where the top was so we can use it as a limit later.
2230 # Remember where the top was so we can use it as a limit later.
2224 highestrev = max([self.rev(n) for n in nodestotag])
2231 highestrev = max([self.rev(n) for n in nodestotag])
2225 while nodestotag:
2232 while nodestotag:
2226 # grab a node to tag
2233 # grab a node to tag
2227 n = nodestotag.pop()
2234 n = nodestotag.pop()
2228 # Never tag nullid
2235 # Never tag nullid
2229 if n == self.nullid:
2236 if n == self.nullid:
2230 continue
2237 continue
2231 # A node's revision number represents its place in a
2238 # A node's revision number represents its place in a
2232 # topologically sorted list of nodes.
2239 # topologically sorted list of nodes.
2233 r = self.rev(n)
2240 r = self.rev(n)
2234 if r >= lowestrev:
2241 if r >= lowestrev:
2235 if n not in ancestors:
2242 if n not in ancestors:
2236 # If we are possibly a descendant of one of the roots
2243 # If we are possibly a descendant of one of the roots
2237 # and we haven't already been marked as an ancestor
2244 # and we haven't already been marked as an ancestor
2238 ancestors.add(n) # Mark as ancestor
2245 ancestors.add(n) # Mark as ancestor
2239 # Add non-nullid parents to list of nodes to tag.
2246 # Add non-nullid parents to list of nodes to tag.
2240 nodestotag.update(
2247 nodestotag.update(
2241 [p for p in self.parents(n) if p != self.nullid]
2248 [p for p in self.parents(n) if p != self.nullid]
2242 )
2249 )
2243 elif n in heads: # We've seen it before, is it a fake head?
2250 elif n in heads: # We've seen it before, is it a fake head?
2244 # So it is, real heads should not be the ancestors of
2251 # So it is, real heads should not be the ancestors of
2245 # any other heads.
2252 # any other heads.
2246 heads.pop(n)
2253 heads.pop(n)
2247 if not ancestors:
2254 if not ancestors:
2248 return nonodes
2255 return nonodes
2249 # Now that we have our set of ancestors, we want to remove any
2256 # Now that we have our set of ancestors, we want to remove any
2250 # roots that are not ancestors.
2257 # roots that are not ancestors.
2251
2258
2252 # If one of the roots was nullid, everything is included anyway.
2259 # If one of the roots was nullid, everything is included anyway.
2253 if lowestrev > nullrev:
2260 if lowestrev > nullrev:
2254 # But, since we weren't, let's recompute the lowest rev to not
2261 # But, since we weren't, let's recompute the lowest rev to not
2255 # include roots that aren't ancestors.
2262 # include roots that aren't ancestors.
2256
2263
2257 # Filter out roots that aren't ancestors of heads
2264 # Filter out roots that aren't ancestors of heads
2258 roots = [root for root in roots if root in ancestors]
2265 roots = [root for root in roots if root in ancestors]
2259 # Recompute the lowest revision
2266 # Recompute the lowest revision
2260 if roots:
2267 if roots:
2261 lowestrev = min([self.rev(root) for root in roots])
2268 lowestrev = min([self.rev(root) for root in roots])
2262 else:
2269 else:
2263 # No more roots? Return empty list
2270 # No more roots? Return empty list
2264 return nonodes
2271 return nonodes
2265 else:
2272 else:
2266 # We are descending from nullid, and don't need to care about
2273 # We are descending from nullid, and don't need to care about
2267 # any other roots.
2274 # any other roots.
2268 lowestrev = nullrev
2275 lowestrev = nullrev
2269 roots = [self.nullid]
2276 roots = [self.nullid]
2270 # Transform our roots list into a set.
2277 # Transform our roots list into a set.
2271 descendants = set(roots)
2278 descendants = set(roots)
2272 # Also, keep the original roots so we can filter out roots that aren't
2279 # Also, keep the original roots so we can filter out roots that aren't
2273 # 'real' roots (i.e. are descended from other roots).
2280 # 'real' roots (i.e. are descended from other roots).
2274 roots = descendants.copy()
2281 roots = descendants.copy()
2275 # Our topologically sorted list of output nodes.
2282 # Our topologically sorted list of output nodes.
2276 orderedout = []
2283 orderedout = []
2277 # Don't start at nullid since we don't want nullid in our output list,
2284 # Don't start at nullid since we don't want nullid in our output list,
2278 # and if nullid shows up in descendants, empty parents will look like
2285 # and if nullid shows up in descendants, empty parents will look like
2279 # they're descendants.
2286 # they're descendants.
2280 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2287 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2281 n = self.node(r)
2288 n = self.node(r)
2282 isdescendant = False
2289 isdescendant = False
2283 if lowestrev == nullrev: # Everybody is a descendant of nullid
2290 if lowestrev == nullrev: # Everybody is a descendant of nullid
2284 isdescendant = True
2291 isdescendant = True
2285 elif n in descendants:
2292 elif n in descendants:
2286 # n is already a descendant
2293 # n is already a descendant
2287 isdescendant = True
2294 isdescendant = True
2288 # This check only needs to be done here because all the roots
2295 # This check only needs to be done here because all the roots
2289 # will start being marked is descendants before the loop.
2296 # will start being marked is descendants before the loop.
2290 if n in roots:
2297 if n in roots:
2291 # If n was a root, check if it's a 'real' root.
2298 # If n was a root, check if it's a 'real' root.
2292 p = tuple(self.parents(n))
2299 p = tuple(self.parents(n))
2293 # If any of its parents are descendants, it's not a root.
2300 # If any of its parents are descendants, it's not a root.
2294 if (p[0] in descendants) or (p[1] in descendants):
2301 if (p[0] in descendants) or (p[1] in descendants):
2295 roots.remove(n)
2302 roots.remove(n)
2296 else:
2303 else:
2297 p = tuple(self.parents(n))
2304 p = tuple(self.parents(n))
2298 # A node is a descendant if either of its parents are
2305 # A node is a descendant if either of its parents are
2299 # descendants. (We seeded the dependents list with the roots
2306 # descendants. (We seeded the dependents list with the roots
2300 # up there, remember?)
2307 # up there, remember?)
2301 if (p[0] in descendants) or (p[1] in descendants):
2308 if (p[0] in descendants) or (p[1] in descendants):
2302 descendants.add(n)
2309 descendants.add(n)
2303 isdescendant = True
2310 isdescendant = True
2304 if isdescendant and ((ancestors is None) or (n in ancestors)):
2311 if isdescendant and ((ancestors is None) or (n in ancestors)):
2305 # Only include nodes that are both descendants and ancestors.
2312 # Only include nodes that are both descendants and ancestors.
2306 orderedout.append(n)
2313 orderedout.append(n)
2307 if (ancestors is not None) and (n in heads):
2314 if (ancestors is not None) and (n in heads):
2308 # We're trying to figure out which heads are reachable
2315 # We're trying to figure out which heads are reachable
2309 # from roots.
2316 # from roots.
2310 # Mark this head as having been reached
2317 # Mark this head as having been reached
2311 heads[n] = True
2318 heads[n] = True
2312 elif ancestors is None:
2319 elif ancestors is None:
2313 # Otherwise, we're trying to discover the heads.
2320 # Otherwise, we're trying to discover the heads.
2314 # Assume this is a head because if it isn't, the next step
2321 # Assume this is a head because if it isn't, the next step
2315 # will eventually remove it.
2322 # will eventually remove it.
2316 heads[n] = True
2323 heads[n] = True
2317 # But, obviously its parents aren't.
2324 # But, obviously its parents aren't.
2318 for p in self.parents(n):
2325 for p in self.parents(n):
2319 heads.pop(p, None)
2326 heads.pop(p, None)
2320 heads = [head for head, flag in heads.items() if flag]
2327 heads = [head for head, flag in heads.items() if flag]
2321 roots = list(roots)
2328 roots = list(roots)
2322 assert orderedout
2329 assert orderedout
2323 assert roots
2330 assert roots
2324 assert heads
2331 assert heads
2325 return (orderedout, roots, heads)
2332 return (orderedout, roots, heads)
2326
2333
2327 def headrevs(self, revs=None):
2334 def headrevs(self, revs=None):
2328 if revs is None:
2335 if revs is None:
2329 try:
2336 try:
2330 return self.index.headrevs()
2337 return self.index.headrevs()
2331 except AttributeError:
2338 except AttributeError:
2332 return self._headrevs()
2339 return self._headrevs()
2333 if rustdagop is not None and self.index.rust_ext_compat:
2340 if rustdagop is not None and self.index.rust_ext_compat:
2334 return rustdagop.headrevs(self.index, revs)
2341 return rustdagop.headrevs(self.index, revs)
2335 return dagop.headrevs(revs, self._uncheckedparentrevs)
2342 return dagop.headrevs(revs, self._uncheckedparentrevs)
2336
2343
2337 def computephases(self, roots):
2344 def computephases(self, roots):
2338 return self.index.computephasesmapsets(roots)
2345 return self.index.computephasesmapsets(roots)
2339
2346
2340 def _headrevs(self):
2347 def _headrevs(self):
2341 count = len(self)
2348 count = len(self)
2342 if not count:
2349 if not count:
2343 return [nullrev]
2350 return [nullrev]
2344 # we won't iter over filtered rev so nobody is a head at start
2351 # we won't iter over filtered rev so nobody is a head at start
2345 ishead = [0] * (count + 1)
2352 ishead = [0] * (count + 1)
2346 index = self.index
2353 index = self.index
2347 for r in self:
2354 for r in self:
2348 ishead[r] = 1 # I may be an head
2355 ishead[r] = 1 # I may be an head
2349 e = index[r]
2356 e = index[r]
2350 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2357 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
2351 return [r for r, val in enumerate(ishead) if val]
2358 return [r for r, val in enumerate(ishead) if val]
2352
2359
2353 def heads(self, start=None, stop=None):
2360 def heads(self, start=None, stop=None):
2354 """return the list of all nodes that have no children
2361 """return the list of all nodes that have no children
2355
2362
2356 if start is specified, only heads that are descendants of
2363 if start is specified, only heads that are descendants of
2357 start will be returned
2364 start will be returned
2358 if stop is specified, it will consider all the revs from stop
2365 if stop is specified, it will consider all the revs from stop
2359 as if they had no children
2366 as if they had no children
2360 """
2367 """
2361 if start is None and stop is None:
2368 if start is None and stop is None:
2362 if not len(self):
2369 if not len(self):
2363 return [self.nullid]
2370 return [self.nullid]
2364 return [self.node(r) for r in self.headrevs()]
2371 return [self.node(r) for r in self.headrevs()]
2365
2372
2366 if start is None:
2373 if start is None:
2367 start = nullrev
2374 start = nullrev
2368 else:
2375 else:
2369 start = self.rev(start)
2376 start = self.rev(start)
2370
2377
2371 stoprevs = {self.rev(n) for n in stop or []}
2378 stoprevs = {self.rev(n) for n in stop or []}
2372
2379
2373 revs = dagop.headrevssubset(
2380 revs = dagop.headrevssubset(
2374 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2381 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2375 )
2382 )
2376
2383
2377 return [self.node(rev) for rev in revs]
2384 return [self.node(rev) for rev in revs]
2378
2385
2379 def children(self, node):
2386 def children(self, node):
2380 """find the children of a given node"""
2387 """find the children of a given node"""
2381 c = []
2388 c = []
2382 p = self.rev(node)
2389 p = self.rev(node)
2383 for r in self.revs(start=p + 1):
2390 for r in self.revs(start=p + 1):
2384 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2391 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2385 if prevs:
2392 if prevs:
2386 for pr in prevs:
2393 for pr in prevs:
2387 if pr == p:
2394 if pr == p:
2388 c.append(self.node(r))
2395 c.append(self.node(r))
2389 elif p == nullrev:
2396 elif p == nullrev:
2390 c.append(self.node(r))
2397 c.append(self.node(r))
2391 return c
2398 return c
2392
2399
2393 def commonancestorsheads(self, a, b):
2400 def commonancestorsheads(self, a, b):
2394 """calculate all the heads of the common ancestors of nodes a and b"""
2401 """calculate all the heads of the common ancestors of nodes a and b"""
2395 a, b = self.rev(a), self.rev(b)
2402 a, b = self.rev(a), self.rev(b)
2396 ancs = self._commonancestorsheads(a, b)
2403 ancs = self._commonancestorsheads(a, b)
2397 return pycompat.maplist(self.node, ancs)
2404 return pycompat.maplist(self.node, ancs)
2398
2405
2399 def _commonancestorsheads(self, *revs):
2406 def _commonancestorsheads(self, *revs):
2400 """calculate all the heads of the common ancestors of revs"""
2407 """calculate all the heads of the common ancestors of revs"""
2401 try:
2408 try:
2402 ancs = self.index.commonancestorsheads(*revs)
2409 ancs = self.index.commonancestorsheads(*revs)
2403 except (AttributeError, OverflowError): # C implementation failed
2410 except (AttributeError, OverflowError): # C implementation failed
2404 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2411 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2405 return ancs
2412 return ancs
2406
2413
2407 def isancestor(self, a, b):
2414 def isancestor(self, a, b):
2408 """return True if node a is an ancestor of node b
2415 """return True if node a is an ancestor of node b
2409
2416
2410 A revision is considered an ancestor of itself."""
2417 A revision is considered an ancestor of itself."""
2411 a, b = self.rev(a), self.rev(b)
2418 a, b = self.rev(a), self.rev(b)
2412 return self.isancestorrev(a, b)
2419 return self.isancestorrev(a, b)
2413
2420
2414 def isancestorrev(self, a, b):
2421 def isancestorrev(self, a, b):
2415 """return True if revision a is an ancestor of revision b
2422 """return True if revision a is an ancestor of revision b
2416
2423
2417 A revision is considered an ancestor of itself.
2424 A revision is considered an ancestor of itself.
2418
2425
2419 The implementation of this is trivial but the use of
2426 The implementation of this is trivial but the use of
2420 reachableroots is not."""
2427 reachableroots is not."""
2421 if a == nullrev:
2428 if a == nullrev:
2422 return True
2429 return True
2423 elif a == b:
2430 elif a == b:
2424 return True
2431 return True
2425 elif a > b:
2432 elif a > b:
2426 return False
2433 return False
2427 return bool(self.reachableroots(a, [b], [a], includepath=False))
2434 return bool(self.reachableroots(a, [b], [a], includepath=False))
2428
2435
2429 def reachableroots(self, minroot, heads, roots, includepath=False):
2436 def reachableroots(self, minroot, heads, roots, includepath=False):
2430 """return (heads(::(<roots> and <roots>::<heads>)))
2437 """return (heads(::(<roots> and <roots>::<heads>)))
2431
2438
2432 If includepath is True, return (<roots>::<heads>)."""
2439 If includepath is True, return (<roots>::<heads>)."""
2433 try:
2440 try:
2434 return self.index.reachableroots2(
2441 return self.index.reachableroots2(
2435 minroot, heads, roots, includepath
2442 minroot, heads, roots, includepath
2436 )
2443 )
2437 except AttributeError:
2444 except AttributeError:
2438 return dagop._reachablerootspure(
2445 return dagop._reachablerootspure(
2439 self.parentrevs, minroot, roots, heads, includepath
2446 self.parentrevs, minroot, roots, heads, includepath
2440 )
2447 )
2441
2448
2442 def ancestor(self, a, b):
2449 def ancestor(self, a, b):
2443 """calculate the "best" common ancestor of nodes a and b"""
2450 """calculate the "best" common ancestor of nodes a and b"""
2444
2451
2445 a, b = self.rev(a), self.rev(b)
2452 a, b = self.rev(a), self.rev(b)
2446 try:
2453 try:
2447 ancs = self.index.ancestors(a, b)
2454 ancs = self.index.ancestors(a, b)
2448 except (AttributeError, OverflowError):
2455 except (AttributeError, OverflowError):
2449 ancs = ancestor.ancestors(self.parentrevs, a, b)
2456 ancs = ancestor.ancestors(self.parentrevs, a, b)
2450 if ancs:
2457 if ancs:
2451 # choose a consistent winner when there's a tie
2458 # choose a consistent winner when there's a tie
2452 return min(map(self.node, ancs))
2459 return min(map(self.node, ancs))
2453 return self.nullid
2460 return self.nullid
2454
2461
2455 def _match(self, id):
2462 def _match(self, id):
2456 if isinstance(id, int):
2463 if isinstance(id, int):
2457 # rev
2464 # rev
2458 return self.node(id)
2465 return self.node(id)
2459 if len(id) == self.nodeconstants.nodelen:
2466 if len(id) == self.nodeconstants.nodelen:
2460 # possibly a binary node
2467 # possibly a binary node
2461 # odds of a binary node being all hex in ASCII are 1 in 10**25
2468 # odds of a binary node being all hex in ASCII are 1 in 10**25
2462 try:
2469 try:
2463 node = id
2470 node = id
2464 self.rev(node) # quick search the index
2471 self.rev(node) # quick search the index
2465 return node
2472 return node
2466 except error.LookupError:
2473 except error.LookupError:
2467 pass # may be partial hex id
2474 pass # may be partial hex id
2468 try:
2475 try:
2469 # str(rev)
2476 # str(rev)
2470 rev = int(id)
2477 rev = int(id)
2471 if b"%d" % rev != id:
2478 if b"%d" % rev != id:
2472 raise ValueError
2479 raise ValueError
2473 if rev < 0:
2480 if rev < 0:
2474 rev = len(self) + rev
2481 rev = len(self) + rev
2475 if rev < 0 or rev >= len(self):
2482 if rev < 0 or rev >= len(self):
2476 raise ValueError
2483 raise ValueError
2477 return self.node(rev)
2484 return self.node(rev)
2478 except (ValueError, OverflowError):
2485 except (ValueError, OverflowError):
2479 pass
2486 pass
2480 if len(id) == 2 * self.nodeconstants.nodelen:
2487 if len(id) == 2 * self.nodeconstants.nodelen:
2481 try:
2488 try:
2482 # a full hex nodeid?
2489 # a full hex nodeid?
2483 node = bin(id)
2490 node = bin(id)
2484 self.rev(node)
2491 self.rev(node)
2485 return node
2492 return node
2486 except (binascii.Error, error.LookupError):
2493 except (binascii.Error, error.LookupError):
2487 pass
2494 pass
2488
2495
2489 def _partialmatch(self, id):
2496 def _partialmatch(self, id):
2490 # we don't care wdirfilenodeids as they should be always full hash
2497 # we don't care wdirfilenodeids as they should be always full hash
2491 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2498 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2492 ambiguous = False
2499 ambiguous = False
2493 try:
2500 try:
2494 partial = self.index.partialmatch(id)
2501 partial = self.index.partialmatch(id)
2495 if partial and self.hasnode(partial):
2502 if partial and self.hasnode(partial):
2496 if maybewdir:
2503 if maybewdir:
2497 # single 'ff...' match in radix tree, ambiguous with wdir
2504 # single 'ff...' match in radix tree, ambiguous with wdir
2498 ambiguous = True
2505 ambiguous = True
2499 else:
2506 else:
2500 return partial
2507 return partial
2501 elif maybewdir:
2508 elif maybewdir:
2502 # no 'ff...' match in radix tree, wdir identified
2509 # no 'ff...' match in radix tree, wdir identified
2503 raise error.WdirUnsupported
2510 raise error.WdirUnsupported
2504 else:
2511 else:
2505 return None
2512 return None
2506 except error.RevlogError:
2513 except error.RevlogError:
2507 # parsers.c radix tree lookup gave multiple matches
2514 # parsers.c radix tree lookup gave multiple matches
2508 # fast path: for unfiltered changelog, radix tree is accurate
2515 # fast path: for unfiltered changelog, radix tree is accurate
2509 if not getattr(self, 'filteredrevs', None):
2516 if not getattr(self, 'filteredrevs', None):
2510 ambiguous = True
2517 ambiguous = True
2511 # fall through to slow path that filters hidden revisions
2518 # fall through to slow path that filters hidden revisions
2512 except (AttributeError, ValueError):
2519 except (AttributeError, ValueError):
2513 # we are pure python, or key is not hex
2520 # we are pure python, or key is not hex
2514 pass
2521 pass
2515 if ambiguous:
2522 if ambiguous:
2516 raise error.AmbiguousPrefixLookupError(
2523 raise error.AmbiguousPrefixLookupError(
2517 id, self.display_id, _(b'ambiguous identifier')
2524 id, self.display_id, _(b'ambiguous identifier')
2518 )
2525 )
2519
2526
2520 if id in self._pcache:
2527 if id in self._pcache:
2521 return self._pcache[id]
2528 return self._pcache[id]
2522
2529
2523 if len(id) <= 40:
2530 if len(id) <= 40:
2524 # hex(node)[:...]
2531 # hex(node)[:...]
2525 l = len(id) // 2 * 2 # grab an even number of digits
2532 l = len(id) // 2 * 2 # grab an even number of digits
2526 try:
2533 try:
2527 # we're dropping the last digit, so let's check that it's hex,
2534 # we're dropping the last digit, so let's check that it's hex,
2528 # to avoid the expensive computation below if it's not
2535 # to avoid the expensive computation below if it's not
2529 if len(id) % 2 > 0:
2536 if len(id) % 2 > 0:
2530 if not (id[-1] in hexdigits):
2537 if not (id[-1] in hexdigits):
2531 return None
2538 return None
2532 prefix = bin(id[:l])
2539 prefix = bin(id[:l])
2533 except binascii.Error:
2540 except binascii.Error:
2534 pass
2541 pass
2535 else:
2542 else:
2536 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2543 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2537 nl = [
2544 nl = [
2538 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2545 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2539 ]
2546 ]
2540 if self.nodeconstants.nullhex.startswith(id):
2547 if self.nodeconstants.nullhex.startswith(id):
2541 nl.append(self.nullid)
2548 nl.append(self.nullid)
2542 if len(nl) > 0:
2549 if len(nl) > 0:
2543 if len(nl) == 1 and not maybewdir:
2550 if len(nl) == 1 and not maybewdir:
2544 self._pcache[id] = nl[0]
2551 self._pcache[id] = nl[0]
2545 return nl[0]
2552 return nl[0]
2546 raise error.AmbiguousPrefixLookupError(
2553 raise error.AmbiguousPrefixLookupError(
2547 id, self.display_id, _(b'ambiguous identifier')
2554 id, self.display_id, _(b'ambiguous identifier')
2548 )
2555 )
2549 if maybewdir:
2556 if maybewdir:
2550 raise error.WdirUnsupported
2557 raise error.WdirUnsupported
2551 return None
2558 return None
2552
2559
2553 def lookup(self, id):
2560 def lookup(self, id):
2554 """locate a node based on:
2561 """locate a node based on:
2555 - revision number or str(revision number)
2562 - revision number or str(revision number)
2556 - nodeid or subset of hex nodeid
2563 - nodeid or subset of hex nodeid
2557 """
2564 """
2558 n = self._match(id)
2565 n = self._match(id)
2559 if n is not None:
2566 if n is not None:
2560 return n
2567 return n
2561 n = self._partialmatch(id)
2568 n = self._partialmatch(id)
2562 if n:
2569 if n:
2563 return n
2570 return n
2564
2571
2565 raise error.LookupError(id, self.display_id, _(b'no match found'))
2572 raise error.LookupError(id, self.display_id, _(b'no match found'))
2566
2573
2567 def shortest(self, node, minlength=1):
2574 def shortest(self, node, minlength=1):
2568 """Find the shortest unambiguous prefix that matches node."""
2575 """Find the shortest unambiguous prefix that matches node."""
2569
2576
2570 def isvalid(prefix):
2577 def isvalid(prefix):
2571 try:
2578 try:
2572 matchednode = self._partialmatch(prefix)
2579 matchednode = self._partialmatch(prefix)
2573 except error.AmbiguousPrefixLookupError:
2580 except error.AmbiguousPrefixLookupError:
2574 return False
2581 return False
2575 except error.WdirUnsupported:
2582 except error.WdirUnsupported:
2576 # single 'ff...' match
2583 # single 'ff...' match
2577 return True
2584 return True
2578 if matchednode is None:
2585 if matchednode is None:
2579 raise error.LookupError(node, self.display_id, _(b'no node'))
2586 raise error.LookupError(node, self.display_id, _(b'no node'))
2580 return True
2587 return True
2581
2588
2582 def maybewdir(prefix):
2589 def maybewdir(prefix):
2583 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2590 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2584
2591
2585 hexnode = hex(node)
2592 hexnode = hex(node)
2586
2593
2587 def disambiguate(hexnode, minlength):
2594 def disambiguate(hexnode, minlength):
2588 """Disambiguate against wdirid."""
2595 """Disambiguate against wdirid."""
2589 for length in range(minlength, len(hexnode) + 1):
2596 for length in range(minlength, len(hexnode) + 1):
2590 prefix = hexnode[:length]
2597 prefix = hexnode[:length]
2591 if not maybewdir(prefix):
2598 if not maybewdir(prefix):
2592 return prefix
2599 return prefix
2593
2600
2594 if not getattr(self, 'filteredrevs', None):
2601 if not getattr(self, 'filteredrevs', None):
2595 try:
2602 try:
2596 length = max(self.index.shortest(node), minlength)
2603 length = max(self.index.shortest(node), minlength)
2597 return disambiguate(hexnode, length)
2604 return disambiguate(hexnode, length)
2598 except error.RevlogError:
2605 except error.RevlogError:
2599 if node != self.nodeconstants.wdirid:
2606 if node != self.nodeconstants.wdirid:
2600 raise error.LookupError(
2607 raise error.LookupError(
2601 node, self.display_id, _(b'no node')
2608 node, self.display_id, _(b'no node')
2602 )
2609 )
2603 except AttributeError:
2610 except AttributeError:
2604 # Fall through to pure code
2611 # Fall through to pure code
2605 pass
2612 pass
2606
2613
2607 if node == self.nodeconstants.wdirid:
2614 if node == self.nodeconstants.wdirid:
2608 for length in range(minlength, len(hexnode) + 1):
2615 for length in range(minlength, len(hexnode) + 1):
2609 prefix = hexnode[:length]
2616 prefix = hexnode[:length]
2610 if isvalid(prefix):
2617 if isvalid(prefix):
2611 return prefix
2618 return prefix
2612
2619
2613 for length in range(minlength, len(hexnode) + 1):
2620 for length in range(minlength, len(hexnode) + 1):
2614 prefix = hexnode[:length]
2621 prefix = hexnode[:length]
2615 if isvalid(prefix):
2622 if isvalid(prefix):
2616 return disambiguate(hexnode, length)
2623 return disambiguate(hexnode, length)
2617
2624
2618 def cmp(self, node, text):
2625 def cmp(self, node, text):
2619 """compare text with a given file revision
2626 """compare text with a given file revision
2620
2627
2621 returns True if text is different than what is stored.
2628 returns True if text is different than what is stored.
2622 """
2629 """
2623 p1, p2 = self.parents(node)
2630 p1, p2 = self.parents(node)
2624 return storageutil.hashrevisionsha1(text, p1, p2) != node
2631 return storageutil.hashrevisionsha1(text, p1, p2) != node
2625
2632
2626 def deltaparent(self, rev):
2633 def deltaparent(self, rev):
2627 """return deltaparent of the given revision"""
2634 """return deltaparent of the given revision"""
2628 base = self.index[rev][3]
2635 base = self.index[rev][3]
2629 if base == rev:
2636 if base == rev:
2630 return nullrev
2637 return nullrev
2631 elif self.delta_config.general_delta:
2638 elif self.delta_config.general_delta:
2632 return base
2639 return base
2633 else:
2640 else:
2634 return rev - 1
2641 return rev - 1
2635
2642
2636 def issnapshot(self, rev):
2643 def issnapshot(self, rev):
2637 """tells whether rev is a snapshot"""
2644 """tells whether rev is a snapshot"""
2638 ret = self._inner.issnapshot(rev)
2645 ret = self._inner.issnapshot(rev)
2639 self.issnapshot = self._inner.issnapshot
2646 self.issnapshot = self._inner.issnapshot
2640 return ret
2647 return ret
2641
2648
2642 def snapshotdepth(self, rev):
2649 def snapshotdepth(self, rev):
2643 """number of snapshot in the chain before this one"""
2650 """number of snapshot in the chain before this one"""
2644 if not self.issnapshot(rev):
2651 if not self.issnapshot(rev):
2645 raise error.ProgrammingError(b'revision %d not a snapshot')
2652 raise error.ProgrammingError(b'revision %d not a snapshot')
2646 return len(self._inner._deltachain(rev)[0]) - 1
2653 return len(self._inner._deltachain(rev)[0]) - 1
2647
2654
2648 def revdiff(self, rev1, rev2):
2655 def revdiff(self, rev1, rev2):
2649 """return or calculate a delta between two revisions
2656 """return or calculate a delta between two revisions
2650
2657
2651 The delta calculated is in binary form and is intended to be written to
2658 The delta calculated is in binary form and is intended to be written to
2652 revlog data directly. So this function needs raw revision data.
2659 revlog data directly. So this function needs raw revision data.
2653 """
2660 """
2654 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2661 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2655 return bytes(self._inner._chunk(rev2))
2662 return bytes(self._inner._chunk(rev2))
2656
2663
2657 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2664 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2658
2665
2659 def revision(self, nodeorrev):
2666 def revision(self, nodeorrev):
2660 """return an uncompressed revision of a given node or revision
2667 """return an uncompressed revision of a given node or revision
2661 number.
2668 number.
2662 """
2669 """
2663 return self._revisiondata(nodeorrev)
2670 return self._revisiondata(nodeorrev)
2664
2671
2665 def sidedata(self, nodeorrev):
2672 def sidedata(self, nodeorrev):
2666 """a map of extra data related to the changeset but not part of the hash
2673 """a map of extra data related to the changeset but not part of the hash
2667
2674
2668 This function currently return a dictionary. However, more advanced
2675 This function currently return a dictionary. However, more advanced
2669 mapping object will likely be used in the future for a more
2676 mapping object will likely be used in the future for a more
2670 efficient/lazy code.
2677 efficient/lazy code.
2671 """
2678 """
2672 # deal with <nodeorrev> argument type
2679 # deal with <nodeorrev> argument type
2673 if isinstance(nodeorrev, int):
2680 if isinstance(nodeorrev, int):
2674 rev = nodeorrev
2681 rev = nodeorrev
2675 else:
2682 else:
2676 rev = self.rev(nodeorrev)
2683 rev = self.rev(nodeorrev)
2677 return self._sidedata(rev)
2684 return self._sidedata(rev)
2678
2685
2679 def _rawtext(self, node, rev):
2686 def _rawtext(self, node, rev):
2680 """return the possibly unvalidated rawtext for a revision
2687 """return the possibly unvalidated rawtext for a revision
2681
2688
2682 returns (rev, rawtext, validated)
2689 returns (rev, rawtext, validated)
2683 """
2690 """
2684 # Check if we have the entry in cache
2691 # Check if we have the entry in cache
2685 # The cache entry looks like (node, rev, rawtext)
2692 # The cache entry looks like (node, rev, rawtext)
2686 if self._inner._revisioncache:
2693 if self._inner._revisioncache:
2687 if self._inner._revisioncache[0] == node:
2694 if self._inner._revisioncache[0] == node:
2688 return (rev, self._inner._revisioncache[2], True)
2695 return (rev, self._inner._revisioncache[2], True)
2689
2696
2690 if rev is None:
2697 if rev is None:
2691 rev = self.rev(node)
2698 rev = self.rev(node)
2692
2699
2693 return self._inner.raw_text(node, rev)
2700 return self._inner.raw_text(node, rev)
2694
2701
2695 def _revisiondata(self, nodeorrev, raw=False):
2702 def _revisiondata(self, nodeorrev, raw=False):
2696 # deal with <nodeorrev> argument type
2703 # deal with <nodeorrev> argument type
2697 if isinstance(nodeorrev, int):
2704 if isinstance(nodeorrev, int):
2698 rev = nodeorrev
2705 rev = nodeorrev
2699 node = self.node(rev)
2706 node = self.node(rev)
2700 else:
2707 else:
2701 node = nodeorrev
2708 node = nodeorrev
2702 rev = None
2709 rev = None
2703
2710
2704 # fast path the special `nullid` rev
2711 # fast path the special `nullid` rev
2705 if node == self.nullid:
2712 if node == self.nullid:
2706 return b""
2713 return b""
2707
2714
2708 # ``rawtext`` is the text as stored inside the revlog. Might be the
2715 # ``rawtext`` is the text as stored inside the revlog. Might be the
2709 # revision or might need to be processed to retrieve the revision.
2716 # revision or might need to be processed to retrieve the revision.
2710 rev, rawtext, validated = self._rawtext(node, rev)
2717 rev, rawtext, validated = self._rawtext(node, rev)
2711
2718
2712 if raw and validated:
2719 if raw and validated:
2713 # if we don't want to process the raw text and that raw
2720 # if we don't want to process the raw text and that raw
2714 # text is cached, we can exit early.
2721 # text is cached, we can exit early.
2715 return rawtext
2722 return rawtext
2716 if rev is None:
2723 if rev is None:
2717 rev = self.rev(node)
2724 rev = self.rev(node)
2718 # the revlog's flag for this revision
2725 # the revlog's flag for this revision
2719 # (usually alter its state or content)
2726 # (usually alter its state or content)
2720 flags = self.flags(rev)
2727 flags = self.flags(rev)
2721
2728
2722 if validated and flags == REVIDX_DEFAULT_FLAGS:
2729 if validated and flags == REVIDX_DEFAULT_FLAGS:
2723 # no extra flags set, no flag processor runs, text = rawtext
2730 # no extra flags set, no flag processor runs, text = rawtext
2724 return rawtext
2731 return rawtext
2725
2732
2726 if raw:
2733 if raw:
2727 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2734 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2728 text = rawtext
2735 text = rawtext
2729 else:
2736 else:
2730 r = flagutil.processflagsread(self, rawtext, flags)
2737 r = flagutil.processflagsread(self, rawtext, flags)
2731 text, validatehash = r
2738 text, validatehash = r
2732 if validatehash:
2739 if validatehash:
2733 self.checkhash(text, node, rev=rev)
2740 self.checkhash(text, node, rev=rev)
2734 if not validated:
2741 if not validated:
2735 self._inner._revisioncache = (node, rev, rawtext)
2742 self._inner._revisioncache = (node, rev, rawtext)
2736
2743
2737 return text
2744 return text
2738
2745
2739 def _sidedata(self, rev):
2746 def _sidedata(self, rev):
2740 """Return the sidedata for a given revision number."""
2747 """Return the sidedata for a given revision number."""
2741 sidedata_end = None
2748 sidedata_end = None
2742 if self._docket is not None:
2749 if self._docket is not None:
2743 sidedata_end = self._docket.sidedata_end
2750 sidedata_end = self._docket.sidedata_end
2744 return self._inner.sidedata(rev, sidedata_end)
2751 return self._inner.sidedata(rev, sidedata_end)
2745
2752
2746 def rawdata(self, nodeorrev):
2753 def rawdata(self, nodeorrev):
2747 """return an uncompressed raw data of a given node or revision number."""
2754 """return an uncompressed raw data of a given node or revision number."""
2748 return self._revisiondata(nodeorrev, raw=True)
2755 return self._revisiondata(nodeorrev, raw=True)
2749
2756
2750 def hash(self, text, p1, p2):
2757 def hash(self, text, p1, p2):
2751 """Compute a node hash.
2758 """Compute a node hash.
2752
2759
2753 Available as a function so that subclasses can replace the hash
2760 Available as a function so that subclasses can replace the hash
2754 as needed.
2761 as needed.
2755 """
2762 """
2756 return storageutil.hashrevisionsha1(text, p1, p2)
2763 return storageutil.hashrevisionsha1(text, p1, p2)
2757
2764
2758 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2765 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2759 """Check node hash integrity.
2766 """Check node hash integrity.
2760
2767
2761 Available as a function so that subclasses can extend hash mismatch
2768 Available as a function so that subclasses can extend hash mismatch
2762 behaviors as needed.
2769 behaviors as needed.
2763 """
2770 """
2764 try:
2771 try:
2765 if p1 is None and p2 is None:
2772 if p1 is None and p2 is None:
2766 p1, p2 = self.parents(node)
2773 p1, p2 = self.parents(node)
2767 if node != self.hash(text, p1, p2):
2774 if node != self.hash(text, p1, p2):
2768 # Clear the revision cache on hash failure. The revision cache
2775 # Clear the revision cache on hash failure. The revision cache
2769 # only stores the raw revision and clearing the cache does have
2776 # only stores the raw revision and clearing the cache does have
2770 # the side-effect that we won't have a cache hit when the raw
2777 # the side-effect that we won't have a cache hit when the raw
2771 # revision data is accessed. But this case should be rare and
2778 # revision data is accessed. But this case should be rare and
2772 # it is extra work to teach the cache about the hash
2779 # it is extra work to teach the cache about the hash
2773 # verification state.
2780 # verification state.
2774 if (
2781 if (
2775 self._inner._revisioncache
2782 self._inner._revisioncache
2776 and self._inner._revisioncache[0] == node
2783 and self._inner._revisioncache[0] == node
2777 ):
2784 ):
2778 self._inner._revisioncache = None
2785 self._inner._revisioncache = None
2779
2786
2780 revornode = rev
2787 revornode = rev
2781 if revornode is None:
2788 if revornode is None:
2782 revornode = templatefilters.short(hex(node))
2789 revornode = templatefilters.short(hex(node))
2783 raise error.RevlogError(
2790 raise error.RevlogError(
2784 _(b"integrity check failed on %s:%s")
2791 _(b"integrity check failed on %s:%s")
2785 % (self.display_id, pycompat.bytestr(revornode))
2792 % (self.display_id, pycompat.bytestr(revornode))
2786 )
2793 )
2787 except error.RevlogError:
2794 except error.RevlogError:
2788 if self.feature_config.censorable and storageutil.iscensoredtext(
2795 if self.feature_config.censorable and storageutil.iscensoredtext(
2789 text
2796 text
2790 ):
2797 ):
2791 raise error.CensoredNodeError(self.display_id, node, text)
2798 raise error.CensoredNodeError(self.display_id, node, text)
2792 raise
2799 raise
2793
2800
2794 @property
2801 @property
2795 def _split_index_file(self):
2802 def _split_index_file(self):
2796 """the path where to expect the index of an ongoing splitting operation
2803 """the path where to expect the index of an ongoing splitting operation
2797
2804
2798 The file will only exist if a splitting operation is in progress, but
2805 The file will only exist if a splitting operation is in progress, but
2799 it is always expected at the same location."""
2806 it is always expected at the same location."""
2800 parts = self.radix.split(b'/')
2807 parts = self.radix.split(b'/')
2801 if len(parts) > 1:
2808 if len(parts) > 1:
2802 # adds a '-s' prefix to the ``data/` or `meta/` base
2809 # adds a '-s' prefix to the ``data/` or `meta/` base
2803 head = parts[0] + b'-s'
2810 head = parts[0] + b'-s'
2804 mids = parts[1:-1]
2811 mids = parts[1:-1]
2805 tail = parts[-1] + b'.i'
2812 tail = parts[-1] + b'.i'
2806 pieces = [head] + mids + [tail]
2813 pieces = [head] + mids + [tail]
2807 return b'/'.join(pieces)
2814 return b'/'.join(pieces)
2808 else:
2815 else:
2809 # the revlog is stored at the root of the store (changelog or
2816 # the revlog is stored at the root of the store (changelog or
2810 # manifest), no risk of collision.
2817 # manifest), no risk of collision.
2811 return self.radix + b'.i.s'
2818 return self.radix + b'.i.s'
2812
2819
2813 def _enforceinlinesize(self, tr, side_write=True):
2820 def _enforceinlinesize(self, tr, side_write=True):
2814 """Check if the revlog is too big for inline and convert if so.
2821 """Check if the revlog is too big for inline and convert if so.
2815
2822
2816 This should be called after revisions are added to the revlog. If the
2823 This should be called after revisions are added to the revlog. If the
2817 revlog has grown too large to be an inline revlog, it will convert it
2824 revlog has grown too large to be an inline revlog, it will convert it
2818 to use multiple index and data files.
2825 to use multiple index and data files.
2819 """
2826 """
2820 tiprev = len(self) - 1
2827 tiprev = len(self) - 1
2821 total_size = self.start(tiprev) + self.length(tiprev)
2828 total_size = self.start(tiprev) + self.length(tiprev)
2822 if not self._inline or total_size < _maxinline:
2829 if not self._inline or total_size < _maxinline:
2823 return
2830 return
2824
2831
2825 if self._docket is not None:
2832 if self._docket is not None:
2826 msg = b"inline revlog should not have a docket"
2833 msg = b"inline revlog should not have a docket"
2827 raise error.ProgrammingError(msg)
2834 raise error.ProgrammingError(msg)
2828
2835
2829 troffset = tr.findoffset(self._indexfile)
2836 troffset = tr.findoffset(self._indexfile)
2830 if troffset is None:
2837 if troffset is None:
2831 raise error.RevlogError(
2838 raise error.RevlogError(
2832 _(b"%s not found in the transaction") % self._indexfile
2839 _(b"%s not found in the transaction") % self._indexfile
2833 )
2840 )
2834 if troffset:
2841 if troffset:
2835 tr.addbackup(self._indexfile, for_offset=True)
2842 tr.addbackup(self._indexfile, for_offset=True)
2836 tr.add(self._datafile, 0)
2843 tr.add(self._datafile, 0)
2837
2844
2838 new_index_file_path = None
2845 new_index_file_path = None
2839 if side_write:
2846 if side_write:
2840 old_index_file_path = self._indexfile
2847 old_index_file_path = self._indexfile
2841 new_index_file_path = self._split_index_file
2848 new_index_file_path = self._split_index_file
2842 opener = self.opener
2849 opener = self.opener
2843 weak_self = weakref.ref(self)
2850 weak_self = weakref.ref(self)
2844
2851
2845 # the "split" index replace the real index when the transaction is
2852 # the "split" index replace the real index when the transaction is
2846 # finalized
2853 # finalized
2847 def finalize_callback(tr):
2854 def finalize_callback(tr):
2848 opener.rename(
2855 opener.rename(
2849 new_index_file_path,
2856 new_index_file_path,
2850 old_index_file_path,
2857 old_index_file_path,
2851 checkambig=True,
2858 checkambig=True,
2852 )
2859 )
2853 maybe_self = weak_self()
2860 maybe_self = weak_self()
2854 if maybe_self is not None:
2861 if maybe_self is not None:
2855 maybe_self._indexfile = old_index_file_path
2862 maybe_self._indexfile = old_index_file_path
2856 maybe_self._inner.index_file = maybe_self._indexfile
2863 maybe_self._inner.index_file = maybe_self._indexfile
2857
2864
2858 def abort_callback(tr):
2865 def abort_callback(tr):
2859 maybe_self = weak_self()
2866 maybe_self = weak_self()
2860 if maybe_self is not None:
2867 if maybe_self is not None:
2861 maybe_self._indexfile = old_index_file_path
2868 maybe_self._indexfile = old_index_file_path
2862 maybe_self._inner.inline = True
2869 maybe_self._inner.inline = True
2863 maybe_self._inner.index_file = old_index_file_path
2870 maybe_self._inner.index_file = old_index_file_path
2864
2871
2865 tr.registertmp(new_index_file_path)
2872 tr.registertmp(new_index_file_path)
2866 if self.target[1] is not None:
2873 if self.target[1] is not None:
2867 callback_id = b'000-revlog-split-%d-%s' % self.target
2874 callback_id = b'000-revlog-split-%d-%s' % self.target
2868 else:
2875 else:
2869 callback_id = b'000-revlog-split-%d' % self.target[0]
2876 callback_id = b'000-revlog-split-%d' % self.target[0]
2870 tr.addfinalize(callback_id, finalize_callback)
2877 tr.addfinalize(callback_id, finalize_callback)
2871 tr.addabort(callback_id, abort_callback)
2878 tr.addabort(callback_id, abort_callback)
2872
2879
2873 self._format_flags &= ~FLAG_INLINE_DATA
2880 self._format_flags &= ~FLAG_INLINE_DATA
2874 self._inner.split_inline(
2881 self._inner.split_inline(
2875 tr,
2882 tr,
2876 self._format_flags | self._format_version,
2883 self._format_flags | self._format_version,
2877 new_index_file_path=new_index_file_path,
2884 new_index_file_path=new_index_file_path,
2878 )
2885 )
2879
2886
2880 self._inline = False
2887 self._inline = False
2881 if new_index_file_path is not None:
2888 if new_index_file_path is not None:
2882 self._indexfile = new_index_file_path
2889 self._indexfile = new_index_file_path
2883
2890
2884 nodemaputil.setup_persistent_nodemap(tr, self)
2891 nodemaputil.setup_persistent_nodemap(tr, self)
2885
2892
2886 def _nodeduplicatecallback(self, transaction, node):
2893 def _nodeduplicatecallback(self, transaction, node):
2887 """called when trying to add a node already stored."""
2894 """called when trying to add a node already stored."""
2888
2895
2889 @contextlib.contextmanager
2896 @contextlib.contextmanager
2890 def reading(self):
2897 def reading(self):
2891 with self._inner.reading():
2898 with self._inner.reading():
2892 yield
2899 yield
2893
2900
2894 @contextlib.contextmanager
2901 @contextlib.contextmanager
2895 def _writing(self, transaction):
2902 def _writing(self, transaction):
2896 if self._trypending:
2903 if self._trypending:
2897 msg = b'try to write in a `trypending` revlog: %s'
2904 msg = b'try to write in a `trypending` revlog: %s'
2898 msg %= self.display_id
2905 msg %= self.display_id
2899 raise error.ProgrammingError(msg)
2906 raise error.ProgrammingError(msg)
2900 if self._inner.is_writing:
2907 if self._inner.is_writing:
2901 yield
2908 yield
2902 else:
2909 else:
2903 data_end = None
2910 data_end = None
2904 sidedata_end = None
2911 sidedata_end = None
2905 if self._docket is not None:
2912 if self._docket is not None:
2906 data_end = self._docket.data_end
2913 data_end = self._docket.data_end
2907 sidedata_end = self._docket.sidedata_end
2914 sidedata_end = self._docket.sidedata_end
2908 with self._inner.writing(
2915 with self._inner.writing(
2909 transaction,
2916 transaction,
2910 data_end=data_end,
2917 data_end=data_end,
2911 sidedata_end=sidedata_end,
2918 sidedata_end=sidedata_end,
2912 ):
2919 ):
2913 yield
2920 yield
2914 if self._docket is not None:
2921 if self._docket is not None:
2915 self._write_docket(transaction)
2922 self._write_docket(transaction)
2916
2923
2917 def _write_docket(self, transaction):
2924 def _write_docket(self, transaction):
2918 """write the current docket on disk
2925 """write the current docket on disk
2919
2926
2920 Exist as a method to help changelog to implement transaction logic
2927 Exist as a method to help changelog to implement transaction logic
2921
2928
2922 We could also imagine using the same transaction logic for all revlog
2929 We could also imagine using the same transaction logic for all revlog
2923 since docket are cheap."""
2930 since docket are cheap."""
2924 self._docket.write(transaction)
2931 self._docket.write(transaction)
2925
2932
2926 def addrevision(
2933 def addrevision(
2927 self,
2934 self,
2928 text,
2935 text,
2929 transaction,
2936 transaction,
2930 link,
2937 link,
2931 p1,
2938 p1,
2932 p2,
2939 p2,
2933 cachedelta=None,
2940 cachedelta=None,
2934 node=None,
2941 node=None,
2935 flags=REVIDX_DEFAULT_FLAGS,
2942 flags=REVIDX_DEFAULT_FLAGS,
2936 deltacomputer=None,
2943 deltacomputer=None,
2937 sidedata=None,
2944 sidedata=None,
2938 ):
2945 ):
2939 """add a revision to the log
2946 """add a revision to the log
2940
2947
2941 text - the revision data to add
2948 text - the revision data to add
2942 transaction - the transaction object used for rollback
2949 transaction - the transaction object used for rollback
2943 link - the linkrev data to add
2950 link - the linkrev data to add
2944 p1, p2 - the parent nodeids of the revision
2951 p1, p2 - the parent nodeids of the revision
2945 cachedelta - an optional precomputed delta
2952 cachedelta - an optional precomputed delta
2946 node - nodeid of revision; typically node is not specified, and it is
2953 node - nodeid of revision; typically node is not specified, and it is
2947 computed by default as hash(text, p1, p2), however subclasses might
2954 computed by default as hash(text, p1, p2), however subclasses might
2948 use different hashing method (and override checkhash() in such case)
2955 use different hashing method (and override checkhash() in such case)
2949 flags - the known flags to set on the revision
2956 flags - the known flags to set on the revision
2950 deltacomputer - an optional deltacomputer instance shared between
2957 deltacomputer - an optional deltacomputer instance shared between
2951 multiple calls
2958 multiple calls
2952 """
2959 """
2953 if link == nullrev:
2960 if link == nullrev:
2954 raise error.RevlogError(
2961 raise error.RevlogError(
2955 _(b"attempted to add linkrev -1 to %s") % self.display_id
2962 _(b"attempted to add linkrev -1 to %s") % self.display_id
2956 )
2963 )
2957
2964
2958 if sidedata is None:
2965 if sidedata is None:
2959 sidedata = {}
2966 sidedata = {}
2960 elif sidedata and not self.feature_config.has_side_data:
2967 elif sidedata and not self.feature_config.has_side_data:
2961 raise error.ProgrammingError(
2968 raise error.ProgrammingError(
2962 _(b"trying to add sidedata to a revlog who don't support them")
2969 _(b"trying to add sidedata to a revlog who don't support them")
2963 )
2970 )
2964
2971
2965 if flags:
2972 if flags:
2966 node = node or self.hash(text, p1, p2)
2973 node = node or self.hash(text, p1, p2)
2967
2974
2968 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2975 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2969
2976
2970 # If the flag processor modifies the revision data, ignore any provided
2977 # If the flag processor modifies the revision data, ignore any provided
2971 # cachedelta.
2978 # cachedelta.
2972 if rawtext != text:
2979 if rawtext != text:
2973 cachedelta = None
2980 cachedelta = None
2974
2981
2975 if len(rawtext) > _maxentrysize:
2982 if len(rawtext) > _maxentrysize:
2976 raise error.RevlogError(
2983 raise error.RevlogError(
2977 _(
2984 _(
2978 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2985 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2979 )
2986 )
2980 % (self.display_id, len(rawtext))
2987 % (self.display_id, len(rawtext))
2981 )
2988 )
2982
2989
2983 node = node or self.hash(rawtext, p1, p2)
2990 node = node or self.hash(rawtext, p1, p2)
2984 rev = self.index.get_rev(node)
2991 rev = self.index.get_rev(node)
2985 if rev is not None:
2992 if rev is not None:
2986 return rev
2993 return rev
2987
2994
2988 if validatehash:
2995 if validatehash:
2989 self.checkhash(rawtext, node, p1=p1, p2=p2)
2996 self.checkhash(rawtext, node, p1=p1, p2=p2)
2990
2997
2991 return self.addrawrevision(
2998 return self.addrawrevision(
2992 rawtext,
2999 rawtext,
2993 transaction,
3000 transaction,
2994 link,
3001 link,
2995 p1,
3002 p1,
2996 p2,
3003 p2,
2997 node,
3004 node,
2998 flags,
3005 flags,
2999 cachedelta=cachedelta,
3006 cachedelta=cachedelta,
3000 deltacomputer=deltacomputer,
3007 deltacomputer=deltacomputer,
3001 sidedata=sidedata,
3008 sidedata=sidedata,
3002 )
3009 )
3003
3010
3004 def addrawrevision(
3011 def addrawrevision(
3005 self,
3012 self,
3006 rawtext,
3013 rawtext,
3007 transaction,
3014 transaction,
3008 link,
3015 link,
3009 p1,
3016 p1,
3010 p2,
3017 p2,
3011 node,
3018 node,
3012 flags,
3019 flags,
3013 cachedelta=None,
3020 cachedelta=None,
3014 deltacomputer=None,
3021 deltacomputer=None,
3015 sidedata=None,
3022 sidedata=None,
3016 ):
3023 ):
3017 """add a raw revision with known flags, node and parents
3024 """add a raw revision with known flags, node and parents
3018 useful when reusing a revision not stored in this revlog (ex: received
3025 useful when reusing a revision not stored in this revlog (ex: received
3019 over wire, or read from an external bundle).
3026 over wire, or read from an external bundle).
3020 """
3027 """
3021 with self._writing(transaction):
3028 with self._writing(transaction):
3022 return self._addrevision(
3029 return self._addrevision(
3023 node,
3030 node,
3024 rawtext,
3031 rawtext,
3025 transaction,
3032 transaction,
3026 link,
3033 link,
3027 p1,
3034 p1,
3028 p2,
3035 p2,
3029 flags,
3036 flags,
3030 cachedelta,
3037 cachedelta,
3031 deltacomputer=deltacomputer,
3038 deltacomputer=deltacomputer,
3032 sidedata=sidedata,
3039 sidedata=sidedata,
3033 )
3040 )
3034
3041
3035 def compress(self, data):
3042 def compress(self, data):
3036 return self._inner.compress(data)
3043 return self._inner.compress(data)
3037
3044
3038 def decompress(self, data):
3045 def decompress(self, data):
3039 return self._inner.decompress(data)
3046 return self._inner.decompress(data)
3040
3047
3041 def _addrevision(
3048 def _addrevision(
3042 self,
3049 self,
3043 node,
3050 node,
3044 rawtext,
3051 rawtext,
3045 transaction,
3052 transaction,
3046 link,
3053 link,
3047 p1,
3054 p1,
3048 p2,
3055 p2,
3049 flags,
3056 flags,
3050 cachedelta,
3057 cachedelta,
3051 alwayscache=False,
3058 alwayscache=False,
3052 deltacomputer=None,
3059 deltacomputer=None,
3053 sidedata=None,
3060 sidedata=None,
3054 ):
3061 ):
3055 """internal function to add revisions to the log
3062 """internal function to add revisions to the log
3056
3063
3057 see addrevision for argument descriptions.
3064 see addrevision for argument descriptions.
3058
3065
3059 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3066 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3060
3067
3061 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3068 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3062 be used.
3069 be used.
3063
3070
3064 invariants:
3071 invariants:
3065 - rawtext is optional (can be None); if not set, cachedelta must be set.
3072 - rawtext is optional (can be None); if not set, cachedelta must be set.
3066 if both are set, they must correspond to each other.
3073 if both are set, they must correspond to each other.
3067 """
3074 """
3068 if node == self.nullid:
3075 if node == self.nullid:
3069 raise error.RevlogError(
3076 raise error.RevlogError(
3070 _(b"%s: attempt to add null revision") % self.display_id
3077 _(b"%s: attempt to add null revision") % self.display_id
3071 )
3078 )
3072 if (
3079 if (
3073 node == self.nodeconstants.wdirid
3080 node == self.nodeconstants.wdirid
3074 or node in self.nodeconstants.wdirfilenodeids
3081 or node in self.nodeconstants.wdirfilenodeids
3075 ):
3082 ):
3076 raise error.RevlogError(
3083 raise error.RevlogError(
3077 _(b"%s: attempt to add wdir revision") % self.display_id
3084 _(b"%s: attempt to add wdir revision") % self.display_id
3078 )
3085 )
3079 if self._inner._writinghandles is None:
3086 if self._inner._writinghandles is None:
3080 msg = b'adding revision outside `revlog._writing` context'
3087 msg = b'adding revision outside `revlog._writing` context'
3081 raise error.ProgrammingError(msg)
3088 raise error.ProgrammingError(msg)
3082
3089
3083 btext = [rawtext]
3090 btext = [rawtext]
3084
3091
3085 curr = len(self)
3092 curr = len(self)
3086 prev = curr - 1
3093 prev = curr - 1
3087
3094
3088 offset = self._get_data_offset(prev)
3095 offset = self._get_data_offset(prev)
3089
3096
3090 if self._concurrencychecker:
3097 if self._concurrencychecker:
3091 ifh, dfh, sdfh = self._inner._writinghandles
3098 ifh, dfh, sdfh = self._inner._writinghandles
3092 # XXX no checking for the sidedata file
3099 # XXX no checking for the sidedata file
3093 if self._inline:
3100 if self._inline:
3094 # offset is "as if" it were in the .d file, so we need to add on
3101 # offset is "as if" it were in the .d file, so we need to add on
3095 # the size of the entry metadata.
3102 # the size of the entry metadata.
3096 self._concurrencychecker(
3103 self._concurrencychecker(
3097 ifh, self._indexfile, offset + curr * self.index.entry_size
3104 ifh, self._indexfile, offset + curr * self.index.entry_size
3098 )
3105 )
3099 else:
3106 else:
3100 # Entries in the .i are a consistent size.
3107 # Entries in the .i are a consistent size.
3101 self._concurrencychecker(
3108 self._concurrencychecker(
3102 ifh, self._indexfile, curr * self.index.entry_size
3109 ifh, self._indexfile, curr * self.index.entry_size
3103 )
3110 )
3104 self._concurrencychecker(dfh, self._datafile, offset)
3111 self._concurrencychecker(dfh, self._datafile, offset)
3105
3112
3106 p1r, p2r = self.rev(p1), self.rev(p2)
3113 p1r, p2r = self.rev(p1), self.rev(p2)
3107
3114
3108 # full versions are inserted when the needed deltas
3115 # full versions are inserted when the needed deltas
3109 # become comparable to the uncompressed text
3116 # become comparable to the uncompressed text
3110 if rawtext is None:
3117 if rawtext is None:
3111 # need rawtext size, before changed by flag processors, which is
3118 # need rawtext size, before changed by flag processors, which is
3112 # the non-raw size. use revlog explicitly to avoid filelog's extra
3119 # the non-raw size. use revlog explicitly to avoid filelog's extra
3113 # logic that might remove metadata size.
3120 # logic that might remove metadata size.
3114 textlen = mdiff.patchedsize(
3121 textlen = mdiff.patchedsize(
3115 revlog.size(self, cachedelta[0]), cachedelta[1]
3122 revlog.size(self, cachedelta[0]), cachedelta[1]
3116 )
3123 )
3117 else:
3124 else:
3118 textlen = len(rawtext)
3125 textlen = len(rawtext)
3119
3126
3120 if deltacomputer is None:
3127 if deltacomputer is None:
3121 write_debug = None
3128 write_debug = None
3122 if self.delta_config.debug_delta:
3129 if self.delta_config.debug_delta:
3123 write_debug = transaction._report
3130 write_debug = transaction._report
3124 deltacomputer = deltautil.deltacomputer(
3131 deltacomputer = deltautil.deltacomputer(
3125 self, write_debug=write_debug
3132 self, write_debug=write_debug
3126 )
3133 )
3127
3134
3128 if cachedelta is not None and len(cachedelta) == 2:
3135 if cachedelta is not None and len(cachedelta) == 2:
3129 # If the cached delta has no information about how it should be
3136 # If the cached delta has no information about how it should be
3130 # reused, add the default reuse instruction according to the
3137 # reused, add the default reuse instruction according to the
3131 # revlog's configuration.
3138 # revlog's configuration.
3132 if (
3139 if (
3133 self.delta_config.general_delta
3140 self.delta_config.general_delta
3134 and self.delta_config.lazy_delta_base
3141 and self.delta_config.lazy_delta_base
3135 ):
3142 ):
3136 delta_base_reuse = DELTA_BASE_REUSE_TRY
3143 delta_base_reuse = DELTA_BASE_REUSE_TRY
3137 else:
3144 else:
3138 delta_base_reuse = DELTA_BASE_REUSE_NO
3145 delta_base_reuse = DELTA_BASE_REUSE_NO
3139 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3146 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3140
3147
3141 revinfo = revlogutils.revisioninfo(
3148 revinfo = revlogutils.revisioninfo(
3142 node,
3149 node,
3143 p1,
3150 p1,
3144 p2,
3151 p2,
3145 btext,
3152 btext,
3146 textlen,
3153 textlen,
3147 cachedelta,
3154 cachedelta,
3148 flags,
3155 flags,
3149 )
3156 )
3150
3157
3151 deltainfo = deltacomputer.finddeltainfo(revinfo)
3158 deltainfo = deltacomputer.finddeltainfo(revinfo)
3152
3159
3153 compression_mode = COMP_MODE_INLINE
3160 compression_mode = COMP_MODE_INLINE
3154 if self._docket is not None:
3161 if self._docket is not None:
3155 default_comp = self._docket.default_compression_header
3162 default_comp = self._docket.default_compression_header
3156 r = deltautil.delta_compression(default_comp, deltainfo)
3163 r = deltautil.delta_compression(default_comp, deltainfo)
3157 compression_mode, deltainfo = r
3164 compression_mode, deltainfo = r
3158
3165
3159 sidedata_compression_mode = COMP_MODE_INLINE
3166 sidedata_compression_mode = COMP_MODE_INLINE
3160 if sidedata and self.feature_config.has_side_data:
3167 if sidedata and self.feature_config.has_side_data:
3161 sidedata_compression_mode = COMP_MODE_PLAIN
3168 sidedata_compression_mode = COMP_MODE_PLAIN
3162 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3169 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3163 sidedata_offset = self._docket.sidedata_end
3170 sidedata_offset = self._docket.sidedata_end
3164 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3171 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3165 if (
3172 if (
3166 h != b'u'
3173 h != b'u'
3167 and comp_sidedata[0:1] != b'\0'
3174 and comp_sidedata[0:1] != b'\0'
3168 and len(comp_sidedata) < len(serialized_sidedata)
3175 and len(comp_sidedata) < len(serialized_sidedata)
3169 ):
3176 ):
3170 assert not h
3177 assert not h
3171 if (
3178 if (
3172 comp_sidedata[0:1]
3179 comp_sidedata[0:1]
3173 == self._docket.default_compression_header
3180 == self._docket.default_compression_header
3174 ):
3181 ):
3175 sidedata_compression_mode = COMP_MODE_DEFAULT
3182 sidedata_compression_mode = COMP_MODE_DEFAULT
3176 serialized_sidedata = comp_sidedata
3183 serialized_sidedata = comp_sidedata
3177 else:
3184 else:
3178 sidedata_compression_mode = COMP_MODE_INLINE
3185 sidedata_compression_mode = COMP_MODE_INLINE
3179 serialized_sidedata = comp_sidedata
3186 serialized_sidedata = comp_sidedata
3180 else:
3187 else:
3181 serialized_sidedata = b""
3188 serialized_sidedata = b""
3182 # Don't store the offset if the sidedata is empty, that way
3189 # Don't store the offset if the sidedata is empty, that way
3183 # we can easily detect empty sidedata and they will be no different
3190 # we can easily detect empty sidedata and they will be no different
3184 # than ones we manually add.
3191 # than ones we manually add.
3185 sidedata_offset = 0
3192 sidedata_offset = 0
3186
3193
3187 rank = RANK_UNKNOWN
3194 rank = RANK_UNKNOWN
3188 if self.feature_config.compute_rank:
3195 if self.feature_config.compute_rank:
3189 if (p1r, p2r) == (nullrev, nullrev):
3196 if (p1r, p2r) == (nullrev, nullrev):
3190 rank = 1
3197 rank = 1
3191 elif p1r != nullrev and p2r == nullrev:
3198 elif p1r != nullrev and p2r == nullrev:
3192 rank = 1 + self.fast_rank(p1r)
3199 rank = 1 + self.fast_rank(p1r)
3193 elif p1r == nullrev and p2r != nullrev:
3200 elif p1r == nullrev and p2r != nullrev:
3194 rank = 1 + self.fast_rank(p2r)
3201 rank = 1 + self.fast_rank(p2r)
3195 else: # merge node
3202 else: # merge node
3196 if rustdagop is not None and self.index.rust_ext_compat:
3203 if rustdagop is not None and self.index.rust_ext_compat:
3197 rank = rustdagop.rank(self.index, p1r, p2r)
3204 rank = rustdagop.rank(self.index, p1r, p2r)
3198 else:
3205 else:
3199 pmin, pmax = sorted((p1r, p2r))
3206 pmin, pmax = sorted((p1r, p2r))
3200 rank = 1 + self.fast_rank(pmax)
3207 rank = 1 + self.fast_rank(pmax)
3201 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3208 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3202
3209
3203 e = revlogutils.entry(
3210 e = revlogutils.entry(
3204 flags=flags,
3211 flags=flags,
3205 data_offset=offset,
3212 data_offset=offset,
3206 data_compressed_length=deltainfo.deltalen,
3213 data_compressed_length=deltainfo.deltalen,
3207 data_uncompressed_length=textlen,
3214 data_uncompressed_length=textlen,
3208 data_compression_mode=compression_mode,
3215 data_compression_mode=compression_mode,
3209 data_delta_base=deltainfo.base,
3216 data_delta_base=deltainfo.base,
3210 link_rev=link,
3217 link_rev=link,
3211 parent_rev_1=p1r,
3218 parent_rev_1=p1r,
3212 parent_rev_2=p2r,
3219 parent_rev_2=p2r,
3213 node_id=node,
3220 node_id=node,
3214 sidedata_offset=sidedata_offset,
3221 sidedata_offset=sidedata_offset,
3215 sidedata_compressed_length=len(serialized_sidedata),
3222 sidedata_compressed_length=len(serialized_sidedata),
3216 sidedata_compression_mode=sidedata_compression_mode,
3223 sidedata_compression_mode=sidedata_compression_mode,
3217 rank=rank,
3224 rank=rank,
3218 )
3225 )
3219
3226
3220 self.index.append(e)
3227 self.index.append(e)
3221 entry = self.index.entry_binary(curr)
3228 entry = self.index.entry_binary(curr)
3222 if curr == 0 and self._docket is None:
3229 if curr == 0 and self._docket is None:
3223 header = self._format_flags | self._format_version
3230 header = self._format_flags | self._format_version
3224 header = self.index.pack_header(header)
3231 header = self.index.pack_header(header)
3225 entry = header + entry
3232 entry = header + entry
3226 self._writeentry(
3233 self._writeentry(
3227 transaction,
3234 transaction,
3228 entry,
3235 entry,
3229 deltainfo.data,
3236 deltainfo.data,
3230 link,
3237 link,
3231 offset,
3238 offset,
3232 serialized_sidedata,
3239 serialized_sidedata,
3233 sidedata_offset,
3240 sidedata_offset,
3234 )
3241 )
3235
3242
3236 rawtext = btext[0]
3243 rawtext = btext[0]
3237
3244
3238 if alwayscache and rawtext is None:
3245 if alwayscache and rawtext is None:
3239 rawtext = deltacomputer.buildtext(revinfo)
3246 rawtext = deltacomputer.buildtext(revinfo)
3240
3247
3241 if type(rawtext) == bytes: # only accept immutable objects
3248 if type(rawtext) == bytes: # only accept immutable objects
3242 self._inner._revisioncache = (node, curr, rawtext)
3249 self._inner._revisioncache = (node, curr, rawtext)
3243 self._chainbasecache[curr] = deltainfo.chainbase
3250 self._chainbasecache[curr] = deltainfo.chainbase
3244 return curr
3251 return curr
3245
3252
3246 def _get_data_offset(self, prev):
3253 def _get_data_offset(self, prev):
3247 """Returns the current offset in the (in-transaction) data file.
3254 """Returns the current offset in the (in-transaction) data file.
3248 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3255 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3249 file to store that information: since sidedata can be rewritten to the
3256 file to store that information: since sidedata can be rewritten to the
3250 end of the data file within a transaction, you can have cases where, for
3257 end of the data file within a transaction, you can have cases where, for
3251 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3258 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3252 to `n - 1`'s sidedata being written after `n`'s data.
3259 to `n - 1`'s sidedata being written after `n`'s data.
3253
3260
3254 TODO cache this in a docket file before getting out of experimental."""
3261 TODO cache this in a docket file before getting out of experimental."""
3255 if self._docket is None:
3262 if self._docket is None:
3256 return self.end(prev)
3263 return self.end(prev)
3257 else:
3264 else:
3258 return self._docket.data_end
3265 return self._docket.data_end
3259
3266
3260 def _writeentry(
3267 def _writeentry(
3261 self,
3268 self,
3262 transaction,
3269 transaction,
3263 entry,
3270 entry,
3264 data,
3271 data,
3265 link,
3272 link,
3266 offset,
3273 offset,
3267 sidedata,
3274 sidedata,
3268 sidedata_offset,
3275 sidedata_offset,
3269 ):
3276 ):
3270 # Files opened in a+ mode have inconsistent behavior on various
3277 # Files opened in a+ mode have inconsistent behavior on various
3271 # platforms. Windows requires that a file positioning call be made
3278 # platforms. Windows requires that a file positioning call be made
3272 # when the file handle transitions between reads and writes. See
3279 # when the file handle transitions between reads and writes. See
3273 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3280 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3274 # platforms, Python or the platform itself can be buggy. Some versions
3281 # platforms, Python or the platform itself can be buggy. Some versions
3275 # of Solaris have been observed to not append at the end of the file
3282 # of Solaris have been observed to not append at the end of the file
3276 # if the file was seeked to before the end. See issue4943 for more.
3283 # if the file was seeked to before the end. See issue4943 for more.
3277 #
3284 #
3278 # We work around this issue by inserting a seek() before writing.
3285 # We work around this issue by inserting a seek() before writing.
3279 # Note: This is likely not necessary on Python 3. However, because
3286 # Note: This is likely not necessary on Python 3. However, because
3280 # the file handle is reused for reads and may be seeked there, we need
3287 # the file handle is reused for reads and may be seeked there, we need
3281 # to be careful before changing this.
3288 # to be careful before changing this.
3282 index_end = data_end = sidedata_end = None
3289 index_end = data_end = sidedata_end = None
3283 if self._docket is not None:
3290 if self._docket is not None:
3284 index_end = self._docket.index_end
3291 index_end = self._docket.index_end
3285 data_end = self._docket.data_end
3292 data_end = self._docket.data_end
3286 sidedata_end = self._docket.sidedata_end
3293 sidedata_end = self._docket.sidedata_end
3287
3294
3288 files_end = self._inner.write_entry(
3295 files_end = self._inner.write_entry(
3289 transaction,
3296 transaction,
3290 entry,
3297 entry,
3291 data,
3298 data,
3292 link,
3299 link,
3293 offset,
3300 offset,
3294 sidedata,
3301 sidedata,
3295 sidedata_offset,
3302 sidedata_offset,
3296 index_end,
3303 index_end,
3297 data_end,
3304 data_end,
3298 sidedata_end,
3305 sidedata_end,
3299 )
3306 )
3300 self._enforceinlinesize(transaction)
3307 self._enforceinlinesize(transaction)
3301 if self._docket is not None:
3308 if self._docket is not None:
3302 self._docket.index_end = files_end[0]
3309 self._docket.index_end = files_end[0]
3303 self._docket.data_end = files_end[1]
3310 self._docket.data_end = files_end[1]
3304 self._docket.sidedata_end = files_end[2]
3311 self._docket.sidedata_end = files_end[2]
3305
3312
3306 nodemaputil.setup_persistent_nodemap(transaction, self)
3313 nodemaputil.setup_persistent_nodemap(transaction, self)
3307
3314
3308 def addgroup(
3315 def addgroup(
3309 self,
3316 self,
3310 deltas,
3317 deltas,
3311 linkmapper,
3318 linkmapper,
3312 transaction,
3319 transaction,
3313 alwayscache=False,
3320 alwayscache=False,
3314 addrevisioncb=None,
3321 addrevisioncb=None,
3315 duplicaterevisioncb=None,
3322 duplicaterevisioncb=None,
3316 debug_info=None,
3323 debug_info=None,
3317 delta_base_reuse_policy=None,
3324 delta_base_reuse_policy=None,
3318 ):
3325 ):
3319 """
3326 """
3320 add a delta group
3327 add a delta group
3321
3328
3322 given a set of deltas, add them to the revision log. the
3329 given a set of deltas, add them to the revision log. the
3323 first delta is against its parent, which should be in our
3330 first delta is against its parent, which should be in our
3324 log, the rest are against the previous delta.
3331 log, the rest are against the previous delta.
3325
3332
3326 If ``addrevisioncb`` is defined, it will be called with arguments of
3333 If ``addrevisioncb`` is defined, it will be called with arguments of
3327 this revlog and the node that was added.
3334 this revlog and the node that was added.
3328 """
3335 """
3329
3336
3330 if self._adding_group:
3337 if self._adding_group:
3331 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3338 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3332
3339
3333 # read the default delta-base reuse policy from revlog config if the
3340 # read the default delta-base reuse policy from revlog config if the
3334 # group did not specify one.
3341 # group did not specify one.
3335 if delta_base_reuse_policy is None:
3342 if delta_base_reuse_policy is None:
3336 if (
3343 if (
3337 self.delta_config.general_delta
3344 self.delta_config.general_delta
3338 and self.delta_config.lazy_delta_base
3345 and self.delta_config.lazy_delta_base
3339 ):
3346 ):
3340 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3347 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3341 else:
3348 else:
3342 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3349 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3343
3350
3344 self._adding_group = True
3351 self._adding_group = True
3345 empty = True
3352 empty = True
3346 try:
3353 try:
3347 with self._writing(transaction):
3354 with self._writing(transaction):
3348 write_debug = None
3355 write_debug = None
3349 if self.delta_config.debug_delta:
3356 if self.delta_config.debug_delta:
3350 write_debug = transaction._report
3357 write_debug = transaction._report
3351 deltacomputer = deltautil.deltacomputer(
3358 deltacomputer = deltautil.deltacomputer(
3352 self,
3359 self,
3353 write_debug=write_debug,
3360 write_debug=write_debug,
3354 debug_info=debug_info,
3361 debug_info=debug_info,
3355 )
3362 )
3356 # loop through our set of deltas
3363 # loop through our set of deltas
3357 for data in deltas:
3364 for data in deltas:
3358 (
3365 (
3359 node,
3366 node,
3360 p1,
3367 p1,
3361 p2,
3368 p2,
3362 linknode,
3369 linknode,
3363 deltabase,
3370 deltabase,
3364 delta,
3371 delta,
3365 flags,
3372 flags,
3366 sidedata,
3373 sidedata,
3367 ) = data
3374 ) = data
3368 link = linkmapper(linknode)
3375 link = linkmapper(linknode)
3369 flags = flags or REVIDX_DEFAULT_FLAGS
3376 flags = flags or REVIDX_DEFAULT_FLAGS
3370
3377
3371 rev = self.index.get_rev(node)
3378 rev = self.index.get_rev(node)
3372 if rev is not None:
3379 if rev is not None:
3373 # this can happen if two branches make the same change
3380 # this can happen if two branches make the same change
3374 self._nodeduplicatecallback(transaction, rev)
3381 self._nodeduplicatecallback(transaction, rev)
3375 if duplicaterevisioncb:
3382 if duplicaterevisioncb:
3376 duplicaterevisioncb(self, rev)
3383 duplicaterevisioncb(self, rev)
3377 empty = False
3384 empty = False
3378 continue
3385 continue
3379
3386
3380 for p in (p1, p2):
3387 for p in (p1, p2):
3381 if not self.index.has_node(p):
3388 if not self.index.has_node(p):
3382 raise error.LookupError(
3389 raise error.LookupError(
3383 p, self.radix, _(b'unknown parent')
3390 p, self.radix, _(b'unknown parent')
3384 )
3391 )
3385
3392
3386 if not self.index.has_node(deltabase):
3393 if not self.index.has_node(deltabase):
3387 raise error.LookupError(
3394 raise error.LookupError(
3388 deltabase, self.display_id, _(b'unknown delta base')
3395 deltabase, self.display_id, _(b'unknown delta base')
3389 )
3396 )
3390
3397
3391 baserev = self.rev(deltabase)
3398 baserev = self.rev(deltabase)
3392
3399
3393 if baserev != nullrev and self.iscensored(baserev):
3400 if baserev != nullrev and self.iscensored(baserev):
3394 # if base is censored, delta must be full replacement in a
3401 # if base is censored, delta must be full replacement in a
3395 # single patch operation
3402 # single patch operation
3396 hlen = struct.calcsize(b">lll")
3403 hlen = struct.calcsize(b">lll")
3397 oldlen = self.rawsize(baserev)
3404 oldlen = self.rawsize(baserev)
3398 newlen = len(delta) - hlen
3405 newlen = len(delta) - hlen
3399 if delta[:hlen] != mdiff.replacediffheader(
3406 if delta[:hlen] != mdiff.replacediffheader(
3400 oldlen, newlen
3407 oldlen, newlen
3401 ):
3408 ):
3402 raise error.CensoredBaseError(
3409 raise error.CensoredBaseError(
3403 self.display_id, self.node(baserev)
3410 self.display_id, self.node(baserev)
3404 )
3411 )
3405
3412
3406 if not flags and self._peek_iscensored(baserev, delta):
3413 if not flags and self._peek_iscensored(baserev, delta):
3407 flags |= REVIDX_ISCENSORED
3414 flags |= REVIDX_ISCENSORED
3408
3415
3409 # We assume consumers of addrevisioncb will want to retrieve
3416 # We assume consumers of addrevisioncb will want to retrieve
3410 # the added revision, which will require a call to
3417 # the added revision, which will require a call to
3411 # revision(). revision() will fast path if there is a cache
3418 # revision(). revision() will fast path if there is a cache
3412 # hit. So, we tell _addrevision() to always cache in this case.
3419 # hit. So, we tell _addrevision() to always cache in this case.
3413 # We're only using addgroup() in the context of changegroup
3420 # We're only using addgroup() in the context of changegroup
3414 # generation so the revision data can always be handled as raw
3421 # generation so the revision data can always be handled as raw
3415 # by the flagprocessor.
3422 # by the flagprocessor.
3416 rev = self._addrevision(
3423 rev = self._addrevision(
3417 node,
3424 node,
3418 None,
3425 None,
3419 transaction,
3426 transaction,
3420 link,
3427 link,
3421 p1,
3428 p1,
3422 p2,
3429 p2,
3423 flags,
3430 flags,
3424 (baserev, delta, delta_base_reuse_policy),
3431 (baserev, delta, delta_base_reuse_policy),
3425 alwayscache=alwayscache,
3432 alwayscache=alwayscache,
3426 deltacomputer=deltacomputer,
3433 deltacomputer=deltacomputer,
3427 sidedata=sidedata,
3434 sidedata=sidedata,
3428 )
3435 )
3429
3436
3430 if addrevisioncb:
3437 if addrevisioncb:
3431 addrevisioncb(self, rev)
3438 addrevisioncb(self, rev)
3432 empty = False
3439 empty = False
3433 finally:
3440 finally:
3434 self._adding_group = False
3441 self._adding_group = False
3435 return not empty
3442 return not empty
3436
3443
3437 def iscensored(self, rev):
3444 def iscensored(self, rev):
3438 """Check if a file revision is censored."""
3445 """Check if a file revision is censored."""
3439 if not self.feature_config.censorable:
3446 if not self.feature_config.censorable:
3440 return False
3447 return False
3441
3448
3442 return self.flags(rev) & REVIDX_ISCENSORED
3449 return self.flags(rev) & REVIDX_ISCENSORED
3443
3450
3444 def _peek_iscensored(self, baserev, delta):
3451 def _peek_iscensored(self, baserev, delta):
3445 """Quickly check if a delta produces a censored revision."""
3452 """Quickly check if a delta produces a censored revision."""
3446 if not self.feature_config.censorable:
3453 if not self.feature_config.censorable:
3447 return False
3454 return False
3448
3455
3449 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3456 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3450
3457
3451 def getstrippoint(self, minlink):
3458 def getstrippoint(self, minlink):
3452 """find the minimum rev that must be stripped to strip the linkrev
3459 """find the minimum rev that must be stripped to strip the linkrev
3453
3460
3454 Returns a tuple containing the minimum rev and a set of all revs that
3461 Returns a tuple containing the minimum rev and a set of all revs that
3455 have linkrevs that will be broken by this strip.
3462 have linkrevs that will be broken by this strip.
3456 """
3463 """
3457 return storageutil.resolvestripinfo(
3464 return storageutil.resolvestripinfo(
3458 minlink,
3465 minlink,
3459 len(self) - 1,
3466 len(self) - 1,
3460 self.headrevs(),
3467 self.headrevs(),
3461 self.linkrev,
3468 self.linkrev,
3462 self.parentrevs,
3469 self.parentrevs,
3463 )
3470 )
3464
3471
3465 def strip(self, minlink, transaction):
3472 def strip(self, minlink, transaction):
3466 """truncate the revlog on the first revision with a linkrev >= minlink
3473 """truncate the revlog on the first revision with a linkrev >= minlink
3467
3474
3468 This function is called when we're stripping revision minlink and
3475 This function is called when we're stripping revision minlink and
3469 its descendants from the repository.
3476 its descendants from the repository.
3470
3477
3471 We have to remove all revisions with linkrev >= minlink, because
3478 We have to remove all revisions with linkrev >= minlink, because
3472 the equivalent changelog revisions will be renumbered after the
3479 the equivalent changelog revisions will be renumbered after the
3473 strip.
3480 strip.
3474
3481
3475 So we truncate the revlog on the first of these revisions, and
3482 So we truncate the revlog on the first of these revisions, and
3476 trust that the caller has saved the revisions that shouldn't be
3483 trust that the caller has saved the revisions that shouldn't be
3477 removed and that it'll re-add them after this truncation.
3484 removed and that it'll re-add them after this truncation.
3478 """
3485 """
3479 if len(self) == 0:
3486 if len(self) == 0:
3480 return
3487 return
3481
3488
3482 rev, _ = self.getstrippoint(minlink)
3489 rev, _ = self.getstrippoint(minlink)
3483 if rev == len(self):
3490 if rev == len(self):
3484 return
3491 return
3485
3492
3486 # first truncate the files on disk
3493 # first truncate the files on disk
3487 data_end = self.start(rev)
3494 data_end = self.start(rev)
3488 if not self._inline:
3495 if not self._inline:
3489 transaction.add(self._datafile, data_end)
3496 transaction.add(self._datafile, data_end)
3490 end = rev * self.index.entry_size
3497 end = rev * self.index.entry_size
3491 else:
3498 else:
3492 end = data_end + (rev * self.index.entry_size)
3499 end = data_end + (rev * self.index.entry_size)
3493
3500
3494 if self._sidedatafile:
3501 if self._sidedatafile:
3495 sidedata_end = self.sidedata_cut_off(rev)
3502 sidedata_end = self.sidedata_cut_off(rev)
3496 transaction.add(self._sidedatafile, sidedata_end)
3503 transaction.add(self._sidedatafile, sidedata_end)
3497
3504
3498 transaction.add(self._indexfile, end)
3505 transaction.add(self._indexfile, end)
3499 if self._docket is not None:
3506 if self._docket is not None:
3500 # XXX we could, leverage the docket while stripping. However it is
3507 # XXX we could, leverage the docket while stripping. However it is
3501 # not powerfull enough at the time of this comment
3508 # not powerfull enough at the time of this comment
3502 self._docket.index_end = end
3509 self._docket.index_end = end
3503 self._docket.data_end = data_end
3510 self._docket.data_end = data_end
3504 self._docket.sidedata_end = sidedata_end
3511 self._docket.sidedata_end = sidedata_end
3505 self._docket.write(transaction, stripping=True)
3512 self._docket.write(transaction, stripping=True)
3506
3513
3507 # then reset internal state in memory to forget those revisions
3514 # then reset internal state in memory to forget those revisions
3508 self._chaininfocache = util.lrucachedict(500)
3515 self._chaininfocache = util.lrucachedict(500)
3509 self._inner.clear_cache()
3516 self._inner.clear_cache()
3510
3517
3511 del self.index[rev:-1]
3518 del self.index[rev:-1]
3512
3519
3513 def checksize(self):
3520 def checksize(self):
3514 """Check size of index and data files
3521 """Check size of index and data files
3515
3522
3516 return a (dd, di) tuple.
3523 return a (dd, di) tuple.
3517 - dd: extra bytes for the "data" file
3524 - dd: extra bytes for the "data" file
3518 - di: extra bytes for the "index" file
3525 - di: extra bytes for the "index" file
3519
3526
3520 A healthy revlog will return (0, 0).
3527 A healthy revlog will return (0, 0).
3521 """
3528 """
3522 expected = 0
3529 expected = 0
3523 if len(self):
3530 if len(self):
3524 expected = max(0, self.end(len(self) - 1))
3531 expected = max(0, self.end(len(self) - 1))
3525
3532
3526 try:
3533 try:
3527 with self._datafp() as f:
3534 with self._datafp() as f:
3528 f.seek(0, io.SEEK_END)
3535 f.seek(0, io.SEEK_END)
3529 actual = f.tell()
3536 actual = f.tell()
3530 dd = actual - expected
3537 dd = actual - expected
3531 except FileNotFoundError:
3538 except FileNotFoundError:
3532 dd = 0
3539 dd = 0
3533
3540
3534 try:
3541 try:
3535 f = self.opener(self._indexfile)
3542 f = self.opener(self._indexfile)
3536 f.seek(0, io.SEEK_END)
3543 f.seek(0, io.SEEK_END)
3537 actual = f.tell()
3544 actual = f.tell()
3538 f.close()
3545 f.close()
3539 s = self.index.entry_size
3546 s = self.index.entry_size
3540 i = max(0, actual // s)
3547 i = max(0, actual // s)
3541 di = actual - (i * s)
3548 di = actual - (i * s)
3542 if self._inline:
3549 if self._inline:
3543 databytes = 0
3550 databytes = 0
3544 for r in self:
3551 for r in self:
3545 databytes += max(0, self.length(r))
3552 databytes += max(0, self.length(r))
3546 dd = 0
3553 dd = 0
3547 di = actual - len(self) * s - databytes
3554 di = actual - len(self) * s - databytes
3548 except FileNotFoundError:
3555 except FileNotFoundError:
3549 di = 0
3556 di = 0
3550
3557
3551 return (dd, di)
3558 return (dd, di)
3552
3559
3553 def files(self):
3560 def files(self):
3554 """return list of files that compose this revlog"""
3561 """return list of files that compose this revlog"""
3555 res = [self._indexfile]
3562 res = [self._indexfile]
3556 if self._docket_file is None:
3563 if self._docket_file is None:
3557 if not self._inline:
3564 if not self._inline:
3558 res.append(self._datafile)
3565 res.append(self._datafile)
3559 else:
3566 else:
3560 res.append(self._docket_file)
3567 res.append(self._docket_file)
3561 res.extend(self._docket.old_index_filepaths(include_empty=False))
3568 res.extend(self._docket.old_index_filepaths(include_empty=False))
3562 if self._docket.data_end:
3569 if self._docket.data_end:
3563 res.append(self._datafile)
3570 res.append(self._datafile)
3564 res.extend(self._docket.old_data_filepaths(include_empty=False))
3571 res.extend(self._docket.old_data_filepaths(include_empty=False))
3565 if self._docket.sidedata_end:
3572 if self._docket.sidedata_end:
3566 res.append(self._sidedatafile)
3573 res.append(self._sidedatafile)
3567 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3574 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3568 return res
3575 return res
3569
3576
3570 def emitrevisions(
3577 def emitrevisions(
3571 self,
3578 self,
3572 nodes,
3579 nodes,
3573 nodesorder=None,
3580 nodesorder=None,
3574 revisiondata=False,
3581 revisiondata=False,
3575 assumehaveparentrevisions=False,
3582 assumehaveparentrevisions=False,
3576 deltamode=repository.CG_DELTAMODE_STD,
3583 deltamode=repository.CG_DELTAMODE_STD,
3577 sidedata_helpers=None,
3584 sidedata_helpers=None,
3578 debug_info=None,
3585 debug_info=None,
3579 ):
3586 ):
3580 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3587 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3581 raise error.ProgrammingError(
3588 raise error.ProgrammingError(
3582 b'unhandled value for nodesorder: %s' % nodesorder
3589 b'unhandled value for nodesorder: %s' % nodesorder
3583 )
3590 )
3584
3591
3585 if nodesorder is None and not self.delta_config.general_delta:
3592 if nodesorder is None and not self.delta_config.general_delta:
3586 nodesorder = b'storage'
3593 nodesorder = b'storage'
3587
3594
3588 if (
3595 if (
3589 not self._storedeltachains
3596 not self._storedeltachains
3590 and deltamode != repository.CG_DELTAMODE_PREV
3597 and deltamode != repository.CG_DELTAMODE_PREV
3591 ):
3598 ):
3592 deltamode = repository.CG_DELTAMODE_FULL
3599 deltamode = repository.CG_DELTAMODE_FULL
3593
3600
3594 return storageutil.emitrevisions(
3601 return storageutil.emitrevisions(
3595 self,
3602 self,
3596 nodes,
3603 nodes,
3597 nodesorder,
3604 nodesorder,
3598 revlogrevisiondelta,
3605 revlogrevisiondelta,
3599 deltaparentfn=self.deltaparent,
3606 deltaparentfn=self.deltaparent,
3600 candeltafn=self._candelta,
3607 candeltafn=self._candelta,
3601 rawsizefn=self.rawsize,
3608 rawsizefn=self.rawsize,
3602 revdifffn=self.revdiff,
3609 revdifffn=self.revdiff,
3603 flagsfn=self.flags,
3610 flagsfn=self.flags,
3604 deltamode=deltamode,
3611 deltamode=deltamode,
3605 revisiondata=revisiondata,
3612 revisiondata=revisiondata,
3606 assumehaveparentrevisions=assumehaveparentrevisions,
3613 assumehaveparentrevisions=assumehaveparentrevisions,
3607 sidedata_helpers=sidedata_helpers,
3614 sidedata_helpers=sidedata_helpers,
3608 debug_info=debug_info,
3615 debug_info=debug_info,
3609 )
3616 )
3610
3617
3611 DELTAREUSEALWAYS = b'always'
3618 DELTAREUSEALWAYS = b'always'
3612 DELTAREUSESAMEREVS = b'samerevs'
3619 DELTAREUSESAMEREVS = b'samerevs'
3613 DELTAREUSENEVER = b'never'
3620 DELTAREUSENEVER = b'never'
3614
3621
3615 DELTAREUSEFULLADD = b'fulladd'
3622 DELTAREUSEFULLADD = b'fulladd'
3616
3623
3617 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3624 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3618
3625
3619 def clone(
3626 def clone(
3620 self,
3627 self,
3621 tr,
3628 tr,
3622 destrevlog,
3629 destrevlog,
3623 addrevisioncb=None,
3630 addrevisioncb=None,
3624 deltareuse=DELTAREUSESAMEREVS,
3631 deltareuse=DELTAREUSESAMEREVS,
3625 forcedeltabothparents=None,
3632 forcedeltabothparents=None,
3626 sidedata_helpers=None,
3633 sidedata_helpers=None,
3627 ):
3634 ):
3628 """Copy this revlog to another, possibly with format changes.
3635 """Copy this revlog to another, possibly with format changes.
3629
3636
3630 The destination revlog will contain the same revisions and nodes.
3637 The destination revlog will contain the same revisions and nodes.
3631 However, it may not be bit-for-bit identical due to e.g. delta encoding
3638 However, it may not be bit-for-bit identical due to e.g. delta encoding
3632 differences.
3639 differences.
3633
3640
3634 The ``deltareuse`` argument control how deltas from the existing revlog
3641 The ``deltareuse`` argument control how deltas from the existing revlog
3635 are preserved in the destination revlog. The argument can have the
3642 are preserved in the destination revlog. The argument can have the
3636 following values:
3643 following values:
3637
3644
3638 DELTAREUSEALWAYS
3645 DELTAREUSEALWAYS
3639 Deltas will always be reused (if possible), even if the destination
3646 Deltas will always be reused (if possible), even if the destination
3640 revlog would not select the same revisions for the delta. This is the
3647 revlog would not select the same revisions for the delta. This is the
3641 fastest mode of operation.
3648 fastest mode of operation.
3642 DELTAREUSESAMEREVS
3649 DELTAREUSESAMEREVS
3643 Deltas will be reused if the destination revlog would pick the same
3650 Deltas will be reused if the destination revlog would pick the same
3644 revisions for the delta. This mode strikes a balance between speed
3651 revisions for the delta. This mode strikes a balance between speed
3645 and optimization.
3652 and optimization.
3646 DELTAREUSENEVER
3653 DELTAREUSENEVER
3647 Deltas will never be reused. This is the slowest mode of execution.
3654 Deltas will never be reused. This is the slowest mode of execution.
3648 This mode can be used to recompute deltas (e.g. if the diff/delta
3655 This mode can be used to recompute deltas (e.g. if the diff/delta
3649 algorithm changes).
3656 algorithm changes).
3650 DELTAREUSEFULLADD
3657 DELTAREUSEFULLADD
3651 Revision will be re-added as if their were new content. This is
3658 Revision will be re-added as if their were new content. This is
3652 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3659 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3653 eg: large file detection and handling.
3660 eg: large file detection and handling.
3654
3661
3655 Delta computation can be slow, so the choice of delta reuse policy can
3662 Delta computation can be slow, so the choice of delta reuse policy can
3656 significantly affect run time.
3663 significantly affect run time.
3657
3664
3658 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3665 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3659 two extremes. Deltas will be reused if they are appropriate. But if the
3666 two extremes. Deltas will be reused if they are appropriate. But if the
3660 delta could choose a better revision, it will do so. This means if you
3667 delta could choose a better revision, it will do so. This means if you
3661 are converting a non-generaldelta revlog to a generaldelta revlog,
3668 are converting a non-generaldelta revlog to a generaldelta revlog,
3662 deltas will be recomputed if the delta's parent isn't a parent of the
3669 deltas will be recomputed if the delta's parent isn't a parent of the
3663 revision.
3670 revision.
3664
3671
3665 In addition to the delta policy, the ``forcedeltabothparents``
3672 In addition to the delta policy, the ``forcedeltabothparents``
3666 argument controls whether to force compute deltas against both parents
3673 argument controls whether to force compute deltas against both parents
3667 for merges. By default, the current default is used.
3674 for merges. By default, the current default is used.
3668
3675
3669 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3676 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3670 `sidedata_helpers`.
3677 `sidedata_helpers`.
3671 """
3678 """
3672 if deltareuse not in self.DELTAREUSEALL:
3679 if deltareuse not in self.DELTAREUSEALL:
3673 raise ValueError(
3680 raise ValueError(
3674 _(b'value for deltareuse invalid: %s') % deltareuse
3681 _(b'value for deltareuse invalid: %s') % deltareuse
3675 )
3682 )
3676
3683
3677 if len(destrevlog):
3684 if len(destrevlog):
3678 raise ValueError(_(b'destination revlog is not empty'))
3685 raise ValueError(_(b'destination revlog is not empty'))
3679
3686
3680 if getattr(self, 'filteredrevs', None):
3687 if getattr(self, 'filteredrevs', None):
3681 raise ValueError(_(b'source revlog has filtered revisions'))
3688 raise ValueError(_(b'source revlog has filtered revisions'))
3682 if getattr(destrevlog, 'filteredrevs', None):
3689 if getattr(destrevlog, 'filteredrevs', None):
3683 raise ValueError(_(b'destination revlog has filtered revisions'))
3690 raise ValueError(_(b'destination revlog has filtered revisions'))
3684
3691
3685 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3692 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3686 # if possible.
3693 # if possible.
3687 old_delta_config = destrevlog.delta_config
3694 old_delta_config = destrevlog.delta_config
3688 destrevlog.delta_config = destrevlog.delta_config.copy()
3695 destrevlog.delta_config = destrevlog.delta_config.copy()
3689
3696
3690 try:
3697 try:
3691 if deltareuse == self.DELTAREUSEALWAYS:
3698 if deltareuse == self.DELTAREUSEALWAYS:
3692 destrevlog.delta_config.lazy_delta_base = True
3699 destrevlog.delta_config.lazy_delta_base = True
3693 destrevlog.delta_config.lazy_delta = True
3700 destrevlog.delta_config.lazy_delta = True
3694 elif deltareuse == self.DELTAREUSESAMEREVS:
3701 elif deltareuse == self.DELTAREUSESAMEREVS:
3695 destrevlog.delta_config.lazy_delta_base = False
3702 destrevlog.delta_config.lazy_delta_base = False
3696 destrevlog.delta_config.lazy_delta = True
3703 destrevlog.delta_config.lazy_delta = True
3697 elif deltareuse == self.DELTAREUSENEVER:
3704 elif deltareuse == self.DELTAREUSENEVER:
3698 destrevlog.delta_config.lazy_delta_base = False
3705 destrevlog.delta_config.lazy_delta_base = False
3699 destrevlog.delta_config.lazy_delta = False
3706 destrevlog.delta_config.lazy_delta = False
3700
3707
3701 delta_both_parents = (
3708 delta_both_parents = (
3702 forcedeltabothparents or old_delta_config.delta_both_parents
3709 forcedeltabothparents or old_delta_config.delta_both_parents
3703 )
3710 )
3704 destrevlog.delta_config.delta_both_parents = delta_both_parents
3711 destrevlog.delta_config.delta_both_parents = delta_both_parents
3705
3712
3706 with self.reading(), destrevlog._writing(tr):
3713 with self.reading(), destrevlog._writing(tr):
3707 self._clone(
3714 self._clone(
3708 tr,
3715 tr,
3709 destrevlog,
3716 destrevlog,
3710 addrevisioncb,
3717 addrevisioncb,
3711 deltareuse,
3718 deltareuse,
3712 forcedeltabothparents,
3719 forcedeltabothparents,
3713 sidedata_helpers,
3720 sidedata_helpers,
3714 )
3721 )
3715
3722
3716 finally:
3723 finally:
3717 destrevlog.delta_config = old_delta_config
3724 destrevlog.delta_config = old_delta_config
3718
3725
3719 def _clone(
3726 def _clone(
3720 self,
3727 self,
3721 tr,
3728 tr,
3722 destrevlog,
3729 destrevlog,
3723 addrevisioncb,
3730 addrevisioncb,
3724 deltareuse,
3731 deltareuse,
3725 forcedeltabothparents,
3732 forcedeltabothparents,
3726 sidedata_helpers,
3733 sidedata_helpers,
3727 ):
3734 ):
3728 """perform the core duty of `revlog.clone` after parameter processing"""
3735 """perform the core duty of `revlog.clone` after parameter processing"""
3729 write_debug = None
3736 write_debug = None
3730 if self.delta_config.debug_delta:
3737 if self.delta_config.debug_delta:
3731 write_debug = tr._report
3738 write_debug = tr._report
3732 deltacomputer = deltautil.deltacomputer(
3739 deltacomputer = deltautil.deltacomputer(
3733 destrevlog,
3740 destrevlog,
3734 write_debug=write_debug,
3741 write_debug=write_debug,
3735 )
3742 )
3736 index = self.index
3743 index = self.index
3737 for rev in self:
3744 for rev in self:
3738 entry = index[rev]
3745 entry = index[rev]
3739
3746
3740 # Some classes override linkrev to take filtered revs into
3747 # Some classes override linkrev to take filtered revs into
3741 # account. Use raw entry from index.
3748 # account. Use raw entry from index.
3742 flags = entry[0] & 0xFFFF
3749 flags = entry[0] & 0xFFFF
3743 linkrev = entry[4]
3750 linkrev = entry[4]
3744 p1 = index[entry[5]][7]
3751 p1 = index[entry[5]][7]
3745 p2 = index[entry[6]][7]
3752 p2 = index[entry[6]][7]
3746 node = entry[7]
3753 node = entry[7]
3747
3754
3748 # (Possibly) reuse the delta from the revlog if allowed and
3755 # (Possibly) reuse the delta from the revlog if allowed and
3749 # the revlog chunk is a delta.
3756 # the revlog chunk is a delta.
3750 cachedelta = None
3757 cachedelta = None
3751 rawtext = None
3758 rawtext = None
3752 if deltareuse == self.DELTAREUSEFULLADD:
3759 if deltareuse == self.DELTAREUSEFULLADD:
3753 text = self._revisiondata(rev)
3760 text = self._revisiondata(rev)
3754 sidedata = self.sidedata(rev)
3761 sidedata = self.sidedata(rev)
3755
3762
3756 if sidedata_helpers is not None:
3763 if sidedata_helpers is not None:
3757 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3764 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3758 self, sidedata_helpers, sidedata, rev
3765 self, sidedata_helpers, sidedata, rev
3759 )
3766 )
3760 flags = flags | new_flags[0] & ~new_flags[1]
3767 flags = flags | new_flags[0] & ~new_flags[1]
3761
3768
3762 destrevlog.addrevision(
3769 destrevlog.addrevision(
3763 text,
3770 text,
3764 tr,
3771 tr,
3765 linkrev,
3772 linkrev,
3766 p1,
3773 p1,
3767 p2,
3774 p2,
3768 cachedelta=cachedelta,
3775 cachedelta=cachedelta,
3769 node=node,
3776 node=node,
3770 flags=flags,
3777 flags=flags,
3771 deltacomputer=deltacomputer,
3778 deltacomputer=deltacomputer,
3772 sidedata=sidedata,
3779 sidedata=sidedata,
3773 )
3780 )
3774 else:
3781 else:
3775 if destrevlog.delta_config.lazy_delta:
3782 if destrevlog.delta_config.lazy_delta:
3776 dp = self.deltaparent(rev)
3783 dp = self.deltaparent(rev)
3777 if dp != nullrev:
3784 if dp != nullrev:
3778 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3785 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3779
3786
3780 sidedata = None
3787 sidedata = None
3781 if not cachedelta:
3788 if not cachedelta:
3782 try:
3789 try:
3783 rawtext = self._revisiondata(rev)
3790 rawtext = self._revisiondata(rev)
3784 except error.CensoredNodeError as censored:
3791 except error.CensoredNodeError as censored:
3785 assert flags & REVIDX_ISCENSORED
3792 assert flags & REVIDX_ISCENSORED
3786 rawtext = censored.tombstone
3793 rawtext = censored.tombstone
3787 sidedata = self.sidedata(rev)
3794 sidedata = self.sidedata(rev)
3788 if sidedata is None:
3795 if sidedata is None:
3789 sidedata = self.sidedata(rev)
3796 sidedata = self.sidedata(rev)
3790
3797
3791 if sidedata_helpers is not None:
3798 if sidedata_helpers is not None:
3792 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3799 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3793 self, sidedata_helpers, sidedata, rev
3800 self, sidedata_helpers, sidedata, rev
3794 )
3801 )
3795 flags = flags | new_flags[0] & ~new_flags[1]
3802 flags = flags | new_flags[0] & ~new_flags[1]
3796
3803
3797 destrevlog._addrevision(
3804 destrevlog._addrevision(
3798 node,
3805 node,
3799 rawtext,
3806 rawtext,
3800 tr,
3807 tr,
3801 linkrev,
3808 linkrev,
3802 p1,
3809 p1,
3803 p2,
3810 p2,
3804 flags,
3811 flags,
3805 cachedelta,
3812 cachedelta,
3806 deltacomputer=deltacomputer,
3813 deltacomputer=deltacomputer,
3807 sidedata=sidedata,
3814 sidedata=sidedata,
3808 )
3815 )
3809
3816
3810 if addrevisioncb:
3817 if addrevisioncb:
3811 addrevisioncb(self, rev, node)
3818 addrevisioncb(self, rev, node)
3812
3819
3813 def censorrevision(self, tr, censornode, tombstone=b''):
3820 def censorrevision(self, tr, censornode, tombstone=b''):
3814 if self._format_version == REVLOGV0:
3821 if self._format_version == REVLOGV0:
3815 raise error.RevlogError(
3822 raise error.RevlogError(
3816 _(b'cannot censor with version %d revlogs')
3823 _(b'cannot censor with version %d revlogs')
3817 % self._format_version
3824 % self._format_version
3818 )
3825 )
3819 elif self._format_version == REVLOGV1:
3826 elif self._format_version == REVLOGV1:
3820 rewrite.v1_censor(self, tr, censornode, tombstone)
3827 rewrite.v1_censor(self, tr, censornode, tombstone)
3821 else:
3828 else:
3822 rewrite.v2_censor(self, tr, censornode, tombstone)
3829 rewrite.v2_censor(self, tr, censornode, tombstone)
3823
3830
3824 def verifyintegrity(self, state):
3831 def verifyintegrity(self, state):
3825 """Verifies the integrity of the revlog.
3832 """Verifies the integrity of the revlog.
3826
3833
3827 Yields ``revlogproblem`` instances describing problems that are
3834 Yields ``revlogproblem`` instances describing problems that are
3828 found.
3835 found.
3829 """
3836 """
3830 dd, di = self.checksize()
3837 dd, di = self.checksize()
3831 if dd:
3838 if dd:
3832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3839 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3833 if di:
3840 if di:
3834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3841 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3835
3842
3836 version = self._format_version
3843 version = self._format_version
3837
3844
3838 # The verifier tells us what version revlog we should be.
3845 # The verifier tells us what version revlog we should be.
3839 if version != state[b'expectedversion']:
3846 if version != state[b'expectedversion']:
3840 yield revlogproblem(
3847 yield revlogproblem(
3841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3848 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3842 % (self.display_id, version, state[b'expectedversion'])
3849 % (self.display_id, version, state[b'expectedversion'])
3843 )
3850 )
3844
3851
3845 state[b'skipread'] = set()
3852 state[b'skipread'] = set()
3846 state[b'safe_renamed'] = set()
3853 state[b'safe_renamed'] = set()
3847
3854
3848 for rev in self:
3855 for rev in self:
3849 node = self.node(rev)
3856 node = self.node(rev)
3850
3857
3851 # Verify contents. 4 cases to care about:
3858 # Verify contents. 4 cases to care about:
3852 #
3859 #
3853 # common: the most common case
3860 # common: the most common case
3854 # rename: with a rename
3861 # rename: with a rename
3855 # meta: file content starts with b'\1\n', the metadata
3862 # meta: file content starts with b'\1\n', the metadata
3856 # header defined in filelog.py, but without a rename
3863 # header defined in filelog.py, but without a rename
3857 # ext: content stored externally
3864 # ext: content stored externally
3858 #
3865 #
3859 # More formally, their differences are shown below:
3866 # More formally, their differences are shown below:
3860 #
3867 #
3861 # | common | rename | meta | ext
3868 # | common | rename | meta | ext
3862 # -------------------------------------------------------
3869 # -------------------------------------------------------
3863 # flags() | 0 | 0 | 0 | not 0
3870 # flags() | 0 | 0 | 0 | not 0
3864 # renamed() | False | True | False | ?
3871 # renamed() | False | True | False | ?
3865 # rawtext[0:2]=='\1\n'| False | True | True | ?
3872 # rawtext[0:2]=='\1\n'| False | True | True | ?
3866 #
3873 #
3867 # "rawtext" means the raw text stored in revlog data, which
3874 # "rawtext" means the raw text stored in revlog data, which
3868 # could be retrieved by "rawdata(rev)". "text"
3875 # could be retrieved by "rawdata(rev)". "text"
3869 # mentioned below is "revision(rev)".
3876 # mentioned below is "revision(rev)".
3870 #
3877 #
3871 # There are 3 different lengths stored physically:
3878 # There are 3 different lengths stored physically:
3872 # 1. L1: rawsize, stored in revlog index
3879 # 1. L1: rawsize, stored in revlog index
3873 # 2. L2: len(rawtext), stored in revlog data
3880 # 2. L2: len(rawtext), stored in revlog data
3874 # 3. L3: len(text), stored in revlog data if flags==0, or
3881 # 3. L3: len(text), stored in revlog data if flags==0, or
3875 # possibly somewhere else if flags!=0
3882 # possibly somewhere else if flags!=0
3876 #
3883 #
3877 # L1 should be equal to L2. L3 could be different from them.
3884 # L1 should be equal to L2. L3 could be different from them.
3878 # "text" may or may not affect commit hash depending on flag
3885 # "text" may or may not affect commit hash depending on flag
3879 # processors (see flagutil.addflagprocessor).
3886 # processors (see flagutil.addflagprocessor).
3880 #
3887 #
3881 # | common | rename | meta | ext
3888 # | common | rename | meta | ext
3882 # -------------------------------------------------
3889 # -------------------------------------------------
3883 # rawsize() | L1 | L1 | L1 | L1
3890 # rawsize() | L1 | L1 | L1 | L1
3884 # size() | L1 | L2-LM | L1(*) | L1 (?)
3891 # size() | L1 | L2-LM | L1(*) | L1 (?)
3885 # len(rawtext) | L2 | L2 | L2 | L2
3892 # len(rawtext) | L2 | L2 | L2 | L2
3886 # len(text) | L2 | L2 | L2 | L3
3893 # len(text) | L2 | L2 | L2 | L3
3887 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3894 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3888 #
3895 #
3889 # LM: length of metadata, depending on rawtext
3896 # LM: length of metadata, depending on rawtext
3890 # (*): not ideal, see comment in filelog.size
3897 # (*): not ideal, see comment in filelog.size
3891 # (?): could be "- len(meta)" if the resolved content has
3898 # (?): could be "- len(meta)" if the resolved content has
3892 # rename metadata
3899 # rename metadata
3893 #
3900 #
3894 # Checks needed to be done:
3901 # Checks needed to be done:
3895 # 1. length check: L1 == L2, in all cases.
3902 # 1. length check: L1 == L2, in all cases.
3896 # 2. hash check: depending on flag processor, we may need to
3903 # 2. hash check: depending on flag processor, we may need to
3897 # use either "text" (external), or "rawtext" (in revlog).
3904 # use either "text" (external), or "rawtext" (in revlog).
3898
3905
3899 try:
3906 try:
3900 skipflags = state.get(b'skipflags', 0)
3907 skipflags = state.get(b'skipflags', 0)
3901 if skipflags:
3908 if skipflags:
3902 skipflags &= self.flags(rev)
3909 skipflags &= self.flags(rev)
3903
3910
3904 _verify_revision(self, skipflags, state, node)
3911 _verify_revision(self, skipflags, state, node)
3905
3912
3906 l1 = self.rawsize(rev)
3913 l1 = self.rawsize(rev)
3907 l2 = len(self.rawdata(node))
3914 l2 = len(self.rawdata(node))
3908
3915
3909 if l1 != l2:
3916 if l1 != l2:
3910 yield revlogproblem(
3917 yield revlogproblem(
3911 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3918 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3912 node=node,
3919 node=node,
3913 )
3920 )
3914
3921
3915 except error.CensoredNodeError:
3922 except error.CensoredNodeError:
3916 if state[b'erroroncensored']:
3923 if state[b'erroroncensored']:
3917 yield revlogproblem(
3924 yield revlogproblem(
3918 error=_(b'censored file data'), node=node
3925 error=_(b'censored file data'), node=node
3919 )
3926 )
3920 state[b'skipread'].add(node)
3927 state[b'skipread'].add(node)
3921 except Exception as e:
3928 except Exception as e:
3922 yield revlogproblem(
3929 yield revlogproblem(
3923 error=_(b'unpacking %s: %s')
3930 error=_(b'unpacking %s: %s')
3924 % (short(node), stringutil.forcebytestr(e)),
3931 % (short(node), stringutil.forcebytestr(e)),
3925 node=node,
3932 node=node,
3926 )
3933 )
3927 state[b'skipread'].add(node)
3934 state[b'skipread'].add(node)
3928
3935
3929 def storageinfo(
3936 def storageinfo(
3930 self,
3937 self,
3931 exclusivefiles=False,
3938 exclusivefiles=False,
3932 sharedfiles=False,
3939 sharedfiles=False,
3933 revisionscount=False,
3940 revisionscount=False,
3934 trackedsize=False,
3941 trackedsize=False,
3935 storedsize=False,
3942 storedsize=False,
3936 ):
3943 ):
3937 d = {}
3944 d = {}
3938
3945
3939 if exclusivefiles:
3946 if exclusivefiles:
3940 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3947 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3941 if not self._inline:
3948 if not self._inline:
3942 d[b'exclusivefiles'].append((self.opener, self._datafile))
3949 d[b'exclusivefiles'].append((self.opener, self._datafile))
3943
3950
3944 if sharedfiles:
3951 if sharedfiles:
3945 d[b'sharedfiles'] = []
3952 d[b'sharedfiles'] = []
3946
3953
3947 if revisionscount:
3954 if revisionscount:
3948 d[b'revisionscount'] = len(self)
3955 d[b'revisionscount'] = len(self)
3949
3956
3950 if trackedsize:
3957 if trackedsize:
3951 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3958 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3952
3959
3953 if storedsize:
3960 if storedsize:
3954 d[b'storedsize'] = sum(
3961 d[b'storedsize'] = sum(
3955 self.opener.stat(path).st_size for path in self.files()
3962 self.opener.stat(path).st_size for path in self.files()
3956 )
3963 )
3957
3964
3958 return d
3965 return d
3959
3966
3960 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3967 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3961 if not self.feature_config.has_side_data:
3968 if not self.feature_config.has_side_data:
3962 return
3969 return
3963 # revlog formats with sidedata support does not support inline
3970 # revlog formats with sidedata support does not support inline
3964 assert not self._inline
3971 assert not self._inline
3965 if not helpers[1] and not helpers[2]:
3972 if not helpers[1] and not helpers[2]:
3966 # Nothing to generate or remove
3973 # Nothing to generate or remove
3967 return
3974 return
3968
3975
3969 new_entries = []
3976 new_entries = []
3970 # append the new sidedata
3977 # append the new sidedata
3971 with self._writing(transaction):
3978 with self._writing(transaction):
3972 ifh, dfh, sdfh = self._inner._writinghandles
3979 ifh, dfh, sdfh = self._inner._writinghandles
3973 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3980 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3974
3981
3975 current_offset = sdfh.tell()
3982 current_offset = sdfh.tell()
3976 for rev in range(startrev, endrev + 1):
3983 for rev in range(startrev, endrev + 1):
3977 entry = self.index[rev]
3984 entry = self.index[rev]
3978 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3985 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3979 store=self,
3986 store=self,
3980 sidedata_helpers=helpers,
3987 sidedata_helpers=helpers,
3981 sidedata={},
3988 sidedata={},
3982 rev=rev,
3989 rev=rev,
3983 )
3990 )
3984
3991
3985 serialized_sidedata = sidedatautil.serialize_sidedata(
3992 serialized_sidedata = sidedatautil.serialize_sidedata(
3986 new_sidedata
3993 new_sidedata
3987 )
3994 )
3988
3995
3989 sidedata_compression_mode = COMP_MODE_INLINE
3996 sidedata_compression_mode = COMP_MODE_INLINE
3990 if serialized_sidedata and self.feature_config.has_side_data:
3997 if serialized_sidedata and self.feature_config.has_side_data:
3991 sidedata_compression_mode = COMP_MODE_PLAIN
3998 sidedata_compression_mode = COMP_MODE_PLAIN
3992 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3999 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3993 if (
4000 if (
3994 h != b'u'
4001 h != b'u'
3995 and comp_sidedata[0] != b'\0'
4002 and comp_sidedata[0] != b'\0'
3996 and len(comp_sidedata) < len(serialized_sidedata)
4003 and len(comp_sidedata) < len(serialized_sidedata)
3997 ):
4004 ):
3998 assert not h
4005 assert not h
3999 if (
4006 if (
4000 comp_sidedata[0]
4007 comp_sidedata[0]
4001 == self._docket.default_compression_header
4008 == self._docket.default_compression_header
4002 ):
4009 ):
4003 sidedata_compression_mode = COMP_MODE_DEFAULT
4010 sidedata_compression_mode = COMP_MODE_DEFAULT
4004 serialized_sidedata = comp_sidedata
4011 serialized_sidedata = comp_sidedata
4005 else:
4012 else:
4006 sidedata_compression_mode = COMP_MODE_INLINE
4013 sidedata_compression_mode = COMP_MODE_INLINE
4007 serialized_sidedata = comp_sidedata
4014 serialized_sidedata = comp_sidedata
4008 if entry[8] != 0 or entry[9] != 0:
4015 if entry[8] != 0 or entry[9] != 0:
4009 # rewriting entries that already have sidedata is not
4016 # rewriting entries that already have sidedata is not
4010 # supported yet, because it introduces garbage data in the
4017 # supported yet, because it introduces garbage data in the
4011 # revlog.
4018 # revlog.
4012 msg = b"rewriting existing sidedata is not supported yet"
4019 msg = b"rewriting existing sidedata is not supported yet"
4013 raise error.Abort(msg)
4020 raise error.Abort(msg)
4014
4021
4015 # Apply (potential) flags to add and to remove after running
4022 # Apply (potential) flags to add and to remove after running
4016 # the sidedata helpers
4023 # the sidedata helpers
4017 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4024 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4018 entry_update = (
4025 entry_update = (
4019 current_offset,
4026 current_offset,
4020 len(serialized_sidedata),
4027 len(serialized_sidedata),
4021 new_offset_flags,
4028 new_offset_flags,
4022 sidedata_compression_mode,
4029 sidedata_compression_mode,
4023 )
4030 )
4024
4031
4025 # the sidedata computation might have move the file cursors around
4032 # the sidedata computation might have move the file cursors around
4026 sdfh.seek(current_offset, os.SEEK_SET)
4033 sdfh.seek(current_offset, os.SEEK_SET)
4027 sdfh.write(serialized_sidedata)
4034 sdfh.write(serialized_sidedata)
4028 new_entries.append(entry_update)
4035 new_entries.append(entry_update)
4029 current_offset += len(serialized_sidedata)
4036 current_offset += len(serialized_sidedata)
4030 self._docket.sidedata_end = sdfh.tell()
4037 self._docket.sidedata_end = sdfh.tell()
4031
4038
4032 # rewrite the new index entries
4039 # rewrite the new index entries
4033 ifh.seek(startrev * self.index.entry_size)
4040 ifh.seek(startrev * self.index.entry_size)
4034 for i, e in enumerate(new_entries):
4041 for i, e in enumerate(new_entries):
4035 rev = startrev + i
4042 rev = startrev + i
4036 self.index.replace_sidedata_info(rev, *e)
4043 self.index.replace_sidedata_info(rev, *e)
4037 packed = self.index.entry_binary(rev)
4044 packed = self.index.entry_binary(rev)
4038 if rev == 0 and self._docket is None:
4045 if rev == 0 and self._docket is None:
4039 header = self._format_flags | self._format_version
4046 header = self._format_flags | self._format_version
4040 header = self.index.pack_header(header)
4047 header = self.index.pack_header(header)
4041 packed = header + packed
4048 packed = header + packed
4042 ifh.write(packed)
4049 ifh.write(packed)
@@ -1,155 +1,164 b''
1 # Copyright Mercurial Contributors
1 # Copyright Mercurial Contributors
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 import contextlib
6 import contextlib
7
7
8 from ..i18n import _
8 from ..i18n import _
9 from .. import (
9 from .. import (
10 error,
10 error,
11 util,
11 util,
12 )
12 )
13
13
14
14
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16
16
17 PARTIAL_READ_MSG = _(
17 PARTIAL_READ_MSG = _(
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 )
19 )
20
20
21
21
22 def _is_power_of_two(n):
22 def _is_power_of_two(n):
23 return (n & (n - 1) == 0) and n != 0
23 return (n & (n - 1) == 0) and n != 0
24
24
25
25
26 class randomaccessfile:
26 class randomaccessfile:
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28
28
29 def __init__(
29 def __init__(
30 self,
30 self,
31 opener,
31 opener,
32 filename,
32 filename,
33 default_cached_chunk_size,
33 default_cached_chunk_size,
34 initial_cache=None,
34 initial_cache=None,
35 ):
35 ):
36 # Required by bitwise manipulation below
36 # Required by bitwise manipulation below
37 assert _is_power_of_two(default_cached_chunk_size)
37 assert _is_power_of_two(default_cached_chunk_size)
38
38
39 self.opener = opener
39 self.opener = opener
40 self.filename = filename
40 self.filename = filename
41 self.default_cached_chunk_size = default_cached_chunk_size
41 self.default_cached_chunk_size = default_cached_chunk_size
42 self.writing_handle = None # This is set from revlog.py
42 self.writing_handle = None # This is set from revlog.py
43 self.reading_handle = None
43 self.reading_handle = None
44 self._cached_chunk = b''
44 self._cached_chunk = b''
45 self._cached_chunk_position = 0 # Offset from the start of the file
45 self._cached_chunk_position = 0 # Offset from the start of the file
46 if initial_cache:
46 if initial_cache:
47 self._cached_chunk_position, self._cached_chunk = initial_cache
47 self._cached_chunk_position, self._cached_chunk = initial_cache
48
48
49 def clear_cache(self):
49 def clear_cache(self):
50 self._cached_chunk = b''
50 self._cached_chunk = b''
51 self._cached_chunk_position = 0
51 self._cached_chunk_position = 0
52
52
53 @property
54 def is_open(self):
55 """True if any file handle is being held
56
57 Used for assert and debug in the python code"""
58 return (
59 self.reading_handle is not None or self.writing_handle is not None
60 )
61
53 def _open(self, mode=b'r'):
62 def _open(self, mode=b'r'):
54 """Return a file object"""
63 """Return a file object"""
55 return self.opener(self.filename, mode=mode)
64 return self.opener(self.filename, mode=mode)
56
65
57 @contextlib.contextmanager
66 @contextlib.contextmanager
58 def _read_handle(self):
67 def _read_handle(self):
59 """File object suitable for reading data"""
68 """File object suitable for reading data"""
60 # Use a file handle being actively used for writes, if available.
69 # Use a file handle being actively used for writes, if available.
61 # There is some danger to doing this because reads will seek the
70 # There is some danger to doing this because reads will seek the
62 # file. However, revlog._writeentry performs a SEEK_END before all
71 # file. However, revlog._writeentry performs a SEEK_END before all
63 # writes, so we should be safe.
72 # writes, so we should be safe.
64 if self.writing_handle:
73 if self.writing_handle:
65 yield self.writing_handle
74 yield self.writing_handle
66
75
67 elif self.reading_handle:
76 elif self.reading_handle:
68 yield self.reading_handle
77 yield self.reading_handle
69
78
70 # Otherwise open a new file handle.
79 # Otherwise open a new file handle.
71 else:
80 else:
72 with self._open() as fp:
81 with self._open() as fp:
73 yield fp
82 yield fp
74
83
75 @contextlib.contextmanager
84 @contextlib.contextmanager
76 def reading(self):
85 def reading(self):
77 """Context manager that keeps the file open for reading"""
86 """Context manager that keeps the file open for reading"""
78 if (
87 if (
79 self.reading_handle is None
88 self.reading_handle is None
80 and self.writing_handle is None
89 and self.writing_handle is None
81 and self.filename is not None
90 and self.filename is not None
82 ):
91 ):
83 with self._open() as fp:
92 with self._open() as fp:
84 self.reading_handle = fp
93 self.reading_handle = fp
85 try:
94 try:
86 yield
95 yield
87 finally:
96 finally:
88 self.reading_handle = None
97 self.reading_handle = None
89 else:
98 else:
90 yield
99 yield
91
100
92 def read_chunk(self, offset, length):
101 def read_chunk(self, offset, length):
93 """Read a chunk of bytes from the file.
102 """Read a chunk of bytes from the file.
94
103
95 Accepts an absolute offset, length to read, and an optional existing
104 Accepts an absolute offset, length to read, and an optional existing
96 file handle to read from.
105 file handle to read from.
97
106
98 If an existing file handle is passed, it will be seeked and the
107 If an existing file handle is passed, it will be seeked and the
99 original seek position will NOT be restored.
108 original seek position will NOT be restored.
100
109
101 Returns a str or buffer of raw byte data.
110 Returns a str or buffer of raw byte data.
102
111
103 Raises if the requested number of bytes could not be read.
112 Raises if the requested number of bytes could not be read.
104 """
113 """
105 end = offset + length
114 end = offset + length
106 cache_start = self._cached_chunk_position
115 cache_start = self._cached_chunk_position
107 cache_end = cache_start + len(self._cached_chunk)
116 cache_end = cache_start + len(self._cached_chunk)
108 # Is the requested chunk within the cache?
117 # Is the requested chunk within the cache?
109 if cache_start <= offset and end <= cache_end:
118 if cache_start <= offset and end <= cache_end:
110 if cache_start == offset and end == cache_end:
119 if cache_start == offset and end == cache_end:
111 return self._cached_chunk # avoid a copy
120 return self._cached_chunk # avoid a copy
112 relative_start = offset - cache_start
121 relative_start = offset - cache_start
113 return util.buffer(self._cached_chunk, relative_start, length)
122 return util.buffer(self._cached_chunk, relative_start, length)
114
123
115 return self._read_and_update_cache(offset, length)
124 return self._read_and_update_cache(offset, length)
116
125
117 def _read_and_update_cache(self, offset, length):
126 def _read_and_update_cache(self, offset, length):
118 # Cache data both forward and backward around the requested
127 # Cache data both forward and backward around the requested
119 # data, in a fixed size window. This helps speed up operations
128 # data, in a fixed size window. This helps speed up operations
120 # involving reading the revlog backwards.
129 # involving reading the revlog backwards.
121 real_offset = offset & ~(self.default_cached_chunk_size - 1)
130 real_offset = offset & ~(self.default_cached_chunk_size - 1)
122 real_length = (
131 real_length = (
123 (offset + length + self.default_cached_chunk_size)
132 (offset + length + self.default_cached_chunk_size)
124 & ~(self.default_cached_chunk_size - 1)
133 & ~(self.default_cached_chunk_size - 1)
125 ) - real_offset
134 ) - real_offset
126 with self._read_handle() as file_obj:
135 with self._read_handle() as file_obj:
127 file_obj.seek(real_offset)
136 file_obj.seek(real_offset)
128 data = file_obj.read(real_length)
137 data = file_obj.read(real_length)
129
138
130 self._add_cached_chunk(real_offset, data)
139 self._add_cached_chunk(real_offset, data)
131
140
132 relative_offset = offset - real_offset
141 relative_offset = offset - real_offset
133 got = len(data) - relative_offset
142 got = len(data) - relative_offset
134 if got < length:
143 if got < length:
135 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
144 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
136 raise error.RevlogError(message)
145 raise error.RevlogError(message)
137
146
138 if offset != real_offset or real_length != length:
147 if offset != real_offset or real_length != length:
139 return util.buffer(data, relative_offset, length)
148 return util.buffer(data, relative_offset, length)
140 return data
149 return data
141
150
142 def _add_cached_chunk(self, offset, data):
151 def _add_cached_chunk(self, offset, data):
143 """Add to or replace the cached data chunk.
152 """Add to or replace the cached data chunk.
144
153
145 Accepts an absolute offset and the data that is at that location.
154 Accepts an absolute offset and the data that is at that location.
146 """
155 """
147 if (
156 if (
148 self._cached_chunk_position + len(self._cached_chunk) == offset
157 self._cached_chunk_position + len(self._cached_chunk) == offset
149 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
158 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
150 ):
159 ):
151 # add to existing cache
160 # add to existing cache
152 self._cached_chunk += data
161 self._cached_chunk += data
153 else:
162 else:
154 self._cached_chunk = data
163 self._cached_chunk = data
155 self._cached_chunk_position = offset
164 self._cached_chunk_position = offset
General Comments 0
You need to be logged in to leave comments. Login now