##// END OF EJS Templates
revlog: move the `trypending` logic from the `changelog` to the `revlog`...
marmoute -
r48014:4f38ada3 default
parent child Browse files
Show More
@@ -1,630 +1,625
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 )
14 )
15 from .thirdparty import attr
15 from .thirdparty import attr
16
16
17 from . import (
17 from . import (
18 encoding,
18 encoding,
19 error,
19 error,
20 metadata,
20 metadata,
21 pycompat,
21 pycompat,
22 revlog,
22 revlog,
23 )
23 )
24 from .utils import (
24 from .utils import (
25 dateutil,
25 dateutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from .revlogutils import (
28 from .revlogutils import (
29 constants as revlog_constants,
29 constants as revlog_constants,
30 flagutil,
30 flagutil,
31 )
31 )
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
86 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
87 return b"\0".join(items)
87 return b"\0".join(items)
88
88
89
89
90 def stripdesc(desc):
90 def stripdesc(desc):
91 """strip trailing whitespace and leading and trailing empty lines"""
91 """strip trailing whitespace and leading and trailing empty lines"""
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
92 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
93
93
94
94
95 class appender(object):
95 class appender(object):
96 """the changelog index must be updated last on disk, so we use this class
96 """the changelog index must be updated last on disk, so we use this class
97 to delay writes to it"""
97 to delay writes to it"""
98
98
99 def __init__(self, vfs, name, mode, buf):
99 def __init__(self, vfs, name, mode, buf):
100 self.data = buf
100 self.data = buf
101 fp = vfs(name, mode)
101 fp = vfs(name, mode)
102 self.fp = fp
102 self.fp = fp
103 self.offset = fp.tell()
103 self.offset = fp.tell()
104 self.size = vfs.fstat(fp).st_size
104 self.size = vfs.fstat(fp).st_size
105 self._end = self.size
105 self._end = self.size
106
106
107 def end(self):
107 def end(self):
108 return self._end
108 return self._end
109
109
110 def tell(self):
110 def tell(self):
111 return self.offset
111 return self.offset
112
112
113 def flush(self):
113 def flush(self):
114 pass
114 pass
115
115
116 @property
116 @property
117 def closed(self):
117 def closed(self):
118 return self.fp.closed
118 return self.fp.closed
119
119
120 def close(self):
120 def close(self):
121 self.fp.close()
121 self.fp.close()
122
122
123 def seek(self, offset, whence=0):
123 def seek(self, offset, whence=0):
124 '''virtual file offset spans real file and data'''
124 '''virtual file offset spans real file and data'''
125 if whence == 0:
125 if whence == 0:
126 self.offset = offset
126 self.offset = offset
127 elif whence == 1:
127 elif whence == 1:
128 self.offset += offset
128 self.offset += offset
129 elif whence == 2:
129 elif whence == 2:
130 self.offset = self.end() + offset
130 self.offset = self.end() + offset
131 if self.offset < self.size:
131 if self.offset < self.size:
132 self.fp.seek(self.offset)
132 self.fp.seek(self.offset)
133
133
134 def read(self, count=-1):
134 def read(self, count=-1):
135 '''only trick here is reads that span real file and data'''
135 '''only trick here is reads that span real file and data'''
136 ret = b""
136 ret = b""
137 if self.offset < self.size:
137 if self.offset < self.size:
138 s = self.fp.read(count)
138 s = self.fp.read(count)
139 ret = s
139 ret = s
140 self.offset += len(s)
140 self.offset += len(s)
141 if count > 0:
141 if count > 0:
142 count -= len(s)
142 count -= len(s)
143 if count != 0:
143 if count != 0:
144 doff = self.offset - self.size
144 doff = self.offset - self.size
145 self.data.insert(0, b"".join(self.data))
145 self.data.insert(0, b"".join(self.data))
146 del self.data[1:]
146 del self.data[1:]
147 s = self.data[0][doff : doff + count]
147 s = self.data[0][doff : doff + count]
148 self.offset += len(s)
148 self.offset += len(s)
149 ret += s
149 ret += s
150 return ret
150 return ret
151
151
152 def write(self, s):
152 def write(self, s):
153 self.data.append(bytes(s))
153 self.data.append(bytes(s))
154 self.offset += len(s)
154 self.offset += len(s)
155 self._end += len(s)
155 self._end += len(s)
156
156
157 def __enter__(self):
157 def __enter__(self):
158 self.fp.__enter__()
158 self.fp.__enter__()
159 return self
159 return self
160
160
161 def __exit__(self, *args):
161 def __exit__(self, *args):
162 return self.fp.__exit__(*args)
162 return self.fp.__exit__(*args)
163
163
164
164
165 class _divertopener(object):
165 class _divertopener(object):
166 def __init__(self, opener, target):
166 def __init__(self, opener, target):
167 self._opener = opener
167 self._opener = opener
168 self._target = target
168 self._target = target
169
169
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
170 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
171 if name != self._target:
171 if name != self._target:
172 return self._opener(name, mode, **kwargs)
172 return self._opener(name, mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
173 return self._opener(name + b".a", mode, **kwargs)
174
174
175 def __getattr__(self, attr):
175 def __getattr__(self, attr):
176 return getattr(self._opener, attr)
176 return getattr(self._opener, attr)
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
182 def _delay(name, mode=b'r', checkambig=False, **kwargs):
183 if name != target:
183 if name != target:
184 return opener(name, mode, **kwargs)
184 return opener(name, mode, **kwargs)
185 assert not kwargs
185 assert not kwargs
186 return appender(opener, name, mode, buf)
186 return appender(opener, name, mode, buf)
187
187
188 return _delay
188 return _delay
189
189
190
190
191 @attr.s
191 @attr.s
192 class _changelogrevision(object):
192 class _changelogrevision(object):
193 # Extensions might modify _defaultextra, so let the constructor below pass
193 # Extensions might modify _defaultextra, so let the constructor below pass
194 # it in
194 # it in
195 extra = attr.ib()
195 extra = attr.ib()
196 manifest = attr.ib()
196 manifest = attr.ib()
197 user = attr.ib(default=b'')
197 user = attr.ib(default=b'')
198 date = attr.ib(default=(0, 0))
198 date = attr.ib(default=(0, 0))
199 files = attr.ib(default=attr.Factory(list))
199 files = attr.ib(default=attr.Factory(list))
200 filesadded = attr.ib(default=None)
200 filesadded = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
201 filesremoved = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
202 p1copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
203 p2copies = attr.ib(default=None)
204 description = attr.ib(default=b'')
204 description = attr.ib(default=b'')
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
205 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
206
206
207
207
208 class changelogrevision(object):
208 class changelogrevision(object):
209 """Holds results of a parsed changelog revision.
209 """Holds results of a parsed changelog revision.
210
210
211 Changelog revisions consist of multiple pieces of data, including
211 Changelog revisions consist of multiple pieces of data, including
212 the manifest node, user, and date. This object exposes a view into
212 the manifest node, user, and date. This object exposes a view into
213 the parsed object.
213 the parsed object.
214 """
214 """
215
215
216 __slots__ = (
216 __slots__ = (
217 '_offsets',
217 '_offsets',
218 '_text',
218 '_text',
219 '_sidedata',
219 '_sidedata',
220 '_cpsd',
220 '_cpsd',
221 '_changes',
221 '_changes',
222 )
222 )
223
223
224 def __new__(cls, cl, text, sidedata, cpsd):
224 def __new__(cls, cl, text, sidedata, cpsd):
225 if not text:
225 if not text:
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
226 return _changelogrevision(extra=_defaultextra, manifest=cl.nullid)
227
227
228 self = super(changelogrevision, cls).__new__(cls)
228 self = super(changelogrevision, cls).__new__(cls)
229 # We could return here and implement the following as an __init__.
229 # We could return here and implement the following as an __init__.
230 # But doing it here is equivalent and saves an extra function call.
230 # But doing it here is equivalent and saves an extra function call.
231
231
232 # format used:
232 # format used:
233 # nodeid\n : manifest node in ascii
233 # nodeid\n : manifest node in ascii
234 # user\n : user, no \n or \r allowed
234 # user\n : user, no \n or \r allowed
235 # time tz extra\n : date (time is int or float, timezone is int)
235 # time tz extra\n : date (time is int or float, timezone is int)
236 # : extra is metadata, encoded and separated by '\0'
236 # : extra is metadata, encoded and separated by '\0'
237 # : older versions ignore it
237 # : older versions ignore it
238 # files\n\n : files modified by the cset, no \n or \r allowed
238 # files\n\n : files modified by the cset, no \n or \r allowed
239 # (.*) : comment (free text, ideally utf-8)
239 # (.*) : comment (free text, ideally utf-8)
240 #
240 #
241 # changelog v0 doesn't use extra
241 # changelog v0 doesn't use extra
242
242
243 nl1 = text.index(b'\n')
243 nl1 = text.index(b'\n')
244 nl2 = text.index(b'\n', nl1 + 1)
244 nl2 = text.index(b'\n', nl1 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
245 nl3 = text.index(b'\n', nl2 + 1)
246
246
247 # The list of files may be empty. Which means nl3 is the first of the
247 # The list of files may be empty. Which means nl3 is the first of the
248 # double newline that precedes the description.
248 # double newline that precedes the description.
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
249 if text[nl3 + 1 : nl3 + 2] == b'\n':
250 doublenl = nl3
250 doublenl = nl3
251 else:
251 else:
252 doublenl = text.index(b'\n\n', nl3 + 1)
252 doublenl = text.index(b'\n\n', nl3 + 1)
253
253
254 self._offsets = (nl1, nl2, nl3, doublenl)
254 self._offsets = (nl1, nl2, nl3, doublenl)
255 self._text = text
255 self._text = text
256 self._sidedata = sidedata
256 self._sidedata = sidedata
257 self._cpsd = cpsd
257 self._cpsd = cpsd
258 self._changes = None
258 self._changes = None
259
259
260 return self
260 return self
261
261
262 @property
262 @property
263 def manifest(self):
263 def manifest(self):
264 return bin(self._text[0 : self._offsets[0]])
264 return bin(self._text[0 : self._offsets[0]])
265
265
266 @property
266 @property
267 def user(self):
267 def user(self):
268 off = self._offsets
268 off = self._offsets
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
269 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
270
270
271 @property
271 @property
272 def _rawdate(self):
272 def _rawdate(self):
273 off = self._offsets
273 off = self._offsets
274 dateextra = self._text[off[1] + 1 : off[2]]
274 dateextra = self._text[off[1] + 1 : off[2]]
275 return dateextra.split(b' ', 2)[0:2]
275 return dateextra.split(b' ', 2)[0:2]
276
276
277 @property
277 @property
278 def _rawextra(self):
278 def _rawextra(self):
279 off = self._offsets
279 off = self._offsets
280 dateextra = self._text[off[1] + 1 : off[2]]
280 dateextra = self._text[off[1] + 1 : off[2]]
281 fields = dateextra.split(b' ', 2)
281 fields = dateextra.split(b' ', 2)
282 if len(fields) != 3:
282 if len(fields) != 3:
283 return None
283 return None
284
284
285 return fields[2]
285 return fields[2]
286
286
287 @property
287 @property
288 def date(self):
288 def date(self):
289 raw = self._rawdate
289 raw = self._rawdate
290 time = float(raw[0])
290 time = float(raw[0])
291 # Various tools did silly things with the timezone.
291 # Various tools did silly things with the timezone.
292 try:
292 try:
293 timezone = int(raw[1])
293 timezone = int(raw[1])
294 except ValueError:
294 except ValueError:
295 timezone = 0
295 timezone = 0
296
296
297 return time, timezone
297 return time, timezone
298
298
299 @property
299 @property
300 def extra(self):
300 def extra(self):
301 raw = self._rawextra
301 raw = self._rawextra
302 if raw is None:
302 if raw is None:
303 return _defaultextra
303 return _defaultextra
304
304
305 return decodeextra(raw)
305 return decodeextra(raw)
306
306
307 @property
307 @property
308 def changes(self):
308 def changes(self):
309 if self._changes is not None:
309 if self._changes is not None:
310 return self._changes
310 return self._changes
311 if self._cpsd:
311 if self._cpsd:
312 changes = metadata.decode_files_sidedata(self._sidedata)
312 changes = metadata.decode_files_sidedata(self._sidedata)
313 else:
313 else:
314 changes = metadata.ChangingFiles(
314 changes = metadata.ChangingFiles(
315 touched=self.files or (),
315 touched=self.files or (),
316 added=self.filesadded or (),
316 added=self.filesadded or (),
317 removed=self.filesremoved or (),
317 removed=self.filesremoved or (),
318 p1_copies=self.p1copies or {},
318 p1_copies=self.p1copies or {},
319 p2_copies=self.p2copies or {},
319 p2_copies=self.p2copies or {},
320 )
320 )
321 self._changes = changes
321 self._changes = changes
322 return changes
322 return changes
323
323
324 @property
324 @property
325 def files(self):
325 def files(self):
326 if self._cpsd:
326 if self._cpsd:
327 return sorted(self.changes.touched)
327 return sorted(self.changes.touched)
328 off = self._offsets
328 off = self._offsets
329 if off[2] == off[3]:
329 if off[2] == off[3]:
330 return []
330 return []
331
331
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
332 return self._text[off[2] + 1 : off[3]].split(b'\n')
333
333
334 @property
334 @property
335 def filesadded(self):
335 def filesadded(self):
336 if self._cpsd:
336 if self._cpsd:
337 return self.changes.added
337 return self.changes.added
338 else:
338 else:
339 rawindices = self.extra.get(b'filesadded')
339 rawindices = self.extra.get(b'filesadded')
340 if rawindices is None:
340 if rawindices is None:
341 return None
341 return None
342 return metadata.decodefileindices(self.files, rawindices)
342 return metadata.decodefileindices(self.files, rawindices)
343
343
344 @property
344 @property
345 def filesremoved(self):
345 def filesremoved(self):
346 if self._cpsd:
346 if self._cpsd:
347 return self.changes.removed
347 return self.changes.removed
348 else:
348 else:
349 rawindices = self.extra.get(b'filesremoved')
349 rawindices = self.extra.get(b'filesremoved')
350 if rawindices is None:
350 if rawindices is None:
351 return None
351 return None
352 return metadata.decodefileindices(self.files, rawindices)
352 return metadata.decodefileindices(self.files, rawindices)
353
353
354 @property
354 @property
355 def p1copies(self):
355 def p1copies(self):
356 if self._cpsd:
356 if self._cpsd:
357 return self.changes.copied_from_p1
357 return self.changes.copied_from_p1
358 else:
358 else:
359 rawcopies = self.extra.get(b'p1copies')
359 rawcopies = self.extra.get(b'p1copies')
360 if rawcopies is None:
360 if rawcopies is None:
361 return None
361 return None
362 return metadata.decodecopies(self.files, rawcopies)
362 return metadata.decodecopies(self.files, rawcopies)
363
363
364 @property
364 @property
365 def p2copies(self):
365 def p2copies(self):
366 if self._cpsd:
366 if self._cpsd:
367 return self.changes.copied_from_p2
367 return self.changes.copied_from_p2
368 else:
368 else:
369 rawcopies = self.extra.get(b'p2copies')
369 rawcopies = self.extra.get(b'p2copies')
370 if rawcopies is None:
370 if rawcopies is None:
371 return None
371 return None
372 return metadata.decodecopies(self.files, rawcopies)
372 return metadata.decodecopies(self.files, rawcopies)
373
373
374 @property
374 @property
375 def description(self):
375 def description(self):
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
376 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
377
377
378 @property
378 @property
379 def branchinfo(self):
379 def branchinfo(self):
380 extra = self.extra
380 extra = self.extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
381 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
382
382
383
383
384 class changelog(revlog.revlog):
384 class changelog(revlog.revlog):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
385 def __init__(self, opener, trypending=False, concurrencychecker=None):
386 """Load a changelog revlog using an opener.
386 """Load a changelog revlog using an opener.
387
387
388 If ``trypending`` is true, we attempt to load the index from a
388 If ``trypending`` is true, we attempt to load the index from a
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
389 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
390 The ``00changelog.i.a`` file contains index (and possibly inline
390 The ``00changelog.i.a`` file contains index (and possibly inline
391 revision) data for a transaction that hasn't been finalized yet.
391 revision) data for a transaction that hasn't been finalized yet.
392 It exists in a separate file to facilitate readers (such as
392 It exists in a separate file to facilitate readers (such as
393 hooks processes) accessing data before a transaction is finalized.
393 hooks processes) accessing data before a transaction is finalized.
394
394
395 ``concurrencychecker`` will be passed to the revlog init function, see
395 ``concurrencychecker`` will be passed to the revlog init function, see
396 the documentation there.
396 the documentation there.
397 """
397 """
398
398
399 if trypending and opener.exists(b'00changelog.i.a'):
400 postfix = b'a'
401 else:
402 postfix = None
403
404 revlog.revlog.__init__(
399 revlog.revlog.__init__(
405 self,
400 self,
406 opener,
401 opener,
407 target=(revlog_constants.KIND_CHANGELOG, None),
402 target=(revlog_constants.KIND_CHANGELOG, None),
408 radix=b'00changelog',
403 radix=b'00changelog',
409 postfix=postfix,
410 checkambig=True,
404 checkambig=True,
411 mmaplargeindex=True,
405 mmaplargeindex=True,
412 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
406 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
413 concurrencychecker=concurrencychecker,
407 concurrencychecker=concurrencychecker,
408 trypending=trypending,
414 )
409 )
415
410
416 if self._initempty and (self._format_version == revlog.REVLOGV1):
411 if self._initempty and (self._format_version == revlog.REVLOGV1):
417 # changelogs don't benefit from generaldelta.
412 # changelogs don't benefit from generaldelta.
418
413
419 self._format_flags &= ~revlog.FLAG_GENERALDELTA
414 self._format_flags &= ~revlog.FLAG_GENERALDELTA
420 self._generaldelta = False
415 self._generaldelta = False
421
416
422 # Delta chains for changelogs tend to be very small because entries
417 # Delta chains for changelogs tend to be very small because entries
423 # tend to be small and don't delta well with each. So disable delta
418 # tend to be small and don't delta well with each. So disable delta
424 # chains.
419 # chains.
425 self._storedeltachains = False
420 self._storedeltachains = False
426
421
427 self._realopener = opener
422 self._realopener = opener
428 self._delayed = False
423 self._delayed = False
429 self._delaybuf = None
424 self._delaybuf = None
430 self._divert = False
425 self._divert = False
431 self._filteredrevs = frozenset()
426 self._filteredrevs = frozenset()
432 self._filteredrevs_hashcache = {}
427 self._filteredrevs_hashcache = {}
433 self._copiesstorage = opener.options.get(b'copies-storage')
428 self._copiesstorage = opener.options.get(b'copies-storage')
434
429
435 @property
430 @property
436 def filteredrevs(self):
431 def filteredrevs(self):
437 return self._filteredrevs
432 return self._filteredrevs
438
433
439 @filteredrevs.setter
434 @filteredrevs.setter
440 def filteredrevs(self, val):
435 def filteredrevs(self, val):
441 # Ensure all updates go through this function
436 # Ensure all updates go through this function
442 assert isinstance(val, frozenset)
437 assert isinstance(val, frozenset)
443 self._filteredrevs = val
438 self._filteredrevs = val
444 self._filteredrevs_hashcache = {}
439 self._filteredrevs_hashcache = {}
445
440
446 def _write_docket(self, tr):
441 def _write_docket(self, tr):
447 if not self._delayed:
442 if not self._delayed:
448 super(changelog, self)._write_docket(tr)
443 super(changelog, self)._write_docket(tr)
449
444
450 def delayupdate(self, tr):
445 def delayupdate(self, tr):
451 """delay visibility of index updates to other readers"""
446 """delay visibility of index updates to other readers"""
452 if self._docket is None and not self._delayed:
447 if self._docket is None and not self._delayed:
453 if len(self) == 0:
448 if len(self) == 0:
454 self._divert = True
449 self._divert = True
455 if self._realopener.exists(self._indexfile + b'.a'):
450 if self._realopener.exists(self._indexfile + b'.a'):
456 self._realopener.unlink(self._indexfile + b'.a')
451 self._realopener.unlink(self._indexfile + b'.a')
457 self.opener = _divertopener(self._realopener, self._indexfile)
452 self.opener = _divertopener(self._realopener, self._indexfile)
458 else:
453 else:
459 self._delaybuf = []
454 self._delaybuf = []
460 self.opener = _delayopener(
455 self.opener = _delayopener(
461 self._realopener, self._indexfile, self._delaybuf
456 self._realopener, self._indexfile, self._delaybuf
462 )
457 )
463 self._delayed = True
458 self._delayed = True
464 tr.addpending(b'cl-%i' % id(self), self._writepending)
459 tr.addpending(b'cl-%i' % id(self), self._writepending)
465 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
460 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
466
461
467 def _finalize(self, tr):
462 def _finalize(self, tr):
468 """finalize index updates"""
463 """finalize index updates"""
469 self._delayed = False
464 self._delayed = False
470 self.opener = self._realopener
465 self.opener = self._realopener
471 # move redirected index data back into place
466 # move redirected index data back into place
472 if self._docket is not None:
467 if self._docket is not None:
473 self._write_docket(tr)
468 self._write_docket(tr)
474 elif self._divert:
469 elif self._divert:
475 assert not self._delaybuf
470 assert not self._delaybuf
476 tmpname = self._indexfile + b".a"
471 tmpname = self._indexfile + b".a"
477 nfile = self.opener.open(tmpname)
472 nfile = self.opener.open(tmpname)
478 nfile.close()
473 nfile.close()
479 self.opener.rename(tmpname, self._indexfile, checkambig=True)
474 self.opener.rename(tmpname, self._indexfile, checkambig=True)
480 elif self._delaybuf:
475 elif self._delaybuf:
481 fp = self.opener(self._indexfile, b'a', checkambig=True)
476 fp = self.opener(self._indexfile, b'a', checkambig=True)
482 fp.write(b"".join(self._delaybuf))
477 fp.write(b"".join(self._delaybuf))
483 fp.close()
478 fp.close()
484 self._delaybuf = None
479 self._delaybuf = None
485 self._divert = False
480 self._divert = False
486 # split when we're done
481 # split when we're done
487 self._enforceinlinesize(tr)
482 self._enforceinlinesize(tr)
488
483
489 def _writepending(self, tr):
484 def _writepending(self, tr):
490 """create a file containing the unfinalized state for
485 """create a file containing the unfinalized state for
491 pretxnchangegroup"""
486 pretxnchangegroup"""
492 if self._delaybuf:
487 if self._delaybuf:
493 # make a temporary copy of the index
488 # make a temporary copy of the index
494 fp1 = self._realopener(self._indexfile)
489 fp1 = self._realopener(self._indexfile)
495 pendingfilename = self._indexfile + b".a"
490 pendingfilename = self._indexfile + b".a"
496 # register as a temp file to ensure cleanup on failure
491 # register as a temp file to ensure cleanup on failure
497 tr.registertmp(pendingfilename)
492 tr.registertmp(pendingfilename)
498 # write existing data
493 # write existing data
499 fp2 = self._realopener(pendingfilename, b"w")
494 fp2 = self._realopener(pendingfilename, b"w")
500 fp2.write(fp1.read())
495 fp2.write(fp1.read())
501 # add pending data
496 # add pending data
502 fp2.write(b"".join(self._delaybuf))
497 fp2.write(b"".join(self._delaybuf))
503 fp2.close()
498 fp2.close()
504 # switch modes so finalize can simply rename
499 # switch modes so finalize can simply rename
505 self._delaybuf = None
500 self._delaybuf = None
506 self._divert = True
501 self._divert = True
507 self.opener = _divertopener(self._realopener, self._indexfile)
502 self.opener = _divertopener(self._realopener, self._indexfile)
508
503
509 if self._divert:
504 if self._divert:
510 return True
505 return True
511
506
512 return False
507 return False
513
508
514 def _enforceinlinesize(self, tr):
509 def _enforceinlinesize(self, tr):
515 if not self._delayed:
510 if not self._delayed:
516 revlog.revlog._enforceinlinesize(self, tr)
511 revlog.revlog._enforceinlinesize(self, tr)
517
512
518 def read(self, nodeorrev):
513 def read(self, nodeorrev):
519 """Obtain data from a parsed changelog revision.
514 """Obtain data from a parsed changelog revision.
520
515
521 Returns a 6-tuple of:
516 Returns a 6-tuple of:
522
517
523 - manifest node in binary
518 - manifest node in binary
524 - author/user as a localstr
519 - author/user as a localstr
525 - date as a 2-tuple of (time, timezone)
520 - date as a 2-tuple of (time, timezone)
526 - list of files
521 - list of files
527 - commit message as a localstr
522 - commit message as a localstr
528 - dict of extra metadata
523 - dict of extra metadata
529
524
530 Unless you need to access all fields, consider calling
525 Unless you need to access all fields, consider calling
531 ``changelogrevision`` instead, as it is faster for partial object
526 ``changelogrevision`` instead, as it is faster for partial object
532 access.
527 access.
533 """
528 """
534 d, s = self._revisiondata(nodeorrev)
529 d, s = self._revisiondata(nodeorrev)
535 c = changelogrevision(
530 c = changelogrevision(
536 self, d, s, self._copiesstorage == b'changeset-sidedata'
531 self, d, s, self._copiesstorage == b'changeset-sidedata'
537 )
532 )
538 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
533 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
539
534
540 def changelogrevision(self, nodeorrev):
535 def changelogrevision(self, nodeorrev):
541 """Obtain a ``changelogrevision`` for a node or revision."""
536 """Obtain a ``changelogrevision`` for a node or revision."""
542 text, sidedata = self._revisiondata(nodeorrev)
537 text, sidedata = self._revisiondata(nodeorrev)
543 return changelogrevision(
538 return changelogrevision(
544 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
539 self, text, sidedata, self._copiesstorage == b'changeset-sidedata'
545 )
540 )
546
541
547 def readfiles(self, nodeorrev):
542 def readfiles(self, nodeorrev):
548 """
543 """
549 short version of read that only returns the files modified by the cset
544 short version of read that only returns the files modified by the cset
550 """
545 """
551 text = self.revision(nodeorrev)
546 text = self.revision(nodeorrev)
552 if not text:
547 if not text:
553 return []
548 return []
554 last = text.index(b"\n\n")
549 last = text.index(b"\n\n")
555 l = text[:last].split(b'\n')
550 l = text[:last].split(b'\n')
556 return l[3:]
551 return l[3:]
557
552
558 def add(
553 def add(
559 self,
554 self,
560 manifest,
555 manifest,
561 files,
556 files,
562 desc,
557 desc,
563 transaction,
558 transaction,
564 p1,
559 p1,
565 p2,
560 p2,
566 user,
561 user,
567 date=None,
562 date=None,
568 extra=None,
563 extra=None,
569 ):
564 ):
570 # Convert to UTF-8 encoded bytestrings as the very first
565 # Convert to UTF-8 encoded bytestrings as the very first
571 # thing: calling any method on a localstr object will turn it
566 # thing: calling any method on a localstr object will turn it
572 # into a str object and the cached UTF-8 string is thus lost.
567 # into a str object and the cached UTF-8 string is thus lost.
573 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
568 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
574
569
575 user = user.strip()
570 user = user.strip()
576 # An empty username or a username with a "\n" will make the
571 # An empty username or a username with a "\n" will make the
577 # revision text contain two "\n\n" sequences -> corrupt
572 # revision text contain two "\n\n" sequences -> corrupt
578 # repository since read cannot unpack the revision.
573 # repository since read cannot unpack the revision.
579 if not user:
574 if not user:
580 raise error.StorageError(_(b"empty username"))
575 raise error.StorageError(_(b"empty username"))
581 if b"\n" in user:
576 if b"\n" in user:
582 raise error.StorageError(
577 raise error.StorageError(
583 _(b"username %r contains a newline") % pycompat.bytestr(user)
578 _(b"username %r contains a newline") % pycompat.bytestr(user)
584 )
579 )
585
580
586 desc = stripdesc(desc)
581 desc = stripdesc(desc)
587
582
588 if date:
583 if date:
589 parseddate = b"%d %d" % dateutil.parsedate(date)
584 parseddate = b"%d %d" % dateutil.parsedate(date)
590 else:
585 else:
591 parseddate = b"%d %d" % dateutil.makedate()
586 parseddate = b"%d %d" % dateutil.makedate()
592 if extra:
587 if extra:
593 branch = extra.get(b"branch")
588 branch = extra.get(b"branch")
594 if branch in (b"default", b""):
589 if branch in (b"default", b""):
595 del extra[b"branch"]
590 del extra[b"branch"]
596 elif branch in (b".", b"null", b"tip"):
591 elif branch in (b".", b"null", b"tip"):
597 raise error.StorageError(
592 raise error.StorageError(
598 _(b'the name \'%s\' is reserved') % branch
593 _(b'the name \'%s\' is reserved') % branch
599 )
594 )
600 sortedfiles = sorted(files.touched)
595 sortedfiles = sorted(files.touched)
601 flags = 0
596 flags = 0
602 sidedata = None
597 sidedata = None
603 if self._copiesstorage == b'changeset-sidedata':
598 if self._copiesstorage == b'changeset-sidedata':
604 if files.has_copies_info:
599 if files.has_copies_info:
605 flags |= flagutil.REVIDX_HASCOPIESINFO
600 flags |= flagutil.REVIDX_HASCOPIESINFO
606 sidedata = metadata.encode_files_sidedata(files)
601 sidedata = metadata.encode_files_sidedata(files)
607
602
608 if extra:
603 if extra:
609 extra = encodeextra(extra)
604 extra = encodeextra(extra)
610 parseddate = b"%s %s" % (parseddate, extra)
605 parseddate = b"%s %s" % (parseddate, extra)
611 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
606 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
612 text = b"\n".join(l)
607 text = b"\n".join(l)
613 rev = self.addrevision(
608 rev = self.addrevision(
614 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
609 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
615 )
610 )
616 return self.node(rev)
611 return self.node(rev)
617
612
618 def branchinfo(self, rev):
613 def branchinfo(self, rev):
619 """return the branch name and open/close state of a revision
614 """return the branch name and open/close state of a revision
620
615
621 This function exists because creating a changectx object
616 This function exists because creating a changectx object
622 just to access this is costly."""
617 just to access this is costly."""
623 return self.changelogrevision(rev).branchinfo
618 return self.changelogrevision(rev).branchinfo
624
619
625 def _nodeduplicatecallback(self, transaction, rev):
620 def _nodeduplicatecallback(self, transaction, rev):
626 # keep track of revisions that got "re-added", eg: unbunde of know rev.
621 # keep track of revisions that got "re-added", eg: unbunde of know rev.
627 #
622 #
628 # We track them in a list to preserve their order from the source bundle
623 # We track them in a list to preserve their order from the source bundle
629 duplicates = transaction.changes.setdefault(b'revduplicates', [])
624 duplicates = transaction.changes.setdefault(b'revduplicates', [])
630 duplicates.append(rev)
625 duplicates.append(rev)
@@ -1,3246 +1,3254
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 FEATURES_BY_VERSION,
38 FEATURES_BY_VERSION,
39 FLAG_GENERALDELTA,
39 FLAG_GENERALDELTA,
40 FLAG_INLINE_DATA,
40 FLAG_INLINE_DATA,
41 INDEX_HEADER,
41 INDEX_HEADER,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 SUPPORTED_FLAGS,
50 SUPPORTED_FLAGS,
51 )
51 )
52 from .revlogutils.flagutil import (
52 from .revlogutils.flagutil import (
53 REVIDX_DEFAULT_FLAGS,
53 REVIDX_DEFAULT_FLAGS,
54 REVIDX_ELLIPSIS,
54 REVIDX_ELLIPSIS,
55 REVIDX_EXTSTORED,
55 REVIDX_EXTSTORED,
56 REVIDX_FLAGS_ORDER,
56 REVIDX_FLAGS_ORDER,
57 REVIDX_HASCOPIESINFO,
57 REVIDX_HASCOPIESINFO,
58 REVIDX_ISCENSORED,
58 REVIDX_ISCENSORED,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 REVIDX_RAWTEXT_CHANGING_FLAGS,
60 )
60 )
61 from .thirdparty import attr
61 from .thirdparty import attr
62 from . import (
62 from . import (
63 ancestor,
63 ancestor,
64 dagop,
64 dagop,
65 error,
65 error,
66 mdiff,
66 mdiff,
67 policy,
67 policy,
68 pycompat,
68 pycompat,
69 templatefilters,
69 templatefilters,
70 util,
70 util,
71 )
71 )
72 from .interfaces import (
72 from .interfaces import (
73 repository,
73 repository,
74 util as interfaceutil,
74 util as interfaceutil,
75 )
75 )
76 from .revlogutils import (
76 from .revlogutils import (
77 deltas as deltautil,
77 deltas as deltautil,
78 docket as docketutil,
78 docket as docketutil,
79 flagutil,
79 flagutil,
80 nodemap as nodemaputil,
80 nodemap as nodemaputil,
81 revlogv0,
81 revlogv0,
82 sidedata as sidedatautil,
82 sidedata as sidedatautil,
83 )
83 )
84 from .utils import (
84 from .utils import (
85 storageutil,
85 storageutil,
86 stringutil,
86 stringutil,
87 )
87 )
88
88
89 # blanked usage of all the name to prevent pyflakes constraints
89 # blanked usage of all the name to prevent pyflakes constraints
90 # We need these name available in the module for extensions.
90 # We need these name available in the module for extensions.
91
91
92 REVLOGV0
92 REVLOGV0
93 REVLOGV1
93 REVLOGV1
94 REVLOGV2
94 REVLOGV2
95 FLAG_INLINE_DATA
95 FLAG_INLINE_DATA
96 FLAG_GENERALDELTA
96 FLAG_GENERALDELTA
97 REVLOG_DEFAULT_FLAGS
97 REVLOG_DEFAULT_FLAGS
98 REVLOG_DEFAULT_FORMAT
98 REVLOG_DEFAULT_FORMAT
99 REVLOG_DEFAULT_VERSION
99 REVLOG_DEFAULT_VERSION
100 REVLOGV1_FLAGS
100 REVLOGV1_FLAGS
101 REVLOGV2_FLAGS
101 REVLOGV2_FLAGS
102 REVIDX_ISCENSORED
102 REVIDX_ISCENSORED
103 REVIDX_ELLIPSIS
103 REVIDX_ELLIPSIS
104 REVIDX_HASCOPIESINFO
104 REVIDX_HASCOPIESINFO
105 REVIDX_EXTSTORED
105 REVIDX_EXTSTORED
106 REVIDX_DEFAULT_FLAGS
106 REVIDX_DEFAULT_FLAGS
107 REVIDX_FLAGS_ORDER
107 REVIDX_FLAGS_ORDER
108 REVIDX_RAWTEXT_CHANGING_FLAGS
108 REVIDX_RAWTEXT_CHANGING_FLAGS
109
109
110 parsers = policy.importmod('parsers')
110 parsers = policy.importmod('parsers')
111 rustancestor = policy.importrust('ancestor')
111 rustancestor = policy.importrust('ancestor')
112 rustdagop = policy.importrust('dagop')
112 rustdagop = policy.importrust('dagop')
113 rustrevlog = policy.importrust('revlog')
113 rustrevlog = policy.importrust('revlog')
114
114
115 # Aliased for performance.
115 # Aliased for performance.
116 _zlibdecompress = zlib.decompress
116 _zlibdecompress = zlib.decompress
117
117
118 # max size of revlog with inline data
118 # max size of revlog with inline data
119 _maxinline = 131072
119 _maxinline = 131072
120 _chunksize = 1048576
120 _chunksize = 1048576
121
121
122 # Flag processors for REVIDX_ELLIPSIS.
122 # Flag processors for REVIDX_ELLIPSIS.
123 def ellipsisreadprocessor(rl, text):
123 def ellipsisreadprocessor(rl, text):
124 return text, False
124 return text, False
125
125
126
126
127 def ellipsiswriteprocessor(rl, text):
127 def ellipsiswriteprocessor(rl, text):
128 return text, False
128 return text, False
129
129
130
130
131 def ellipsisrawprocessor(rl, text):
131 def ellipsisrawprocessor(rl, text):
132 return False
132 return False
133
133
134
134
135 ellipsisprocessor = (
135 ellipsisprocessor = (
136 ellipsisreadprocessor,
136 ellipsisreadprocessor,
137 ellipsiswriteprocessor,
137 ellipsiswriteprocessor,
138 ellipsisrawprocessor,
138 ellipsisrawprocessor,
139 )
139 )
140
140
141
141
142 def offset_type(offset, type):
142 def offset_type(offset, type):
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
143 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
144 raise ValueError(b'unknown revlog index flags')
144 raise ValueError(b'unknown revlog index flags')
145 return int(int(offset) << 16 | type)
145 return int(int(offset) << 16 | type)
146
146
147
147
148 def _verify_revision(rl, skipflags, state, node):
148 def _verify_revision(rl, skipflags, state, node):
149 """Verify the integrity of the given revlog ``node`` while providing a hook
149 """Verify the integrity of the given revlog ``node`` while providing a hook
150 point for extensions to influence the operation."""
150 point for extensions to influence the operation."""
151 if skipflags:
151 if skipflags:
152 state[b'skipread'].add(node)
152 state[b'skipread'].add(node)
153 else:
153 else:
154 # Side-effect: read content and verify hash.
154 # Side-effect: read content and verify hash.
155 rl.revision(node)
155 rl.revision(node)
156
156
157
157
158 # True if a fast implementation for persistent-nodemap is available
158 # True if a fast implementation for persistent-nodemap is available
159 #
159 #
160 # We also consider we have a "fast" implementation in "pure" python because
160 # We also consider we have a "fast" implementation in "pure" python because
161 # people using pure don't really have performance consideration (and a
161 # people using pure don't really have performance consideration (and a
162 # wheelbarrow of other slowness source)
162 # wheelbarrow of other slowness source)
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
163 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
164 parsers, 'BaseIndexObject'
164 parsers, 'BaseIndexObject'
165 )
165 )
166
166
167
167
168 @attr.s(slots=True, frozen=True)
168 @attr.s(slots=True, frozen=True)
169 class _revisioninfo(object):
169 class _revisioninfo(object):
170 """Information about a revision that allows building its fulltext
170 """Information about a revision that allows building its fulltext
171 node: expected hash of the revision
171 node: expected hash of the revision
172 p1, p2: parent revs of the revision
172 p1, p2: parent revs of the revision
173 btext: built text cache consisting of a one-element list
173 btext: built text cache consisting of a one-element list
174 cachedelta: (baserev, uncompressed_delta) or None
174 cachedelta: (baserev, uncompressed_delta) or None
175 flags: flags associated to the revision storage
175 flags: flags associated to the revision storage
176
176
177 One of btext[0] or cachedelta must be set.
177 One of btext[0] or cachedelta must be set.
178 """
178 """
179
179
180 node = attr.ib()
180 node = attr.ib()
181 p1 = attr.ib()
181 p1 = attr.ib()
182 p2 = attr.ib()
182 p2 = attr.ib()
183 btext = attr.ib()
183 btext = attr.ib()
184 textlen = attr.ib()
184 textlen = attr.ib()
185 cachedelta = attr.ib()
185 cachedelta = attr.ib()
186 flags = attr.ib()
186 flags = attr.ib()
187
187
188
188
189 @interfaceutil.implementer(repository.irevisiondelta)
189 @interfaceutil.implementer(repository.irevisiondelta)
190 @attr.s(slots=True)
190 @attr.s(slots=True)
191 class revlogrevisiondelta(object):
191 class revlogrevisiondelta(object):
192 node = attr.ib()
192 node = attr.ib()
193 p1node = attr.ib()
193 p1node = attr.ib()
194 p2node = attr.ib()
194 p2node = attr.ib()
195 basenode = attr.ib()
195 basenode = attr.ib()
196 flags = attr.ib()
196 flags = attr.ib()
197 baserevisionsize = attr.ib()
197 baserevisionsize = attr.ib()
198 revision = attr.ib()
198 revision = attr.ib()
199 delta = attr.ib()
199 delta = attr.ib()
200 sidedata = attr.ib()
200 sidedata = attr.ib()
201 protocol_flags = attr.ib()
201 protocol_flags = attr.ib()
202 linknode = attr.ib(default=None)
202 linknode = attr.ib(default=None)
203
203
204
204
205 @interfaceutil.implementer(repository.iverifyproblem)
205 @interfaceutil.implementer(repository.iverifyproblem)
206 @attr.s(frozen=True)
206 @attr.s(frozen=True)
207 class revlogproblem(object):
207 class revlogproblem(object):
208 warning = attr.ib(default=None)
208 warning = attr.ib(default=None)
209 error = attr.ib(default=None)
209 error = attr.ib(default=None)
210 node = attr.ib(default=None)
210 node = attr.ib(default=None)
211
211
212
212
213 def parse_index_v1(data, inline):
213 def parse_index_v1(data, inline):
214 # call the C implementation to parse the index data
214 # call the C implementation to parse the index data
215 index, cache = parsers.parse_index2(data, inline)
215 index, cache = parsers.parse_index2(data, inline)
216 return index, cache
216 return index, cache
217
217
218
218
219 def parse_index_v2(data, inline):
219 def parse_index_v2(data, inline):
220 # call the C implementation to parse the index data
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
221 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
222 return index, cache
222 return index, cache
223
223
224
224
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
225 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
226
226
227 def parse_index_v1_nodemap(data, inline):
227 def parse_index_v1_nodemap(data, inline):
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
228 index, cache = parsers.parse_index_devel_nodemap(data, inline)
229 return index, cache
229 return index, cache
230
230
231
231
232 else:
232 else:
233 parse_index_v1_nodemap = None
233 parse_index_v1_nodemap = None
234
234
235
235
236 def parse_index_v1_mixed(data, inline):
236 def parse_index_v1_mixed(data, inline):
237 index, cache = parse_index_v1(data, inline)
237 index, cache = parse_index_v1(data, inline)
238 return rustrevlog.MixedIndex(index), cache
238 return rustrevlog.MixedIndex(index), cache
239
239
240
240
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
241 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
242 # signed integer)
242 # signed integer)
243 _maxentrysize = 0x7FFFFFFF
243 _maxentrysize = 0x7FFFFFFF
244
244
245
245
246 class revlog(object):
246 class revlog(object):
247 """
247 """
248 the underlying revision storage object
248 the underlying revision storage object
249
249
250 A revlog consists of two parts, an index and the revision data.
250 A revlog consists of two parts, an index and the revision data.
251
251
252 The index is a file with a fixed record size containing
252 The index is a file with a fixed record size containing
253 information on each revision, including its nodeid (hash), the
253 information on each revision, including its nodeid (hash), the
254 nodeids of its parents, the position and offset of its data within
254 nodeids of its parents, the position and offset of its data within
255 the data file, and the revision it's based on. Finally, each entry
255 the data file, and the revision it's based on. Finally, each entry
256 contains a linkrev entry that can serve as a pointer to external
256 contains a linkrev entry that can serve as a pointer to external
257 data.
257 data.
258
258
259 The revision data itself is a linear collection of data chunks.
259 The revision data itself is a linear collection of data chunks.
260 Each chunk represents a revision and is usually represented as a
260 Each chunk represents a revision and is usually represented as a
261 delta against the previous chunk. To bound lookup time, runs of
261 delta against the previous chunk. To bound lookup time, runs of
262 deltas are limited to about 2 times the length of the original
262 deltas are limited to about 2 times the length of the original
263 version data. This makes retrieval of a version proportional to
263 version data. This makes retrieval of a version proportional to
264 its size, or O(1) relative to the number of revisions.
264 its size, or O(1) relative to the number of revisions.
265
265
266 Both pieces of the revlog are written to in an append-only
266 Both pieces of the revlog are written to in an append-only
267 fashion, which means we never need to rewrite a file to insert or
267 fashion, which means we never need to rewrite a file to insert or
268 remove data, and can use some simple techniques to avoid the need
268 remove data, and can use some simple techniques to avoid the need
269 for locking while reading.
269 for locking while reading.
270
270
271 If checkambig, indexfile is opened with checkambig=True at
271 If checkambig, indexfile is opened with checkambig=True at
272 writing, to avoid file stat ambiguity.
272 writing, to avoid file stat ambiguity.
273
273
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
274 If mmaplargeindex is True, and an mmapindexthreshold is set, the
275 index will be mmapped rather than read if it is larger than the
275 index will be mmapped rather than read if it is larger than the
276 configured threshold.
276 configured threshold.
277
277
278 If censorable is True, the revlog can have censored revisions.
278 If censorable is True, the revlog can have censored revisions.
279
279
280 If `upperboundcomp` is not None, this is the expected maximal gain from
280 If `upperboundcomp` is not None, this is the expected maximal gain from
281 compression for the data content.
281 compression for the data content.
282
282
283 `concurrencychecker` is an optional function that receives 3 arguments: a
283 `concurrencychecker` is an optional function that receives 3 arguments: a
284 file handle, a filename, and an expected position. It should check whether
284 file handle, a filename, and an expected position. It should check whether
285 the current position in the file handle is valid, and log/warn/fail (by
285 the current position in the file handle is valid, and log/warn/fail (by
286 raising).
286 raising).
287 """
287 """
288
288
289 _flagserrorclass = error.RevlogError
289 _flagserrorclass = error.RevlogError
290
290
291 def __init__(
291 def __init__(
292 self,
292 self,
293 opener,
293 opener,
294 target,
294 target,
295 radix,
295 radix,
296 postfix=None,
296 postfix=None, # only exist for `tmpcensored` now
297 checkambig=False,
297 checkambig=False,
298 mmaplargeindex=False,
298 mmaplargeindex=False,
299 censorable=False,
299 censorable=False,
300 upperboundcomp=None,
300 upperboundcomp=None,
301 persistentnodemap=False,
301 persistentnodemap=False,
302 concurrencychecker=None,
302 concurrencychecker=None,
303 trypending=False,
303 ):
304 ):
304 """
305 """
305 create a revlog object
306 create a revlog object
306
307
307 opener is a function that abstracts the file opening operation
308 opener is a function that abstracts the file opening operation
308 and can be used to implement COW semantics or the like.
309 and can be used to implement COW semantics or the like.
309
310
310 `target`: a (KIND, ID) tuple that identify the content stored in
311 `target`: a (KIND, ID) tuple that identify the content stored in
311 this revlog. It help the rest of the code to understand what the revlog
312 this revlog. It help the rest of the code to understand what the revlog
312 is about without having to resort to heuristic and index filename
313 is about without having to resort to heuristic and index filename
313 analysis. Note: that this must be reliably be set by normal code, but
314 analysis. Note: that this must be reliably be set by normal code, but
314 that test, debug, or performance measurement code might not set this to
315 that test, debug, or performance measurement code might not set this to
315 accurate value.
316 accurate value.
316 """
317 """
317 self.upperboundcomp = upperboundcomp
318 self.upperboundcomp = upperboundcomp
318
319
319 self.radix = radix
320 self.radix = radix
320
321
321 self._docket_file = None
322 self._docket_file = None
322 self._indexfile = None
323 self._indexfile = None
323 self._datafile = None
324 self._datafile = None
324 self._nodemap_file = None
325 self._nodemap_file = None
325 self.postfix = postfix
326 self.postfix = postfix
327 self._trypending = trypending
326 self.opener = opener
328 self.opener = opener
327 if persistentnodemap:
329 if persistentnodemap:
328 self._nodemap_file = nodemaputil.get_nodemap_file(self)
330 self._nodemap_file = nodemaputil.get_nodemap_file(self)
329
331
330 assert target[0] in ALL_KINDS
332 assert target[0] in ALL_KINDS
331 assert len(target) == 2
333 assert len(target) == 2
332 self.target = target
334 self.target = target
333 # When True, indexfile is opened with checkambig=True at writing, to
335 # When True, indexfile is opened with checkambig=True at writing, to
334 # avoid file stat ambiguity.
336 # avoid file stat ambiguity.
335 self._checkambig = checkambig
337 self._checkambig = checkambig
336 self._mmaplargeindex = mmaplargeindex
338 self._mmaplargeindex = mmaplargeindex
337 self._censorable = censorable
339 self._censorable = censorable
338 # 3-tuple of (node, rev, text) for a raw revision.
340 # 3-tuple of (node, rev, text) for a raw revision.
339 self._revisioncache = None
341 self._revisioncache = None
340 # Maps rev to chain base rev.
342 # Maps rev to chain base rev.
341 self._chainbasecache = util.lrucachedict(100)
343 self._chainbasecache = util.lrucachedict(100)
342 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
344 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
343 self._chunkcache = (0, b'')
345 self._chunkcache = (0, b'')
344 # How much data to read and cache into the raw revlog data cache.
346 # How much data to read and cache into the raw revlog data cache.
345 self._chunkcachesize = 65536
347 self._chunkcachesize = 65536
346 self._maxchainlen = None
348 self._maxchainlen = None
347 self._deltabothparents = True
349 self._deltabothparents = True
348 self.index = None
350 self.index = None
349 self._docket = None
351 self._docket = None
350 self._nodemap_docket = None
352 self._nodemap_docket = None
351 # Mapping of partial identifiers to full nodes.
353 # Mapping of partial identifiers to full nodes.
352 self._pcache = {}
354 self._pcache = {}
353 # Mapping of revision integer to full node.
355 # Mapping of revision integer to full node.
354 self._compengine = b'zlib'
356 self._compengine = b'zlib'
355 self._compengineopts = {}
357 self._compengineopts = {}
356 self._maxdeltachainspan = -1
358 self._maxdeltachainspan = -1
357 self._withsparseread = False
359 self._withsparseread = False
358 self._sparserevlog = False
360 self._sparserevlog = False
359 self.hassidedata = False
361 self.hassidedata = False
360 self._srdensitythreshold = 0.50
362 self._srdensitythreshold = 0.50
361 self._srmingapsize = 262144
363 self._srmingapsize = 262144
362
364
363 # Make copy of flag processors so each revlog instance can support
365 # Make copy of flag processors so each revlog instance can support
364 # custom flags.
366 # custom flags.
365 self._flagprocessors = dict(flagutil.flagprocessors)
367 self._flagprocessors = dict(flagutil.flagprocessors)
366
368
367 # 2-tuple of file handles being used for active writing.
369 # 2-tuple of file handles being used for active writing.
368 self._writinghandles = None
370 self._writinghandles = None
369 # prevent nesting of addgroup
371 # prevent nesting of addgroup
370 self._adding_group = None
372 self._adding_group = None
371
373
372 self._loadindex()
374 self._loadindex()
373
375
374 self._concurrencychecker = concurrencychecker
376 self._concurrencychecker = concurrencychecker
375
377
376 def _init_opts(self):
378 def _init_opts(self):
377 """process options (from above/config) to setup associated default revlog mode
379 """process options (from above/config) to setup associated default revlog mode
378
380
379 These values might be affected when actually reading on disk information.
381 These values might be affected when actually reading on disk information.
380
382
381 The relevant values are returned for use in _loadindex().
383 The relevant values are returned for use in _loadindex().
382
384
383 * newversionflags:
385 * newversionflags:
384 version header to use if we need to create a new revlog
386 version header to use if we need to create a new revlog
385
387
386 * mmapindexthreshold:
388 * mmapindexthreshold:
387 minimal index size for start to use mmap
389 minimal index size for start to use mmap
388
390
389 * force_nodemap:
391 * force_nodemap:
390 force the usage of a "development" version of the nodemap code
392 force the usage of a "development" version of the nodemap code
391 """
393 """
392 mmapindexthreshold = None
394 mmapindexthreshold = None
393 opts = self.opener.options
395 opts = self.opener.options
394
396
395 if b'revlogv2' in opts:
397 if b'revlogv2' in opts:
396 new_header = REVLOGV2 | FLAG_INLINE_DATA
398 new_header = REVLOGV2 | FLAG_INLINE_DATA
397 elif b'revlogv1' in opts:
399 elif b'revlogv1' in opts:
398 new_header = REVLOGV1 | FLAG_INLINE_DATA
400 new_header = REVLOGV1 | FLAG_INLINE_DATA
399 if b'generaldelta' in opts:
401 if b'generaldelta' in opts:
400 new_header |= FLAG_GENERALDELTA
402 new_header |= FLAG_GENERALDELTA
401 elif b'revlogv0' in self.opener.options:
403 elif b'revlogv0' in self.opener.options:
402 new_header = REVLOGV0
404 new_header = REVLOGV0
403 else:
405 else:
404 new_header = REVLOG_DEFAULT_VERSION
406 new_header = REVLOG_DEFAULT_VERSION
405
407
406 if b'chunkcachesize' in opts:
408 if b'chunkcachesize' in opts:
407 self._chunkcachesize = opts[b'chunkcachesize']
409 self._chunkcachesize = opts[b'chunkcachesize']
408 if b'maxchainlen' in opts:
410 if b'maxchainlen' in opts:
409 self._maxchainlen = opts[b'maxchainlen']
411 self._maxchainlen = opts[b'maxchainlen']
410 if b'deltabothparents' in opts:
412 if b'deltabothparents' in opts:
411 self._deltabothparents = opts[b'deltabothparents']
413 self._deltabothparents = opts[b'deltabothparents']
412 self._lazydelta = bool(opts.get(b'lazydelta', True))
414 self._lazydelta = bool(opts.get(b'lazydelta', True))
413 self._lazydeltabase = False
415 self._lazydeltabase = False
414 if self._lazydelta:
416 if self._lazydelta:
415 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
416 if b'compengine' in opts:
418 if b'compengine' in opts:
417 self._compengine = opts[b'compengine']
419 self._compengine = opts[b'compengine']
418 if b'zlib.level' in opts:
420 if b'zlib.level' in opts:
419 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
421 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
420 if b'zstd.level' in opts:
422 if b'zstd.level' in opts:
421 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
423 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
422 if b'maxdeltachainspan' in opts:
424 if b'maxdeltachainspan' in opts:
423 self._maxdeltachainspan = opts[b'maxdeltachainspan']
425 self._maxdeltachainspan = opts[b'maxdeltachainspan']
424 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
425 mmapindexthreshold = opts[b'mmapindexthreshold']
427 mmapindexthreshold = opts[b'mmapindexthreshold']
426 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
428 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
427 withsparseread = bool(opts.get(b'with-sparse-read', False))
429 withsparseread = bool(opts.get(b'with-sparse-read', False))
428 # sparse-revlog forces sparse-read
430 # sparse-revlog forces sparse-read
429 self._withsparseread = self._sparserevlog or withsparseread
431 self._withsparseread = self._sparserevlog or withsparseread
430 if b'sparse-read-density-threshold' in opts:
432 if b'sparse-read-density-threshold' in opts:
431 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
433 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
432 if b'sparse-read-min-gap-size' in opts:
434 if b'sparse-read-min-gap-size' in opts:
433 self._srmingapsize = opts[b'sparse-read-min-gap-size']
435 self._srmingapsize = opts[b'sparse-read-min-gap-size']
434 if opts.get(b'enableellipsis'):
436 if opts.get(b'enableellipsis'):
435 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
436
438
437 # revlog v0 doesn't have flag processors
439 # revlog v0 doesn't have flag processors
438 for flag, processor in pycompat.iteritems(
440 for flag, processor in pycompat.iteritems(
439 opts.get(b'flagprocessors', {})
441 opts.get(b'flagprocessors', {})
440 ):
442 ):
441 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
443 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
442
444
443 if self._chunkcachesize <= 0:
445 if self._chunkcachesize <= 0:
444 raise error.RevlogError(
446 raise error.RevlogError(
445 _(b'revlog chunk cache size %r is not greater than 0')
447 _(b'revlog chunk cache size %r is not greater than 0')
446 % self._chunkcachesize
448 % self._chunkcachesize
447 )
449 )
448 elif self._chunkcachesize & (self._chunkcachesize - 1):
450 elif self._chunkcachesize & (self._chunkcachesize - 1):
449 raise error.RevlogError(
451 raise error.RevlogError(
450 _(b'revlog chunk cache size %r is not a power of 2')
452 _(b'revlog chunk cache size %r is not a power of 2')
451 % self._chunkcachesize
453 % self._chunkcachesize
452 )
454 )
453 force_nodemap = opts.get(b'devel-force-nodemap', False)
455 force_nodemap = opts.get(b'devel-force-nodemap', False)
454 return new_header, mmapindexthreshold, force_nodemap
456 return new_header, mmapindexthreshold, force_nodemap
455
457
456 def _get_data(self, filepath, mmap_threshold, size=None):
458 def _get_data(self, filepath, mmap_threshold, size=None):
457 """return a file content with or without mmap
459 """return a file content with or without mmap
458
460
459 If the file is missing return the empty string"""
461 If the file is missing return the empty string"""
460 try:
462 try:
461 with self.opener(filepath) as fp:
463 with self.opener(filepath) as fp:
462 if mmap_threshold is not None:
464 if mmap_threshold is not None:
463 file_size = self.opener.fstat(fp).st_size
465 file_size = self.opener.fstat(fp).st_size
464 if file_size >= mmap_threshold:
466 if file_size >= mmap_threshold:
465 if size is not None:
467 if size is not None:
466 # avoid potentiel mmap crash
468 # avoid potentiel mmap crash
467 size = min(file_size, size)
469 size = min(file_size, size)
468 # TODO: should .close() to release resources without
470 # TODO: should .close() to release resources without
469 # relying on Python GC
471 # relying on Python GC
470 if size is None:
472 if size is None:
471 return util.buffer(util.mmapread(fp))
473 return util.buffer(util.mmapread(fp))
472 else:
474 else:
473 return util.buffer(util.mmapread(fp, size))
475 return util.buffer(util.mmapread(fp, size))
474 if size is None:
476 if size is None:
475 return fp.read()
477 return fp.read()
476 else:
478 else:
477 return fp.read(size)
479 return fp.read(size)
478 except IOError as inst:
480 except IOError as inst:
479 if inst.errno != errno.ENOENT:
481 if inst.errno != errno.ENOENT:
480 raise
482 raise
481 return b''
483 return b''
482
484
483 def _loadindex(self):
485 def _loadindex(self):
484
486
485 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
487 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
486
488
487 if self.postfix is None:
489 if self.postfix is not None:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
492 entry_point = b'%s.i.a' % self.radix
493 else:
488 entry_point = b'%s.i' % self.radix
494 entry_point = b'%s.i' % self.radix
489 else:
490 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
491
495
492 entry_data = b''
496 entry_data = b''
493 self._initempty = True
497 self._initempty = True
494 entry_data = self._get_data(entry_point, mmapindexthreshold)
498 entry_data = self._get_data(entry_point, mmapindexthreshold)
495 if len(entry_data) > 0:
499 if len(entry_data) > 0:
496 header = INDEX_HEADER.unpack(entry_data[:4])[0]
500 header = INDEX_HEADER.unpack(entry_data[:4])[0]
497 self._initempty = False
501 self._initempty = False
498 else:
502 else:
499 header = new_header
503 header = new_header
500
504
501 self._format_flags = header & ~0xFFFF
505 self._format_flags = header & ~0xFFFF
502 self._format_version = header & 0xFFFF
506 self._format_version = header & 0xFFFF
503
507
504 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
508 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
505 if supported_flags is None:
509 if supported_flags is None:
506 msg = _(b'unknown version (%d) in revlog %s')
510 msg = _(b'unknown version (%d) in revlog %s')
507 msg %= (self._format_version, self.display_id)
511 msg %= (self._format_version, self.display_id)
508 raise error.RevlogError(msg)
512 raise error.RevlogError(msg)
509 elif self._format_flags & ~supported_flags:
513 elif self._format_flags & ~supported_flags:
510 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
514 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
511 display_flag = self._format_flags >> 16
515 display_flag = self._format_flags >> 16
512 msg %= (display_flag, self._format_version, self.display_id)
516 msg %= (display_flag, self._format_version, self.display_id)
513 raise error.RevlogError(msg)
517 raise error.RevlogError(msg)
514
518
515 features = FEATURES_BY_VERSION[self._format_version]
519 features = FEATURES_BY_VERSION[self._format_version]
516 self._inline = features[b'inline'](self._format_flags)
520 self._inline = features[b'inline'](self._format_flags)
517 self._generaldelta = features[b'generaldelta'](self._format_flags)
521 self._generaldelta = features[b'generaldelta'](self._format_flags)
518 self.hassidedata = features[b'sidedata']
522 self.hassidedata = features[b'sidedata']
519
523
520 if not features[b'docket']:
524 if not features[b'docket']:
521 self._indexfile = entry_point
525 self._indexfile = entry_point
522 index_data = entry_data
526 index_data = entry_data
523 else:
527 else:
524 self._docket_file = entry_point
528 self._docket_file = entry_point
525 if self._initempty:
529 if self._initempty:
526 self._docket = docketutil.default_docket(self, header)
530 self._docket = docketutil.default_docket(self, header)
527 else:
531 else:
528 self._docket = docketutil.parse_docket(self, entry_data)
532 self._docket = docketutil.parse_docket(self, entry_data)
529 self._indexfile = self._docket.index_filepath()
533 self._indexfile = self._docket.index_filepath()
530 index_data = b''
534 index_data = b''
531 index_size = self._docket.index_end
535 index_size = self._docket.index_end
532 if index_size > 0:
536 if index_size > 0:
533 index_data = self._get_data(
537 index_data = self._get_data(
534 self._indexfile, mmapindexthreshold, size=index_size
538 self._indexfile, mmapindexthreshold, size=index_size
535 )
539 )
536 if len(index_data) < index_size:
540 if len(index_data) < index_size:
537 msg = _(b'too few index data for %s: got %d, expected %d')
541 msg = _(b'too few index data for %s: got %d, expected %d')
538 msg %= (self.display_id, len(index_data), index_size)
542 msg %= (self.display_id, len(index_data), index_size)
539 raise error.RevlogError(msg)
543 raise error.RevlogError(msg)
540
544
541 self._inline = False
545 self._inline = False
542 # generaldelta implied by version 2 revlogs.
546 # generaldelta implied by version 2 revlogs.
543 self._generaldelta = True
547 self._generaldelta = True
544 # the logic for persistent nodemap will be dealt with within the
548 # the logic for persistent nodemap will be dealt with within the
545 # main docket, so disable it for now.
549 # main docket, so disable it for now.
546 self._nodemap_file = None
550 self._nodemap_file = None
547
551
548 if self.postfix is None or self.postfix == b'a':
552 if self.postfix is None:
549 self._datafile = b'%s.d' % self.radix
553 self._datafile = b'%s.d' % self.radix
550 else:
554 else:
551 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
555 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
552
556
553 self.nodeconstants = sha1nodeconstants
557 self.nodeconstants = sha1nodeconstants
554 self.nullid = self.nodeconstants.nullid
558 self.nullid = self.nodeconstants.nullid
555
559
556 # sparse-revlog can't be on without general-delta (issue6056)
560 # sparse-revlog can't be on without general-delta (issue6056)
557 if not self._generaldelta:
561 if not self._generaldelta:
558 self._sparserevlog = False
562 self._sparserevlog = False
559
563
560 self._storedeltachains = True
564 self._storedeltachains = True
561
565
562 devel_nodemap = (
566 devel_nodemap = (
563 self._nodemap_file
567 self._nodemap_file
564 and force_nodemap
568 and force_nodemap
565 and parse_index_v1_nodemap is not None
569 and parse_index_v1_nodemap is not None
566 )
570 )
567
571
568 use_rust_index = False
572 use_rust_index = False
569 if rustrevlog is not None:
573 if rustrevlog is not None:
570 if self._nodemap_file is not None:
574 if self._nodemap_file is not None:
571 use_rust_index = True
575 use_rust_index = True
572 else:
576 else:
573 use_rust_index = self.opener.options.get(b'rust.index')
577 use_rust_index = self.opener.options.get(b'rust.index')
574
578
575 self._parse_index = parse_index_v1
579 self._parse_index = parse_index_v1
576 if self._format_version == REVLOGV0:
580 if self._format_version == REVLOGV0:
577 self._parse_index = revlogv0.parse_index_v0
581 self._parse_index = revlogv0.parse_index_v0
578 elif self._format_version == REVLOGV2:
582 elif self._format_version == REVLOGV2:
579 self._parse_index = parse_index_v2
583 self._parse_index = parse_index_v2
580 elif devel_nodemap:
584 elif devel_nodemap:
581 self._parse_index = parse_index_v1_nodemap
585 self._parse_index = parse_index_v1_nodemap
582 elif use_rust_index:
586 elif use_rust_index:
583 self._parse_index = parse_index_v1_mixed
587 self._parse_index = parse_index_v1_mixed
584 try:
588 try:
585 d = self._parse_index(index_data, self._inline)
589 d = self._parse_index(index_data, self._inline)
586 index, _chunkcache = d
590 index, _chunkcache = d
587 use_nodemap = (
591 use_nodemap = (
588 not self._inline
592 not self._inline
589 and self._nodemap_file is not None
593 and self._nodemap_file is not None
590 and util.safehasattr(index, 'update_nodemap_data')
594 and util.safehasattr(index, 'update_nodemap_data')
591 )
595 )
592 if use_nodemap:
596 if use_nodemap:
593 nodemap_data = nodemaputil.persisted_data(self)
597 nodemap_data = nodemaputil.persisted_data(self)
594 if nodemap_data is not None:
598 if nodemap_data is not None:
595 docket = nodemap_data[0]
599 docket = nodemap_data[0]
596 if (
600 if (
597 len(d[0]) > docket.tip_rev
601 len(d[0]) > docket.tip_rev
598 and d[0][docket.tip_rev][7] == docket.tip_node
602 and d[0][docket.tip_rev][7] == docket.tip_node
599 ):
603 ):
600 # no changelog tampering
604 # no changelog tampering
601 self._nodemap_docket = docket
605 self._nodemap_docket = docket
602 index.update_nodemap_data(*nodemap_data)
606 index.update_nodemap_data(*nodemap_data)
603 except (ValueError, IndexError):
607 except (ValueError, IndexError):
604 raise error.RevlogError(
608 raise error.RevlogError(
605 _(b"index %s is corrupted") % self.display_id
609 _(b"index %s is corrupted") % self.display_id
606 )
610 )
607 self.index, self._chunkcache = d
611 self.index, self._chunkcache = d
608 if not self._chunkcache:
612 if not self._chunkcache:
609 self._chunkclear()
613 self._chunkclear()
610 # revnum -> (chain-length, sum-delta-length)
614 # revnum -> (chain-length, sum-delta-length)
611 self._chaininfocache = util.lrucachedict(500)
615 self._chaininfocache = util.lrucachedict(500)
612 # revlog header -> revlog compressor
616 # revlog header -> revlog compressor
613 self._decompressors = {}
617 self._decompressors = {}
614
618
615 @util.propertycache
619 @util.propertycache
616 def revlog_kind(self):
620 def revlog_kind(self):
617 return self.target[0]
621 return self.target[0]
618
622
619 @util.propertycache
623 @util.propertycache
620 def display_id(self):
624 def display_id(self):
621 """The public facing "ID" of the revlog that we use in message"""
625 """The public facing "ID" of the revlog that we use in message"""
622 # Maybe we should build a user facing representation of
626 # Maybe we should build a user facing representation of
623 # revlog.target instead of using `self.radix`
627 # revlog.target instead of using `self.radix`
624 return self.radix
628 return self.radix
625
629
626 @util.propertycache
630 @util.propertycache
627 def _compressor(self):
631 def _compressor(self):
628 engine = util.compengines[self._compengine]
632 engine = util.compengines[self._compengine]
629 return engine.revlogcompressor(self._compengineopts)
633 return engine.revlogcompressor(self._compengineopts)
630
634
631 def _indexfp(self):
635 def _indexfp(self):
632 """file object for the revlog's index file"""
636 """file object for the revlog's index file"""
633 return self.opener(self._indexfile, mode=b"r")
637 return self.opener(self._indexfile, mode=b"r")
634
638
635 def __index_write_fp(self):
639 def __index_write_fp(self):
636 # You should not use this directly and use `_writing` instead
640 # You should not use this directly and use `_writing` instead
637 try:
641 try:
638 f = self.opener(
642 f = self.opener(
639 self._indexfile, mode=b"r+", checkambig=self._checkambig
643 self._indexfile, mode=b"r+", checkambig=self._checkambig
640 )
644 )
641 if self._docket is None:
645 if self._docket is None:
642 f.seek(0, os.SEEK_END)
646 f.seek(0, os.SEEK_END)
643 else:
647 else:
644 f.seek(self._docket.index_end, os.SEEK_SET)
648 f.seek(self._docket.index_end, os.SEEK_SET)
645 return f
649 return f
646 except IOError as inst:
650 except IOError as inst:
647 if inst.errno != errno.ENOENT:
651 if inst.errno != errno.ENOENT:
648 raise
652 raise
649 return self.opener(
653 return self.opener(
650 self._indexfile, mode=b"w+", checkambig=self._checkambig
654 self._indexfile, mode=b"w+", checkambig=self._checkambig
651 )
655 )
652
656
653 def __index_new_fp(self):
657 def __index_new_fp(self):
654 # You should not use this unless you are upgrading from inline revlog
658 # You should not use this unless you are upgrading from inline revlog
655 return self.opener(
659 return self.opener(
656 self._indexfile,
660 self._indexfile,
657 mode=b"w",
661 mode=b"w",
658 checkambig=self._checkambig,
662 checkambig=self._checkambig,
659 atomictemp=True,
663 atomictemp=True,
660 )
664 )
661
665
662 def _datafp(self, mode=b'r'):
666 def _datafp(self, mode=b'r'):
663 """file object for the revlog's data file"""
667 """file object for the revlog's data file"""
664 return self.opener(self._datafile, mode=mode)
668 return self.opener(self._datafile, mode=mode)
665
669
666 @contextlib.contextmanager
670 @contextlib.contextmanager
667 def _datareadfp(self, existingfp=None):
671 def _datareadfp(self, existingfp=None):
668 """file object suitable to read data"""
672 """file object suitable to read data"""
669 # Use explicit file handle, if given.
673 # Use explicit file handle, if given.
670 if existingfp is not None:
674 if existingfp is not None:
671 yield existingfp
675 yield existingfp
672
676
673 # Use a file handle being actively used for writes, if available.
677 # Use a file handle being actively used for writes, if available.
674 # There is some danger to doing this because reads will seek the
678 # There is some danger to doing this because reads will seek the
675 # file. However, _writeentry() performs a SEEK_END before all writes,
679 # file. However, _writeentry() performs a SEEK_END before all writes,
676 # so we should be safe.
680 # so we should be safe.
677 elif self._writinghandles:
681 elif self._writinghandles:
678 if self._inline:
682 if self._inline:
679 yield self._writinghandles[0]
683 yield self._writinghandles[0]
680 else:
684 else:
681 yield self._writinghandles[1]
685 yield self._writinghandles[1]
682
686
683 # Otherwise open a new file handle.
687 # Otherwise open a new file handle.
684 else:
688 else:
685 if self._inline:
689 if self._inline:
686 func = self._indexfp
690 func = self._indexfp
687 else:
691 else:
688 func = self._datafp
692 func = self._datafp
689 with func() as fp:
693 with func() as fp:
690 yield fp
694 yield fp
691
695
692 def tiprev(self):
696 def tiprev(self):
693 return len(self.index) - 1
697 return len(self.index) - 1
694
698
695 def tip(self):
699 def tip(self):
696 return self.node(self.tiprev())
700 return self.node(self.tiprev())
697
701
698 def __contains__(self, rev):
702 def __contains__(self, rev):
699 return 0 <= rev < len(self)
703 return 0 <= rev < len(self)
700
704
701 def __len__(self):
705 def __len__(self):
702 return len(self.index)
706 return len(self.index)
703
707
704 def __iter__(self):
708 def __iter__(self):
705 return iter(pycompat.xrange(len(self)))
709 return iter(pycompat.xrange(len(self)))
706
710
707 def revs(self, start=0, stop=None):
711 def revs(self, start=0, stop=None):
708 """iterate over all rev in this revlog (from start to stop)"""
712 """iterate over all rev in this revlog (from start to stop)"""
709 return storageutil.iterrevs(len(self), start=start, stop=stop)
713 return storageutil.iterrevs(len(self), start=start, stop=stop)
710
714
711 @property
715 @property
712 def nodemap(self):
716 def nodemap(self):
713 msg = (
717 msg = (
714 b"revlog.nodemap is deprecated, "
718 b"revlog.nodemap is deprecated, "
715 b"use revlog.index.[has_node|rev|get_rev]"
719 b"use revlog.index.[has_node|rev|get_rev]"
716 )
720 )
717 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
721 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
718 return self.index.nodemap
722 return self.index.nodemap
719
723
720 @property
724 @property
721 def _nodecache(self):
725 def _nodecache(self):
722 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
726 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
723 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
727 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
724 return self.index.nodemap
728 return self.index.nodemap
725
729
726 def hasnode(self, node):
730 def hasnode(self, node):
727 try:
731 try:
728 self.rev(node)
732 self.rev(node)
729 return True
733 return True
730 except KeyError:
734 except KeyError:
731 return False
735 return False
732
736
733 def candelta(self, baserev, rev):
737 def candelta(self, baserev, rev):
734 """whether two revisions (baserev, rev) can be delta-ed or not"""
738 """whether two revisions (baserev, rev) can be delta-ed or not"""
735 # Disable delta if either rev requires a content-changing flag
739 # Disable delta if either rev requires a content-changing flag
736 # processor (ex. LFS). This is because such flag processor can alter
740 # processor (ex. LFS). This is because such flag processor can alter
737 # the rawtext content that the delta will be based on, and two clients
741 # the rawtext content that the delta will be based on, and two clients
738 # could have a same revlog node with different flags (i.e. different
742 # could have a same revlog node with different flags (i.e. different
739 # rawtext contents) and the delta could be incompatible.
743 # rawtext contents) and the delta could be incompatible.
740 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
744 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
741 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
745 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
742 ):
746 ):
743 return False
747 return False
744 return True
748 return True
745
749
746 def update_caches(self, transaction):
750 def update_caches(self, transaction):
747 if self._nodemap_file is not None:
751 if self._nodemap_file is not None:
748 if transaction is None:
752 if transaction is None:
749 nodemaputil.update_persistent_nodemap(self)
753 nodemaputil.update_persistent_nodemap(self)
750 else:
754 else:
751 nodemaputil.setup_persistent_nodemap(transaction, self)
755 nodemaputil.setup_persistent_nodemap(transaction, self)
752
756
753 def clearcaches(self):
757 def clearcaches(self):
754 self._revisioncache = None
758 self._revisioncache = None
755 self._chainbasecache.clear()
759 self._chainbasecache.clear()
756 self._chunkcache = (0, b'')
760 self._chunkcache = (0, b'')
757 self._pcache = {}
761 self._pcache = {}
758 self._nodemap_docket = None
762 self._nodemap_docket = None
759 self.index.clearcaches()
763 self.index.clearcaches()
760 # The python code is the one responsible for validating the docket, we
764 # The python code is the one responsible for validating the docket, we
761 # end up having to refresh it here.
765 # end up having to refresh it here.
762 use_nodemap = (
766 use_nodemap = (
763 not self._inline
767 not self._inline
764 and self._nodemap_file is not None
768 and self._nodemap_file is not None
765 and util.safehasattr(self.index, 'update_nodemap_data')
769 and util.safehasattr(self.index, 'update_nodemap_data')
766 )
770 )
767 if use_nodemap:
771 if use_nodemap:
768 nodemap_data = nodemaputil.persisted_data(self)
772 nodemap_data = nodemaputil.persisted_data(self)
769 if nodemap_data is not None:
773 if nodemap_data is not None:
770 self._nodemap_docket = nodemap_data[0]
774 self._nodemap_docket = nodemap_data[0]
771 self.index.update_nodemap_data(*nodemap_data)
775 self.index.update_nodemap_data(*nodemap_data)
772
776
773 def rev(self, node):
777 def rev(self, node):
774 try:
778 try:
775 return self.index.rev(node)
779 return self.index.rev(node)
776 except TypeError:
780 except TypeError:
777 raise
781 raise
778 except error.RevlogError:
782 except error.RevlogError:
779 # parsers.c radix tree lookup failed
783 # parsers.c radix tree lookup failed
780 if (
784 if (
781 node == self.nodeconstants.wdirid
785 node == self.nodeconstants.wdirid
782 or node in self.nodeconstants.wdirfilenodeids
786 or node in self.nodeconstants.wdirfilenodeids
783 ):
787 ):
784 raise error.WdirUnsupported
788 raise error.WdirUnsupported
785 raise error.LookupError(node, self.display_id, _(b'no node'))
789 raise error.LookupError(node, self.display_id, _(b'no node'))
786
790
787 # Accessors for index entries.
791 # Accessors for index entries.
788
792
789 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
793 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
790 # are flags.
794 # are flags.
791 def start(self, rev):
795 def start(self, rev):
792 return int(self.index[rev][0] >> 16)
796 return int(self.index[rev][0] >> 16)
793
797
794 def flags(self, rev):
798 def flags(self, rev):
795 return self.index[rev][0] & 0xFFFF
799 return self.index[rev][0] & 0xFFFF
796
800
797 def length(self, rev):
801 def length(self, rev):
798 return self.index[rev][1]
802 return self.index[rev][1]
799
803
800 def sidedata_length(self, rev):
804 def sidedata_length(self, rev):
801 if not self.hassidedata:
805 if not self.hassidedata:
802 return 0
806 return 0
803 return self.index[rev][9]
807 return self.index[rev][9]
804
808
805 def rawsize(self, rev):
809 def rawsize(self, rev):
806 """return the length of the uncompressed text for a given revision"""
810 """return the length of the uncompressed text for a given revision"""
807 l = self.index[rev][2]
811 l = self.index[rev][2]
808 if l >= 0:
812 if l >= 0:
809 return l
813 return l
810
814
811 t = self.rawdata(rev)
815 t = self.rawdata(rev)
812 return len(t)
816 return len(t)
813
817
814 def size(self, rev):
818 def size(self, rev):
815 """length of non-raw text (processed by a "read" flag processor)"""
819 """length of non-raw text (processed by a "read" flag processor)"""
816 # fast path: if no "read" flag processor could change the content,
820 # fast path: if no "read" flag processor could change the content,
817 # size is rawsize. note: ELLIPSIS is known to not change the content.
821 # size is rawsize. note: ELLIPSIS is known to not change the content.
818 flags = self.flags(rev)
822 flags = self.flags(rev)
819 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
823 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
820 return self.rawsize(rev)
824 return self.rawsize(rev)
821
825
822 return len(self.revision(rev, raw=False))
826 return len(self.revision(rev, raw=False))
823
827
824 def chainbase(self, rev):
828 def chainbase(self, rev):
825 base = self._chainbasecache.get(rev)
829 base = self._chainbasecache.get(rev)
826 if base is not None:
830 if base is not None:
827 return base
831 return base
828
832
829 index = self.index
833 index = self.index
830 iterrev = rev
834 iterrev = rev
831 base = index[iterrev][3]
835 base = index[iterrev][3]
832 while base != iterrev:
836 while base != iterrev:
833 iterrev = base
837 iterrev = base
834 base = index[iterrev][3]
838 base = index[iterrev][3]
835
839
836 self._chainbasecache[rev] = base
840 self._chainbasecache[rev] = base
837 return base
841 return base
838
842
839 def linkrev(self, rev):
843 def linkrev(self, rev):
840 return self.index[rev][4]
844 return self.index[rev][4]
841
845
842 def parentrevs(self, rev):
846 def parentrevs(self, rev):
843 try:
847 try:
844 entry = self.index[rev]
848 entry = self.index[rev]
845 except IndexError:
849 except IndexError:
846 if rev == wdirrev:
850 if rev == wdirrev:
847 raise error.WdirUnsupported
851 raise error.WdirUnsupported
848 raise
852 raise
849 if entry[5] == nullrev:
853 if entry[5] == nullrev:
850 return entry[6], entry[5]
854 return entry[6], entry[5]
851 else:
855 else:
852 return entry[5], entry[6]
856 return entry[5], entry[6]
853
857
854 # fast parentrevs(rev) where rev isn't filtered
858 # fast parentrevs(rev) where rev isn't filtered
855 _uncheckedparentrevs = parentrevs
859 _uncheckedparentrevs = parentrevs
856
860
857 def node(self, rev):
861 def node(self, rev):
858 try:
862 try:
859 return self.index[rev][7]
863 return self.index[rev][7]
860 except IndexError:
864 except IndexError:
861 if rev == wdirrev:
865 if rev == wdirrev:
862 raise error.WdirUnsupported
866 raise error.WdirUnsupported
863 raise
867 raise
864
868
865 # Derived from index values.
869 # Derived from index values.
866
870
867 def end(self, rev):
871 def end(self, rev):
868 return self.start(rev) + self.length(rev)
872 return self.start(rev) + self.length(rev)
869
873
870 def parents(self, node):
874 def parents(self, node):
871 i = self.index
875 i = self.index
872 d = i[self.rev(node)]
876 d = i[self.rev(node)]
873 # inline node() to avoid function call overhead
877 # inline node() to avoid function call overhead
874 if d[5] == self.nullid:
878 if d[5] == self.nullid:
875 return i[d[6]][7], i[d[5]][7]
879 return i[d[6]][7], i[d[5]][7]
876 else:
880 else:
877 return i[d[5]][7], i[d[6]][7]
881 return i[d[5]][7], i[d[6]][7]
878
882
879 def chainlen(self, rev):
883 def chainlen(self, rev):
880 return self._chaininfo(rev)[0]
884 return self._chaininfo(rev)[0]
881
885
882 def _chaininfo(self, rev):
886 def _chaininfo(self, rev):
883 chaininfocache = self._chaininfocache
887 chaininfocache = self._chaininfocache
884 if rev in chaininfocache:
888 if rev in chaininfocache:
885 return chaininfocache[rev]
889 return chaininfocache[rev]
886 index = self.index
890 index = self.index
887 generaldelta = self._generaldelta
891 generaldelta = self._generaldelta
888 iterrev = rev
892 iterrev = rev
889 e = index[iterrev]
893 e = index[iterrev]
890 clen = 0
894 clen = 0
891 compresseddeltalen = 0
895 compresseddeltalen = 0
892 while iterrev != e[3]:
896 while iterrev != e[3]:
893 clen += 1
897 clen += 1
894 compresseddeltalen += e[1]
898 compresseddeltalen += e[1]
895 if generaldelta:
899 if generaldelta:
896 iterrev = e[3]
900 iterrev = e[3]
897 else:
901 else:
898 iterrev -= 1
902 iterrev -= 1
899 if iterrev in chaininfocache:
903 if iterrev in chaininfocache:
900 t = chaininfocache[iterrev]
904 t = chaininfocache[iterrev]
901 clen += t[0]
905 clen += t[0]
902 compresseddeltalen += t[1]
906 compresseddeltalen += t[1]
903 break
907 break
904 e = index[iterrev]
908 e = index[iterrev]
905 else:
909 else:
906 # Add text length of base since decompressing that also takes
910 # Add text length of base since decompressing that also takes
907 # work. For cache hits the length is already included.
911 # work. For cache hits the length is already included.
908 compresseddeltalen += e[1]
912 compresseddeltalen += e[1]
909 r = (clen, compresseddeltalen)
913 r = (clen, compresseddeltalen)
910 chaininfocache[rev] = r
914 chaininfocache[rev] = r
911 return r
915 return r
912
916
913 def _deltachain(self, rev, stoprev=None):
917 def _deltachain(self, rev, stoprev=None):
914 """Obtain the delta chain for a revision.
918 """Obtain the delta chain for a revision.
915
919
916 ``stoprev`` specifies a revision to stop at. If not specified, we
920 ``stoprev`` specifies a revision to stop at. If not specified, we
917 stop at the base of the chain.
921 stop at the base of the chain.
918
922
919 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
923 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
920 revs in ascending order and ``stopped`` is a bool indicating whether
924 revs in ascending order and ``stopped`` is a bool indicating whether
921 ``stoprev`` was hit.
925 ``stoprev`` was hit.
922 """
926 """
923 # Try C implementation.
927 # Try C implementation.
924 try:
928 try:
925 return self.index.deltachain(rev, stoprev, self._generaldelta)
929 return self.index.deltachain(rev, stoprev, self._generaldelta)
926 except AttributeError:
930 except AttributeError:
927 pass
931 pass
928
932
929 chain = []
933 chain = []
930
934
931 # Alias to prevent attribute lookup in tight loop.
935 # Alias to prevent attribute lookup in tight loop.
932 index = self.index
936 index = self.index
933 generaldelta = self._generaldelta
937 generaldelta = self._generaldelta
934
938
935 iterrev = rev
939 iterrev = rev
936 e = index[iterrev]
940 e = index[iterrev]
937 while iterrev != e[3] and iterrev != stoprev:
941 while iterrev != e[3] and iterrev != stoprev:
938 chain.append(iterrev)
942 chain.append(iterrev)
939 if generaldelta:
943 if generaldelta:
940 iterrev = e[3]
944 iterrev = e[3]
941 else:
945 else:
942 iterrev -= 1
946 iterrev -= 1
943 e = index[iterrev]
947 e = index[iterrev]
944
948
945 if iterrev == stoprev:
949 if iterrev == stoprev:
946 stopped = True
950 stopped = True
947 else:
951 else:
948 chain.append(iterrev)
952 chain.append(iterrev)
949 stopped = False
953 stopped = False
950
954
951 chain.reverse()
955 chain.reverse()
952 return chain, stopped
956 return chain, stopped
953
957
954 def ancestors(self, revs, stoprev=0, inclusive=False):
958 def ancestors(self, revs, stoprev=0, inclusive=False):
955 """Generate the ancestors of 'revs' in reverse revision order.
959 """Generate the ancestors of 'revs' in reverse revision order.
956 Does not generate revs lower than stoprev.
960 Does not generate revs lower than stoprev.
957
961
958 See the documentation for ancestor.lazyancestors for more details."""
962 See the documentation for ancestor.lazyancestors for more details."""
959
963
960 # first, make sure start revisions aren't filtered
964 # first, make sure start revisions aren't filtered
961 revs = list(revs)
965 revs = list(revs)
962 checkrev = self.node
966 checkrev = self.node
963 for r in revs:
967 for r in revs:
964 checkrev(r)
968 checkrev(r)
965 # and we're sure ancestors aren't filtered as well
969 # and we're sure ancestors aren't filtered as well
966
970
967 if rustancestor is not None:
971 if rustancestor is not None:
968 lazyancestors = rustancestor.LazyAncestors
972 lazyancestors = rustancestor.LazyAncestors
969 arg = self.index
973 arg = self.index
970 else:
974 else:
971 lazyancestors = ancestor.lazyancestors
975 lazyancestors = ancestor.lazyancestors
972 arg = self._uncheckedparentrevs
976 arg = self._uncheckedparentrevs
973 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
977 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
974
978
975 def descendants(self, revs):
979 def descendants(self, revs):
976 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
980 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
977
981
978 def findcommonmissing(self, common=None, heads=None):
982 def findcommonmissing(self, common=None, heads=None):
979 """Return a tuple of the ancestors of common and the ancestors of heads
983 """Return a tuple of the ancestors of common and the ancestors of heads
980 that are not ancestors of common. In revset terminology, we return the
984 that are not ancestors of common. In revset terminology, we return the
981 tuple:
985 tuple:
982
986
983 ::common, (::heads) - (::common)
987 ::common, (::heads) - (::common)
984
988
985 The list is sorted by revision number, meaning it is
989 The list is sorted by revision number, meaning it is
986 topologically sorted.
990 topologically sorted.
987
991
988 'heads' and 'common' are both lists of node IDs. If heads is
992 'heads' and 'common' are both lists of node IDs. If heads is
989 not supplied, uses all of the revlog's heads. If common is not
993 not supplied, uses all of the revlog's heads. If common is not
990 supplied, uses nullid."""
994 supplied, uses nullid."""
991 if common is None:
995 if common is None:
992 common = [self.nullid]
996 common = [self.nullid]
993 if heads is None:
997 if heads is None:
994 heads = self.heads()
998 heads = self.heads()
995
999
996 common = [self.rev(n) for n in common]
1000 common = [self.rev(n) for n in common]
997 heads = [self.rev(n) for n in heads]
1001 heads = [self.rev(n) for n in heads]
998
1002
999 # we want the ancestors, but inclusive
1003 # we want the ancestors, but inclusive
1000 class lazyset(object):
1004 class lazyset(object):
1001 def __init__(self, lazyvalues):
1005 def __init__(self, lazyvalues):
1002 self.addedvalues = set()
1006 self.addedvalues = set()
1003 self.lazyvalues = lazyvalues
1007 self.lazyvalues = lazyvalues
1004
1008
1005 def __contains__(self, value):
1009 def __contains__(self, value):
1006 return value in self.addedvalues or value in self.lazyvalues
1010 return value in self.addedvalues or value in self.lazyvalues
1007
1011
1008 def __iter__(self):
1012 def __iter__(self):
1009 added = self.addedvalues
1013 added = self.addedvalues
1010 for r in added:
1014 for r in added:
1011 yield r
1015 yield r
1012 for r in self.lazyvalues:
1016 for r in self.lazyvalues:
1013 if not r in added:
1017 if not r in added:
1014 yield r
1018 yield r
1015
1019
1016 def add(self, value):
1020 def add(self, value):
1017 self.addedvalues.add(value)
1021 self.addedvalues.add(value)
1018
1022
1019 def update(self, values):
1023 def update(self, values):
1020 self.addedvalues.update(values)
1024 self.addedvalues.update(values)
1021
1025
1022 has = lazyset(self.ancestors(common))
1026 has = lazyset(self.ancestors(common))
1023 has.add(nullrev)
1027 has.add(nullrev)
1024 has.update(common)
1028 has.update(common)
1025
1029
1026 # take all ancestors from heads that aren't in has
1030 # take all ancestors from heads that aren't in has
1027 missing = set()
1031 missing = set()
1028 visit = collections.deque(r for r in heads if r not in has)
1032 visit = collections.deque(r for r in heads if r not in has)
1029 while visit:
1033 while visit:
1030 r = visit.popleft()
1034 r = visit.popleft()
1031 if r in missing:
1035 if r in missing:
1032 continue
1036 continue
1033 else:
1037 else:
1034 missing.add(r)
1038 missing.add(r)
1035 for p in self.parentrevs(r):
1039 for p in self.parentrevs(r):
1036 if p not in has:
1040 if p not in has:
1037 visit.append(p)
1041 visit.append(p)
1038 missing = list(missing)
1042 missing = list(missing)
1039 missing.sort()
1043 missing.sort()
1040 return has, [self.node(miss) for miss in missing]
1044 return has, [self.node(miss) for miss in missing]
1041
1045
1042 def incrementalmissingrevs(self, common=None):
1046 def incrementalmissingrevs(self, common=None):
1043 """Return an object that can be used to incrementally compute the
1047 """Return an object that can be used to incrementally compute the
1044 revision numbers of the ancestors of arbitrary sets that are not
1048 revision numbers of the ancestors of arbitrary sets that are not
1045 ancestors of common. This is an ancestor.incrementalmissingancestors
1049 ancestors of common. This is an ancestor.incrementalmissingancestors
1046 object.
1050 object.
1047
1051
1048 'common' is a list of revision numbers. If common is not supplied, uses
1052 'common' is a list of revision numbers. If common is not supplied, uses
1049 nullrev.
1053 nullrev.
1050 """
1054 """
1051 if common is None:
1055 if common is None:
1052 common = [nullrev]
1056 common = [nullrev]
1053
1057
1054 if rustancestor is not None:
1058 if rustancestor is not None:
1055 return rustancestor.MissingAncestors(self.index, common)
1059 return rustancestor.MissingAncestors(self.index, common)
1056 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1060 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1057
1061
1058 def findmissingrevs(self, common=None, heads=None):
1062 def findmissingrevs(self, common=None, heads=None):
1059 """Return the revision numbers of the ancestors of heads that
1063 """Return the revision numbers of the ancestors of heads that
1060 are not ancestors of common.
1064 are not ancestors of common.
1061
1065
1062 More specifically, return a list of revision numbers corresponding to
1066 More specifically, return a list of revision numbers corresponding to
1063 nodes N such that every N satisfies the following constraints:
1067 nodes N such that every N satisfies the following constraints:
1064
1068
1065 1. N is an ancestor of some node in 'heads'
1069 1. N is an ancestor of some node in 'heads'
1066 2. N is not an ancestor of any node in 'common'
1070 2. N is not an ancestor of any node in 'common'
1067
1071
1068 The list is sorted by revision number, meaning it is
1072 The list is sorted by revision number, meaning it is
1069 topologically sorted.
1073 topologically sorted.
1070
1074
1071 'heads' and 'common' are both lists of revision numbers. If heads is
1075 'heads' and 'common' are both lists of revision numbers. If heads is
1072 not supplied, uses all of the revlog's heads. If common is not
1076 not supplied, uses all of the revlog's heads. If common is not
1073 supplied, uses nullid."""
1077 supplied, uses nullid."""
1074 if common is None:
1078 if common is None:
1075 common = [nullrev]
1079 common = [nullrev]
1076 if heads is None:
1080 if heads is None:
1077 heads = self.headrevs()
1081 heads = self.headrevs()
1078
1082
1079 inc = self.incrementalmissingrevs(common=common)
1083 inc = self.incrementalmissingrevs(common=common)
1080 return inc.missingancestors(heads)
1084 return inc.missingancestors(heads)
1081
1085
1082 def findmissing(self, common=None, heads=None):
1086 def findmissing(self, common=None, heads=None):
1083 """Return the ancestors of heads that are not ancestors of common.
1087 """Return the ancestors of heads that are not ancestors of common.
1084
1088
1085 More specifically, return a list of nodes N such that every N
1089 More specifically, return a list of nodes N such that every N
1086 satisfies the following constraints:
1090 satisfies the following constraints:
1087
1091
1088 1. N is an ancestor of some node in 'heads'
1092 1. N is an ancestor of some node in 'heads'
1089 2. N is not an ancestor of any node in 'common'
1093 2. N is not an ancestor of any node in 'common'
1090
1094
1091 The list is sorted by revision number, meaning it is
1095 The list is sorted by revision number, meaning it is
1092 topologically sorted.
1096 topologically sorted.
1093
1097
1094 'heads' and 'common' are both lists of node IDs. If heads is
1098 'heads' and 'common' are both lists of node IDs. If heads is
1095 not supplied, uses all of the revlog's heads. If common is not
1099 not supplied, uses all of the revlog's heads. If common is not
1096 supplied, uses nullid."""
1100 supplied, uses nullid."""
1097 if common is None:
1101 if common is None:
1098 common = [self.nullid]
1102 common = [self.nullid]
1099 if heads is None:
1103 if heads is None:
1100 heads = self.heads()
1104 heads = self.heads()
1101
1105
1102 common = [self.rev(n) for n in common]
1106 common = [self.rev(n) for n in common]
1103 heads = [self.rev(n) for n in heads]
1107 heads = [self.rev(n) for n in heads]
1104
1108
1105 inc = self.incrementalmissingrevs(common=common)
1109 inc = self.incrementalmissingrevs(common=common)
1106 return [self.node(r) for r in inc.missingancestors(heads)]
1110 return [self.node(r) for r in inc.missingancestors(heads)]
1107
1111
1108 def nodesbetween(self, roots=None, heads=None):
1112 def nodesbetween(self, roots=None, heads=None):
1109 """Return a topological path from 'roots' to 'heads'.
1113 """Return a topological path from 'roots' to 'heads'.
1110
1114
1111 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1115 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1112 topologically sorted list of all nodes N that satisfy both of
1116 topologically sorted list of all nodes N that satisfy both of
1113 these constraints:
1117 these constraints:
1114
1118
1115 1. N is a descendant of some node in 'roots'
1119 1. N is a descendant of some node in 'roots'
1116 2. N is an ancestor of some node in 'heads'
1120 2. N is an ancestor of some node in 'heads'
1117
1121
1118 Every node is considered to be both a descendant and an ancestor
1122 Every node is considered to be both a descendant and an ancestor
1119 of itself, so every reachable node in 'roots' and 'heads' will be
1123 of itself, so every reachable node in 'roots' and 'heads' will be
1120 included in 'nodes'.
1124 included in 'nodes'.
1121
1125
1122 'outroots' is the list of reachable nodes in 'roots', i.e., the
1126 'outroots' is the list of reachable nodes in 'roots', i.e., the
1123 subset of 'roots' that is returned in 'nodes'. Likewise,
1127 subset of 'roots' that is returned in 'nodes'. Likewise,
1124 'outheads' is the subset of 'heads' that is also in 'nodes'.
1128 'outheads' is the subset of 'heads' that is also in 'nodes'.
1125
1129
1126 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1130 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1127 unspecified, uses nullid as the only root. If 'heads' is
1131 unspecified, uses nullid as the only root. If 'heads' is
1128 unspecified, uses list of all of the revlog's heads."""
1132 unspecified, uses list of all of the revlog's heads."""
1129 nonodes = ([], [], [])
1133 nonodes = ([], [], [])
1130 if roots is not None:
1134 if roots is not None:
1131 roots = list(roots)
1135 roots = list(roots)
1132 if not roots:
1136 if not roots:
1133 return nonodes
1137 return nonodes
1134 lowestrev = min([self.rev(n) for n in roots])
1138 lowestrev = min([self.rev(n) for n in roots])
1135 else:
1139 else:
1136 roots = [self.nullid] # Everybody's a descendant of nullid
1140 roots = [self.nullid] # Everybody's a descendant of nullid
1137 lowestrev = nullrev
1141 lowestrev = nullrev
1138 if (lowestrev == nullrev) and (heads is None):
1142 if (lowestrev == nullrev) and (heads is None):
1139 # We want _all_ the nodes!
1143 # We want _all_ the nodes!
1140 return (
1144 return (
1141 [self.node(r) for r in self],
1145 [self.node(r) for r in self],
1142 [self.nullid],
1146 [self.nullid],
1143 list(self.heads()),
1147 list(self.heads()),
1144 )
1148 )
1145 if heads is None:
1149 if heads is None:
1146 # All nodes are ancestors, so the latest ancestor is the last
1150 # All nodes are ancestors, so the latest ancestor is the last
1147 # node.
1151 # node.
1148 highestrev = len(self) - 1
1152 highestrev = len(self) - 1
1149 # Set ancestors to None to signal that every node is an ancestor.
1153 # Set ancestors to None to signal that every node is an ancestor.
1150 ancestors = None
1154 ancestors = None
1151 # Set heads to an empty dictionary for later discovery of heads
1155 # Set heads to an empty dictionary for later discovery of heads
1152 heads = {}
1156 heads = {}
1153 else:
1157 else:
1154 heads = list(heads)
1158 heads = list(heads)
1155 if not heads:
1159 if not heads:
1156 return nonodes
1160 return nonodes
1157 ancestors = set()
1161 ancestors = set()
1158 # Turn heads into a dictionary so we can remove 'fake' heads.
1162 # Turn heads into a dictionary so we can remove 'fake' heads.
1159 # Also, later we will be using it to filter out the heads we can't
1163 # Also, later we will be using it to filter out the heads we can't
1160 # find from roots.
1164 # find from roots.
1161 heads = dict.fromkeys(heads, False)
1165 heads = dict.fromkeys(heads, False)
1162 # Start at the top and keep marking parents until we're done.
1166 # Start at the top and keep marking parents until we're done.
1163 nodestotag = set(heads)
1167 nodestotag = set(heads)
1164 # Remember where the top was so we can use it as a limit later.
1168 # Remember where the top was so we can use it as a limit later.
1165 highestrev = max([self.rev(n) for n in nodestotag])
1169 highestrev = max([self.rev(n) for n in nodestotag])
1166 while nodestotag:
1170 while nodestotag:
1167 # grab a node to tag
1171 # grab a node to tag
1168 n = nodestotag.pop()
1172 n = nodestotag.pop()
1169 # Never tag nullid
1173 # Never tag nullid
1170 if n == self.nullid:
1174 if n == self.nullid:
1171 continue
1175 continue
1172 # A node's revision number represents its place in a
1176 # A node's revision number represents its place in a
1173 # topologically sorted list of nodes.
1177 # topologically sorted list of nodes.
1174 r = self.rev(n)
1178 r = self.rev(n)
1175 if r >= lowestrev:
1179 if r >= lowestrev:
1176 if n not in ancestors:
1180 if n not in ancestors:
1177 # If we are possibly a descendant of one of the roots
1181 # If we are possibly a descendant of one of the roots
1178 # and we haven't already been marked as an ancestor
1182 # and we haven't already been marked as an ancestor
1179 ancestors.add(n) # Mark as ancestor
1183 ancestors.add(n) # Mark as ancestor
1180 # Add non-nullid parents to list of nodes to tag.
1184 # Add non-nullid parents to list of nodes to tag.
1181 nodestotag.update(
1185 nodestotag.update(
1182 [p for p in self.parents(n) if p != self.nullid]
1186 [p for p in self.parents(n) if p != self.nullid]
1183 )
1187 )
1184 elif n in heads: # We've seen it before, is it a fake head?
1188 elif n in heads: # We've seen it before, is it a fake head?
1185 # So it is, real heads should not be the ancestors of
1189 # So it is, real heads should not be the ancestors of
1186 # any other heads.
1190 # any other heads.
1187 heads.pop(n)
1191 heads.pop(n)
1188 if not ancestors:
1192 if not ancestors:
1189 return nonodes
1193 return nonodes
1190 # Now that we have our set of ancestors, we want to remove any
1194 # Now that we have our set of ancestors, we want to remove any
1191 # roots that are not ancestors.
1195 # roots that are not ancestors.
1192
1196
1193 # If one of the roots was nullid, everything is included anyway.
1197 # If one of the roots was nullid, everything is included anyway.
1194 if lowestrev > nullrev:
1198 if lowestrev > nullrev:
1195 # But, since we weren't, let's recompute the lowest rev to not
1199 # But, since we weren't, let's recompute the lowest rev to not
1196 # include roots that aren't ancestors.
1200 # include roots that aren't ancestors.
1197
1201
1198 # Filter out roots that aren't ancestors of heads
1202 # Filter out roots that aren't ancestors of heads
1199 roots = [root for root in roots if root in ancestors]
1203 roots = [root for root in roots if root in ancestors]
1200 # Recompute the lowest revision
1204 # Recompute the lowest revision
1201 if roots:
1205 if roots:
1202 lowestrev = min([self.rev(root) for root in roots])
1206 lowestrev = min([self.rev(root) for root in roots])
1203 else:
1207 else:
1204 # No more roots? Return empty list
1208 # No more roots? Return empty list
1205 return nonodes
1209 return nonodes
1206 else:
1210 else:
1207 # We are descending from nullid, and don't need to care about
1211 # We are descending from nullid, and don't need to care about
1208 # any other roots.
1212 # any other roots.
1209 lowestrev = nullrev
1213 lowestrev = nullrev
1210 roots = [self.nullid]
1214 roots = [self.nullid]
1211 # Transform our roots list into a set.
1215 # Transform our roots list into a set.
1212 descendants = set(roots)
1216 descendants = set(roots)
1213 # Also, keep the original roots so we can filter out roots that aren't
1217 # Also, keep the original roots so we can filter out roots that aren't
1214 # 'real' roots (i.e. are descended from other roots).
1218 # 'real' roots (i.e. are descended from other roots).
1215 roots = descendants.copy()
1219 roots = descendants.copy()
1216 # Our topologically sorted list of output nodes.
1220 # Our topologically sorted list of output nodes.
1217 orderedout = []
1221 orderedout = []
1218 # Don't start at nullid since we don't want nullid in our output list,
1222 # Don't start at nullid since we don't want nullid in our output list,
1219 # and if nullid shows up in descendants, empty parents will look like
1223 # and if nullid shows up in descendants, empty parents will look like
1220 # they're descendants.
1224 # they're descendants.
1221 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1225 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1222 n = self.node(r)
1226 n = self.node(r)
1223 isdescendant = False
1227 isdescendant = False
1224 if lowestrev == nullrev: # Everybody is a descendant of nullid
1228 if lowestrev == nullrev: # Everybody is a descendant of nullid
1225 isdescendant = True
1229 isdescendant = True
1226 elif n in descendants:
1230 elif n in descendants:
1227 # n is already a descendant
1231 # n is already a descendant
1228 isdescendant = True
1232 isdescendant = True
1229 # This check only needs to be done here because all the roots
1233 # This check only needs to be done here because all the roots
1230 # will start being marked is descendants before the loop.
1234 # will start being marked is descendants before the loop.
1231 if n in roots:
1235 if n in roots:
1232 # If n was a root, check if it's a 'real' root.
1236 # If n was a root, check if it's a 'real' root.
1233 p = tuple(self.parents(n))
1237 p = tuple(self.parents(n))
1234 # If any of its parents are descendants, it's not a root.
1238 # If any of its parents are descendants, it's not a root.
1235 if (p[0] in descendants) or (p[1] in descendants):
1239 if (p[0] in descendants) or (p[1] in descendants):
1236 roots.remove(n)
1240 roots.remove(n)
1237 else:
1241 else:
1238 p = tuple(self.parents(n))
1242 p = tuple(self.parents(n))
1239 # A node is a descendant if either of its parents are
1243 # A node is a descendant if either of its parents are
1240 # descendants. (We seeded the dependents list with the roots
1244 # descendants. (We seeded the dependents list with the roots
1241 # up there, remember?)
1245 # up there, remember?)
1242 if (p[0] in descendants) or (p[1] in descendants):
1246 if (p[0] in descendants) or (p[1] in descendants):
1243 descendants.add(n)
1247 descendants.add(n)
1244 isdescendant = True
1248 isdescendant = True
1245 if isdescendant and ((ancestors is None) or (n in ancestors)):
1249 if isdescendant and ((ancestors is None) or (n in ancestors)):
1246 # Only include nodes that are both descendants and ancestors.
1250 # Only include nodes that are both descendants and ancestors.
1247 orderedout.append(n)
1251 orderedout.append(n)
1248 if (ancestors is not None) and (n in heads):
1252 if (ancestors is not None) and (n in heads):
1249 # We're trying to figure out which heads are reachable
1253 # We're trying to figure out which heads are reachable
1250 # from roots.
1254 # from roots.
1251 # Mark this head as having been reached
1255 # Mark this head as having been reached
1252 heads[n] = True
1256 heads[n] = True
1253 elif ancestors is None:
1257 elif ancestors is None:
1254 # Otherwise, we're trying to discover the heads.
1258 # Otherwise, we're trying to discover the heads.
1255 # Assume this is a head because if it isn't, the next step
1259 # Assume this is a head because if it isn't, the next step
1256 # will eventually remove it.
1260 # will eventually remove it.
1257 heads[n] = True
1261 heads[n] = True
1258 # But, obviously its parents aren't.
1262 # But, obviously its parents aren't.
1259 for p in self.parents(n):
1263 for p in self.parents(n):
1260 heads.pop(p, None)
1264 heads.pop(p, None)
1261 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1265 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1262 roots = list(roots)
1266 roots = list(roots)
1263 assert orderedout
1267 assert orderedout
1264 assert roots
1268 assert roots
1265 assert heads
1269 assert heads
1266 return (orderedout, roots, heads)
1270 return (orderedout, roots, heads)
1267
1271
1268 def headrevs(self, revs=None):
1272 def headrevs(self, revs=None):
1269 if revs is None:
1273 if revs is None:
1270 try:
1274 try:
1271 return self.index.headrevs()
1275 return self.index.headrevs()
1272 except AttributeError:
1276 except AttributeError:
1273 return self._headrevs()
1277 return self._headrevs()
1274 if rustdagop is not None:
1278 if rustdagop is not None:
1275 return rustdagop.headrevs(self.index, revs)
1279 return rustdagop.headrevs(self.index, revs)
1276 return dagop.headrevs(revs, self._uncheckedparentrevs)
1280 return dagop.headrevs(revs, self._uncheckedparentrevs)
1277
1281
1278 def computephases(self, roots):
1282 def computephases(self, roots):
1279 return self.index.computephasesmapsets(roots)
1283 return self.index.computephasesmapsets(roots)
1280
1284
1281 def _headrevs(self):
1285 def _headrevs(self):
1282 count = len(self)
1286 count = len(self)
1283 if not count:
1287 if not count:
1284 return [nullrev]
1288 return [nullrev]
1285 # we won't iter over filtered rev so nobody is a head at start
1289 # we won't iter over filtered rev so nobody is a head at start
1286 ishead = [0] * (count + 1)
1290 ishead = [0] * (count + 1)
1287 index = self.index
1291 index = self.index
1288 for r in self:
1292 for r in self:
1289 ishead[r] = 1 # I may be an head
1293 ishead[r] = 1 # I may be an head
1290 e = index[r]
1294 e = index[r]
1291 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1295 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1292 return [r for r, val in enumerate(ishead) if val]
1296 return [r for r, val in enumerate(ishead) if val]
1293
1297
1294 def heads(self, start=None, stop=None):
1298 def heads(self, start=None, stop=None):
1295 """return the list of all nodes that have no children
1299 """return the list of all nodes that have no children
1296
1300
1297 if start is specified, only heads that are descendants of
1301 if start is specified, only heads that are descendants of
1298 start will be returned
1302 start will be returned
1299 if stop is specified, it will consider all the revs from stop
1303 if stop is specified, it will consider all the revs from stop
1300 as if they had no children
1304 as if they had no children
1301 """
1305 """
1302 if start is None and stop is None:
1306 if start is None and stop is None:
1303 if not len(self):
1307 if not len(self):
1304 return [self.nullid]
1308 return [self.nullid]
1305 return [self.node(r) for r in self.headrevs()]
1309 return [self.node(r) for r in self.headrevs()]
1306
1310
1307 if start is None:
1311 if start is None:
1308 start = nullrev
1312 start = nullrev
1309 else:
1313 else:
1310 start = self.rev(start)
1314 start = self.rev(start)
1311
1315
1312 stoprevs = {self.rev(n) for n in stop or []}
1316 stoprevs = {self.rev(n) for n in stop or []}
1313
1317
1314 revs = dagop.headrevssubset(
1318 revs = dagop.headrevssubset(
1315 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1319 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1316 )
1320 )
1317
1321
1318 return [self.node(rev) for rev in revs]
1322 return [self.node(rev) for rev in revs]
1319
1323
1320 def children(self, node):
1324 def children(self, node):
1321 """find the children of a given node"""
1325 """find the children of a given node"""
1322 c = []
1326 c = []
1323 p = self.rev(node)
1327 p = self.rev(node)
1324 for r in self.revs(start=p + 1):
1328 for r in self.revs(start=p + 1):
1325 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1329 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1326 if prevs:
1330 if prevs:
1327 for pr in prevs:
1331 for pr in prevs:
1328 if pr == p:
1332 if pr == p:
1329 c.append(self.node(r))
1333 c.append(self.node(r))
1330 elif p == nullrev:
1334 elif p == nullrev:
1331 c.append(self.node(r))
1335 c.append(self.node(r))
1332 return c
1336 return c
1333
1337
1334 def commonancestorsheads(self, a, b):
1338 def commonancestorsheads(self, a, b):
1335 """calculate all the heads of the common ancestors of nodes a and b"""
1339 """calculate all the heads of the common ancestors of nodes a and b"""
1336 a, b = self.rev(a), self.rev(b)
1340 a, b = self.rev(a), self.rev(b)
1337 ancs = self._commonancestorsheads(a, b)
1341 ancs = self._commonancestorsheads(a, b)
1338 return pycompat.maplist(self.node, ancs)
1342 return pycompat.maplist(self.node, ancs)
1339
1343
1340 def _commonancestorsheads(self, *revs):
1344 def _commonancestorsheads(self, *revs):
1341 """calculate all the heads of the common ancestors of revs"""
1345 """calculate all the heads of the common ancestors of revs"""
1342 try:
1346 try:
1343 ancs = self.index.commonancestorsheads(*revs)
1347 ancs = self.index.commonancestorsheads(*revs)
1344 except (AttributeError, OverflowError): # C implementation failed
1348 except (AttributeError, OverflowError): # C implementation failed
1345 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1349 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1346 return ancs
1350 return ancs
1347
1351
1348 def isancestor(self, a, b):
1352 def isancestor(self, a, b):
1349 """return True if node a is an ancestor of node b
1353 """return True if node a is an ancestor of node b
1350
1354
1351 A revision is considered an ancestor of itself."""
1355 A revision is considered an ancestor of itself."""
1352 a, b = self.rev(a), self.rev(b)
1356 a, b = self.rev(a), self.rev(b)
1353 return self.isancestorrev(a, b)
1357 return self.isancestorrev(a, b)
1354
1358
1355 def isancestorrev(self, a, b):
1359 def isancestorrev(self, a, b):
1356 """return True if revision a is an ancestor of revision b
1360 """return True if revision a is an ancestor of revision b
1357
1361
1358 A revision is considered an ancestor of itself.
1362 A revision is considered an ancestor of itself.
1359
1363
1360 The implementation of this is trivial but the use of
1364 The implementation of this is trivial but the use of
1361 reachableroots is not."""
1365 reachableroots is not."""
1362 if a == nullrev:
1366 if a == nullrev:
1363 return True
1367 return True
1364 elif a == b:
1368 elif a == b:
1365 return True
1369 return True
1366 elif a > b:
1370 elif a > b:
1367 return False
1371 return False
1368 return bool(self.reachableroots(a, [b], [a], includepath=False))
1372 return bool(self.reachableroots(a, [b], [a], includepath=False))
1369
1373
1370 def reachableroots(self, minroot, heads, roots, includepath=False):
1374 def reachableroots(self, minroot, heads, roots, includepath=False):
1371 """return (heads(::(<roots> and <roots>::<heads>)))
1375 """return (heads(::(<roots> and <roots>::<heads>)))
1372
1376
1373 If includepath is True, return (<roots>::<heads>)."""
1377 If includepath is True, return (<roots>::<heads>)."""
1374 try:
1378 try:
1375 return self.index.reachableroots2(
1379 return self.index.reachableroots2(
1376 minroot, heads, roots, includepath
1380 minroot, heads, roots, includepath
1377 )
1381 )
1378 except AttributeError:
1382 except AttributeError:
1379 return dagop._reachablerootspure(
1383 return dagop._reachablerootspure(
1380 self.parentrevs, minroot, roots, heads, includepath
1384 self.parentrevs, minroot, roots, heads, includepath
1381 )
1385 )
1382
1386
1383 def ancestor(self, a, b):
1387 def ancestor(self, a, b):
1384 """calculate the "best" common ancestor of nodes a and b"""
1388 """calculate the "best" common ancestor of nodes a and b"""
1385
1389
1386 a, b = self.rev(a), self.rev(b)
1390 a, b = self.rev(a), self.rev(b)
1387 try:
1391 try:
1388 ancs = self.index.ancestors(a, b)
1392 ancs = self.index.ancestors(a, b)
1389 except (AttributeError, OverflowError):
1393 except (AttributeError, OverflowError):
1390 ancs = ancestor.ancestors(self.parentrevs, a, b)
1394 ancs = ancestor.ancestors(self.parentrevs, a, b)
1391 if ancs:
1395 if ancs:
1392 # choose a consistent winner when there's a tie
1396 # choose a consistent winner when there's a tie
1393 return min(map(self.node, ancs))
1397 return min(map(self.node, ancs))
1394 return self.nullid
1398 return self.nullid
1395
1399
1396 def _match(self, id):
1400 def _match(self, id):
1397 if isinstance(id, int):
1401 if isinstance(id, int):
1398 # rev
1402 # rev
1399 return self.node(id)
1403 return self.node(id)
1400 if len(id) == self.nodeconstants.nodelen:
1404 if len(id) == self.nodeconstants.nodelen:
1401 # possibly a binary node
1405 # possibly a binary node
1402 # odds of a binary node being all hex in ASCII are 1 in 10**25
1406 # odds of a binary node being all hex in ASCII are 1 in 10**25
1403 try:
1407 try:
1404 node = id
1408 node = id
1405 self.rev(node) # quick search the index
1409 self.rev(node) # quick search the index
1406 return node
1410 return node
1407 except error.LookupError:
1411 except error.LookupError:
1408 pass # may be partial hex id
1412 pass # may be partial hex id
1409 try:
1413 try:
1410 # str(rev)
1414 # str(rev)
1411 rev = int(id)
1415 rev = int(id)
1412 if b"%d" % rev != id:
1416 if b"%d" % rev != id:
1413 raise ValueError
1417 raise ValueError
1414 if rev < 0:
1418 if rev < 0:
1415 rev = len(self) + rev
1419 rev = len(self) + rev
1416 if rev < 0 or rev >= len(self):
1420 if rev < 0 or rev >= len(self):
1417 raise ValueError
1421 raise ValueError
1418 return self.node(rev)
1422 return self.node(rev)
1419 except (ValueError, OverflowError):
1423 except (ValueError, OverflowError):
1420 pass
1424 pass
1421 if len(id) == 2 * self.nodeconstants.nodelen:
1425 if len(id) == 2 * self.nodeconstants.nodelen:
1422 try:
1426 try:
1423 # a full hex nodeid?
1427 # a full hex nodeid?
1424 node = bin(id)
1428 node = bin(id)
1425 self.rev(node)
1429 self.rev(node)
1426 return node
1430 return node
1427 except (TypeError, error.LookupError):
1431 except (TypeError, error.LookupError):
1428 pass
1432 pass
1429
1433
1430 def _partialmatch(self, id):
1434 def _partialmatch(self, id):
1431 # we don't care wdirfilenodeids as they should be always full hash
1435 # we don't care wdirfilenodeids as they should be always full hash
1432 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1436 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1433 try:
1437 try:
1434 partial = self.index.partialmatch(id)
1438 partial = self.index.partialmatch(id)
1435 if partial and self.hasnode(partial):
1439 if partial and self.hasnode(partial):
1436 if maybewdir:
1440 if maybewdir:
1437 # single 'ff...' match in radix tree, ambiguous with wdir
1441 # single 'ff...' match in radix tree, ambiguous with wdir
1438 raise error.RevlogError
1442 raise error.RevlogError
1439 return partial
1443 return partial
1440 if maybewdir:
1444 if maybewdir:
1441 # no 'ff...' match in radix tree, wdir identified
1445 # no 'ff...' match in radix tree, wdir identified
1442 raise error.WdirUnsupported
1446 raise error.WdirUnsupported
1443 return None
1447 return None
1444 except error.RevlogError:
1448 except error.RevlogError:
1445 # parsers.c radix tree lookup gave multiple matches
1449 # parsers.c radix tree lookup gave multiple matches
1446 # fast path: for unfiltered changelog, radix tree is accurate
1450 # fast path: for unfiltered changelog, radix tree is accurate
1447 if not getattr(self, 'filteredrevs', None):
1451 if not getattr(self, 'filteredrevs', None):
1448 raise error.AmbiguousPrefixLookupError(
1452 raise error.AmbiguousPrefixLookupError(
1449 id, self.display_id, _(b'ambiguous identifier')
1453 id, self.display_id, _(b'ambiguous identifier')
1450 )
1454 )
1451 # fall through to slow path that filters hidden revisions
1455 # fall through to slow path that filters hidden revisions
1452 except (AttributeError, ValueError):
1456 except (AttributeError, ValueError):
1453 # we are pure python, or key was too short to search radix tree
1457 # we are pure python, or key was too short to search radix tree
1454 pass
1458 pass
1455
1459
1456 if id in self._pcache:
1460 if id in self._pcache:
1457 return self._pcache[id]
1461 return self._pcache[id]
1458
1462
1459 if len(id) <= 40:
1463 if len(id) <= 40:
1460 try:
1464 try:
1461 # hex(node)[:...]
1465 # hex(node)[:...]
1462 l = len(id) // 2 # grab an even number of digits
1466 l = len(id) // 2 # grab an even number of digits
1463 prefix = bin(id[: l * 2])
1467 prefix = bin(id[: l * 2])
1464 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1468 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1465 nl = [
1469 nl = [
1466 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1470 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1467 ]
1471 ]
1468 if self.nodeconstants.nullhex.startswith(id):
1472 if self.nodeconstants.nullhex.startswith(id):
1469 nl.append(self.nullid)
1473 nl.append(self.nullid)
1470 if len(nl) > 0:
1474 if len(nl) > 0:
1471 if len(nl) == 1 and not maybewdir:
1475 if len(nl) == 1 and not maybewdir:
1472 self._pcache[id] = nl[0]
1476 self._pcache[id] = nl[0]
1473 return nl[0]
1477 return nl[0]
1474 raise error.AmbiguousPrefixLookupError(
1478 raise error.AmbiguousPrefixLookupError(
1475 id, self.display_id, _(b'ambiguous identifier')
1479 id, self.display_id, _(b'ambiguous identifier')
1476 )
1480 )
1477 if maybewdir:
1481 if maybewdir:
1478 raise error.WdirUnsupported
1482 raise error.WdirUnsupported
1479 return None
1483 return None
1480 except TypeError:
1484 except TypeError:
1481 pass
1485 pass
1482
1486
1483 def lookup(self, id):
1487 def lookup(self, id):
1484 """locate a node based on:
1488 """locate a node based on:
1485 - revision number or str(revision number)
1489 - revision number or str(revision number)
1486 - nodeid or subset of hex nodeid
1490 - nodeid or subset of hex nodeid
1487 """
1491 """
1488 n = self._match(id)
1492 n = self._match(id)
1489 if n is not None:
1493 if n is not None:
1490 return n
1494 return n
1491 n = self._partialmatch(id)
1495 n = self._partialmatch(id)
1492 if n:
1496 if n:
1493 return n
1497 return n
1494
1498
1495 raise error.LookupError(id, self.display_id, _(b'no match found'))
1499 raise error.LookupError(id, self.display_id, _(b'no match found'))
1496
1500
1497 def shortest(self, node, minlength=1):
1501 def shortest(self, node, minlength=1):
1498 """Find the shortest unambiguous prefix that matches node."""
1502 """Find the shortest unambiguous prefix that matches node."""
1499
1503
1500 def isvalid(prefix):
1504 def isvalid(prefix):
1501 try:
1505 try:
1502 matchednode = self._partialmatch(prefix)
1506 matchednode = self._partialmatch(prefix)
1503 except error.AmbiguousPrefixLookupError:
1507 except error.AmbiguousPrefixLookupError:
1504 return False
1508 return False
1505 except error.WdirUnsupported:
1509 except error.WdirUnsupported:
1506 # single 'ff...' match
1510 # single 'ff...' match
1507 return True
1511 return True
1508 if matchednode is None:
1512 if matchednode is None:
1509 raise error.LookupError(node, self.display_id, _(b'no node'))
1513 raise error.LookupError(node, self.display_id, _(b'no node'))
1510 return True
1514 return True
1511
1515
1512 def maybewdir(prefix):
1516 def maybewdir(prefix):
1513 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1517 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1514
1518
1515 hexnode = hex(node)
1519 hexnode = hex(node)
1516
1520
1517 def disambiguate(hexnode, minlength):
1521 def disambiguate(hexnode, minlength):
1518 """Disambiguate against wdirid."""
1522 """Disambiguate against wdirid."""
1519 for length in range(minlength, len(hexnode) + 1):
1523 for length in range(minlength, len(hexnode) + 1):
1520 prefix = hexnode[:length]
1524 prefix = hexnode[:length]
1521 if not maybewdir(prefix):
1525 if not maybewdir(prefix):
1522 return prefix
1526 return prefix
1523
1527
1524 if not getattr(self, 'filteredrevs', None):
1528 if not getattr(self, 'filteredrevs', None):
1525 try:
1529 try:
1526 length = max(self.index.shortest(node), minlength)
1530 length = max(self.index.shortest(node), minlength)
1527 return disambiguate(hexnode, length)
1531 return disambiguate(hexnode, length)
1528 except error.RevlogError:
1532 except error.RevlogError:
1529 if node != self.nodeconstants.wdirid:
1533 if node != self.nodeconstants.wdirid:
1530 raise error.LookupError(
1534 raise error.LookupError(
1531 node, self.display_id, _(b'no node')
1535 node, self.display_id, _(b'no node')
1532 )
1536 )
1533 except AttributeError:
1537 except AttributeError:
1534 # Fall through to pure code
1538 # Fall through to pure code
1535 pass
1539 pass
1536
1540
1537 if node == self.nodeconstants.wdirid:
1541 if node == self.nodeconstants.wdirid:
1538 for length in range(minlength, len(hexnode) + 1):
1542 for length in range(minlength, len(hexnode) + 1):
1539 prefix = hexnode[:length]
1543 prefix = hexnode[:length]
1540 if isvalid(prefix):
1544 if isvalid(prefix):
1541 return prefix
1545 return prefix
1542
1546
1543 for length in range(minlength, len(hexnode) + 1):
1547 for length in range(minlength, len(hexnode) + 1):
1544 prefix = hexnode[:length]
1548 prefix = hexnode[:length]
1545 if isvalid(prefix):
1549 if isvalid(prefix):
1546 return disambiguate(hexnode, length)
1550 return disambiguate(hexnode, length)
1547
1551
1548 def cmp(self, node, text):
1552 def cmp(self, node, text):
1549 """compare text with a given file revision
1553 """compare text with a given file revision
1550
1554
1551 returns True if text is different than what is stored.
1555 returns True if text is different than what is stored.
1552 """
1556 """
1553 p1, p2 = self.parents(node)
1557 p1, p2 = self.parents(node)
1554 return storageutil.hashrevisionsha1(text, p1, p2) != node
1558 return storageutil.hashrevisionsha1(text, p1, p2) != node
1555
1559
1556 def _cachesegment(self, offset, data):
1560 def _cachesegment(self, offset, data):
1557 """Add a segment to the revlog cache.
1561 """Add a segment to the revlog cache.
1558
1562
1559 Accepts an absolute offset and the data that is at that location.
1563 Accepts an absolute offset and the data that is at that location.
1560 """
1564 """
1561 o, d = self._chunkcache
1565 o, d = self._chunkcache
1562 # try to add to existing cache
1566 # try to add to existing cache
1563 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1567 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1564 self._chunkcache = o, d + data
1568 self._chunkcache = o, d + data
1565 else:
1569 else:
1566 self._chunkcache = offset, data
1570 self._chunkcache = offset, data
1567
1571
1568 def _readsegment(self, offset, length, df=None):
1572 def _readsegment(self, offset, length, df=None):
1569 """Load a segment of raw data from the revlog.
1573 """Load a segment of raw data from the revlog.
1570
1574
1571 Accepts an absolute offset, length to read, and an optional existing
1575 Accepts an absolute offset, length to read, and an optional existing
1572 file handle to read from.
1576 file handle to read from.
1573
1577
1574 If an existing file handle is passed, it will be seeked and the
1578 If an existing file handle is passed, it will be seeked and the
1575 original seek position will NOT be restored.
1579 original seek position will NOT be restored.
1576
1580
1577 Returns a str or buffer of raw byte data.
1581 Returns a str or buffer of raw byte data.
1578
1582
1579 Raises if the requested number of bytes could not be read.
1583 Raises if the requested number of bytes could not be read.
1580 """
1584 """
1581 # Cache data both forward and backward around the requested
1585 # Cache data both forward and backward around the requested
1582 # data, in a fixed size window. This helps speed up operations
1586 # data, in a fixed size window. This helps speed up operations
1583 # involving reading the revlog backwards.
1587 # involving reading the revlog backwards.
1584 cachesize = self._chunkcachesize
1588 cachesize = self._chunkcachesize
1585 realoffset = offset & ~(cachesize - 1)
1589 realoffset = offset & ~(cachesize - 1)
1586 reallength = (
1590 reallength = (
1587 (offset + length + cachesize) & ~(cachesize - 1)
1591 (offset + length + cachesize) & ~(cachesize - 1)
1588 ) - realoffset
1592 ) - realoffset
1589 with self._datareadfp(df) as df:
1593 with self._datareadfp(df) as df:
1590 df.seek(realoffset)
1594 df.seek(realoffset)
1591 d = df.read(reallength)
1595 d = df.read(reallength)
1592
1596
1593 self._cachesegment(realoffset, d)
1597 self._cachesegment(realoffset, d)
1594 if offset != realoffset or reallength != length:
1598 if offset != realoffset or reallength != length:
1595 startoffset = offset - realoffset
1599 startoffset = offset - realoffset
1596 if len(d) - startoffset < length:
1600 if len(d) - startoffset < length:
1597 raise error.RevlogError(
1601 raise error.RevlogError(
1598 _(
1602 _(
1599 b'partial read of revlog %s; expected %d bytes from '
1603 b'partial read of revlog %s; expected %d bytes from '
1600 b'offset %d, got %d'
1604 b'offset %d, got %d'
1601 )
1605 )
1602 % (
1606 % (
1603 self._indexfile if self._inline else self._datafile,
1607 self._indexfile if self._inline else self._datafile,
1604 length,
1608 length,
1605 offset,
1609 offset,
1606 len(d) - startoffset,
1610 len(d) - startoffset,
1607 )
1611 )
1608 )
1612 )
1609
1613
1610 return util.buffer(d, startoffset, length)
1614 return util.buffer(d, startoffset, length)
1611
1615
1612 if len(d) < length:
1616 if len(d) < length:
1613 raise error.RevlogError(
1617 raise error.RevlogError(
1614 _(
1618 _(
1615 b'partial read of revlog %s; expected %d bytes from offset '
1619 b'partial read of revlog %s; expected %d bytes from offset '
1616 b'%d, got %d'
1620 b'%d, got %d'
1617 )
1621 )
1618 % (
1622 % (
1619 self._indexfile if self._inline else self._datafile,
1623 self._indexfile if self._inline else self._datafile,
1620 length,
1624 length,
1621 offset,
1625 offset,
1622 len(d),
1626 len(d),
1623 )
1627 )
1624 )
1628 )
1625
1629
1626 return d
1630 return d
1627
1631
1628 def _getsegment(self, offset, length, df=None):
1632 def _getsegment(self, offset, length, df=None):
1629 """Obtain a segment of raw data from the revlog.
1633 """Obtain a segment of raw data from the revlog.
1630
1634
1631 Accepts an absolute offset, length of bytes to obtain, and an
1635 Accepts an absolute offset, length of bytes to obtain, and an
1632 optional file handle to the already-opened revlog. If the file
1636 optional file handle to the already-opened revlog. If the file
1633 handle is used, it's original seek position will not be preserved.
1637 handle is used, it's original seek position will not be preserved.
1634
1638
1635 Requests for data may be returned from a cache.
1639 Requests for data may be returned from a cache.
1636
1640
1637 Returns a str or a buffer instance of raw byte data.
1641 Returns a str or a buffer instance of raw byte data.
1638 """
1642 """
1639 o, d = self._chunkcache
1643 o, d = self._chunkcache
1640 l = len(d)
1644 l = len(d)
1641
1645
1642 # is it in the cache?
1646 # is it in the cache?
1643 cachestart = offset - o
1647 cachestart = offset - o
1644 cacheend = cachestart + length
1648 cacheend = cachestart + length
1645 if cachestart >= 0 and cacheend <= l:
1649 if cachestart >= 0 and cacheend <= l:
1646 if cachestart == 0 and cacheend == l:
1650 if cachestart == 0 and cacheend == l:
1647 return d # avoid a copy
1651 return d # avoid a copy
1648 return util.buffer(d, cachestart, cacheend - cachestart)
1652 return util.buffer(d, cachestart, cacheend - cachestart)
1649
1653
1650 return self._readsegment(offset, length, df=df)
1654 return self._readsegment(offset, length, df=df)
1651
1655
1652 def _getsegmentforrevs(self, startrev, endrev, df=None):
1656 def _getsegmentforrevs(self, startrev, endrev, df=None):
1653 """Obtain a segment of raw data corresponding to a range of revisions.
1657 """Obtain a segment of raw data corresponding to a range of revisions.
1654
1658
1655 Accepts the start and end revisions and an optional already-open
1659 Accepts the start and end revisions and an optional already-open
1656 file handle to be used for reading. If the file handle is read, its
1660 file handle to be used for reading. If the file handle is read, its
1657 seek position will not be preserved.
1661 seek position will not be preserved.
1658
1662
1659 Requests for data may be satisfied by a cache.
1663 Requests for data may be satisfied by a cache.
1660
1664
1661 Returns a 2-tuple of (offset, data) for the requested range of
1665 Returns a 2-tuple of (offset, data) for the requested range of
1662 revisions. Offset is the integer offset from the beginning of the
1666 revisions. Offset is the integer offset from the beginning of the
1663 revlog and data is a str or buffer of the raw byte data.
1667 revlog and data is a str or buffer of the raw byte data.
1664
1668
1665 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1669 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1666 to determine where each revision's data begins and ends.
1670 to determine where each revision's data begins and ends.
1667 """
1671 """
1668 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1672 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1669 # (functions are expensive).
1673 # (functions are expensive).
1670 index = self.index
1674 index = self.index
1671 istart = index[startrev]
1675 istart = index[startrev]
1672 start = int(istart[0] >> 16)
1676 start = int(istart[0] >> 16)
1673 if startrev == endrev:
1677 if startrev == endrev:
1674 end = start + istart[1]
1678 end = start + istart[1]
1675 else:
1679 else:
1676 iend = index[endrev]
1680 iend = index[endrev]
1677 end = int(iend[0] >> 16) + iend[1]
1681 end = int(iend[0] >> 16) + iend[1]
1678
1682
1679 if self._inline:
1683 if self._inline:
1680 start += (startrev + 1) * self.index.entry_size
1684 start += (startrev + 1) * self.index.entry_size
1681 end += (endrev + 1) * self.index.entry_size
1685 end += (endrev + 1) * self.index.entry_size
1682 length = end - start
1686 length = end - start
1683
1687
1684 return start, self._getsegment(start, length, df=df)
1688 return start, self._getsegment(start, length, df=df)
1685
1689
1686 def _chunk(self, rev, df=None):
1690 def _chunk(self, rev, df=None):
1687 """Obtain a single decompressed chunk for a revision.
1691 """Obtain a single decompressed chunk for a revision.
1688
1692
1689 Accepts an integer revision and an optional already-open file handle
1693 Accepts an integer revision and an optional already-open file handle
1690 to be used for reading. If used, the seek position of the file will not
1694 to be used for reading. If used, the seek position of the file will not
1691 be preserved.
1695 be preserved.
1692
1696
1693 Returns a str holding uncompressed data for the requested revision.
1697 Returns a str holding uncompressed data for the requested revision.
1694 """
1698 """
1695 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1699 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1696
1700
1697 def _chunks(self, revs, df=None, targetsize=None):
1701 def _chunks(self, revs, df=None, targetsize=None):
1698 """Obtain decompressed chunks for the specified revisions.
1702 """Obtain decompressed chunks for the specified revisions.
1699
1703
1700 Accepts an iterable of numeric revisions that are assumed to be in
1704 Accepts an iterable of numeric revisions that are assumed to be in
1701 ascending order. Also accepts an optional already-open file handle
1705 ascending order. Also accepts an optional already-open file handle
1702 to be used for reading. If used, the seek position of the file will
1706 to be used for reading. If used, the seek position of the file will
1703 not be preserved.
1707 not be preserved.
1704
1708
1705 This function is similar to calling ``self._chunk()`` multiple times,
1709 This function is similar to calling ``self._chunk()`` multiple times,
1706 but is faster.
1710 but is faster.
1707
1711
1708 Returns a list with decompressed data for each requested revision.
1712 Returns a list with decompressed data for each requested revision.
1709 """
1713 """
1710 if not revs:
1714 if not revs:
1711 return []
1715 return []
1712 start = self.start
1716 start = self.start
1713 length = self.length
1717 length = self.length
1714 inline = self._inline
1718 inline = self._inline
1715 iosize = self.index.entry_size
1719 iosize = self.index.entry_size
1716 buffer = util.buffer
1720 buffer = util.buffer
1717
1721
1718 l = []
1722 l = []
1719 ladd = l.append
1723 ladd = l.append
1720
1724
1721 if not self._withsparseread:
1725 if not self._withsparseread:
1722 slicedchunks = (revs,)
1726 slicedchunks = (revs,)
1723 else:
1727 else:
1724 slicedchunks = deltautil.slicechunk(
1728 slicedchunks = deltautil.slicechunk(
1725 self, revs, targetsize=targetsize
1729 self, revs, targetsize=targetsize
1726 )
1730 )
1727
1731
1728 for revschunk in slicedchunks:
1732 for revschunk in slicedchunks:
1729 firstrev = revschunk[0]
1733 firstrev = revschunk[0]
1730 # Skip trailing revisions with empty diff
1734 # Skip trailing revisions with empty diff
1731 for lastrev in revschunk[::-1]:
1735 for lastrev in revschunk[::-1]:
1732 if length(lastrev) != 0:
1736 if length(lastrev) != 0:
1733 break
1737 break
1734
1738
1735 try:
1739 try:
1736 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1740 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1737 except OverflowError:
1741 except OverflowError:
1738 # issue4215 - we can't cache a run of chunks greater than
1742 # issue4215 - we can't cache a run of chunks greater than
1739 # 2G on Windows
1743 # 2G on Windows
1740 return [self._chunk(rev, df=df) for rev in revschunk]
1744 return [self._chunk(rev, df=df) for rev in revschunk]
1741
1745
1742 decomp = self.decompress
1746 decomp = self.decompress
1743 for rev in revschunk:
1747 for rev in revschunk:
1744 chunkstart = start(rev)
1748 chunkstart = start(rev)
1745 if inline:
1749 if inline:
1746 chunkstart += (rev + 1) * iosize
1750 chunkstart += (rev + 1) * iosize
1747 chunklength = length(rev)
1751 chunklength = length(rev)
1748 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1752 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1749
1753
1750 return l
1754 return l
1751
1755
1752 def _chunkclear(self):
1756 def _chunkclear(self):
1753 """Clear the raw chunk cache."""
1757 """Clear the raw chunk cache."""
1754 self._chunkcache = (0, b'')
1758 self._chunkcache = (0, b'')
1755
1759
1756 def deltaparent(self, rev):
1760 def deltaparent(self, rev):
1757 """return deltaparent of the given revision"""
1761 """return deltaparent of the given revision"""
1758 base = self.index[rev][3]
1762 base = self.index[rev][3]
1759 if base == rev:
1763 if base == rev:
1760 return nullrev
1764 return nullrev
1761 elif self._generaldelta:
1765 elif self._generaldelta:
1762 return base
1766 return base
1763 else:
1767 else:
1764 return rev - 1
1768 return rev - 1
1765
1769
1766 def issnapshot(self, rev):
1770 def issnapshot(self, rev):
1767 """tells whether rev is a snapshot"""
1771 """tells whether rev is a snapshot"""
1768 if not self._sparserevlog:
1772 if not self._sparserevlog:
1769 return self.deltaparent(rev) == nullrev
1773 return self.deltaparent(rev) == nullrev
1770 elif util.safehasattr(self.index, b'issnapshot'):
1774 elif util.safehasattr(self.index, b'issnapshot'):
1771 # directly assign the method to cache the testing and access
1775 # directly assign the method to cache the testing and access
1772 self.issnapshot = self.index.issnapshot
1776 self.issnapshot = self.index.issnapshot
1773 return self.issnapshot(rev)
1777 return self.issnapshot(rev)
1774 if rev == nullrev:
1778 if rev == nullrev:
1775 return True
1779 return True
1776 entry = self.index[rev]
1780 entry = self.index[rev]
1777 base = entry[3]
1781 base = entry[3]
1778 if base == rev:
1782 if base == rev:
1779 return True
1783 return True
1780 if base == nullrev:
1784 if base == nullrev:
1781 return True
1785 return True
1782 p1 = entry[5]
1786 p1 = entry[5]
1783 p2 = entry[6]
1787 p2 = entry[6]
1784 if base == p1 or base == p2:
1788 if base == p1 or base == p2:
1785 return False
1789 return False
1786 return self.issnapshot(base)
1790 return self.issnapshot(base)
1787
1791
1788 def snapshotdepth(self, rev):
1792 def snapshotdepth(self, rev):
1789 """number of snapshot in the chain before this one"""
1793 """number of snapshot in the chain before this one"""
1790 if not self.issnapshot(rev):
1794 if not self.issnapshot(rev):
1791 raise error.ProgrammingError(b'revision %d not a snapshot')
1795 raise error.ProgrammingError(b'revision %d not a snapshot')
1792 return len(self._deltachain(rev)[0]) - 1
1796 return len(self._deltachain(rev)[0]) - 1
1793
1797
1794 def revdiff(self, rev1, rev2):
1798 def revdiff(self, rev1, rev2):
1795 """return or calculate a delta between two revisions
1799 """return or calculate a delta between two revisions
1796
1800
1797 The delta calculated is in binary form and is intended to be written to
1801 The delta calculated is in binary form and is intended to be written to
1798 revlog data directly. So this function needs raw revision data.
1802 revlog data directly. So this function needs raw revision data.
1799 """
1803 """
1800 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1804 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1801 return bytes(self._chunk(rev2))
1805 return bytes(self._chunk(rev2))
1802
1806
1803 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1807 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1804
1808
1805 def _processflags(self, text, flags, operation, raw=False):
1809 def _processflags(self, text, flags, operation, raw=False):
1806 """deprecated entry point to access flag processors"""
1810 """deprecated entry point to access flag processors"""
1807 msg = b'_processflag(...) use the specialized variant'
1811 msg = b'_processflag(...) use the specialized variant'
1808 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1812 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1809 if raw:
1813 if raw:
1810 return text, flagutil.processflagsraw(self, text, flags)
1814 return text, flagutil.processflagsraw(self, text, flags)
1811 elif operation == b'read':
1815 elif operation == b'read':
1812 return flagutil.processflagsread(self, text, flags)
1816 return flagutil.processflagsread(self, text, flags)
1813 else: # write operation
1817 else: # write operation
1814 return flagutil.processflagswrite(self, text, flags)
1818 return flagutil.processflagswrite(self, text, flags)
1815
1819
1816 def revision(self, nodeorrev, _df=None, raw=False):
1820 def revision(self, nodeorrev, _df=None, raw=False):
1817 """return an uncompressed revision of a given node or revision
1821 """return an uncompressed revision of a given node or revision
1818 number.
1822 number.
1819
1823
1820 _df - an existing file handle to read from. (internal-only)
1824 _df - an existing file handle to read from. (internal-only)
1821 raw - an optional argument specifying if the revision data is to be
1825 raw - an optional argument specifying if the revision data is to be
1822 treated as raw data when applying flag transforms. 'raw' should be set
1826 treated as raw data when applying flag transforms. 'raw' should be set
1823 to True when generating changegroups or in debug commands.
1827 to True when generating changegroups or in debug commands.
1824 """
1828 """
1825 if raw:
1829 if raw:
1826 msg = (
1830 msg = (
1827 b'revlog.revision(..., raw=True) is deprecated, '
1831 b'revlog.revision(..., raw=True) is deprecated, '
1828 b'use revlog.rawdata(...)'
1832 b'use revlog.rawdata(...)'
1829 )
1833 )
1830 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1834 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1831 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1835 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1832
1836
1833 def sidedata(self, nodeorrev, _df=None):
1837 def sidedata(self, nodeorrev, _df=None):
1834 """a map of extra data related to the changeset but not part of the hash
1838 """a map of extra data related to the changeset but not part of the hash
1835
1839
1836 This function currently return a dictionary. However, more advanced
1840 This function currently return a dictionary. However, more advanced
1837 mapping object will likely be used in the future for a more
1841 mapping object will likely be used in the future for a more
1838 efficient/lazy code.
1842 efficient/lazy code.
1839 """
1843 """
1840 return self._revisiondata(nodeorrev, _df)[1]
1844 return self._revisiondata(nodeorrev, _df)[1]
1841
1845
1842 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1846 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1843 # deal with <nodeorrev> argument type
1847 # deal with <nodeorrev> argument type
1844 if isinstance(nodeorrev, int):
1848 if isinstance(nodeorrev, int):
1845 rev = nodeorrev
1849 rev = nodeorrev
1846 node = self.node(rev)
1850 node = self.node(rev)
1847 else:
1851 else:
1848 node = nodeorrev
1852 node = nodeorrev
1849 rev = None
1853 rev = None
1850
1854
1851 # fast path the special `nullid` rev
1855 # fast path the special `nullid` rev
1852 if node == self.nullid:
1856 if node == self.nullid:
1853 return b"", {}
1857 return b"", {}
1854
1858
1855 # ``rawtext`` is the text as stored inside the revlog. Might be the
1859 # ``rawtext`` is the text as stored inside the revlog. Might be the
1856 # revision or might need to be processed to retrieve the revision.
1860 # revision or might need to be processed to retrieve the revision.
1857 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1861 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1858
1862
1859 if self.hassidedata:
1863 if self.hassidedata:
1860 if rev is None:
1864 if rev is None:
1861 rev = self.rev(node)
1865 rev = self.rev(node)
1862 sidedata = self._sidedata(rev)
1866 sidedata = self._sidedata(rev)
1863 else:
1867 else:
1864 sidedata = {}
1868 sidedata = {}
1865
1869
1866 if raw and validated:
1870 if raw and validated:
1867 # if we don't want to process the raw text and that raw
1871 # if we don't want to process the raw text and that raw
1868 # text is cached, we can exit early.
1872 # text is cached, we can exit early.
1869 return rawtext, sidedata
1873 return rawtext, sidedata
1870 if rev is None:
1874 if rev is None:
1871 rev = self.rev(node)
1875 rev = self.rev(node)
1872 # the revlog's flag for this revision
1876 # the revlog's flag for this revision
1873 # (usually alter its state or content)
1877 # (usually alter its state or content)
1874 flags = self.flags(rev)
1878 flags = self.flags(rev)
1875
1879
1876 if validated and flags == REVIDX_DEFAULT_FLAGS:
1880 if validated and flags == REVIDX_DEFAULT_FLAGS:
1877 # no extra flags set, no flag processor runs, text = rawtext
1881 # no extra flags set, no flag processor runs, text = rawtext
1878 return rawtext, sidedata
1882 return rawtext, sidedata
1879
1883
1880 if raw:
1884 if raw:
1881 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1885 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1882 text = rawtext
1886 text = rawtext
1883 else:
1887 else:
1884 r = flagutil.processflagsread(self, rawtext, flags)
1888 r = flagutil.processflagsread(self, rawtext, flags)
1885 text, validatehash = r
1889 text, validatehash = r
1886 if validatehash:
1890 if validatehash:
1887 self.checkhash(text, node, rev=rev)
1891 self.checkhash(text, node, rev=rev)
1888 if not validated:
1892 if not validated:
1889 self._revisioncache = (node, rev, rawtext)
1893 self._revisioncache = (node, rev, rawtext)
1890
1894
1891 return text, sidedata
1895 return text, sidedata
1892
1896
1893 def _rawtext(self, node, rev, _df=None):
1897 def _rawtext(self, node, rev, _df=None):
1894 """return the possibly unvalidated rawtext for a revision
1898 """return the possibly unvalidated rawtext for a revision
1895
1899
1896 returns (rev, rawtext, validated)
1900 returns (rev, rawtext, validated)
1897 """
1901 """
1898
1902
1899 # revision in the cache (could be useful to apply delta)
1903 # revision in the cache (could be useful to apply delta)
1900 cachedrev = None
1904 cachedrev = None
1901 # An intermediate text to apply deltas to
1905 # An intermediate text to apply deltas to
1902 basetext = None
1906 basetext = None
1903
1907
1904 # Check if we have the entry in cache
1908 # Check if we have the entry in cache
1905 # The cache entry looks like (node, rev, rawtext)
1909 # The cache entry looks like (node, rev, rawtext)
1906 if self._revisioncache:
1910 if self._revisioncache:
1907 if self._revisioncache[0] == node:
1911 if self._revisioncache[0] == node:
1908 return (rev, self._revisioncache[2], True)
1912 return (rev, self._revisioncache[2], True)
1909 cachedrev = self._revisioncache[1]
1913 cachedrev = self._revisioncache[1]
1910
1914
1911 if rev is None:
1915 if rev is None:
1912 rev = self.rev(node)
1916 rev = self.rev(node)
1913
1917
1914 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1918 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1915 if stopped:
1919 if stopped:
1916 basetext = self._revisioncache[2]
1920 basetext = self._revisioncache[2]
1917
1921
1918 # drop cache to save memory, the caller is expected to
1922 # drop cache to save memory, the caller is expected to
1919 # update self._revisioncache after validating the text
1923 # update self._revisioncache after validating the text
1920 self._revisioncache = None
1924 self._revisioncache = None
1921
1925
1922 targetsize = None
1926 targetsize = None
1923 rawsize = self.index[rev][2]
1927 rawsize = self.index[rev][2]
1924 if 0 <= rawsize:
1928 if 0 <= rawsize:
1925 targetsize = 4 * rawsize
1929 targetsize = 4 * rawsize
1926
1930
1927 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1931 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1928 if basetext is None:
1932 if basetext is None:
1929 basetext = bytes(bins[0])
1933 basetext = bytes(bins[0])
1930 bins = bins[1:]
1934 bins = bins[1:]
1931
1935
1932 rawtext = mdiff.patches(basetext, bins)
1936 rawtext = mdiff.patches(basetext, bins)
1933 del basetext # let us have a chance to free memory early
1937 del basetext # let us have a chance to free memory early
1934 return (rev, rawtext, False)
1938 return (rev, rawtext, False)
1935
1939
1936 def _sidedata(self, rev):
1940 def _sidedata(self, rev):
1937 """Return the sidedata for a given revision number."""
1941 """Return the sidedata for a given revision number."""
1938 index_entry = self.index[rev]
1942 index_entry = self.index[rev]
1939 sidedata_offset = index_entry[8]
1943 sidedata_offset = index_entry[8]
1940 sidedata_size = index_entry[9]
1944 sidedata_size = index_entry[9]
1941
1945
1942 if self._inline:
1946 if self._inline:
1943 sidedata_offset += self.index.entry_size * (1 + rev)
1947 sidedata_offset += self.index.entry_size * (1 + rev)
1944 if sidedata_size == 0:
1948 if sidedata_size == 0:
1945 return {}
1949 return {}
1946
1950
1947 segment = self._getsegment(sidedata_offset, sidedata_size)
1951 segment = self._getsegment(sidedata_offset, sidedata_size)
1948 sidedata = sidedatautil.deserialize_sidedata(segment)
1952 sidedata = sidedatautil.deserialize_sidedata(segment)
1949 return sidedata
1953 return sidedata
1950
1954
1951 def rawdata(self, nodeorrev, _df=None):
1955 def rawdata(self, nodeorrev, _df=None):
1952 """return an uncompressed raw data of a given node or revision number.
1956 """return an uncompressed raw data of a given node or revision number.
1953
1957
1954 _df - an existing file handle to read from. (internal-only)
1958 _df - an existing file handle to read from. (internal-only)
1955 """
1959 """
1956 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1960 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1957
1961
1958 def hash(self, text, p1, p2):
1962 def hash(self, text, p1, p2):
1959 """Compute a node hash.
1963 """Compute a node hash.
1960
1964
1961 Available as a function so that subclasses can replace the hash
1965 Available as a function so that subclasses can replace the hash
1962 as needed.
1966 as needed.
1963 """
1967 """
1964 return storageutil.hashrevisionsha1(text, p1, p2)
1968 return storageutil.hashrevisionsha1(text, p1, p2)
1965
1969
1966 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1970 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1967 """Check node hash integrity.
1971 """Check node hash integrity.
1968
1972
1969 Available as a function so that subclasses can extend hash mismatch
1973 Available as a function so that subclasses can extend hash mismatch
1970 behaviors as needed.
1974 behaviors as needed.
1971 """
1975 """
1972 try:
1976 try:
1973 if p1 is None and p2 is None:
1977 if p1 is None and p2 is None:
1974 p1, p2 = self.parents(node)
1978 p1, p2 = self.parents(node)
1975 if node != self.hash(text, p1, p2):
1979 if node != self.hash(text, p1, p2):
1976 # Clear the revision cache on hash failure. The revision cache
1980 # Clear the revision cache on hash failure. The revision cache
1977 # only stores the raw revision and clearing the cache does have
1981 # only stores the raw revision and clearing the cache does have
1978 # the side-effect that we won't have a cache hit when the raw
1982 # the side-effect that we won't have a cache hit when the raw
1979 # revision data is accessed. But this case should be rare and
1983 # revision data is accessed. But this case should be rare and
1980 # it is extra work to teach the cache about the hash
1984 # it is extra work to teach the cache about the hash
1981 # verification state.
1985 # verification state.
1982 if self._revisioncache and self._revisioncache[0] == node:
1986 if self._revisioncache and self._revisioncache[0] == node:
1983 self._revisioncache = None
1987 self._revisioncache = None
1984
1988
1985 revornode = rev
1989 revornode = rev
1986 if revornode is None:
1990 if revornode is None:
1987 revornode = templatefilters.short(hex(node))
1991 revornode = templatefilters.short(hex(node))
1988 raise error.RevlogError(
1992 raise error.RevlogError(
1989 _(b"integrity check failed on %s:%s")
1993 _(b"integrity check failed on %s:%s")
1990 % (self.display_id, pycompat.bytestr(revornode))
1994 % (self.display_id, pycompat.bytestr(revornode))
1991 )
1995 )
1992 except error.RevlogError:
1996 except error.RevlogError:
1993 if self._censorable and storageutil.iscensoredtext(text):
1997 if self._censorable and storageutil.iscensoredtext(text):
1994 raise error.CensoredNodeError(self.display_id, node, text)
1998 raise error.CensoredNodeError(self.display_id, node, text)
1995 raise
1999 raise
1996
2000
1997 def _enforceinlinesize(self, tr):
2001 def _enforceinlinesize(self, tr):
1998 """Check if the revlog is too big for inline and convert if so.
2002 """Check if the revlog is too big for inline and convert if so.
1999
2003
2000 This should be called after revisions are added to the revlog. If the
2004 This should be called after revisions are added to the revlog. If the
2001 revlog has grown too large to be an inline revlog, it will convert it
2005 revlog has grown too large to be an inline revlog, it will convert it
2002 to use multiple index and data files.
2006 to use multiple index and data files.
2003 """
2007 """
2004 tiprev = len(self) - 1
2008 tiprev = len(self) - 1
2005 total_size = self.start(tiprev) + self.length(tiprev)
2009 total_size = self.start(tiprev) + self.length(tiprev)
2006 if not self._inline or total_size < _maxinline:
2010 if not self._inline or total_size < _maxinline:
2007 return
2011 return
2008
2012
2009 troffset = tr.findoffset(self._indexfile)
2013 troffset = tr.findoffset(self._indexfile)
2010 if troffset is None:
2014 if troffset is None:
2011 raise error.RevlogError(
2015 raise error.RevlogError(
2012 _(b"%s not found in the transaction") % self._indexfile
2016 _(b"%s not found in the transaction") % self._indexfile
2013 )
2017 )
2014 trindex = 0
2018 trindex = 0
2015 tr.add(self._datafile, 0)
2019 tr.add(self._datafile, 0)
2016
2020
2017 existing_handles = False
2021 existing_handles = False
2018 if self._writinghandles is not None:
2022 if self._writinghandles is not None:
2019 existing_handles = True
2023 existing_handles = True
2020 fp = self._writinghandles[0]
2024 fp = self._writinghandles[0]
2021 fp.flush()
2025 fp.flush()
2022 fp.close()
2026 fp.close()
2023 # We can't use the cached file handle after close(). So prevent
2027 # We can't use the cached file handle after close(). So prevent
2024 # its usage.
2028 # its usage.
2025 self._writinghandles = None
2029 self._writinghandles = None
2026
2030
2027 new_dfh = self._datafp(b'w+')
2031 new_dfh = self._datafp(b'w+')
2028 new_dfh.truncate(0) # drop any potentially existing data
2032 new_dfh.truncate(0) # drop any potentially existing data
2029 try:
2033 try:
2030 with self._indexfp() as read_ifh:
2034 with self._indexfp() as read_ifh:
2031 for r in self:
2035 for r in self:
2032 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2036 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2033 if troffset <= self.start(r):
2037 if troffset <= self.start(r):
2034 trindex = r
2038 trindex = r
2035 new_dfh.flush()
2039 new_dfh.flush()
2036
2040
2037 with self.__index_new_fp() as fp:
2041 with self.__index_new_fp() as fp:
2038 self._format_flags &= ~FLAG_INLINE_DATA
2042 self._format_flags &= ~FLAG_INLINE_DATA
2039 self._inline = False
2043 self._inline = False
2040 for i in self:
2044 for i in self:
2041 e = self.index.entry_binary(i)
2045 e = self.index.entry_binary(i)
2042 if i == 0 and self._docket is None:
2046 if i == 0 and self._docket is None:
2043 header = self._format_flags | self._format_version
2047 header = self._format_flags | self._format_version
2044 header = self.index.pack_header(header)
2048 header = self.index.pack_header(header)
2045 e = header + e
2049 e = header + e
2046 fp.write(e)
2050 fp.write(e)
2047 if self._docket is not None:
2051 if self._docket is not None:
2048 self._docket.index_end = fp.tell()
2052 self._docket.index_end = fp.tell()
2049 # the temp file replace the real index when we exit the context
2053 # the temp file replace the real index when we exit the context
2050 # manager
2054 # manager
2051
2055
2052 tr.replace(self._indexfile, trindex * self.index.entry_size)
2056 tr.replace(self._indexfile, trindex * self.index.entry_size)
2053 nodemaputil.setup_persistent_nodemap(tr, self)
2057 nodemaputil.setup_persistent_nodemap(tr, self)
2054 self._chunkclear()
2058 self._chunkclear()
2055
2059
2056 if existing_handles:
2060 if existing_handles:
2057 # switched from inline to conventional reopen the index
2061 # switched from inline to conventional reopen the index
2058 ifh = self.__index_write_fp()
2062 ifh = self.__index_write_fp()
2059 self._writinghandles = (ifh, new_dfh)
2063 self._writinghandles = (ifh, new_dfh)
2060 new_dfh = None
2064 new_dfh = None
2061 finally:
2065 finally:
2062 if new_dfh is not None:
2066 if new_dfh is not None:
2063 new_dfh.close()
2067 new_dfh.close()
2064
2068
2065 def _nodeduplicatecallback(self, transaction, node):
2069 def _nodeduplicatecallback(self, transaction, node):
2066 """called when trying to add a node already stored."""
2070 """called when trying to add a node already stored."""
2067
2071
2068 @contextlib.contextmanager
2072 @contextlib.contextmanager
2069 def _writing(self, transaction):
2073 def _writing(self, transaction):
2074 if self._trypending:
2075 msg = b'try to write in a `trypending` revlog: %s'
2076 msg %= self.display_id
2077 raise error.ProgrammingError(msg)
2070 if self._writinghandles is not None:
2078 if self._writinghandles is not None:
2071 yield
2079 yield
2072 else:
2080 else:
2073 r = len(self)
2081 r = len(self)
2074 dsize = 0
2082 dsize = 0
2075 if r:
2083 if r:
2076 dsize = self.end(r - 1)
2084 dsize = self.end(r - 1)
2077 dfh = None
2085 dfh = None
2078 if not self._inline:
2086 if not self._inline:
2079 try:
2087 try:
2080 dfh = self._datafp(b"r+")
2088 dfh = self._datafp(b"r+")
2081 dfh.seek(0, os.SEEK_END)
2089 dfh.seek(0, os.SEEK_END)
2082 except IOError as inst:
2090 except IOError as inst:
2083 if inst.errno != errno.ENOENT:
2091 if inst.errno != errno.ENOENT:
2084 raise
2092 raise
2085 dfh = self._datafp(b"w+")
2093 dfh = self._datafp(b"w+")
2086 transaction.add(self._datafile, dsize)
2094 transaction.add(self._datafile, dsize)
2087 try:
2095 try:
2088 isize = r * self.index.entry_size
2096 isize = r * self.index.entry_size
2089 ifh = self.__index_write_fp()
2097 ifh = self.__index_write_fp()
2090 if self._inline:
2098 if self._inline:
2091 transaction.add(self._indexfile, dsize + isize)
2099 transaction.add(self._indexfile, dsize + isize)
2092 else:
2100 else:
2093 transaction.add(self._indexfile, isize)
2101 transaction.add(self._indexfile, isize)
2094 try:
2102 try:
2095 self._writinghandles = (ifh, dfh)
2103 self._writinghandles = (ifh, dfh)
2096 try:
2104 try:
2097 yield
2105 yield
2098 if self._docket is not None:
2106 if self._docket is not None:
2099 self._write_docket(transaction)
2107 self._write_docket(transaction)
2100 finally:
2108 finally:
2101 self._writinghandles = None
2109 self._writinghandles = None
2102 finally:
2110 finally:
2103 ifh.close()
2111 ifh.close()
2104 finally:
2112 finally:
2105 if dfh is not None:
2113 if dfh is not None:
2106 dfh.close()
2114 dfh.close()
2107
2115
2108 def _write_docket(self, transaction):
2116 def _write_docket(self, transaction):
2109 """write the current docket on disk
2117 """write the current docket on disk
2110
2118
2111 Exist as a method to help changelog to implement transaction logic
2119 Exist as a method to help changelog to implement transaction logic
2112
2120
2113 We could also imagine using the same transaction logic for all revlog
2121 We could also imagine using the same transaction logic for all revlog
2114 since docket are cheap."""
2122 since docket are cheap."""
2115 self._docket.write(transaction)
2123 self._docket.write(transaction)
2116
2124
2117 def addrevision(
2125 def addrevision(
2118 self,
2126 self,
2119 text,
2127 text,
2120 transaction,
2128 transaction,
2121 link,
2129 link,
2122 p1,
2130 p1,
2123 p2,
2131 p2,
2124 cachedelta=None,
2132 cachedelta=None,
2125 node=None,
2133 node=None,
2126 flags=REVIDX_DEFAULT_FLAGS,
2134 flags=REVIDX_DEFAULT_FLAGS,
2127 deltacomputer=None,
2135 deltacomputer=None,
2128 sidedata=None,
2136 sidedata=None,
2129 ):
2137 ):
2130 """add a revision to the log
2138 """add a revision to the log
2131
2139
2132 text - the revision data to add
2140 text - the revision data to add
2133 transaction - the transaction object used for rollback
2141 transaction - the transaction object used for rollback
2134 link - the linkrev data to add
2142 link - the linkrev data to add
2135 p1, p2 - the parent nodeids of the revision
2143 p1, p2 - the parent nodeids of the revision
2136 cachedelta - an optional precomputed delta
2144 cachedelta - an optional precomputed delta
2137 node - nodeid of revision; typically node is not specified, and it is
2145 node - nodeid of revision; typically node is not specified, and it is
2138 computed by default as hash(text, p1, p2), however subclasses might
2146 computed by default as hash(text, p1, p2), however subclasses might
2139 use different hashing method (and override checkhash() in such case)
2147 use different hashing method (and override checkhash() in such case)
2140 flags - the known flags to set on the revision
2148 flags - the known flags to set on the revision
2141 deltacomputer - an optional deltacomputer instance shared between
2149 deltacomputer - an optional deltacomputer instance shared between
2142 multiple calls
2150 multiple calls
2143 """
2151 """
2144 if link == nullrev:
2152 if link == nullrev:
2145 raise error.RevlogError(
2153 raise error.RevlogError(
2146 _(b"attempted to add linkrev -1 to %s") % self.display_id
2154 _(b"attempted to add linkrev -1 to %s") % self.display_id
2147 )
2155 )
2148
2156
2149 if sidedata is None:
2157 if sidedata is None:
2150 sidedata = {}
2158 sidedata = {}
2151 elif sidedata and not self.hassidedata:
2159 elif sidedata and not self.hassidedata:
2152 raise error.ProgrammingError(
2160 raise error.ProgrammingError(
2153 _(b"trying to add sidedata to a revlog who don't support them")
2161 _(b"trying to add sidedata to a revlog who don't support them")
2154 )
2162 )
2155
2163
2156 if flags:
2164 if flags:
2157 node = node or self.hash(text, p1, p2)
2165 node = node or self.hash(text, p1, p2)
2158
2166
2159 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2167 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2160
2168
2161 # If the flag processor modifies the revision data, ignore any provided
2169 # If the flag processor modifies the revision data, ignore any provided
2162 # cachedelta.
2170 # cachedelta.
2163 if rawtext != text:
2171 if rawtext != text:
2164 cachedelta = None
2172 cachedelta = None
2165
2173
2166 if len(rawtext) > _maxentrysize:
2174 if len(rawtext) > _maxentrysize:
2167 raise error.RevlogError(
2175 raise error.RevlogError(
2168 _(
2176 _(
2169 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2177 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2170 )
2178 )
2171 % (self.display_id, len(rawtext))
2179 % (self.display_id, len(rawtext))
2172 )
2180 )
2173
2181
2174 node = node or self.hash(rawtext, p1, p2)
2182 node = node or self.hash(rawtext, p1, p2)
2175 rev = self.index.get_rev(node)
2183 rev = self.index.get_rev(node)
2176 if rev is not None:
2184 if rev is not None:
2177 return rev
2185 return rev
2178
2186
2179 if validatehash:
2187 if validatehash:
2180 self.checkhash(rawtext, node, p1=p1, p2=p2)
2188 self.checkhash(rawtext, node, p1=p1, p2=p2)
2181
2189
2182 return self.addrawrevision(
2190 return self.addrawrevision(
2183 rawtext,
2191 rawtext,
2184 transaction,
2192 transaction,
2185 link,
2193 link,
2186 p1,
2194 p1,
2187 p2,
2195 p2,
2188 node,
2196 node,
2189 flags,
2197 flags,
2190 cachedelta=cachedelta,
2198 cachedelta=cachedelta,
2191 deltacomputer=deltacomputer,
2199 deltacomputer=deltacomputer,
2192 sidedata=sidedata,
2200 sidedata=sidedata,
2193 )
2201 )
2194
2202
2195 def addrawrevision(
2203 def addrawrevision(
2196 self,
2204 self,
2197 rawtext,
2205 rawtext,
2198 transaction,
2206 transaction,
2199 link,
2207 link,
2200 p1,
2208 p1,
2201 p2,
2209 p2,
2202 node,
2210 node,
2203 flags,
2211 flags,
2204 cachedelta=None,
2212 cachedelta=None,
2205 deltacomputer=None,
2213 deltacomputer=None,
2206 sidedata=None,
2214 sidedata=None,
2207 ):
2215 ):
2208 """add a raw revision with known flags, node and parents
2216 """add a raw revision with known flags, node and parents
2209 useful when reusing a revision not stored in this revlog (ex: received
2217 useful when reusing a revision not stored in this revlog (ex: received
2210 over wire, or read from an external bundle).
2218 over wire, or read from an external bundle).
2211 """
2219 """
2212 with self._writing(transaction):
2220 with self._writing(transaction):
2213 return self._addrevision(
2221 return self._addrevision(
2214 node,
2222 node,
2215 rawtext,
2223 rawtext,
2216 transaction,
2224 transaction,
2217 link,
2225 link,
2218 p1,
2226 p1,
2219 p2,
2227 p2,
2220 flags,
2228 flags,
2221 cachedelta,
2229 cachedelta,
2222 deltacomputer=deltacomputer,
2230 deltacomputer=deltacomputer,
2223 sidedata=sidedata,
2231 sidedata=sidedata,
2224 )
2232 )
2225
2233
2226 def compress(self, data):
2234 def compress(self, data):
2227 """Generate a possibly-compressed representation of data."""
2235 """Generate a possibly-compressed representation of data."""
2228 if not data:
2236 if not data:
2229 return b'', data
2237 return b'', data
2230
2238
2231 compressed = self._compressor.compress(data)
2239 compressed = self._compressor.compress(data)
2232
2240
2233 if compressed:
2241 if compressed:
2234 # The revlog compressor added the header in the returned data.
2242 # The revlog compressor added the header in the returned data.
2235 return b'', compressed
2243 return b'', compressed
2236
2244
2237 if data[0:1] == b'\0':
2245 if data[0:1] == b'\0':
2238 return b'', data
2246 return b'', data
2239 return b'u', data
2247 return b'u', data
2240
2248
2241 def decompress(self, data):
2249 def decompress(self, data):
2242 """Decompress a revlog chunk.
2250 """Decompress a revlog chunk.
2243
2251
2244 The chunk is expected to begin with a header identifying the
2252 The chunk is expected to begin with a header identifying the
2245 format type so it can be routed to an appropriate decompressor.
2253 format type so it can be routed to an appropriate decompressor.
2246 """
2254 """
2247 if not data:
2255 if not data:
2248 return data
2256 return data
2249
2257
2250 # Revlogs are read much more frequently than they are written and many
2258 # Revlogs are read much more frequently than they are written and many
2251 # chunks only take microseconds to decompress, so performance is
2259 # chunks only take microseconds to decompress, so performance is
2252 # important here.
2260 # important here.
2253 #
2261 #
2254 # We can make a few assumptions about revlogs:
2262 # We can make a few assumptions about revlogs:
2255 #
2263 #
2256 # 1) the majority of chunks will be compressed (as opposed to inline
2264 # 1) the majority of chunks will be compressed (as opposed to inline
2257 # raw data).
2265 # raw data).
2258 # 2) decompressing *any* data will likely by at least 10x slower than
2266 # 2) decompressing *any* data will likely by at least 10x slower than
2259 # returning raw inline data.
2267 # returning raw inline data.
2260 # 3) we want to prioritize common and officially supported compression
2268 # 3) we want to prioritize common and officially supported compression
2261 # engines
2269 # engines
2262 #
2270 #
2263 # It follows that we want to optimize for "decompress compressed data
2271 # It follows that we want to optimize for "decompress compressed data
2264 # when encoded with common and officially supported compression engines"
2272 # when encoded with common and officially supported compression engines"
2265 # case over "raw data" and "data encoded by less common or non-official
2273 # case over "raw data" and "data encoded by less common or non-official
2266 # compression engines." That is why we have the inline lookup first
2274 # compression engines." That is why we have the inline lookup first
2267 # followed by the compengines lookup.
2275 # followed by the compengines lookup.
2268 #
2276 #
2269 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2277 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2270 # compressed chunks. And this matters for changelog and manifest reads.
2278 # compressed chunks. And this matters for changelog and manifest reads.
2271 t = data[0:1]
2279 t = data[0:1]
2272
2280
2273 if t == b'x':
2281 if t == b'x':
2274 try:
2282 try:
2275 return _zlibdecompress(data)
2283 return _zlibdecompress(data)
2276 except zlib.error as e:
2284 except zlib.error as e:
2277 raise error.RevlogError(
2285 raise error.RevlogError(
2278 _(b'revlog decompress error: %s')
2286 _(b'revlog decompress error: %s')
2279 % stringutil.forcebytestr(e)
2287 % stringutil.forcebytestr(e)
2280 )
2288 )
2281 # '\0' is more common than 'u' so it goes first.
2289 # '\0' is more common than 'u' so it goes first.
2282 elif t == b'\0':
2290 elif t == b'\0':
2283 return data
2291 return data
2284 elif t == b'u':
2292 elif t == b'u':
2285 return util.buffer(data, 1)
2293 return util.buffer(data, 1)
2286
2294
2287 try:
2295 try:
2288 compressor = self._decompressors[t]
2296 compressor = self._decompressors[t]
2289 except KeyError:
2297 except KeyError:
2290 try:
2298 try:
2291 engine = util.compengines.forrevlogheader(t)
2299 engine = util.compengines.forrevlogheader(t)
2292 compressor = engine.revlogcompressor(self._compengineopts)
2300 compressor = engine.revlogcompressor(self._compengineopts)
2293 self._decompressors[t] = compressor
2301 self._decompressors[t] = compressor
2294 except KeyError:
2302 except KeyError:
2295 raise error.RevlogError(
2303 raise error.RevlogError(
2296 _(b'unknown compression type %s') % binascii.hexlify(t)
2304 _(b'unknown compression type %s') % binascii.hexlify(t)
2297 )
2305 )
2298
2306
2299 return compressor.decompress(data)
2307 return compressor.decompress(data)
2300
2308
2301 def _addrevision(
2309 def _addrevision(
2302 self,
2310 self,
2303 node,
2311 node,
2304 rawtext,
2312 rawtext,
2305 transaction,
2313 transaction,
2306 link,
2314 link,
2307 p1,
2315 p1,
2308 p2,
2316 p2,
2309 flags,
2317 flags,
2310 cachedelta,
2318 cachedelta,
2311 alwayscache=False,
2319 alwayscache=False,
2312 deltacomputer=None,
2320 deltacomputer=None,
2313 sidedata=None,
2321 sidedata=None,
2314 ):
2322 ):
2315 """internal function to add revisions to the log
2323 """internal function to add revisions to the log
2316
2324
2317 see addrevision for argument descriptions.
2325 see addrevision for argument descriptions.
2318
2326
2319 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2327 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2320
2328
2321 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2329 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2322 be used.
2330 be used.
2323
2331
2324 invariants:
2332 invariants:
2325 - rawtext is optional (can be None); if not set, cachedelta must be set.
2333 - rawtext is optional (can be None); if not set, cachedelta must be set.
2326 if both are set, they must correspond to each other.
2334 if both are set, they must correspond to each other.
2327 """
2335 """
2328 if node == self.nullid:
2336 if node == self.nullid:
2329 raise error.RevlogError(
2337 raise error.RevlogError(
2330 _(b"%s: attempt to add null revision") % self.display_id
2338 _(b"%s: attempt to add null revision") % self.display_id
2331 )
2339 )
2332 if (
2340 if (
2333 node == self.nodeconstants.wdirid
2341 node == self.nodeconstants.wdirid
2334 or node in self.nodeconstants.wdirfilenodeids
2342 or node in self.nodeconstants.wdirfilenodeids
2335 ):
2343 ):
2336 raise error.RevlogError(
2344 raise error.RevlogError(
2337 _(b"%s: attempt to add wdir revision") % self.display_id
2345 _(b"%s: attempt to add wdir revision") % self.display_id
2338 )
2346 )
2339 if self._writinghandles is None:
2347 if self._writinghandles is None:
2340 msg = b'adding revision outside `revlog._writing` context'
2348 msg = b'adding revision outside `revlog._writing` context'
2341 raise error.ProgrammingError(msg)
2349 raise error.ProgrammingError(msg)
2342
2350
2343 if self._inline:
2351 if self._inline:
2344 fh = self._writinghandles[0]
2352 fh = self._writinghandles[0]
2345 else:
2353 else:
2346 fh = self._writinghandles[1]
2354 fh = self._writinghandles[1]
2347
2355
2348 btext = [rawtext]
2356 btext = [rawtext]
2349
2357
2350 curr = len(self)
2358 curr = len(self)
2351 prev = curr - 1
2359 prev = curr - 1
2352
2360
2353 offset = self._get_data_offset(prev)
2361 offset = self._get_data_offset(prev)
2354
2362
2355 if self._concurrencychecker:
2363 if self._concurrencychecker:
2356 ifh, dfh = self._writinghandles
2364 ifh, dfh = self._writinghandles
2357 if self._inline:
2365 if self._inline:
2358 # offset is "as if" it were in the .d file, so we need to add on
2366 # offset is "as if" it were in the .d file, so we need to add on
2359 # the size of the entry metadata.
2367 # the size of the entry metadata.
2360 self._concurrencychecker(
2368 self._concurrencychecker(
2361 ifh, self._indexfile, offset + curr * self.index.entry_size
2369 ifh, self._indexfile, offset + curr * self.index.entry_size
2362 )
2370 )
2363 else:
2371 else:
2364 # Entries in the .i are a consistent size.
2372 # Entries in the .i are a consistent size.
2365 self._concurrencychecker(
2373 self._concurrencychecker(
2366 ifh, self._indexfile, curr * self.index.entry_size
2374 ifh, self._indexfile, curr * self.index.entry_size
2367 )
2375 )
2368 self._concurrencychecker(dfh, self._datafile, offset)
2376 self._concurrencychecker(dfh, self._datafile, offset)
2369
2377
2370 p1r, p2r = self.rev(p1), self.rev(p2)
2378 p1r, p2r = self.rev(p1), self.rev(p2)
2371
2379
2372 # full versions are inserted when the needed deltas
2380 # full versions are inserted when the needed deltas
2373 # become comparable to the uncompressed text
2381 # become comparable to the uncompressed text
2374 if rawtext is None:
2382 if rawtext is None:
2375 # need rawtext size, before changed by flag processors, which is
2383 # need rawtext size, before changed by flag processors, which is
2376 # the non-raw size. use revlog explicitly to avoid filelog's extra
2384 # the non-raw size. use revlog explicitly to avoid filelog's extra
2377 # logic that might remove metadata size.
2385 # logic that might remove metadata size.
2378 textlen = mdiff.patchedsize(
2386 textlen = mdiff.patchedsize(
2379 revlog.size(self, cachedelta[0]), cachedelta[1]
2387 revlog.size(self, cachedelta[0]), cachedelta[1]
2380 )
2388 )
2381 else:
2389 else:
2382 textlen = len(rawtext)
2390 textlen = len(rawtext)
2383
2391
2384 if deltacomputer is None:
2392 if deltacomputer is None:
2385 deltacomputer = deltautil.deltacomputer(self)
2393 deltacomputer = deltautil.deltacomputer(self)
2386
2394
2387 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2395 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2388
2396
2389 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2397 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2390
2398
2391 if sidedata and self.hassidedata:
2399 if sidedata and self.hassidedata:
2392 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2400 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2393 sidedata_offset = offset + deltainfo.deltalen
2401 sidedata_offset = offset + deltainfo.deltalen
2394 else:
2402 else:
2395 serialized_sidedata = b""
2403 serialized_sidedata = b""
2396 # Don't store the offset if the sidedata is empty, that way
2404 # Don't store the offset if the sidedata is empty, that way
2397 # we can easily detect empty sidedata and they will be no different
2405 # we can easily detect empty sidedata and they will be no different
2398 # than ones we manually add.
2406 # than ones we manually add.
2399 sidedata_offset = 0
2407 sidedata_offset = 0
2400
2408
2401 e = (
2409 e = (
2402 offset_type(offset, flags),
2410 offset_type(offset, flags),
2403 deltainfo.deltalen,
2411 deltainfo.deltalen,
2404 textlen,
2412 textlen,
2405 deltainfo.base,
2413 deltainfo.base,
2406 link,
2414 link,
2407 p1r,
2415 p1r,
2408 p2r,
2416 p2r,
2409 node,
2417 node,
2410 sidedata_offset,
2418 sidedata_offset,
2411 len(serialized_sidedata),
2419 len(serialized_sidedata),
2412 )
2420 )
2413
2421
2414 self.index.append(e)
2422 self.index.append(e)
2415 entry = self.index.entry_binary(curr)
2423 entry = self.index.entry_binary(curr)
2416 if curr == 0 and self._docket is None:
2424 if curr == 0 and self._docket is None:
2417 header = self._format_flags | self._format_version
2425 header = self._format_flags | self._format_version
2418 header = self.index.pack_header(header)
2426 header = self.index.pack_header(header)
2419 entry = header + entry
2427 entry = header + entry
2420 self._writeentry(
2428 self._writeentry(
2421 transaction,
2429 transaction,
2422 entry,
2430 entry,
2423 deltainfo.data,
2431 deltainfo.data,
2424 link,
2432 link,
2425 offset,
2433 offset,
2426 serialized_sidedata,
2434 serialized_sidedata,
2427 )
2435 )
2428
2436
2429 rawtext = btext[0]
2437 rawtext = btext[0]
2430
2438
2431 if alwayscache and rawtext is None:
2439 if alwayscache and rawtext is None:
2432 rawtext = deltacomputer.buildtext(revinfo, fh)
2440 rawtext = deltacomputer.buildtext(revinfo, fh)
2433
2441
2434 if type(rawtext) == bytes: # only accept immutable objects
2442 if type(rawtext) == bytes: # only accept immutable objects
2435 self._revisioncache = (node, curr, rawtext)
2443 self._revisioncache = (node, curr, rawtext)
2436 self._chainbasecache[curr] = deltainfo.chainbase
2444 self._chainbasecache[curr] = deltainfo.chainbase
2437 return curr
2445 return curr
2438
2446
2439 def _get_data_offset(self, prev):
2447 def _get_data_offset(self, prev):
2440 """Returns the current offset in the (in-transaction) data file.
2448 """Returns the current offset in the (in-transaction) data file.
2441 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2449 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2442 file to store that information: since sidedata can be rewritten to the
2450 file to store that information: since sidedata can be rewritten to the
2443 end of the data file within a transaction, you can have cases where, for
2451 end of the data file within a transaction, you can have cases where, for
2444 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2452 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2445 to `n - 1`'s sidedata being written after `n`'s data.
2453 to `n - 1`'s sidedata being written after `n`'s data.
2446
2454
2447 TODO cache this in a docket file before getting out of experimental."""
2455 TODO cache this in a docket file before getting out of experimental."""
2448 if self._format_version != REVLOGV2:
2456 if self._format_version != REVLOGV2:
2449 return self.end(prev)
2457 return self.end(prev)
2450
2458
2451 offset = 0
2459 offset = 0
2452 for rev, entry in enumerate(self.index):
2460 for rev, entry in enumerate(self.index):
2453 sidedata_end = entry[8] + entry[9]
2461 sidedata_end = entry[8] + entry[9]
2454 # Sidedata for a previous rev has potentially been written after
2462 # Sidedata for a previous rev has potentially been written after
2455 # this rev's end, so take the max.
2463 # this rev's end, so take the max.
2456 offset = max(self.end(rev), offset, sidedata_end)
2464 offset = max(self.end(rev), offset, sidedata_end)
2457 return offset
2465 return offset
2458
2466
2459 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2467 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2460 # Files opened in a+ mode have inconsistent behavior on various
2468 # Files opened in a+ mode have inconsistent behavior on various
2461 # platforms. Windows requires that a file positioning call be made
2469 # platforms. Windows requires that a file positioning call be made
2462 # when the file handle transitions between reads and writes. See
2470 # when the file handle transitions between reads and writes. See
2463 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2471 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2464 # platforms, Python or the platform itself can be buggy. Some versions
2472 # platforms, Python or the platform itself can be buggy. Some versions
2465 # of Solaris have been observed to not append at the end of the file
2473 # of Solaris have been observed to not append at the end of the file
2466 # if the file was seeked to before the end. See issue4943 for more.
2474 # if the file was seeked to before the end. See issue4943 for more.
2467 #
2475 #
2468 # We work around this issue by inserting a seek() before writing.
2476 # We work around this issue by inserting a seek() before writing.
2469 # Note: This is likely not necessary on Python 3. However, because
2477 # Note: This is likely not necessary on Python 3. However, because
2470 # the file handle is reused for reads and may be seeked there, we need
2478 # the file handle is reused for reads and may be seeked there, we need
2471 # to be careful before changing this.
2479 # to be careful before changing this.
2472 if self._writinghandles is None:
2480 if self._writinghandles is None:
2473 msg = b'adding revision outside `revlog._writing` context'
2481 msg = b'adding revision outside `revlog._writing` context'
2474 raise error.ProgrammingError(msg)
2482 raise error.ProgrammingError(msg)
2475 ifh, dfh = self._writinghandles
2483 ifh, dfh = self._writinghandles
2476 if self._docket is None:
2484 if self._docket is None:
2477 ifh.seek(0, os.SEEK_END)
2485 ifh.seek(0, os.SEEK_END)
2478 else:
2486 else:
2479 ifh.seek(self._docket.index_end, os.SEEK_SET)
2487 ifh.seek(self._docket.index_end, os.SEEK_SET)
2480 if dfh:
2488 if dfh:
2481 dfh.seek(0, os.SEEK_END)
2489 dfh.seek(0, os.SEEK_END)
2482
2490
2483 curr = len(self) - 1
2491 curr = len(self) - 1
2484 if not self._inline:
2492 if not self._inline:
2485 transaction.add(self._datafile, offset)
2493 transaction.add(self._datafile, offset)
2486 transaction.add(self._indexfile, curr * len(entry))
2494 transaction.add(self._indexfile, curr * len(entry))
2487 if data[0]:
2495 if data[0]:
2488 dfh.write(data[0])
2496 dfh.write(data[0])
2489 dfh.write(data[1])
2497 dfh.write(data[1])
2490 if sidedata:
2498 if sidedata:
2491 dfh.write(sidedata)
2499 dfh.write(sidedata)
2492 ifh.write(entry)
2500 ifh.write(entry)
2493 else:
2501 else:
2494 offset += curr * self.index.entry_size
2502 offset += curr * self.index.entry_size
2495 transaction.add(self._indexfile, offset)
2503 transaction.add(self._indexfile, offset)
2496 ifh.write(entry)
2504 ifh.write(entry)
2497 ifh.write(data[0])
2505 ifh.write(data[0])
2498 ifh.write(data[1])
2506 ifh.write(data[1])
2499 if sidedata:
2507 if sidedata:
2500 ifh.write(sidedata)
2508 ifh.write(sidedata)
2501 self._enforceinlinesize(transaction)
2509 self._enforceinlinesize(transaction)
2502 if self._docket is not None:
2510 if self._docket is not None:
2503 self._docket.index_end = self._writinghandles[0].tell()
2511 self._docket.index_end = self._writinghandles[0].tell()
2504
2512
2505 nodemaputil.setup_persistent_nodemap(transaction, self)
2513 nodemaputil.setup_persistent_nodemap(transaction, self)
2506
2514
2507 def addgroup(
2515 def addgroup(
2508 self,
2516 self,
2509 deltas,
2517 deltas,
2510 linkmapper,
2518 linkmapper,
2511 transaction,
2519 transaction,
2512 alwayscache=False,
2520 alwayscache=False,
2513 addrevisioncb=None,
2521 addrevisioncb=None,
2514 duplicaterevisioncb=None,
2522 duplicaterevisioncb=None,
2515 ):
2523 ):
2516 """
2524 """
2517 add a delta group
2525 add a delta group
2518
2526
2519 given a set of deltas, add them to the revision log. the
2527 given a set of deltas, add them to the revision log. the
2520 first delta is against its parent, which should be in our
2528 first delta is against its parent, which should be in our
2521 log, the rest are against the previous delta.
2529 log, the rest are against the previous delta.
2522
2530
2523 If ``addrevisioncb`` is defined, it will be called with arguments of
2531 If ``addrevisioncb`` is defined, it will be called with arguments of
2524 this revlog and the node that was added.
2532 this revlog and the node that was added.
2525 """
2533 """
2526
2534
2527 if self._adding_group:
2535 if self._adding_group:
2528 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2536 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2529
2537
2530 self._adding_group = True
2538 self._adding_group = True
2531 empty = True
2539 empty = True
2532 try:
2540 try:
2533 with self._writing(transaction):
2541 with self._writing(transaction):
2534 deltacomputer = deltautil.deltacomputer(self)
2542 deltacomputer = deltautil.deltacomputer(self)
2535 # loop through our set of deltas
2543 # loop through our set of deltas
2536 for data in deltas:
2544 for data in deltas:
2537 (
2545 (
2538 node,
2546 node,
2539 p1,
2547 p1,
2540 p2,
2548 p2,
2541 linknode,
2549 linknode,
2542 deltabase,
2550 deltabase,
2543 delta,
2551 delta,
2544 flags,
2552 flags,
2545 sidedata,
2553 sidedata,
2546 ) = data
2554 ) = data
2547 link = linkmapper(linknode)
2555 link = linkmapper(linknode)
2548 flags = flags or REVIDX_DEFAULT_FLAGS
2556 flags = flags or REVIDX_DEFAULT_FLAGS
2549
2557
2550 rev = self.index.get_rev(node)
2558 rev = self.index.get_rev(node)
2551 if rev is not None:
2559 if rev is not None:
2552 # this can happen if two branches make the same change
2560 # this can happen if two branches make the same change
2553 self._nodeduplicatecallback(transaction, rev)
2561 self._nodeduplicatecallback(transaction, rev)
2554 if duplicaterevisioncb:
2562 if duplicaterevisioncb:
2555 duplicaterevisioncb(self, rev)
2563 duplicaterevisioncb(self, rev)
2556 empty = False
2564 empty = False
2557 continue
2565 continue
2558
2566
2559 for p in (p1, p2):
2567 for p in (p1, p2):
2560 if not self.index.has_node(p):
2568 if not self.index.has_node(p):
2561 raise error.LookupError(
2569 raise error.LookupError(
2562 p, self.radix, _(b'unknown parent')
2570 p, self.radix, _(b'unknown parent')
2563 )
2571 )
2564
2572
2565 if not self.index.has_node(deltabase):
2573 if not self.index.has_node(deltabase):
2566 raise error.LookupError(
2574 raise error.LookupError(
2567 deltabase, self.display_id, _(b'unknown delta base')
2575 deltabase, self.display_id, _(b'unknown delta base')
2568 )
2576 )
2569
2577
2570 baserev = self.rev(deltabase)
2578 baserev = self.rev(deltabase)
2571
2579
2572 if baserev != nullrev and self.iscensored(baserev):
2580 if baserev != nullrev and self.iscensored(baserev):
2573 # if base is censored, delta must be full replacement in a
2581 # if base is censored, delta must be full replacement in a
2574 # single patch operation
2582 # single patch operation
2575 hlen = struct.calcsize(b">lll")
2583 hlen = struct.calcsize(b">lll")
2576 oldlen = self.rawsize(baserev)
2584 oldlen = self.rawsize(baserev)
2577 newlen = len(delta) - hlen
2585 newlen = len(delta) - hlen
2578 if delta[:hlen] != mdiff.replacediffheader(
2586 if delta[:hlen] != mdiff.replacediffheader(
2579 oldlen, newlen
2587 oldlen, newlen
2580 ):
2588 ):
2581 raise error.CensoredBaseError(
2589 raise error.CensoredBaseError(
2582 self.display_id, self.node(baserev)
2590 self.display_id, self.node(baserev)
2583 )
2591 )
2584
2592
2585 if not flags and self._peek_iscensored(baserev, delta):
2593 if not flags and self._peek_iscensored(baserev, delta):
2586 flags |= REVIDX_ISCENSORED
2594 flags |= REVIDX_ISCENSORED
2587
2595
2588 # We assume consumers of addrevisioncb will want to retrieve
2596 # We assume consumers of addrevisioncb will want to retrieve
2589 # the added revision, which will require a call to
2597 # the added revision, which will require a call to
2590 # revision(). revision() will fast path if there is a cache
2598 # revision(). revision() will fast path if there is a cache
2591 # hit. So, we tell _addrevision() to always cache in this case.
2599 # hit. So, we tell _addrevision() to always cache in this case.
2592 # We're only using addgroup() in the context of changegroup
2600 # We're only using addgroup() in the context of changegroup
2593 # generation so the revision data can always be handled as raw
2601 # generation so the revision data can always be handled as raw
2594 # by the flagprocessor.
2602 # by the flagprocessor.
2595 rev = self._addrevision(
2603 rev = self._addrevision(
2596 node,
2604 node,
2597 None,
2605 None,
2598 transaction,
2606 transaction,
2599 link,
2607 link,
2600 p1,
2608 p1,
2601 p2,
2609 p2,
2602 flags,
2610 flags,
2603 (baserev, delta),
2611 (baserev, delta),
2604 alwayscache=alwayscache,
2612 alwayscache=alwayscache,
2605 deltacomputer=deltacomputer,
2613 deltacomputer=deltacomputer,
2606 sidedata=sidedata,
2614 sidedata=sidedata,
2607 )
2615 )
2608
2616
2609 if addrevisioncb:
2617 if addrevisioncb:
2610 addrevisioncb(self, rev)
2618 addrevisioncb(self, rev)
2611 empty = False
2619 empty = False
2612 finally:
2620 finally:
2613 self._adding_group = False
2621 self._adding_group = False
2614 return not empty
2622 return not empty
2615
2623
2616 def iscensored(self, rev):
2624 def iscensored(self, rev):
2617 """Check if a file revision is censored."""
2625 """Check if a file revision is censored."""
2618 if not self._censorable:
2626 if not self._censorable:
2619 return False
2627 return False
2620
2628
2621 return self.flags(rev) & REVIDX_ISCENSORED
2629 return self.flags(rev) & REVIDX_ISCENSORED
2622
2630
2623 def _peek_iscensored(self, baserev, delta):
2631 def _peek_iscensored(self, baserev, delta):
2624 """Quickly check if a delta produces a censored revision."""
2632 """Quickly check if a delta produces a censored revision."""
2625 if not self._censorable:
2633 if not self._censorable:
2626 return False
2634 return False
2627
2635
2628 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2636 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2629
2637
2630 def getstrippoint(self, minlink):
2638 def getstrippoint(self, minlink):
2631 """find the minimum rev that must be stripped to strip the linkrev
2639 """find the minimum rev that must be stripped to strip the linkrev
2632
2640
2633 Returns a tuple containing the minimum rev and a set of all revs that
2641 Returns a tuple containing the minimum rev and a set of all revs that
2634 have linkrevs that will be broken by this strip.
2642 have linkrevs that will be broken by this strip.
2635 """
2643 """
2636 return storageutil.resolvestripinfo(
2644 return storageutil.resolvestripinfo(
2637 minlink,
2645 minlink,
2638 len(self) - 1,
2646 len(self) - 1,
2639 self.headrevs(),
2647 self.headrevs(),
2640 self.linkrev,
2648 self.linkrev,
2641 self.parentrevs,
2649 self.parentrevs,
2642 )
2650 )
2643
2651
2644 def strip(self, minlink, transaction):
2652 def strip(self, minlink, transaction):
2645 """truncate the revlog on the first revision with a linkrev >= minlink
2653 """truncate the revlog on the first revision with a linkrev >= minlink
2646
2654
2647 This function is called when we're stripping revision minlink and
2655 This function is called when we're stripping revision minlink and
2648 its descendants from the repository.
2656 its descendants from the repository.
2649
2657
2650 We have to remove all revisions with linkrev >= minlink, because
2658 We have to remove all revisions with linkrev >= minlink, because
2651 the equivalent changelog revisions will be renumbered after the
2659 the equivalent changelog revisions will be renumbered after the
2652 strip.
2660 strip.
2653
2661
2654 So we truncate the revlog on the first of these revisions, and
2662 So we truncate the revlog on the first of these revisions, and
2655 trust that the caller has saved the revisions that shouldn't be
2663 trust that the caller has saved the revisions that shouldn't be
2656 removed and that it'll re-add them after this truncation.
2664 removed and that it'll re-add them after this truncation.
2657 """
2665 """
2658 if len(self) == 0:
2666 if len(self) == 0:
2659 return
2667 return
2660
2668
2661 rev, _ = self.getstrippoint(minlink)
2669 rev, _ = self.getstrippoint(minlink)
2662 if rev == len(self):
2670 if rev == len(self):
2663 return
2671 return
2664
2672
2665 # first truncate the files on disk
2673 # first truncate the files on disk
2666 end = self.start(rev)
2674 end = self.start(rev)
2667 if not self._inline:
2675 if not self._inline:
2668 transaction.add(self._datafile, end)
2676 transaction.add(self._datafile, end)
2669 end = rev * self.index.entry_size
2677 end = rev * self.index.entry_size
2670 else:
2678 else:
2671 end += rev * self.index.entry_size
2679 end += rev * self.index.entry_size
2672
2680
2673 transaction.add(self._indexfile, end)
2681 transaction.add(self._indexfile, end)
2674 if self._docket is not None:
2682 if self._docket is not None:
2675 # XXX we could, leverage the docket while stripping. However it is
2683 # XXX we could, leverage the docket while stripping. However it is
2676 # not powerfull enough at the time of this comment
2684 # not powerfull enough at the time of this comment
2677 self._docket.index_end = end
2685 self._docket.index_end = end
2678 self._docket.write(transaction, stripping=True)
2686 self._docket.write(transaction, stripping=True)
2679
2687
2680 # then reset internal state in memory to forget those revisions
2688 # then reset internal state in memory to forget those revisions
2681 self._revisioncache = None
2689 self._revisioncache = None
2682 self._chaininfocache = util.lrucachedict(500)
2690 self._chaininfocache = util.lrucachedict(500)
2683 self._chunkclear()
2691 self._chunkclear()
2684
2692
2685 del self.index[rev:-1]
2693 del self.index[rev:-1]
2686
2694
2687 def checksize(self):
2695 def checksize(self):
2688 """Check size of index and data files
2696 """Check size of index and data files
2689
2697
2690 return a (dd, di) tuple.
2698 return a (dd, di) tuple.
2691 - dd: extra bytes for the "data" file
2699 - dd: extra bytes for the "data" file
2692 - di: extra bytes for the "index" file
2700 - di: extra bytes for the "index" file
2693
2701
2694 A healthy revlog will return (0, 0).
2702 A healthy revlog will return (0, 0).
2695 """
2703 """
2696 expected = 0
2704 expected = 0
2697 if len(self):
2705 if len(self):
2698 expected = max(0, self.end(len(self) - 1))
2706 expected = max(0, self.end(len(self) - 1))
2699
2707
2700 try:
2708 try:
2701 with self._datafp() as f:
2709 with self._datafp() as f:
2702 f.seek(0, io.SEEK_END)
2710 f.seek(0, io.SEEK_END)
2703 actual = f.tell()
2711 actual = f.tell()
2704 dd = actual - expected
2712 dd = actual - expected
2705 except IOError as inst:
2713 except IOError as inst:
2706 if inst.errno != errno.ENOENT:
2714 if inst.errno != errno.ENOENT:
2707 raise
2715 raise
2708 dd = 0
2716 dd = 0
2709
2717
2710 try:
2718 try:
2711 f = self.opener(self._indexfile)
2719 f = self.opener(self._indexfile)
2712 f.seek(0, io.SEEK_END)
2720 f.seek(0, io.SEEK_END)
2713 actual = f.tell()
2721 actual = f.tell()
2714 f.close()
2722 f.close()
2715 s = self.index.entry_size
2723 s = self.index.entry_size
2716 i = max(0, actual // s)
2724 i = max(0, actual // s)
2717 di = actual - (i * s)
2725 di = actual - (i * s)
2718 if self._inline:
2726 if self._inline:
2719 databytes = 0
2727 databytes = 0
2720 for r in self:
2728 for r in self:
2721 databytes += max(0, self.length(r))
2729 databytes += max(0, self.length(r))
2722 dd = 0
2730 dd = 0
2723 di = actual - len(self) * s - databytes
2731 di = actual - len(self) * s - databytes
2724 except IOError as inst:
2732 except IOError as inst:
2725 if inst.errno != errno.ENOENT:
2733 if inst.errno != errno.ENOENT:
2726 raise
2734 raise
2727 di = 0
2735 di = 0
2728
2736
2729 return (dd, di)
2737 return (dd, di)
2730
2738
2731 def files(self):
2739 def files(self):
2732 res = [self._indexfile]
2740 res = [self._indexfile]
2733 if not self._inline:
2741 if not self._inline:
2734 res.append(self._datafile)
2742 res.append(self._datafile)
2735 return res
2743 return res
2736
2744
2737 def emitrevisions(
2745 def emitrevisions(
2738 self,
2746 self,
2739 nodes,
2747 nodes,
2740 nodesorder=None,
2748 nodesorder=None,
2741 revisiondata=False,
2749 revisiondata=False,
2742 assumehaveparentrevisions=False,
2750 assumehaveparentrevisions=False,
2743 deltamode=repository.CG_DELTAMODE_STD,
2751 deltamode=repository.CG_DELTAMODE_STD,
2744 sidedata_helpers=None,
2752 sidedata_helpers=None,
2745 ):
2753 ):
2746 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2754 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2747 raise error.ProgrammingError(
2755 raise error.ProgrammingError(
2748 b'unhandled value for nodesorder: %s' % nodesorder
2756 b'unhandled value for nodesorder: %s' % nodesorder
2749 )
2757 )
2750
2758
2751 if nodesorder is None and not self._generaldelta:
2759 if nodesorder is None and not self._generaldelta:
2752 nodesorder = b'storage'
2760 nodesorder = b'storage'
2753
2761
2754 if (
2762 if (
2755 not self._storedeltachains
2763 not self._storedeltachains
2756 and deltamode != repository.CG_DELTAMODE_PREV
2764 and deltamode != repository.CG_DELTAMODE_PREV
2757 ):
2765 ):
2758 deltamode = repository.CG_DELTAMODE_FULL
2766 deltamode = repository.CG_DELTAMODE_FULL
2759
2767
2760 return storageutil.emitrevisions(
2768 return storageutil.emitrevisions(
2761 self,
2769 self,
2762 nodes,
2770 nodes,
2763 nodesorder,
2771 nodesorder,
2764 revlogrevisiondelta,
2772 revlogrevisiondelta,
2765 deltaparentfn=self.deltaparent,
2773 deltaparentfn=self.deltaparent,
2766 candeltafn=self.candelta,
2774 candeltafn=self.candelta,
2767 rawsizefn=self.rawsize,
2775 rawsizefn=self.rawsize,
2768 revdifffn=self.revdiff,
2776 revdifffn=self.revdiff,
2769 flagsfn=self.flags,
2777 flagsfn=self.flags,
2770 deltamode=deltamode,
2778 deltamode=deltamode,
2771 revisiondata=revisiondata,
2779 revisiondata=revisiondata,
2772 assumehaveparentrevisions=assumehaveparentrevisions,
2780 assumehaveparentrevisions=assumehaveparentrevisions,
2773 sidedata_helpers=sidedata_helpers,
2781 sidedata_helpers=sidedata_helpers,
2774 )
2782 )
2775
2783
2776 DELTAREUSEALWAYS = b'always'
2784 DELTAREUSEALWAYS = b'always'
2777 DELTAREUSESAMEREVS = b'samerevs'
2785 DELTAREUSESAMEREVS = b'samerevs'
2778 DELTAREUSENEVER = b'never'
2786 DELTAREUSENEVER = b'never'
2779
2787
2780 DELTAREUSEFULLADD = b'fulladd'
2788 DELTAREUSEFULLADD = b'fulladd'
2781
2789
2782 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2790 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2783
2791
2784 def clone(
2792 def clone(
2785 self,
2793 self,
2786 tr,
2794 tr,
2787 destrevlog,
2795 destrevlog,
2788 addrevisioncb=None,
2796 addrevisioncb=None,
2789 deltareuse=DELTAREUSESAMEREVS,
2797 deltareuse=DELTAREUSESAMEREVS,
2790 forcedeltabothparents=None,
2798 forcedeltabothparents=None,
2791 sidedata_helpers=None,
2799 sidedata_helpers=None,
2792 ):
2800 ):
2793 """Copy this revlog to another, possibly with format changes.
2801 """Copy this revlog to another, possibly with format changes.
2794
2802
2795 The destination revlog will contain the same revisions and nodes.
2803 The destination revlog will contain the same revisions and nodes.
2796 However, it may not be bit-for-bit identical due to e.g. delta encoding
2804 However, it may not be bit-for-bit identical due to e.g. delta encoding
2797 differences.
2805 differences.
2798
2806
2799 The ``deltareuse`` argument control how deltas from the existing revlog
2807 The ``deltareuse`` argument control how deltas from the existing revlog
2800 are preserved in the destination revlog. The argument can have the
2808 are preserved in the destination revlog. The argument can have the
2801 following values:
2809 following values:
2802
2810
2803 DELTAREUSEALWAYS
2811 DELTAREUSEALWAYS
2804 Deltas will always be reused (if possible), even if the destination
2812 Deltas will always be reused (if possible), even if the destination
2805 revlog would not select the same revisions for the delta. This is the
2813 revlog would not select the same revisions for the delta. This is the
2806 fastest mode of operation.
2814 fastest mode of operation.
2807 DELTAREUSESAMEREVS
2815 DELTAREUSESAMEREVS
2808 Deltas will be reused if the destination revlog would pick the same
2816 Deltas will be reused if the destination revlog would pick the same
2809 revisions for the delta. This mode strikes a balance between speed
2817 revisions for the delta. This mode strikes a balance between speed
2810 and optimization.
2818 and optimization.
2811 DELTAREUSENEVER
2819 DELTAREUSENEVER
2812 Deltas will never be reused. This is the slowest mode of execution.
2820 Deltas will never be reused. This is the slowest mode of execution.
2813 This mode can be used to recompute deltas (e.g. if the diff/delta
2821 This mode can be used to recompute deltas (e.g. if the diff/delta
2814 algorithm changes).
2822 algorithm changes).
2815 DELTAREUSEFULLADD
2823 DELTAREUSEFULLADD
2816 Revision will be re-added as if their were new content. This is
2824 Revision will be re-added as if their were new content. This is
2817 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2825 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2818 eg: large file detection and handling.
2826 eg: large file detection and handling.
2819
2827
2820 Delta computation can be slow, so the choice of delta reuse policy can
2828 Delta computation can be slow, so the choice of delta reuse policy can
2821 significantly affect run time.
2829 significantly affect run time.
2822
2830
2823 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2831 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2824 two extremes. Deltas will be reused if they are appropriate. But if the
2832 two extremes. Deltas will be reused if they are appropriate. But if the
2825 delta could choose a better revision, it will do so. This means if you
2833 delta could choose a better revision, it will do so. This means if you
2826 are converting a non-generaldelta revlog to a generaldelta revlog,
2834 are converting a non-generaldelta revlog to a generaldelta revlog,
2827 deltas will be recomputed if the delta's parent isn't a parent of the
2835 deltas will be recomputed if the delta's parent isn't a parent of the
2828 revision.
2836 revision.
2829
2837
2830 In addition to the delta policy, the ``forcedeltabothparents``
2838 In addition to the delta policy, the ``forcedeltabothparents``
2831 argument controls whether to force compute deltas against both parents
2839 argument controls whether to force compute deltas against both parents
2832 for merges. By default, the current default is used.
2840 for merges. By default, the current default is used.
2833
2841
2834 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2842 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2835 `sidedata_helpers`.
2843 `sidedata_helpers`.
2836 """
2844 """
2837 if deltareuse not in self.DELTAREUSEALL:
2845 if deltareuse not in self.DELTAREUSEALL:
2838 raise ValueError(
2846 raise ValueError(
2839 _(b'value for deltareuse invalid: %s') % deltareuse
2847 _(b'value for deltareuse invalid: %s') % deltareuse
2840 )
2848 )
2841
2849
2842 if len(destrevlog):
2850 if len(destrevlog):
2843 raise ValueError(_(b'destination revlog is not empty'))
2851 raise ValueError(_(b'destination revlog is not empty'))
2844
2852
2845 if getattr(self, 'filteredrevs', None):
2853 if getattr(self, 'filteredrevs', None):
2846 raise ValueError(_(b'source revlog has filtered revisions'))
2854 raise ValueError(_(b'source revlog has filtered revisions'))
2847 if getattr(destrevlog, 'filteredrevs', None):
2855 if getattr(destrevlog, 'filteredrevs', None):
2848 raise ValueError(_(b'destination revlog has filtered revisions'))
2856 raise ValueError(_(b'destination revlog has filtered revisions'))
2849
2857
2850 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2858 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2851 # if possible.
2859 # if possible.
2852 oldlazydelta = destrevlog._lazydelta
2860 oldlazydelta = destrevlog._lazydelta
2853 oldlazydeltabase = destrevlog._lazydeltabase
2861 oldlazydeltabase = destrevlog._lazydeltabase
2854 oldamd = destrevlog._deltabothparents
2862 oldamd = destrevlog._deltabothparents
2855
2863
2856 try:
2864 try:
2857 if deltareuse == self.DELTAREUSEALWAYS:
2865 if deltareuse == self.DELTAREUSEALWAYS:
2858 destrevlog._lazydeltabase = True
2866 destrevlog._lazydeltabase = True
2859 destrevlog._lazydelta = True
2867 destrevlog._lazydelta = True
2860 elif deltareuse == self.DELTAREUSESAMEREVS:
2868 elif deltareuse == self.DELTAREUSESAMEREVS:
2861 destrevlog._lazydeltabase = False
2869 destrevlog._lazydeltabase = False
2862 destrevlog._lazydelta = True
2870 destrevlog._lazydelta = True
2863 elif deltareuse == self.DELTAREUSENEVER:
2871 elif deltareuse == self.DELTAREUSENEVER:
2864 destrevlog._lazydeltabase = False
2872 destrevlog._lazydeltabase = False
2865 destrevlog._lazydelta = False
2873 destrevlog._lazydelta = False
2866
2874
2867 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2875 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2868
2876
2869 self._clone(
2877 self._clone(
2870 tr,
2878 tr,
2871 destrevlog,
2879 destrevlog,
2872 addrevisioncb,
2880 addrevisioncb,
2873 deltareuse,
2881 deltareuse,
2874 forcedeltabothparents,
2882 forcedeltabothparents,
2875 sidedata_helpers,
2883 sidedata_helpers,
2876 )
2884 )
2877
2885
2878 finally:
2886 finally:
2879 destrevlog._lazydelta = oldlazydelta
2887 destrevlog._lazydelta = oldlazydelta
2880 destrevlog._lazydeltabase = oldlazydeltabase
2888 destrevlog._lazydeltabase = oldlazydeltabase
2881 destrevlog._deltabothparents = oldamd
2889 destrevlog._deltabothparents = oldamd
2882
2890
2883 def _clone(
2891 def _clone(
2884 self,
2892 self,
2885 tr,
2893 tr,
2886 destrevlog,
2894 destrevlog,
2887 addrevisioncb,
2895 addrevisioncb,
2888 deltareuse,
2896 deltareuse,
2889 forcedeltabothparents,
2897 forcedeltabothparents,
2890 sidedata_helpers,
2898 sidedata_helpers,
2891 ):
2899 ):
2892 """perform the core duty of `revlog.clone` after parameter processing"""
2900 """perform the core duty of `revlog.clone` after parameter processing"""
2893 deltacomputer = deltautil.deltacomputer(destrevlog)
2901 deltacomputer = deltautil.deltacomputer(destrevlog)
2894 index = self.index
2902 index = self.index
2895 for rev in self:
2903 for rev in self:
2896 entry = index[rev]
2904 entry = index[rev]
2897
2905
2898 # Some classes override linkrev to take filtered revs into
2906 # Some classes override linkrev to take filtered revs into
2899 # account. Use raw entry from index.
2907 # account. Use raw entry from index.
2900 flags = entry[0] & 0xFFFF
2908 flags = entry[0] & 0xFFFF
2901 linkrev = entry[4]
2909 linkrev = entry[4]
2902 p1 = index[entry[5]][7]
2910 p1 = index[entry[5]][7]
2903 p2 = index[entry[6]][7]
2911 p2 = index[entry[6]][7]
2904 node = entry[7]
2912 node = entry[7]
2905
2913
2906 # (Possibly) reuse the delta from the revlog if allowed and
2914 # (Possibly) reuse the delta from the revlog if allowed and
2907 # the revlog chunk is a delta.
2915 # the revlog chunk is a delta.
2908 cachedelta = None
2916 cachedelta = None
2909 rawtext = None
2917 rawtext = None
2910 if deltareuse == self.DELTAREUSEFULLADD:
2918 if deltareuse == self.DELTAREUSEFULLADD:
2911 text, sidedata = self._revisiondata(rev)
2919 text, sidedata = self._revisiondata(rev)
2912
2920
2913 if sidedata_helpers is not None:
2921 if sidedata_helpers is not None:
2914 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2922 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2915 self, sidedata_helpers, sidedata, rev
2923 self, sidedata_helpers, sidedata, rev
2916 )
2924 )
2917 flags = flags | new_flags[0] & ~new_flags[1]
2925 flags = flags | new_flags[0] & ~new_flags[1]
2918
2926
2919 destrevlog.addrevision(
2927 destrevlog.addrevision(
2920 text,
2928 text,
2921 tr,
2929 tr,
2922 linkrev,
2930 linkrev,
2923 p1,
2931 p1,
2924 p2,
2932 p2,
2925 cachedelta=cachedelta,
2933 cachedelta=cachedelta,
2926 node=node,
2934 node=node,
2927 flags=flags,
2935 flags=flags,
2928 deltacomputer=deltacomputer,
2936 deltacomputer=deltacomputer,
2929 sidedata=sidedata,
2937 sidedata=sidedata,
2930 )
2938 )
2931 else:
2939 else:
2932 if destrevlog._lazydelta:
2940 if destrevlog._lazydelta:
2933 dp = self.deltaparent(rev)
2941 dp = self.deltaparent(rev)
2934 if dp != nullrev:
2942 if dp != nullrev:
2935 cachedelta = (dp, bytes(self._chunk(rev)))
2943 cachedelta = (dp, bytes(self._chunk(rev)))
2936
2944
2937 sidedata = None
2945 sidedata = None
2938 if not cachedelta:
2946 if not cachedelta:
2939 rawtext, sidedata = self._revisiondata(rev)
2947 rawtext, sidedata = self._revisiondata(rev)
2940 if sidedata is None:
2948 if sidedata is None:
2941 sidedata = self.sidedata(rev)
2949 sidedata = self.sidedata(rev)
2942
2950
2943 if sidedata_helpers is not None:
2951 if sidedata_helpers is not None:
2944 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2952 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
2945 self, sidedata_helpers, sidedata, rev
2953 self, sidedata_helpers, sidedata, rev
2946 )
2954 )
2947 flags = flags | new_flags[0] & ~new_flags[1]
2955 flags = flags | new_flags[0] & ~new_flags[1]
2948
2956
2949 with destrevlog._writing(tr):
2957 with destrevlog._writing(tr):
2950 destrevlog._addrevision(
2958 destrevlog._addrevision(
2951 node,
2959 node,
2952 rawtext,
2960 rawtext,
2953 tr,
2961 tr,
2954 linkrev,
2962 linkrev,
2955 p1,
2963 p1,
2956 p2,
2964 p2,
2957 flags,
2965 flags,
2958 cachedelta,
2966 cachedelta,
2959 deltacomputer=deltacomputer,
2967 deltacomputer=deltacomputer,
2960 sidedata=sidedata,
2968 sidedata=sidedata,
2961 )
2969 )
2962
2970
2963 if addrevisioncb:
2971 if addrevisioncb:
2964 addrevisioncb(self, rev, node)
2972 addrevisioncb(self, rev, node)
2965
2973
2966 def censorrevision(self, tr, censornode, tombstone=b''):
2974 def censorrevision(self, tr, censornode, tombstone=b''):
2967 if self._format_version == REVLOGV0:
2975 if self._format_version == REVLOGV0:
2968 raise error.RevlogError(
2976 raise error.RevlogError(
2969 _(b'cannot censor with version %d revlogs')
2977 _(b'cannot censor with version %d revlogs')
2970 % self._format_version
2978 % self._format_version
2971 )
2979 )
2972
2980
2973 censorrev = self.rev(censornode)
2981 censorrev = self.rev(censornode)
2974 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2982 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2975
2983
2976 if len(tombstone) > self.rawsize(censorrev):
2984 if len(tombstone) > self.rawsize(censorrev):
2977 raise error.Abort(
2985 raise error.Abort(
2978 _(b'censor tombstone must be no longer than censored data')
2986 _(b'censor tombstone must be no longer than censored data')
2979 )
2987 )
2980
2988
2981 # Rewriting the revlog in place is hard. Our strategy for censoring is
2989 # Rewriting the revlog in place is hard. Our strategy for censoring is
2982 # to create a new revlog, copy all revisions to it, then replace the
2990 # to create a new revlog, copy all revisions to it, then replace the
2983 # revlogs on transaction close.
2991 # revlogs on transaction close.
2984 #
2992 #
2985 # This is a bit dangerous. We could easily have a mismatch of state.
2993 # This is a bit dangerous. We could easily have a mismatch of state.
2986 newrl = revlog(
2994 newrl = revlog(
2987 self.opener,
2995 self.opener,
2988 target=self.target,
2996 target=self.target,
2989 radix=self.radix,
2997 radix=self.radix,
2990 postfix=b'tmpcensored',
2998 postfix=b'tmpcensored',
2991 censorable=True,
2999 censorable=True,
2992 )
3000 )
2993 newrl._format_version = self._format_version
3001 newrl._format_version = self._format_version
2994 newrl._format_flags = self._format_flags
3002 newrl._format_flags = self._format_flags
2995 newrl._generaldelta = self._generaldelta
3003 newrl._generaldelta = self._generaldelta
2996 newrl._parse_index = self._parse_index
3004 newrl._parse_index = self._parse_index
2997
3005
2998 for rev in self.revs():
3006 for rev in self.revs():
2999 node = self.node(rev)
3007 node = self.node(rev)
3000 p1, p2 = self.parents(node)
3008 p1, p2 = self.parents(node)
3001
3009
3002 if rev == censorrev:
3010 if rev == censorrev:
3003 newrl.addrawrevision(
3011 newrl.addrawrevision(
3004 tombstone,
3012 tombstone,
3005 tr,
3013 tr,
3006 self.linkrev(censorrev),
3014 self.linkrev(censorrev),
3007 p1,
3015 p1,
3008 p2,
3016 p2,
3009 censornode,
3017 censornode,
3010 REVIDX_ISCENSORED,
3018 REVIDX_ISCENSORED,
3011 )
3019 )
3012
3020
3013 if newrl.deltaparent(rev) != nullrev:
3021 if newrl.deltaparent(rev) != nullrev:
3014 raise error.Abort(
3022 raise error.Abort(
3015 _(
3023 _(
3016 b'censored revision stored as delta; '
3024 b'censored revision stored as delta; '
3017 b'cannot censor'
3025 b'cannot censor'
3018 ),
3026 ),
3019 hint=_(
3027 hint=_(
3020 b'censoring of revlogs is not '
3028 b'censoring of revlogs is not '
3021 b'fully implemented; please report '
3029 b'fully implemented; please report '
3022 b'this bug'
3030 b'this bug'
3023 ),
3031 ),
3024 )
3032 )
3025 continue
3033 continue
3026
3034
3027 if self.iscensored(rev):
3035 if self.iscensored(rev):
3028 if self.deltaparent(rev) != nullrev:
3036 if self.deltaparent(rev) != nullrev:
3029 raise error.Abort(
3037 raise error.Abort(
3030 _(
3038 _(
3031 b'cannot censor due to censored '
3039 b'cannot censor due to censored '
3032 b'revision having delta stored'
3040 b'revision having delta stored'
3033 )
3041 )
3034 )
3042 )
3035 rawtext = self._chunk(rev)
3043 rawtext = self._chunk(rev)
3036 else:
3044 else:
3037 rawtext = self.rawdata(rev)
3045 rawtext = self.rawdata(rev)
3038
3046
3039 newrl.addrawrevision(
3047 newrl.addrawrevision(
3040 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3048 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3041 )
3049 )
3042
3050
3043 tr.addbackup(self._indexfile, location=b'store')
3051 tr.addbackup(self._indexfile, location=b'store')
3044 if not self._inline:
3052 if not self._inline:
3045 tr.addbackup(self._datafile, location=b'store')
3053 tr.addbackup(self._datafile, location=b'store')
3046
3054
3047 self.opener.rename(newrl._indexfile, self._indexfile)
3055 self.opener.rename(newrl._indexfile, self._indexfile)
3048 if not self._inline:
3056 if not self._inline:
3049 self.opener.rename(newrl._datafile, self._datafile)
3057 self.opener.rename(newrl._datafile, self._datafile)
3050
3058
3051 self.clearcaches()
3059 self.clearcaches()
3052 self._loadindex()
3060 self._loadindex()
3053
3061
3054 def verifyintegrity(self, state):
3062 def verifyintegrity(self, state):
3055 """Verifies the integrity of the revlog.
3063 """Verifies the integrity of the revlog.
3056
3064
3057 Yields ``revlogproblem`` instances describing problems that are
3065 Yields ``revlogproblem`` instances describing problems that are
3058 found.
3066 found.
3059 """
3067 """
3060 dd, di = self.checksize()
3068 dd, di = self.checksize()
3061 if dd:
3069 if dd:
3062 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3070 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3063 if di:
3071 if di:
3064 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3072 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3065
3073
3066 version = self._format_version
3074 version = self._format_version
3067
3075
3068 # The verifier tells us what version revlog we should be.
3076 # The verifier tells us what version revlog we should be.
3069 if version != state[b'expectedversion']:
3077 if version != state[b'expectedversion']:
3070 yield revlogproblem(
3078 yield revlogproblem(
3071 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3079 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3072 % (self.display_id, version, state[b'expectedversion'])
3080 % (self.display_id, version, state[b'expectedversion'])
3073 )
3081 )
3074
3082
3075 state[b'skipread'] = set()
3083 state[b'skipread'] = set()
3076 state[b'safe_renamed'] = set()
3084 state[b'safe_renamed'] = set()
3077
3085
3078 for rev in self:
3086 for rev in self:
3079 node = self.node(rev)
3087 node = self.node(rev)
3080
3088
3081 # Verify contents. 4 cases to care about:
3089 # Verify contents. 4 cases to care about:
3082 #
3090 #
3083 # common: the most common case
3091 # common: the most common case
3084 # rename: with a rename
3092 # rename: with a rename
3085 # meta: file content starts with b'\1\n', the metadata
3093 # meta: file content starts with b'\1\n', the metadata
3086 # header defined in filelog.py, but without a rename
3094 # header defined in filelog.py, but without a rename
3087 # ext: content stored externally
3095 # ext: content stored externally
3088 #
3096 #
3089 # More formally, their differences are shown below:
3097 # More formally, their differences are shown below:
3090 #
3098 #
3091 # | common | rename | meta | ext
3099 # | common | rename | meta | ext
3092 # -------------------------------------------------------
3100 # -------------------------------------------------------
3093 # flags() | 0 | 0 | 0 | not 0
3101 # flags() | 0 | 0 | 0 | not 0
3094 # renamed() | False | True | False | ?
3102 # renamed() | False | True | False | ?
3095 # rawtext[0:2]=='\1\n'| False | True | True | ?
3103 # rawtext[0:2]=='\1\n'| False | True | True | ?
3096 #
3104 #
3097 # "rawtext" means the raw text stored in revlog data, which
3105 # "rawtext" means the raw text stored in revlog data, which
3098 # could be retrieved by "rawdata(rev)". "text"
3106 # could be retrieved by "rawdata(rev)". "text"
3099 # mentioned below is "revision(rev)".
3107 # mentioned below is "revision(rev)".
3100 #
3108 #
3101 # There are 3 different lengths stored physically:
3109 # There are 3 different lengths stored physically:
3102 # 1. L1: rawsize, stored in revlog index
3110 # 1. L1: rawsize, stored in revlog index
3103 # 2. L2: len(rawtext), stored in revlog data
3111 # 2. L2: len(rawtext), stored in revlog data
3104 # 3. L3: len(text), stored in revlog data if flags==0, or
3112 # 3. L3: len(text), stored in revlog data if flags==0, or
3105 # possibly somewhere else if flags!=0
3113 # possibly somewhere else if flags!=0
3106 #
3114 #
3107 # L1 should be equal to L2. L3 could be different from them.
3115 # L1 should be equal to L2. L3 could be different from them.
3108 # "text" may or may not affect commit hash depending on flag
3116 # "text" may or may not affect commit hash depending on flag
3109 # processors (see flagutil.addflagprocessor).
3117 # processors (see flagutil.addflagprocessor).
3110 #
3118 #
3111 # | common | rename | meta | ext
3119 # | common | rename | meta | ext
3112 # -------------------------------------------------
3120 # -------------------------------------------------
3113 # rawsize() | L1 | L1 | L1 | L1
3121 # rawsize() | L1 | L1 | L1 | L1
3114 # size() | L1 | L2-LM | L1(*) | L1 (?)
3122 # size() | L1 | L2-LM | L1(*) | L1 (?)
3115 # len(rawtext) | L2 | L2 | L2 | L2
3123 # len(rawtext) | L2 | L2 | L2 | L2
3116 # len(text) | L2 | L2 | L2 | L3
3124 # len(text) | L2 | L2 | L2 | L3
3117 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3125 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3118 #
3126 #
3119 # LM: length of metadata, depending on rawtext
3127 # LM: length of metadata, depending on rawtext
3120 # (*): not ideal, see comment in filelog.size
3128 # (*): not ideal, see comment in filelog.size
3121 # (?): could be "- len(meta)" if the resolved content has
3129 # (?): could be "- len(meta)" if the resolved content has
3122 # rename metadata
3130 # rename metadata
3123 #
3131 #
3124 # Checks needed to be done:
3132 # Checks needed to be done:
3125 # 1. length check: L1 == L2, in all cases.
3133 # 1. length check: L1 == L2, in all cases.
3126 # 2. hash check: depending on flag processor, we may need to
3134 # 2. hash check: depending on flag processor, we may need to
3127 # use either "text" (external), or "rawtext" (in revlog).
3135 # use either "text" (external), or "rawtext" (in revlog).
3128
3136
3129 try:
3137 try:
3130 skipflags = state.get(b'skipflags', 0)
3138 skipflags = state.get(b'skipflags', 0)
3131 if skipflags:
3139 if skipflags:
3132 skipflags &= self.flags(rev)
3140 skipflags &= self.flags(rev)
3133
3141
3134 _verify_revision(self, skipflags, state, node)
3142 _verify_revision(self, skipflags, state, node)
3135
3143
3136 l1 = self.rawsize(rev)
3144 l1 = self.rawsize(rev)
3137 l2 = len(self.rawdata(node))
3145 l2 = len(self.rawdata(node))
3138
3146
3139 if l1 != l2:
3147 if l1 != l2:
3140 yield revlogproblem(
3148 yield revlogproblem(
3141 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3149 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3142 node=node,
3150 node=node,
3143 )
3151 )
3144
3152
3145 except error.CensoredNodeError:
3153 except error.CensoredNodeError:
3146 if state[b'erroroncensored']:
3154 if state[b'erroroncensored']:
3147 yield revlogproblem(
3155 yield revlogproblem(
3148 error=_(b'censored file data'), node=node
3156 error=_(b'censored file data'), node=node
3149 )
3157 )
3150 state[b'skipread'].add(node)
3158 state[b'skipread'].add(node)
3151 except Exception as e:
3159 except Exception as e:
3152 yield revlogproblem(
3160 yield revlogproblem(
3153 error=_(b'unpacking %s: %s')
3161 error=_(b'unpacking %s: %s')
3154 % (short(node), stringutil.forcebytestr(e)),
3162 % (short(node), stringutil.forcebytestr(e)),
3155 node=node,
3163 node=node,
3156 )
3164 )
3157 state[b'skipread'].add(node)
3165 state[b'skipread'].add(node)
3158
3166
3159 def storageinfo(
3167 def storageinfo(
3160 self,
3168 self,
3161 exclusivefiles=False,
3169 exclusivefiles=False,
3162 sharedfiles=False,
3170 sharedfiles=False,
3163 revisionscount=False,
3171 revisionscount=False,
3164 trackedsize=False,
3172 trackedsize=False,
3165 storedsize=False,
3173 storedsize=False,
3166 ):
3174 ):
3167 d = {}
3175 d = {}
3168
3176
3169 if exclusivefiles:
3177 if exclusivefiles:
3170 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3178 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3171 if not self._inline:
3179 if not self._inline:
3172 d[b'exclusivefiles'].append((self.opener, self._datafile))
3180 d[b'exclusivefiles'].append((self.opener, self._datafile))
3173
3181
3174 if sharedfiles:
3182 if sharedfiles:
3175 d[b'sharedfiles'] = []
3183 d[b'sharedfiles'] = []
3176
3184
3177 if revisionscount:
3185 if revisionscount:
3178 d[b'revisionscount'] = len(self)
3186 d[b'revisionscount'] = len(self)
3179
3187
3180 if trackedsize:
3188 if trackedsize:
3181 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3189 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3182
3190
3183 if storedsize:
3191 if storedsize:
3184 d[b'storedsize'] = sum(
3192 d[b'storedsize'] = sum(
3185 self.opener.stat(path).st_size for path in self.files()
3193 self.opener.stat(path).st_size for path in self.files()
3186 )
3194 )
3187
3195
3188 return d
3196 return d
3189
3197
3190 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3198 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3191 if not self.hassidedata:
3199 if not self.hassidedata:
3192 return
3200 return
3193 # revlog formats with sidedata support does not support inline
3201 # revlog formats with sidedata support does not support inline
3194 assert not self._inline
3202 assert not self._inline
3195 if not helpers[1] and not helpers[2]:
3203 if not helpers[1] and not helpers[2]:
3196 # Nothing to generate or remove
3204 # Nothing to generate or remove
3197 return
3205 return
3198
3206
3199 new_entries = []
3207 new_entries = []
3200 # append the new sidedata
3208 # append the new sidedata
3201 with self._writing(transaction):
3209 with self._writing(transaction):
3202 ifh, dfh = self._writinghandles
3210 ifh, dfh = self._writinghandles
3203 dfh.seek(0, os.SEEK_END)
3211 dfh.seek(0, os.SEEK_END)
3204 current_offset = dfh.tell()
3212 current_offset = dfh.tell()
3205 for rev in range(startrev, endrev + 1):
3213 for rev in range(startrev, endrev + 1):
3206 entry = self.index[rev]
3214 entry = self.index[rev]
3207 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3215 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3208 store=self,
3216 store=self,
3209 sidedata_helpers=helpers,
3217 sidedata_helpers=helpers,
3210 sidedata={},
3218 sidedata={},
3211 rev=rev,
3219 rev=rev,
3212 )
3220 )
3213
3221
3214 serialized_sidedata = sidedatautil.serialize_sidedata(
3222 serialized_sidedata = sidedatautil.serialize_sidedata(
3215 new_sidedata
3223 new_sidedata
3216 )
3224 )
3217 if entry[8] != 0 or entry[9] != 0:
3225 if entry[8] != 0 or entry[9] != 0:
3218 # rewriting entries that already have sidedata is not
3226 # rewriting entries that already have sidedata is not
3219 # supported yet, because it introduces garbage data in the
3227 # supported yet, because it introduces garbage data in the
3220 # revlog.
3228 # revlog.
3221 msg = b"rewriting existing sidedata is not supported yet"
3229 msg = b"rewriting existing sidedata is not supported yet"
3222 raise error.Abort(msg)
3230 raise error.Abort(msg)
3223
3231
3224 # Apply (potential) flags to add and to remove after running
3232 # Apply (potential) flags to add and to remove after running
3225 # the sidedata helpers
3233 # the sidedata helpers
3226 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3234 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3227 entry = (new_offset_flags,) + entry[1:8]
3235 entry = (new_offset_flags,) + entry[1:8]
3228 entry += (current_offset, len(serialized_sidedata))
3236 entry += (current_offset, len(serialized_sidedata))
3229
3237
3230 # the sidedata computation might have move the file cursors around
3238 # the sidedata computation might have move the file cursors around
3231 dfh.seek(current_offset, os.SEEK_SET)
3239 dfh.seek(current_offset, os.SEEK_SET)
3232 dfh.write(serialized_sidedata)
3240 dfh.write(serialized_sidedata)
3233 new_entries.append(entry)
3241 new_entries.append(entry)
3234 current_offset += len(serialized_sidedata)
3242 current_offset += len(serialized_sidedata)
3235
3243
3236 # rewrite the new index entries
3244 # rewrite the new index entries
3237 ifh.seek(startrev * self.index.entry_size)
3245 ifh.seek(startrev * self.index.entry_size)
3238 for i, e in enumerate(new_entries):
3246 for i, e in enumerate(new_entries):
3239 rev = startrev + i
3247 rev = startrev + i
3240 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3248 self.index.replace_sidedata_info(rev, e[8], e[9], e[0])
3241 packed = self.index.entry_binary(rev)
3249 packed = self.index.entry_binary(rev)
3242 if rev == 0 and self._docket is None:
3250 if rev == 0 and self._docket is None:
3243 header = self._format_flags | self._format_version
3251 header = self._format_flags | self._format_version
3244 header = self.index.pack_header(header)
3252 header = self.index.pack_header(header)
3245 packed = header + packed
3253 packed = header + packed
3246 ifh.write(packed)
3254 ifh.write(packed)
@@ -1,656 +1,656
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..node import hex
16 from ..node import hex
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 util,
20 util,
21 )
21 )
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog._nodemap_file is None:
31 if revlog._nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog._nodemap_file)
33 pdata = revlog.opener.tryread(revlog._nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 try:
56 try:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 except ValueError:
58 except ValueError:
59 # raised when the read file is too small
59 # raised when the read file is too small
60 data = b''
60 data = b''
61 else:
61 else:
62 data = fd.read(data_length)
62 data = fd.read(data_length)
63 except (IOError, OSError) as e:
63 except (IOError, OSError) as e:
64 if e.errno == errno.ENOENT:
64 if e.errno == errno.ENOENT:
65 return None
65 return None
66 else:
66 else:
67 raise
67 raise
68 if len(data) < data_length:
68 if len(data) < data_length:
69 return None
69 return None
70 return docket, data
70 return docket, data
71
71
72
72
73 def setup_persistent_nodemap(tr, revlog):
73 def setup_persistent_nodemap(tr, revlog):
74 """Install whatever is needed transaction side to persist a nodemap on disk
74 """Install whatever is needed transaction side to persist a nodemap on disk
75
75
76 (only actually persist the nodemap if this is relevant for this revlog)
76 (only actually persist the nodemap if this is relevant for this revlog)
77 """
77 """
78 if revlog._inline:
78 if revlog._inline:
79 return # inlined revlog are too small for this to be relevant
79 return # inlined revlog are too small for this to be relevant
80 if revlog._nodemap_file is None:
80 if revlog._nodemap_file is None:
81 return # we do not use persistent_nodemap on this revlog
81 return # we do not use persistent_nodemap on this revlog
82
82
83 # we need to happen after the changelog finalization, in that use "cl-"
83 # we need to happen after the changelog finalization, in that use "cl-"
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
85 if tr.hasfinalize(callback_id):
85 if tr.hasfinalize(callback_id):
86 return # no need to register again
86 return # no need to register again
87 tr.addpending(
87 tr.addpending(
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
89 )
89 )
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
91
91
92
92
93 class _NoTransaction(object):
93 class _NoTransaction(object):
94 """transaction like object to update the nodemap outside a transaction"""
94 """transaction like object to update the nodemap outside a transaction"""
95
95
96 def __init__(self):
96 def __init__(self):
97 self._postclose = {}
97 self._postclose = {}
98
98
99 def addpostclose(self, callback_id, callback_func):
99 def addpostclose(self, callback_id, callback_func):
100 self._postclose[callback_id] = callback_func
100 self._postclose[callback_id] = callback_func
101
101
102 def registertmp(self, *args, **kwargs):
102 def registertmp(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def addbackup(self, *args, **kwargs):
105 def addbackup(self, *args, **kwargs):
106 pass
106 pass
107
107
108 def add(self, *args, **kwargs):
108 def add(self, *args, **kwargs):
109 pass
109 pass
110
110
111 def addabort(self, *args, **kwargs):
111 def addabort(self, *args, **kwargs):
112 pass
112 pass
113
113
114 def _report(self, *args):
114 def _report(self, *args):
115 pass
115 pass
116
116
117
117
118 def update_persistent_nodemap(revlog):
118 def update_persistent_nodemap(revlog):
119 """update the persistent nodemap right now
119 """update the persistent nodemap right now
120
120
121 To be used for updating the nodemap on disk outside of a normal transaction
121 To be used for updating the nodemap on disk outside of a normal transaction
122 setup (eg, `debugupdatecache`).
122 setup (eg, `debugupdatecache`).
123 """
123 """
124 if revlog._inline:
124 if revlog._inline:
125 return # inlined revlog are too small for this to be relevant
125 return # inlined revlog are too small for this to be relevant
126 if revlog._nodemap_file is None:
126 if revlog._nodemap_file is None:
127 return # we do not use persistent_nodemap on this revlog
127 return # we do not use persistent_nodemap on this revlog
128
128
129 notr = _NoTransaction()
129 notr = _NoTransaction()
130 persist_nodemap(notr, revlog)
130 persist_nodemap(notr, revlog)
131 for k in sorted(notr._postclose):
131 for k in sorted(notr._postclose):
132 notr._postclose[k](None)
132 notr._postclose[k](None)
133
133
134
134
135 def delete_nodemap(tr, repo, revlog):
135 def delete_nodemap(tr, repo, revlog):
136 """ Delete nodemap data on disk for a given revlog"""
136 """ Delete nodemap data on disk for a given revlog"""
137 if revlog._nodemap_file is None:
137 if revlog._nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enabled"
138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140 repo.svfs.unlink(revlog._nodemap_file)
140 repo.svfs.unlink(revlog._nodemap_file)
141
141
142
142
143 def persist_nodemap(tr, revlog, pending=False, force=False):
143 def persist_nodemap(tr, revlog, pending=False, force=False):
144 """Write nodemap data on disk for a given revlog"""
144 """Write nodemap data on disk for a given revlog"""
145 if getattr(revlog, 'filteredrevs', ()):
145 if getattr(revlog, 'filteredrevs', ()):
146 raise error.ProgrammingError(
146 raise error.ProgrammingError(
147 "cannot persist nodemap of a filtered changelog"
147 "cannot persist nodemap of a filtered changelog"
148 )
148 )
149 if revlog._nodemap_file is None:
149 if revlog._nodemap_file is None:
150 if force:
150 if force:
151 revlog._nodemap_file = get_nodemap_file(revlog)
151 revlog._nodemap_file = get_nodemap_file(revlog)
152 else:
152 else:
153 msg = "calling persist nodemap on a revlog without the feature enabled"
153 msg = "calling persist nodemap on a revlog without the feature enabled"
154 raise error.ProgrammingError(msg)
154 raise error.ProgrammingError(msg)
155
155
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
157 ondisk_docket = revlog._nodemap_docket
157 ondisk_docket = revlog._nodemap_docket
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
160
160
161 data = None
161 data = None
162 # first attemp an incremental update of the data
162 # first attemp an incremental update of the data
163 if can_incremental and ondisk_docket is not None:
163 if can_incremental and ondisk_docket is not None:
164 target_docket = revlog._nodemap_docket.copy()
164 target_docket = revlog._nodemap_docket.copy()
165 (
165 (
166 src_docket,
166 src_docket,
167 data_changed_count,
167 data_changed_count,
168 data,
168 data,
169 ) = revlog.index.nodemap_data_incremental()
169 ) = revlog.index.nodemap_data_incremental()
170 new_length = target_docket.data_length + len(data)
170 new_length = target_docket.data_length + len(data)
171 new_unused = target_docket.data_unused + data_changed_count
171 new_unused = target_docket.data_unused + data_changed_count
172 if src_docket != target_docket:
172 if src_docket != target_docket:
173 data = None
173 data = None
174 elif new_length <= (new_unused * 10): # under 10% of unused data
174 elif new_length <= (new_unused * 10): # under 10% of unused data
175 data = None
175 data = None
176 else:
176 else:
177 datafile = _rawdata_filepath(revlog, target_docket)
177 datafile = _rawdata_filepath(revlog, target_docket)
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
179 # store vfs
179 # store vfs
180 tr.add(datafile, target_docket.data_length)
180 tr.add(datafile, target_docket.data_length)
181 with revlog.opener(datafile, b'r+') as fd:
181 with revlog.opener(datafile, b'r+') as fd:
182 fd.seek(target_docket.data_length)
182 fd.seek(target_docket.data_length)
183 fd.write(data)
183 fd.write(data)
184 if feed_data:
184 if feed_data:
185 if use_mmap:
185 if use_mmap:
186 fd.seek(0)
186 fd.seek(0)
187 new_data = fd.read(new_length)
187 new_data = fd.read(new_length)
188 else:
188 else:
189 fd.flush()
189 fd.flush()
190 new_data = util.buffer(util.mmapread(fd, new_length))
190 new_data = util.buffer(util.mmapread(fd, new_length))
191 target_docket.data_length = new_length
191 target_docket.data_length = new_length
192 target_docket.data_unused = new_unused
192 target_docket.data_unused = new_unused
193
193
194 if data is None:
194 if data is None:
195 # otherwise fallback to a full new export
195 # otherwise fallback to a full new export
196 target_docket = NodeMapDocket()
196 target_docket = NodeMapDocket()
197 datafile = _rawdata_filepath(revlog, target_docket)
197 datafile = _rawdata_filepath(revlog, target_docket)
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
199 data = revlog.index.nodemap_data_all()
199 data = revlog.index.nodemap_data_all()
200 else:
200 else:
201 data = persistent_data(revlog.index)
201 data = persistent_data(revlog.index)
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
203 # store vfs
203 # store vfs
204
204
205 tryunlink = revlog.opener.tryunlink
205 tryunlink = revlog.opener.tryunlink
206
206
207 def abortck(tr):
207 def abortck(tr):
208 tryunlink(datafile)
208 tryunlink(datafile)
209
209
210 callback_id = b"delete-%s" % datafile
210 callback_id = b"delete-%s" % datafile
211
211
212 # some flavor of the transaction abort does not cleanup new file, it
212 # some flavor of the transaction abort does not cleanup new file, it
213 # simply empty them.
213 # simply empty them.
214 tr.addabort(callback_id, abortck)
214 tr.addabort(callback_id, abortck)
215 with revlog.opener(datafile, b'w+') as fd:
215 with revlog.opener(datafile, b'w+') as fd:
216 fd.write(data)
216 fd.write(data)
217 if feed_data:
217 if feed_data:
218 if use_mmap:
218 if use_mmap:
219 new_data = data
219 new_data = data
220 else:
220 else:
221 fd.flush()
221 fd.flush()
222 new_data = util.buffer(util.mmapread(fd, len(data)))
222 new_data = util.buffer(util.mmapread(fd, len(data)))
223 target_docket.data_length = len(data)
223 target_docket.data_length = len(data)
224 target_docket.tip_rev = revlog.tiprev()
224 target_docket.tip_rev = revlog.tiprev()
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
227 # store vfs
227 # store vfs
228 file_path = revlog._nodemap_file
228 file_path = revlog._nodemap_file
229 if pending:
229 if pending:
230 file_path += b'.a'
230 file_path += b'.a'
231 tr.registertmp(file_path)
231 tr.registertmp(file_path)
232 else:
232 else:
233 tr.addbackup(file_path)
233 tr.addbackup(file_path)
234
234
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
236 fp.write(target_docket.serialize())
236 fp.write(target_docket.serialize())
237 revlog._nodemap_docket = target_docket
237 revlog._nodemap_docket = target_docket
238 if feed_data:
238 if feed_data:
239 revlog.index.update_nodemap_data(target_docket, new_data)
239 revlog.index.update_nodemap_data(target_docket, new_data)
240
240
241 # search for old index file in all cases, some older process might have
241 # search for old index file in all cases, some older process might have
242 # left one behind.
242 # left one behind.
243 olds = _other_rawdata_filepath(revlog, target_docket)
243 olds = _other_rawdata_filepath(revlog, target_docket)
244 if olds:
244 if olds:
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
246
246
247 def cleanup(tr):
247 def cleanup(tr):
248 for oldfile in olds:
248 for oldfile in olds:
249 realvfs.tryunlink(oldfile)
249 realvfs.tryunlink(oldfile)
250
250
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
252 tr.addpostclose(callback_id, cleanup)
252 tr.addpostclose(callback_id, cleanup)
253
253
254
254
255 ### Nodemap docket file
255 ### Nodemap docket file
256 #
256 #
257 # The nodemap data are stored on disk using 2 files:
257 # The nodemap data are stored on disk using 2 files:
258 #
258 #
259 # * a raw data files containing a persistent nodemap
259 # * a raw data files containing a persistent nodemap
260 # (see `Nodemap Trie` section)
260 # (see `Nodemap Trie` section)
261 #
261 #
262 # * a small "docket" file containing medatadata
262 # * a small "docket" file containing medatadata
263 #
263 #
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
265 # small, it is easy to update it automatically or to duplicated its content
265 # small, it is easy to update it automatically or to duplicated its content
266 # during a transaction.
266 # during a transaction.
267 #
267 #
268 # Multiple raw data can exist at the same time (The currently valid one and a
268 # Multiple raw data can exist at the same time (The currently valid one and a
269 # new one beind used by an in progress transaction). To accomodate this, the
269 # new one beind used by an in progress transaction). To accomodate this, the
270 # filename hosting the raw data has a variable parts. The exact filename is
270 # filename hosting the raw data has a variable parts. The exact filename is
271 # specified inside the "docket" file.
271 # specified inside the "docket" file.
272 #
272 #
273 # The docket file contains information to find, qualify and validate the raw
273 # The docket file contains information to find, qualify and validate the raw
274 # data. Its content is currently very light, but it will expand as the on disk
274 # data. Its content is currently very light, but it will expand as the on disk
275 # nodemap gains the necessary features to be used in production.
275 # nodemap gains the necessary features to be used in production.
276
276
277 ONDISK_VERSION = 1
277 ONDISK_VERSION = 1
278 S_VERSION = struct.Struct(">B")
278 S_VERSION = struct.Struct(">B")
279 S_HEADER = struct.Struct(">BQQQQ")
279 S_HEADER = struct.Struct(">BQQQQ")
280
280
281 ID_SIZE = 8
281 ID_SIZE = 8
282
282
283
283
284 def _make_uid():
284 def _make_uid():
285 """return a new unique identifier.
285 """return a new unique identifier.
286
286
287 The identifier is random and composed of ascii characters."""
287 The identifier is random and composed of ascii characters."""
288 return hex(os.urandom(ID_SIZE))
288 return hex(os.urandom(ID_SIZE))
289
289
290
290
291 class NodeMapDocket(object):
291 class NodeMapDocket(object):
292 """metadata associated with persistent nodemap data
292 """metadata associated with persistent nodemap data
293
293
294 The persistent data may come from disk or be on their way to disk.
294 The persistent data may come from disk or be on their way to disk.
295 """
295 """
296
296
297 def __init__(self, uid=None):
297 def __init__(self, uid=None):
298 if uid is None:
298 if uid is None:
299 uid = _make_uid()
299 uid = _make_uid()
300 # a unique identifier for the data file:
300 # a unique identifier for the data file:
301 # - When new data are appended, it is preserved.
301 # - When new data are appended, it is preserved.
302 # - When a new data file is created, a new identifier is generated.
302 # - When a new data file is created, a new identifier is generated.
303 self.uid = uid
303 self.uid = uid
304 # the tipmost revision stored in the data file. This revision and all
304 # the tipmost revision stored in the data file. This revision and all
305 # revision before it are expected to be encoded in the data file.
305 # revision before it are expected to be encoded in the data file.
306 self.tip_rev = None
306 self.tip_rev = None
307 # the node of that tipmost revision, if it mismatch the current index
307 # the node of that tipmost revision, if it mismatch the current index
308 # data the docket is not valid for the current index and should be
308 # data the docket is not valid for the current index and should be
309 # discarded.
309 # discarded.
310 #
310 #
311 # note: this method is not perfect as some destructive operation could
311 # note: this method is not perfect as some destructive operation could
312 # preserve the same tip_rev + tip_node while altering lower revision.
312 # preserve the same tip_rev + tip_node while altering lower revision.
313 # However this multiple other caches have the same vulnerability (eg:
313 # However this multiple other caches have the same vulnerability (eg:
314 # brancmap cache).
314 # brancmap cache).
315 self.tip_node = None
315 self.tip_node = None
316 # the size (in bytes) of the persisted data to encode the nodemap valid
316 # the size (in bytes) of the persisted data to encode the nodemap valid
317 # for `tip_rev`.
317 # for `tip_rev`.
318 # - data file shorter than this are corrupted,
318 # - data file shorter than this are corrupted,
319 # - any extra data should be ignored.
319 # - any extra data should be ignored.
320 self.data_length = None
320 self.data_length = None
321 # the amount (in bytes) of "dead" data, still in the data file but no
321 # the amount (in bytes) of "dead" data, still in the data file but no
322 # longer used for the nodemap.
322 # longer used for the nodemap.
323 self.data_unused = 0
323 self.data_unused = 0
324
324
325 def copy(self):
325 def copy(self):
326 new = NodeMapDocket(uid=self.uid)
326 new = NodeMapDocket(uid=self.uid)
327 new.tip_rev = self.tip_rev
327 new.tip_rev = self.tip_rev
328 new.tip_node = self.tip_node
328 new.tip_node = self.tip_node
329 new.data_length = self.data_length
329 new.data_length = self.data_length
330 new.data_unused = self.data_unused
330 new.data_unused = self.data_unused
331 return new
331 return new
332
332
333 def __cmp__(self, other):
333 def __cmp__(self, other):
334 if self.uid < other.uid:
334 if self.uid < other.uid:
335 return -1
335 return -1
336 if self.uid > other.uid:
336 if self.uid > other.uid:
337 return 1
337 return 1
338 elif self.data_length < other.data_length:
338 elif self.data_length < other.data_length:
339 return -1
339 return -1
340 elif self.data_length > other.data_length:
340 elif self.data_length > other.data_length:
341 return 1
341 return 1
342 return 0
342 return 0
343
343
344 def __eq__(self, other):
344 def __eq__(self, other):
345 return self.uid == other.uid and self.data_length == other.data_length
345 return self.uid == other.uid and self.data_length == other.data_length
346
346
347 def serialize(self):
347 def serialize(self):
348 """return serialized bytes for a docket using the passed uid"""
348 """return serialized bytes for a docket using the passed uid"""
349 data = []
349 data = []
350 data.append(S_VERSION.pack(ONDISK_VERSION))
350 data.append(S_VERSION.pack(ONDISK_VERSION))
351 headers = (
351 headers = (
352 len(self.uid),
352 len(self.uid),
353 self.tip_rev,
353 self.tip_rev,
354 self.data_length,
354 self.data_length,
355 self.data_unused,
355 self.data_unused,
356 len(self.tip_node),
356 len(self.tip_node),
357 )
357 )
358 data.append(S_HEADER.pack(*headers))
358 data.append(S_HEADER.pack(*headers))
359 data.append(self.uid)
359 data.append(self.uid)
360 data.append(self.tip_node)
360 data.append(self.tip_node)
361 return b''.join(data)
361 return b''.join(data)
362
362
363
363
364 def _rawdata_filepath(revlog, docket):
364 def _rawdata_filepath(revlog, docket):
365 """The (vfs relative) nodemap's rawdata file for a given uid"""
365 """The (vfs relative) nodemap's rawdata file for a given uid"""
366 prefix = revlog.radix
366 prefix = revlog.radix
367 return b"%s-%s.nd" % (prefix, docket.uid)
367 return b"%s-%s.nd" % (prefix, docket.uid)
368
368
369
369
370 def _other_rawdata_filepath(revlog, docket):
370 def _other_rawdata_filepath(revlog, docket):
371 prefix = revlog.radix
371 prefix = revlog.radix
372 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
372 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
373 new_file_path = _rawdata_filepath(revlog, docket)
373 new_file_path = _rawdata_filepath(revlog, docket)
374 new_file_name = revlog.opener.basename(new_file_path)
374 new_file_name = revlog.opener.basename(new_file_path)
375 dirpath = revlog.opener.dirname(new_file_path)
375 dirpath = revlog.opener.dirname(new_file_path)
376 others = []
376 others = []
377 for f in revlog.opener.listdir(dirpath):
377 for f in revlog.opener.listdir(dirpath):
378 if pattern.match(f) and f != new_file_name:
378 if pattern.match(f) and f != new_file_name:
379 others.append(f)
379 others.append(f)
380 return others
380 return others
381
381
382
382
383 ### Nodemap Trie
383 ### Nodemap Trie
384 #
384 #
385 # This is a simple reference implementation to compute and persist a nodemap
385 # This is a simple reference implementation to compute and persist a nodemap
386 # trie. This reference implementation is write only. The python version of this
386 # trie. This reference implementation is write only. The python version of this
387 # is not expected to be actually used, since it wont provide performance
387 # is not expected to be actually used, since it wont provide performance
388 # improvement over existing non-persistent C implementation.
388 # improvement over existing non-persistent C implementation.
389 #
389 #
390 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
390 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
391 # revision can be adressed using its node shortest prefix.
391 # revision can be adressed using its node shortest prefix.
392 #
392 #
393 # The trie is stored as a sequence of block. Each block contains 16 entries
393 # The trie is stored as a sequence of block. Each block contains 16 entries
394 # (signed 64bit integer, big endian). Each entry can be one of the following:
394 # (signed 64bit integer, big endian). Each entry can be one of the following:
395 #
395 #
396 # * value >= 0 -> index of sub-block
396 # * value >= 0 -> index of sub-block
397 # * value == -1 -> no value
397 # * value == -1 -> no value
398 # * value < -1 -> encoded revision: rev = -(value+2)
398 # * value < -1 -> encoded revision: rev = -(value+2)
399 #
399 #
400 # See REV_OFFSET and _transform_rev below.
400 # See REV_OFFSET and _transform_rev below.
401 #
401 #
402 # The implementation focus on simplicity, not on performance. A Rust
402 # The implementation focus on simplicity, not on performance. A Rust
403 # implementation should provide a efficient version of the same binary
403 # implementation should provide a efficient version of the same binary
404 # persistence. This reference python implementation is never meant to be
404 # persistence. This reference python implementation is never meant to be
405 # extensively use in production.
405 # extensively use in production.
406
406
407
407
408 def persistent_data(index):
408 def persistent_data(index):
409 """return the persistent binary form for a nodemap for a given index"""
409 """return the persistent binary form for a nodemap for a given index"""
410 trie = _build_trie(index)
410 trie = _build_trie(index)
411 return _persist_trie(trie)
411 return _persist_trie(trie)
412
412
413
413
414 def update_persistent_data(index, root, max_idx, last_rev):
414 def update_persistent_data(index, root, max_idx, last_rev):
415 """return the incremental update for persistent nodemap from a given index"""
415 """return the incremental update for persistent nodemap from a given index"""
416 changed_block, trie = _update_trie(index, root, last_rev)
416 changed_block, trie = _update_trie(index, root, last_rev)
417 return (
417 return (
418 changed_block * S_BLOCK.size,
418 changed_block * S_BLOCK.size,
419 _persist_trie(trie, existing_idx=max_idx),
419 _persist_trie(trie, existing_idx=max_idx),
420 )
420 )
421
421
422
422
423 S_BLOCK = struct.Struct(">" + ("l" * 16))
423 S_BLOCK = struct.Struct(">" + ("l" * 16))
424
424
425 NO_ENTRY = -1
425 NO_ENTRY = -1
426 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
426 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
427 REV_OFFSET = 2
427 REV_OFFSET = 2
428
428
429
429
430 def _transform_rev(rev):
430 def _transform_rev(rev):
431 """Return the number used to represent the rev in the tree.
431 """Return the number used to represent the rev in the tree.
432
432
433 (or retrieve a rev number from such representation)
433 (or retrieve a rev number from such representation)
434
434
435 Note that this is an involution, a function equal to its inverse (i.e.
435 Note that this is an involution, a function equal to its inverse (i.e.
436 which gives the identity when applied to itself).
436 which gives the identity when applied to itself).
437 """
437 """
438 return -(rev + REV_OFFSET)
438 return -(rev + REV_OFFSET)
439
439
440
440
441 def _to_int(hex_digit):
441 def _to_int(hex_digit):
442 """turn an hexadecimal digit into a proper integer"""
442 """turn an hexadecimal digit into a proper integer"""
443 return int(hex_digit, 16)
443 return int(hex_digit, 16)
444
444
445
445
446 class Block(dict):
446 class Block(dict):
447 """represent a block of the Trie
447 """represent a block of the Trie
448
448
449 contains up to 16 entry indexed from 0 to 15"""
449 contains up to 16 entry indexed from 0 to 15"""
450
450
451 def __init__(self):
451 def __init__(self):
452 super(Block, self).__init__()
452 super(Block, self).__init__()
453 # If this block exist on disk, here is its ID
453 # If this block exist on disk, here is its ID
454 self.ondisk_id = None
454 self.ondisk_id = None
455
455
456 def __iter__(self):
456 def __iter__(self):
457 return iter(self.get(i) for i in range(16))
457 return iter(self.get(i) for i in range(16))
458
458
459
459
460 def _build_trie(index):
460 def _build_trie(index):
461 """build a nodemap trie
461 """build a nodemap trie
462
462
463 The nodemap stores revision number for each unique prefix.
463 The nodemap stores revision number for each unique prefix.
464
464
465 Each block is a dictionary with keys in `[0, 15]`. Values are either
465 Each block is a dictionary with keys in `[0, 15]`. Values are either
466 another block or a revision number.
466 another block or a revision number.
467 """
467 """
468 root = Block()
468 root = Block()
469 for rev in range(len(index)):
469 for rev in range(len(index)):
470 current_hex = hex(index[rev][7])
470 current_hex = hex(index[rev][7])
471 _insert_into_block(index, 0, root, rev, current_hex)
471 _insert_into_block(index, 0, root, rev, current_hex)
472 return root
472 return root
473
473
474
474
475 def _update_trie(index, root, last_rev):
475 def _update_trie(index, root, last_rev):
476 """consume"""
476 """consume"""
477 changed = 0
477 changed = 0
478 for rev in range(last_rev + 1, len(index)):
478 for rev in range(last_rev + 1, len(index)):
479 current_hex = hex(index[rev][7])
479 current_hex = hex(index[rev][7])
480 changed += _insert_into_block(index, 0, root, rev, current_hex)
480 changed += _insert_into_block(index, 0, root, rev, current_hex)
481 return changed, root
481 return changed, root
482
482
483
483
484 def _insert_into_block(index, level, block, current_rev, current_hex):
484 def _insert_into_block(index, level, block, current_rev, current_hex):
485 """insert a new revision in a block
485 """insert a new revision in a block
486
486
487 index: the index we are adding revision for
487 index: the index we are adding revision for
488 level: the depth of the current block in the trie
488 level: the depth of the current block in the trie
489 block: the block currently being considered
489 block: the block currently being considered
490 current_rev: the revision number we are adding
490 current_rev: the revision number we are adding
491 current_hex: the hexadecimal representation of the of that revision
491 current_hex: the hexadecimal representation of the of that revision
492 """
492 """
493 changed = 1
493 changed = 1
494 if block.ondisk_id is not None:
494 if block.ondisk_id is not None:
495 block.ondisk_id = None
495 block.ondisk_id = None
496 hex_digit = _to_int(current_hex[level : level + 1])
496 hex_digit = _to_int(current_hex[level : level + 1])
497 entry = block.get(hex_digit)
497 entry = block.get(hex_digit)
498 if entry is None:
498 if entry is None:
499 # no entry, simply store the revision number
499 # no entry, simply store the revision number
500 block[hex_digit] = current_rev
500 block[hex_digit] = current_rev
501 elif isinstance(entry, dict):
501 elif isinstance(entry, dict):
502 # need to recurse to an underlying block
502 # need to recurse to an underlying block
503 changed += _insert_into_block(
503 changed += _insert_into_block(
504 index, level + 1, entry, current_rev, current_hex
504 index, level + 1, entry, current_rev, current_hex
505 )
505 )
506 else:
506 else:
507 # collision with a previously unique prefix, inserting new
507 # collision with a previously unique prefix, inserting new
508 # vertices to fit both entry.
508 # vertices to fit both entry.
509 other_hex = hex(index[entry][7])
509 other_hex = hex(index[entry][7])
510 other_rev = entry
510 other_rev = entry
511 new = Block()
511 new = Block()
512 block[hex_digit] = new
512 block[hex_digit] = new
513 _insert_into_block(index, level + 1, new, other_rev, other_hex)
513 _insert_into_block(index, level + 1, new, other_rev, other_hex)
514 _insert_into_block(index, level + 1, new, current_rev, current_hex)
514 _insert_into_block(index, level + 1, new, current_rev, current_hex)
515 return changed
515 return changed
516
516
517
517
518 def _persist_trie(root, existing_idx=None):
518 def _persist_trie(root, existing_idx=None):
519 """turn a nodemap trie into persistent binary data
519 """turn a nodemap trie into persistent binary data
520
520
521 See `_build_trie` for nodemap trie structure"""
521 See `_build_trie` for nodemap trie structure"""
522 block_map = {}
522 block_map = {}
523 if existing_idx is not None:
523 if existing_idx is not None:
524 base_idx = existing_idx + 1
524 base_idx = existing_idx + 1
525 else:
525 else:
526 base_idx = 0
526 base_idx = 0
527 chunks = []
527 chunks = []
528 for tn in _walk_trie(root):
528 for tn in _walk_trie(root):
529 if tn.ondisk_id is not None:
529 if tn.ondisk_id is not None:
530 block_map[id(tn)] = tn.ondisk_id
530 block_map[id(tn)] = tn.ondisk_id
531 else:
531 else:
532 block_map[id(tn)] = len(chunks) + base_idx
532 block_map[id(tn)] = len(chunks) + base_idx
533 chunks.append(_persist_block(tn, block_map))
533 chunks.append(_persist_block(tn, block_map))
534 return b''.join(chunks)
534 return b''.join(chunks)
535
535
536
536
537 def _walk_trie(block):
537 def _walk_trie(block):
538 """yield all the block in a trie
538 """yield all the block in a trie
539
539
540 Children blocks are always yield before their parent block.
540 Children blocks are always yield before their parent block.
541 """
541 """
542 for (__, item) in sorted(block.items()):
542 for (__, item) in sorted(block.items()):
543 if isinstance(item, dict):
543 if isinstance(item, dict):
544 for sub_block in _walk_trie(item):
544 for sub_block in _walk_trie(item):
545 yield sub_block
545 yield sub_block
546 yield block
546 yield block
547
547
548
548
549 def _persist_block(block_node, block_map):
549 def _persist_block(block_node, block_map):
550 """produce persistent binary data for a single block
550 """produce persistent binary data for a single block
551
551
552 Children block are assumed to be already persisted and present in
552 Children block are assumed to be already persisted and present in
553 block_map.
553 block_map.
554 """
554 """
555 data = tuple(_to_value(v, block_map) for v in block_node)
555 data = tuple(_to_value(v, block_map) for v in block_node)
556 return S_BLOCK.pack(*data)
556 return S_BLOCK.pack(*data)
557
557
558
558
559 def _to_value(item, block_map):
559 def _to_value(item, block_map):
560 """persist any value as an integer"""
560 """persist any value as an integer"""
561 if item is None:
561 if item is None:
562 return NO_ENTRY
562 return NO_ENTRY
563 elif isinstance(item, dict):
563 elif isinstance(item, dict):
564 return block_map[id(item)]
564 return block_map[id(item)]
565 else:
565 else:
566 return _transform_rev(item)
566 return _transform_rev(item)
567
567
568
568
569 def parse_data(data):
569 def parse_data(data):
570 """parse parse nodemap data into a nodemap Trie"""
570 """parse parse nodemap data into a nodemap Trie"""
571 if (len(data) % S_BLOCK.size) != 0:
571 if (len(data) % S_BLOCK.size) != 0:
572 msg = b"nodemap data size is not a multiple of block size (%d): %d"
572 msg = b"nodemap data size is not a multiple of block size (%d): %d"
573 raise error.Abort(msg % (S_BLOCK.size, len(data)))
573 raise error.Abort(msg % (S_BLOCK.size, len(data)))
574 if not data:
574 if not data:
575 return Block(), None
575 return Block(), None
576 block_map = {}
576 block_map = {}
577 new_blocks = []
577 new_blocks = []
578 for i in range(0, len(data), S_BLOCK.size):
578 for i in range(0, len(data), S_BLOCK.size):
579 block = Block()
579 block = Block()
580 block.ondisk_id = len(block_map)
580 block.ondisk_id = len(block_map)
581 block_map[block.ondisk_id] = block
581 block_map[block.ondisk_id] = block
582 block_data = data[i : i + S_BLOCK.size]
582 block_data = data[i : i + S_BLOCK.size]
583 values = S_BLOCK.unpack(block_data)
583 values = S_BLOCK.unpack(block_data)
584 new_blocks.append((block, values))
584 new_blocks.append((block, values))
585 for b, values in new_blocks:
585 for b, values in new_blocks:
586 for idx, v in enumerate(values):
586 for idx, v in enumerate(values):
587 if v == NO_ENTRY:
587 if v == NO_ENTRY:
588 continue
588 continue
589 elif v >= 0:
589 elif v >= 0:
590 b[idx] = block_map[v]
590 b[idx] = block_map[v]
591 else:
591 else:
592 b[idx] = _transform_rev(v)
592 b[idx] = _transform_rev(v)
593 return block, i // S_BLOCK.size
593 return block, i // S_BLOCK.size
594
594
595
595
596 # debug utility
596 # debug utility
597
597
598
598
599 def check_data(ui, index, data):
599 def check_data(ui, index, data):
600 """verify that the provided nodemap data are valid for the given idex"""
600 """verify that the provided nodemap data are valid for the given idex"""
601 ret = 0
601 ret = 0
602 ui.status((b"revision in index: %d\n") % len(index))
602 ui.status((b"revision in index: %d\n") % len(index))
603 root, __ = parse_data(data)
603 root, __ = parse_data(data)
604 all_revs = set(_all_revisions(root))
604 all_revs = set(_all_revisions(root))
605 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
605 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
606 for r in range(len(index)):
606 for r in range(len(index)):
607 if r not in all_revs:
607 if r not in all_revs:
608 msg = b" revision missing from nodemap: %d\n" % r
608 msg = b" revision missing from nodemap: %d\n" % r
609 ui.write_err(msg)
609 ui.write_err(msg)
610 ret = 1
610 ret = 1
611 else:
611 else:
612 all_revs.remove(r)
612 all_revs.remove(r)
613 nm_rev = _find_node(root, hex(index[r][7]))
613 nm_rev = _find_node(root, hex(index[r][7]))
614 if nm_rev is None:
614 if nm_rev is None:
615 msg = b" revision node does not match any entries: %d\n" % r
615 msg = b" revision node does not match any entries: %d\n" % r
616 ui.write_err(msg)
616 ui.write_err(msg)
617 ret = 1
617 ret = 1
618 elif nm_rev != r:
618 elif nm_rev != r:
619 msg = (
619 msg = (
620 b" revision node does not match the expected revision: "
620 b" revision node does not match the expected revision: "
621 b"%d != %d\n" % (r, nm_rev)
621 b"%d != %d\n" % (r, nm_rev)
622 )
622 )
623 ui.write_err(msg)
623 ui.write_err(msg)
624 ret = 1
624 ret = 1
625
625
626 if all_revs:
626 if all_revs:
627 for r in sorted(all_revs):
627 for r in sorted(all_revs):
628 msg = b" extra revision in nodemap: %d\n" % r
628 msg = b" extra revision in nodemap: %d\n" % r
629 ui.write_err(msg)
629 ui.write_err(msg)
630 ret = 1
630 ret = 1
631 return ret
631 return ret
632
632
633
633
634 def _all_revisions(root):
634 def _all_revisions(root):
635 """return all revisions stored in a Trie"""
635 """return all revisions stored in a Trie"""
636 for block in _walk_trie(root):
636 for block in _walk_trie(root):
637 for v in block:
637 for v in block:
638 if v is None or isinstance(v, Block):
638 if v is None or isinstance(v, Block):
639 continue
639 continue
640 yield v
640 yield v
641
641
642
642
643 def _find_node(block, node):
643 def _find_node(block, node):
644 """find the revision associated with a given node"""
644 """find the revision associated with a given node"""
645 entry = block.get(_to_int(node[0:1]))
645 entry = block.get(_to_int(node[0:1]))
646 if isinstance(entry, dict):
646 if isinstance(entry, dict):
647 return _find_node(entry, node[1:])
647 return _find_node(entry, node[1:])
648 return entry
648 return entry
649
649
650
650
651 def get_nodemap_file(revlog):
651 def get_nodemap_file(revlog):
652 if revlog.postfix == b'a':
652 if revlog._trypending:
653 pending_path = revlog.radix + b".n.a"
653 pending_path = revlog.radix + b".n.a"
654 if revlog.opener.exists(pending_path):
654 if revlog.opener.exists(pending_path):
655 return pending_path
655 return pending_path
656 return revlog.radix + b".n"
656 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now