##// END OF EJS Templates
revlog: move tiprev() from changelog up to revlog...
Martin von Zweigbergk -
r43745:ec7ba79b default
parent child Browse files
Show More
@@ -1,729 +1,726 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import attr
16 from .thirdparty import attr
17
17
18 from . import (
18 from . import (
19 copies,
19 copies,
20 encoding,
20 encoding,
21 error,
21 error,
22 pycompat,
22 pycompat,
23 revlog,
23 revlog,
24 util,
24 util,
25 )
25 )
26 from .utils import (
26 from .utils import (
27 dateutil,
27 dateutil,
28 stringutil,
28 stringutil,
29 )
29 )
30
30
31 from .revlogutils import sidedata as sidedatamod
31 from .revlogutils import sidedata as sidedatamod
32
32
33 _defaultextra = {b'branch': b'default'}
33 _defaultextra = {b'branch': b'default'}
34
34
35
35
36 def _string_escape(text):
36 def _string_escape(text):
37 """
37 """
38 >>> from .pycompat import bytechr as chr
38 >>> from .pycompat import bytechr as chr
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
39 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
40 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
41 >>> s
41 >>> s
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
42 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
43 >>> res = _string_escape(s)
43 >>> res = _string_escape(s)
44 >>> s == _string_unescape(res)
44 >>> s == _string_unescape(res)
45 True
45 True
46 """
46 """
47 # subset of the string_escape codec
47 # subset of the string_escape codec
48 text = (
48 text = (
49 text.replace(b'\\', b'\\\\')
49 text.replace(b'\\', b'\\\\')
50 .replace(b'\n', b'\\n')
50 .replace(b'\n', b'\\n')
51 .replace(b'\r', b'\\r')
51 .replace(b'\r', b'\\r')
52 )
52 )
53 return text.replace(b'\0', b'\\0')
53 return text.replace(b'\0', b'\\0')
54
54
55
55
56 def _string_unescape(text):
56 def _string_unescape(text):
57 if b'\\0' in text:
57 if b'\\0' in text:
58 # fix up \0 without getting into trouble with \\0
58 # fix up \0 without getting into trouble with \\0
59 text = text.replace(b'\\\\', b'\\\\\n')
59 text = text.replace(b'\\\\', b'\\\\\n')
60 text = text.replace(b'\\0', b'\0')
60 text = text.replace(b'\\0', b'\0')
61 text = text.replace(b'\n', b'')
61 text = text.replace(b'\n', b'')
62 return stringutil.unescapestr(text)
62 return stringutil.unescapestr(text)
63
63
64
64
65 def decodeextra(text):
65 def decodeextra(text):
66 """
66 """
67 >>> from .pycompat import bytechr as chr
67 >>> from .pycompat import bytechr as chr
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
68 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
69 ... ).items())
69 ... ).items())
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
70 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
71 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
72 ... b'baz': chr(92) + chr(0) + b'2'})
72 ... b'baz': chr(92) + chr(0) + b'2'})
73 ... ).items())
73 ... ).items())
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
74 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
75 """
75 """
76 extra = _defaultextra.copy()
76 extra = _defaultextra.copy()
77 for l in text.split(b'\0'):
77 for l in text.split(b'\0'):
78 if l:
78 if l:
79 k, v = _string_unescape(l).split(b':', 1)
79 k, v = _string_unescape(l).split(b':', 1)
80 extra[k] = v
80 extra[k] = v
81 return extra
81 return extra
82
82
83
83
84 def encodeextra(d):
84 def encodeextra(d):
85 # keys must be sorted to produce a deterministic changelog entry
85 # keys must be sorted to produce a deterministic changelog entry
86 items = [
86 items = [
87 _string_escape(b'%s:%s' % (k, pycompat.bytestr(d[k])))
87 _string_escape(b'%s:%s' % (k, pycompat.bytestr(d[k])))
88 for k in sorted(d)
88 for k in sorted(d)
89 ]
89 ]
90 return b"\0".join(items)
90 return b"\0".join(items)
91
91
92
92
93 def stripdesc(desc):
93 def stripdesc(desc):
94 """strip trailing whitespace and leading and trailing empty lines"""
94 """strip trailing whitespace and leading and trailing empty lines"""
95 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
95 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
96
96
97
97
98 class appender(object):
98 class appender(object):
99 '''the changelog index must be updated last on disk, so we use this class
99 '''the changelog index must be updated last on disk, so we use this class
100 to delay writes to it'''
100 to delay writes to it'''
101
101
102 def __init__(self, vfs, name, mode, buf):
102 def __init__(self, vfs, name, mode, buf):
103 self.data = buf
103 self.data = buf
104 fp = vfs(name, mode)
104 fp = vfs(name, mode)
105 self.fp = fp
105 self.fp = fp
106 self.offset = fp.tell()
106 self.offset = fp.tell()
107 self.size = vfs.fstat(fp).st_size
107 self.size = vfs.fstat(fp).st_size
108 self._end = self.size
108 self._end = self.size
109
109
110 def end(self):
110 def end(self):
111 return self._end
111 return self._end
112
112
113 def tell(self):
113 def tell(self):
114 return self.offset
114 return self.offset
115
115
116 def flush(self):
116 def flush(self):
117 pass
117 pass
118
118
119 @property
119 @property
120 def closed(self):
120 def closed(self):
121 return self.fp.closed
121 return self.fp.closed
122
122
123 def close(self):
123 def close(self):
124 self.fp.close()
124 self.fp.close()
125
125
126 def seek(self, offset, whence=0):
126 def seek(self, offset, whence=0):
127 '''virtual file offset spans real file and data'''
127 '''virtual file offset spans real file and data'''
128 if whence == 0:
128 if whence == 0:
129 self.offset = offset
129 self.offset = offset
130 elif whence == 1:
130 elif whence == 1:
131 self.offset += offset
131 self.offset += offset
132 elif whence == 2:
132 elif whence == 2:
133 self.offset = self.end() + offset
133 self.offset = self.end() + offset
134 if self.offset < self.size:
134 if self.offset < self.size:
135 self.fp.seek(self.offset)
135 self.fp.seek(self.offset)
136
136
137 def read(self, count=-1):
137 def read(self, count=-1):
138 '''only trick here is reads that span real file and data'''
138 '''only trick here is reads that span real file and data'''
139 ret = b""
139 ret = b""
140 if self.offset < self.size:
140 if self.offset < self.size:
141 s = self.fp.read(count)
141 s = self.fp.read(count)
142 ret = s
142 ret = s
143 self.offset += len(s)
143 self.offset += len(s)
144 if count > 0:
144 if count > 0:
145 count -= len(s)
145 count -= len(s)
146 if count != 0:
146 if count != 0:
147 doff = self.offset - self.size
147 doff = self.offset - self.size
148 self.data.insert(0, b"".join(self.data))
148 self.data.insert(0, b"".join(self.data))
149 del self.data[1:]
149 del self.data[1:]
150 s = self.data[0][doff : doff + count]
150 s = self.data[0][doff : doff + count]
151 self.offset += len(s)
151 self.offset += len(s)
152 ret += s
152 ret += s
153 return ret
153 return ret
154
154
155 def write(self, s):
155 def write(self, s):
156 self.data.append(bytes(s))
156 self.data.append(bytes(s))
157 self.offset += len(s)
157 self.offset += len(s)
158 self._end += len(s)
158 self._end += len(s)
159
159
160 def __enter__(self):
160 def __enter__(self):
161 self.fp.__enter__()
161 self.fp.__enter__()
162 return self
162 return self
163
163
164 def __exit__(self, *args):
164 def __exit__(self, *args):
165 return self.fp.__exit__(*args)
165 return self.fp.__exit__(*args)
166
166
167
167
168 def _divertopener(opener, target):
168 def _divertopener(opener, target):
169 """build an opener that writes in 'target.a' instead of 'target'"""
169 """build an opener that writes in 'target.a' instead of 'target'"""
170
170
171 def _divert(name, mode=b'r', checkambig=False):
171 def _divert(name, mode=b'r', checkambig=False):
172 if name != target:
172 if name != target:
173 return opener(name, mode)
173 return opener(name, mode)
174 return opener(name + b".a", mode)
174 return opener(name + b".a", mode)
175
175
176 return _divert
176 return _divert
177
177
178
178
179 def _delayopener(opener, target, buf):
179 def _delayopener(opener, target, buf):
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
180 """build an opener that stores chunks in 'buf' instead of 'target'"""
181
181
182 def _delay(name, mode=b'r', checkambig=False):
182 def _delay(name, mode=b'r', checkambig=False):
183 if name != target:
183 if name != target:
184 return opener(name, mode)
184 return opener(name, mode)
185 return appender(opener, name, mode, buf)
185 return appender(opener, name, mode, buf)
186
186
187 return _delay
187 return _delay
188
188
189
189
190 @attr.s
190 @attr.s
191 class _changelogrevision(object):
191 class _changelogrevision(object):
192 # Extensions might modify _defaultextra, so let the constructor below pass
192 # Extensions might modify _defaultextra, so let the constructor below pass
193 # it in
193 # it in
194 extra = attr.ib()
194 extra = attr.ib()
195 manifest = attr.ib(default=nullid)
195 manifest = attr.ib(default=nullid)
196 user = attr.ib(default=b'')
196 user = attr.ib(default=b'')
197 date = attr.ib(default=(0, 0))
197 date = attr.ib(default=(0, 0))
198 files = attr.ib(default=attr.Factory(list))
198 files = attr.ib(default=attr.Factory(list))
199 filesadded = attr.ib(default=None)
199 filesadded = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
200 filesremoved = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
201 p1copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
202 p2copies = attr.ib(default=None)
203 description = attr.ib(default=b'')
203 description = attr.ib(default=b'')
204
204
205
205
206 class changelogrevision(object):
206 class changelogrevision(object):
207 """Holds results of a parsed changelog revision.
207 """Holds results of a parsed changelog revision.
208
208
209 Changelog revisions consist of multiple pieces of data, including
209 Changelog revisions consist of multiple pieces of data, including
210 the manifest node, user, and date. This object exposes a view into
210 the manifest node, user, and date. This object exposes a view into
211 the parsed object.
211 the parsed object.
212 """
212 """
213
213
214 __slots__ = (
214 __slots__ = (
215 r'_offsets',
215 r'_offsets',
216 r'_text',
216 r'_text',
217 r'_sidedata',
217 r'_sidedata',
218 r'_cpsd',
218 r'_cpsd',
219 )
219 )
220
220
221 def __new__(cls, text, sidedata, cpsd):
221 def __new__(cls, text, sidedata, cpsd):
222 if not text:
222 if not text:
223 return _changelogrevision(extra=_defaultextra)
223 return _changelogrevision(extra=_defaultextra)
224
224
225 self = super(changelogrevision, cls).__new__(cls)
225 self = super(changelogrevision, cls).__new__(cls)
226 # We could return here and implement the following as an __init__.
226 # We could return here and implement the following as an __init__.
227 # But doing it here is equivalent and saves an extra function call.
227 # But doing it here is equivalent and saves an extra function call.
228
228
229 # format used:
229 # format used:
230 # nodeid\n : manifest node in ascii
230 # nodeid\n : manifest node in ascii
231 # user\n : user, no \n or \r allowed
231 # user\n : user, no \n or \r allowed
232 # time tz extra\n : date (time is int or float, timezone is int)
232 # time tz extra\n : date (time is int or float, timezone is int)
233 # : extra is metadata, encoded and separated by '\0'
233 # : extra is metadata, encoded and separated by '\0'
234 # : older versions ignore it
234 # : older versions ignore it
235 # files\n\n : files modified by the cset, no \n or \r allowed
235 # files\n\n : files modified by the cset, no \n or \r allowed
236 # (.*) : comment (free text, ideally utf-8)
236 # (.*) : comment (free text, ideally utf-8)
237 #
237 #
238 # changelog v0 doesn't use extra
238 # changelog v0 doesn't use extra
239
239
240 nl1 = text.index(b'\n')
240 nl1 = text.index(b'\n')
241 nl2 = text.index(b'\n', nl1 + 1)
241 nl2 = text.index(b'\n', nl1 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
243
243
244 # The list of files may be empty. Which means nl3 is the first of the
244 # The list of files may be empty. Which means nl3 is the first of the
245 # double newline that precedes the description.
245 # double newline that precedes the description.
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 doublenl = nl3
247 doublenl = nl3
248 else:
248 else:
249 doublenl = text.index(b'\n\n', nl3 + 1)
249 doublenl = text.index(b'\n\n', nl3 + 1)
250
250
251 self._offsets = (nl1, nl2, nl3, doublenl)
251 self._offsets = (nl1, nl2, nl3, doublenl)
252 self._text = text
252 self._text = text
253 self._sidedata = sidedata
253 self._sidedata = sidedata
254 self._cpsd = cpsd
254 self._cpsd = cpsd
255
255
256 return self
256 return self
257
257
258 @property
258 @property
259 def manifest(self):
259 def manifest(self):
260 return bin(self._text[0 : self._offsets[0]])
260 return bin(self._text[0 : self._offsets[0]])
261
261
262 @property
262 @property
263 def user(self):
263 def user(self):
264 off = self._offsets
264 off = self._offsets
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
265 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266
266
267 @property
267 @property
268 def _rawdate(self):
268 def _rawdate(self):
269 off = self._offsets
269 off = self._offsets
270 dateextra = self._text[off[1] + 1 : off[2]]
270 dateextra = self._text[off[1] + 1 : off[2]]
271 return dateextra.split(b' ', 2)[0:2]
271 return dateextra.split(b' ', 2)[0:2]
272
272
273 @property
273 @property
274 def _rawextra(self):
274 def _rawextra(self):
275 off = self._offsets
275 off = self._offsets
276 dateextra = self._text[off[1] + 1 : off[2]]
276 dateextra = self._text[off[1] + 1 : off[2]]
277 fields = dateextra.split(b' ', 2)
277 fields = dateextra.split(b' ', 2)
278 if len(fields) != 3:
278 if len(fields) != 3:
279 return None
279 return None
280
280
281 return fields[2]
281 return fields[2]
282
282
283 @property
283 @property
284 def date(self):
284 def date(self):
285 raw = self._rawdate
285 raw = self._rawdate
286 time = float(raw[0])
286 time = float(raw[0])
287 # Various tools did silly things with the timezone.
287 # Various tools did silly things with the timezone.
288 try:
288 try:
289 timezone = int(raw[1])
289 timezone = int(raw[1])
290 except ValueError:
290 except ValueError:
291 timezone = 0
291 timezone = 0
292
292
293 return time, timezone
293 return time, timezone
294
294
295 @property
295 @property
296 def extra(self):
296 def extra(self):
297 raw = self._rawextra
297 raw = self._rawextra
298 if raw is None:
298 if raw is None:
299 return _defaultextra
299 return _defaultextra
300
300
301 return decodeextra(raw)
301 return decodeextra(raw)
302
302
303 @property
303 @property
304 def files(self):
304 def files(self):
305 off = self._offsets
305 off = self._offsets
306 if off[2] == off[3]:
306 if off[2] == off[3]:
307 return []
307 return []
308
308
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
309 return self._text[off[2] + 1 : off[3]].split(b'\n')
310
310
311 @property
311 @property
312 def filesadded(self):
312 def filesadded(self):
313 if self._cpsd:
313 if self._cpsd:
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
314 rawindices = self._sidedata.get(sidedatamod.SD_FILESADDED)
315 if not rawindices:
315 if not rawindices:
316 return []
316 return []
317 else:
317 else:
318 rawindices = self.extra.get(b'filesadded')
318 rawindices = self.extra.get(b'filesadded')
319 if rawindices is None:
319 if rawindices is None:
320 return None
320 return None
321 return copies.decodefileindices(self.files, rawindices)
321 return copies.decodefileindices(self.files, rawindices)
322
322
323 @property
323 @property
324 def filesremoved(self):
324 def filesremoved(self):
325 if self._cpsd:
325 if self._cpsd:
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
326 rawindices = self._sidedata.get(sidedatamod.SD_FILESREMOVED)
327 if not rawindices:
327 if not rawindices:
328 return []
328 return []
329 else:
329 else:
330 rawindices = self.extra.get(b'filesremoved')
330 rawindices = self.extra.get(b'filesremoved')
331 if rawindices is None:
331 if rawindices is None:
332 return None
332 return None
333 return copies.decodefileindices(self.files, rawindices)
333 return copies.decodefileindices(self.files, rawindices)
334
334
335 @property
335 @property
336 def p1copies(self):
336 def p1copies(self):
337 if self._cpsd:
337 if self._cpsd:
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
338 rawcopies = self._sidedata.get(sidedatamod.SD_P1COPIES)
339 if not rawcopies:
339 if not rawcopies:
340 return {}
340 return {}
341 else:
341 else:
342 rawcopies = self.extra.get(b'p1copies')
342 rawcopies = self.extra.get(b'p1copies')
343 if rawcopies is None:
343 if rawcopies is None:
344 return None
344 return None
345 return copies.decodecopies(self.files, rawcopies)
345 return copies.decodecopies(self.files, rawcopies)
346
346
347 @property
347 @property
348 def p2copies(self):
348 def p2copies(self):
349 if self._cpsd:
349 if self._cpsd:
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
350 rawcopies = self._sidedata.get(sidedatamod.SD_P2COPIES)
351 if not rawcopies:
351 if not rawcopies:
352 return {}
352 return {}
353 else:
353 else:
354 rawcopies = self.extra.get(b'p2copies')
354 rawcopies = self.extra.get(b'p2copies')
355 if rawcopies is None:
355 if rawcopies is None:
356 return None
356 return None
357 return copies.decodecopies(self.files, rawcopies)
357 return copies.decodecopies(self.files, rawcopies)
358
358
359 @property
359 @property
360 def description(self):
360 def description(self):
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
361 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
362
362
363
363
364 class changelog(revlog.revlog):
364 class changelog(revlog.revlog):
365 def __init__(self, opener, trypending=False):
365 def __init__(self, opener, trypending=False):
366 """Load a changelog revlog using an opener.
366 """Load a changelog revlog using an opener.
367
367
368 If ``trypending`` is true, we attempt to load the index from a
368 If ``trypending`` is true, we attempt to load the index from a
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
369 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
370 The ``00changelog.i.a`` file contains index (and possibly inline
370 The ``00changelog.i.a`` file contains index (and possibly inline
371 revision) data for a transaction that hasn't been finalized yet.
371 revision) data for a transaction that hasn't been finalized yet.
372 It exists in a separate file to facilitate readers (such as
372 It exists in a separate file to facilitate readers (such as
373 hooks processes) accessing data before a transaction is finalized.
373 hooks processes) accessing data before a transaction is finalized.
374 """
374 """
375 if trypending and opener.exists(b'00changelog.i.a'):
375 if trypending and opener.exists(b'00changelog.i.a'):
376 indexfile = b'00changelog.i.a'
376 indexfile = b'00changelog.i.a'
377 else:
377 else:
378 indexfile = b'00changelog.i'
378 indexfile = b'00changelog.i'
379
379
380 datafile = b'00changelog.d'
380 datafile = b'00changelog.d'
381 revlog.revlog.__init__(
381 revlog.revlog.__init__(
382 self,
382 self,
383 opener,
383 opener,
384 indexfile,
384 indexfile,
385 datafile=datafile,
385 datafile=datafile,
386 checkambig=True,
386 checkambig=True,
387 mmaplargeindex=True,
387 mmaplargeindex=True,
388 )
388 )
389
389
390 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
390 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
391 # changelogs don't benefit from generaldelta.
391 # changelogs don't benefit from generaldelta.
392
392
393 self.version &= ~revlog.FLAG_GENERALDELTA
393 self.version &= ~revlog.FLAG_GENERALDELTA
394 self._generaldelta = False
394 self._generaldelta = False
395
395
396 # Delta chains for changelogs tend to be very small because entries
396 # Delta chains for changelogs tend to be very small because entries
397 # tend to be small and don't delta well with each. So disable delta
397 # tend to be small and don't delta well with each. So disable delta
398 # chains.
398 # chains.
399 self._storedeltachains = False
399 self._storedeltachains = False
400
400
401 self._realopener = opener
401 self._realopener = opener
402 self._delayed = False
402 self._delayed = False
403 self._delaybuf = None
403 self._delaybuf = None
404 self._divert = False
404 self._divert = False
405 self.filteredrevs = frozenset()
405 self.filteredrevs = frozenset()
406 self._copiesstorage = opener.options.get(b'copies-storage')
406 self._copiesstorage = opener.options.get(b'copies-storage')
407
407
408 def tiprev(self):
408 def tiprev(self):
409 """filtered version of revlog.tiprev"""
409 for i in pycompat.xrange(len(self) - 1, -2, -1):
410 for i in pycompat.xrange(len(self) - 1, -2, -1):
410 if i not in self.filteredrevs:
411 if i not in self.filteredrevs:
411 return i
412 return i
412
413
413 def tip(self):
414 """filtered version of revlog.tip"""
415 return self.node(self.tiprev())
416
417 def __contains__(self, rev):
414 def __contains__(self, rev):
418 """filtered version of revlog.__contains__"""
415 """filtered version of revlog.__contains__"""
419 return 0 <= rev < len(self) and rev not in self.filteredrevs
416 return 0 <= rev < len(self) and rev not in self.filteredrevs
420
417
421 def __iter__(self):
418 def __iter__(self):
422 """filtered version of revlog.__iter__"""
419 """filtered version of revlog.__iter__"""
423 if len(self.filteredrevs) == 0:
420 if len(self.filteredrevs) == 0:
424 return revlog.revlog.__iter__(self)
421 return revlog.revlog.__iter__(self)
425
422
426 def filterediter():
423 def filterediter():
427 for i in pycompat.xrange(len(self)):
424 for i in pycompat.xrange(len(self)):
428 if i not in self.filteredrevs:
425 if i not in self.filteredrevs:
429 yield i
426 yield i
430
427
431 return filterediter()
428 return filterediter()
432
429
433 def revs(self, start=0, stop=None):
430 def revs(self, start=0, stop=None):
434 """filtered version of revlog.revs"""
431 """filtered version of revlog.revs"""
435 for i in super(changelog, self).revs(start, stop):
432 for i in super(changelog, self).revs(start, stop):
436 if i not in self.filteredrevs:
433 if i not in self.filteredrevs:
437 yield i
434 yield i
438
435
439 def _checknofilteredinrevs(self, revs):
436 def _checknofilteredinrevs(self, revs):
440 """raise the appropriate error if 'revs' contains a filtered revision
437 """raise the appropriate error if 'revs' contains a filtered revision
441
438
442 This returns a version of 'revs' to be used thereafter by the caller.
439 This returns a version of 'revs' to be used thereafter by the caller.
443 In particular, if revs is an iterator, it is converted into a set.
440 In particular, if revs is an iterator, it is converted into a set.
444 """
441 """
445 safehasattr = util.safehasattr
442 safehasattr = util.safehasattr
446 if safehasattr(revs, '__next__'):
443 if safehasattr(revs, '__next__'):
447 # Note that inspect.isgenerator() is not true for iterators,
444 # Note that inspect.isgenerator() is not true for iterators,
448 revs = set(revs)
445 revs = set(revs)
449
446
450 filteredrevs = self.filteredrevs
447 filteredrevs = self.filteredrevs
451 if safehasattr(revs, 'first'): # smartset
448 if safehasattr(revs, 'first'): # smartset
452 offenders = revs & filteredrevs
449 offenders = revs & filteredrevs
453 else:
450 else:
454 offenders = filteredrevs.intersection(revs)
451 offenders = filteredrevs.intersection(revs)
455
452
456 for rev in offenders:
453 for rev in offenders:
457 raise error.FilteredIndexError(rev)
454 raise error.FilteredIndexError(rev)
458 return revs
455 return revs
459
456
460 def headrevs(self, revs=None):
457 def headrevs(self, revs=None):
461 if revs is None and self.filteredrevs:
458 if revs is None and self.filteredrevs:
462 try:
459 try:
463 return self.index.headrevsfiltered(self.filteredrevs)
460 return self.index.headrevsfiltered(self.filteredrevs)
464 # AttributeError covers non-c-extension environments and
461 # AttributeError covers non-c-extension environments and
465 # old c extensions without filter handling.
462 # old c extensions without filter handling.
466 except AttributeError:
463 except AttributeError:
467 return self._headrevs()
464 return self._headrevs()
468
465
469 if self.filteredrevs:
466 if self.filteredrevs:
470 revs = self._checknofilteredinrevs(revs)
467 revs = self._checknofilteredinrevs(revs)
471 return super(changelog, self).headrevs(revs)
468 return super(changelog, self).headrevs(revs)
472
469
473 def strip(self, *args, **kwargs):
470 def strip(self, *args, **kwargs):
474 # XXX make something better than assert
471 # XXX make something better than assert
475 # We can't expect proper strip behavior if we are filtered.
472 # We can't expect proper strip behavior if we are filtered.
476 assert not self.filteredrevs
473 assert not self.filteredrevs
477 super(changelog, self).strip(*args, **kwargs)
474 super(changelog, self).strip(*args, **kwargs)
478
475
479 def rev(self, node):
476 def rev(self, node):
480 """filtered version of revlog.rev"""
477 """filtered version of revlog.rev"""
481 r = super(changelog, self).rev(node)
478 r = super(changelog, self).rev(node)
482 if r in self.filteredrevs:
479 if r in self.filteredrevs:
483 raise error.FilteredLookupError(
480 raise error.FilteredLookupError(
484 hex(node), self.indexfile, _(b'filtered node')
481 hex(node), self.indexfile, _(b'filtered node')
485 )
482 )
486 return r
483 return r
487
484
488 def node(self, rev):
485 def node(self, rev):
489 """filtered version of revlog.node"""
486 """filtered version of revlog.node"""
490 if rev in self.filteredrevs:
487 if rev in self.filteredrevs:
491 raise error.FilteredIndexError(rev)
488 raise error.FilteredIndexError(rev)
492 return super(changelog, self).node(rev)
489 return super(changelog, self).node(rev)
493
490
494 def linkrev(self, rev):
491 def linkrev(self, rev):
495 """filtered version of revlog.linkrev"""
492 """filtered version of revlog.linkrev"""
496 if rev in self.filteredrevs:
493 if rev in self.filteredrevs:
497 raise error.FilteredIndexError(rev)
494 raise error.FilteredIndexError(rev)
498 return super(changelog, self).linkrev(rev)
495 return super(changelog, self).linkrev(rev)
499
496
500 def parentrevs(self, rev):
497 def parentrevs(self, rev):
501 """filtered version of revlog.parentrevs"""
498 """filtered version of revlog.parentrevs"""
502 if rev in self.filteredrevs:
499 if rev in self.filteredrevs:
503 raise error.FilteredIndexError(rev)
500 raise error.FilteredIndexError(rev)
504 return super(changelog, self).parentrevs(rev)
501 return super(changelog, self).parentrevs(rev)
505
502
506 def flags(self, rev):
503 def flags(self, rev):
507 """filtered version of revlog.flags"""
504 """filtered version of revlog.flags"""
508 if rev in self.filteredrevs:
505 if rev in self.filteredrevs:
509 raise error.FilteredIndexError(rev)
506 raise error.FilteredIndexError(rev)
510 return super(changelog, self).flags(rev)
507 return super(changelog, self).flags(rev)
511
508
512 def delayupdate(self, tr):
509 def delayupdate(self, tr):
513 b"delay visibility of index updates to other readers"
510 b"delay visibility of index updates to other readers"
514
511
515 if not self._delayed:
512 if not self._delayed:
516 if len(self) == 0:
513 if len(self) == 0:
517 self._divert = True
514 self._divert = True
518 if self._realopener.exists(self.indexfile + b'.a'):
515 if self._realopener.exists(self.indexfile + b'.a'):
519 self._realopener.unlink(self.indexfile + b'.a')
516 self._realopener.unlink(self.indexfile + b'.a')
520 self.opener = _divertopener(self._realopener, self.indexfile)
517 self.opener = _divertopener(self._realopener, self.indexfile)
521 else:
518 else:
522 self._delaybuf = []
519 self._delaybuf = []
523 self.opener = _delayopener(
520 self.opener = _delayopener(
524 self._realopener, self.indexfile, self._delaybuf
521 self._realopener, self.indexfile, self._delaybuf
525 )
522 )
526 self._delayed = True
523 self._delayed = True
527 tr.addpending(b'cl-%i' % id(self), self._writepending)
524 tr.addpending(b'cl-%i' % id(self), self._writepending)
528 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
525 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
529
526
530 def _finalize(self, tr):
527 def _finalize(self, tr):
531 b"finalize index updates"
528 b"finalize index updates"
532 self._delayed = False
529 self._delayed = False
533 self.opener = self._realopener
530 self.opener = self._realopener
534 # move redirected index data back into place
531 # move redirected index data back into place
535 if self._divert:
532 if self._divert:
536 assert not self._delaybuf
533 assert not self._delaybuf
537 tmpname = self.indexfile + b".a"
534 tmpname = self.indexfile + b".a"
538 nfile = self.opener.open(tmpname)
535 nfile = self.opener.open(tmpname)
539 nfile.close()
536 nfile.close()
540 self.opener.rename(tmpname, self.indexfile, checkambig=True)
537 self.opener.rename(tmpname, self.indexfile, checkambig=True)
541 elif self._delaybuf:
538 elif self._delaybuf:
542 fp = self.opener(self.indexfile, b'a', checkambig=True)
539 fp = self.opener(self.indexfile, b'a', checkambig=True)
543 fp.write(b"".join(self._delaybuf))
540 fp.write(b"".join(self._delaybuf))
544 fp.close()
541 fp.close()
545 self._delaybuf = None
542 self._delaybuf = None
546 self._divert = False
543 self._divert = False
547 # split when we're done
544 # split when we're done
548 self._enforceinlinesize(tr)
545 self._enforceinlinesize(tr)
549
546
550 def _writepending(self, tr):
547 def _writepending(self, tr):
551 b"create a file containing the unfinalized state for pretxnchangegroup"
548 b"create a file containing the unfinalized state for pretxnchangegroup"
552 if self._delaybuf:
549 if self._delaybuf:
553 # make a temporary copy of the index
550 # make a temporary copy of the index
554 fp1 = self._realopener(self.indexfile)
551 fp1 = self._realopener(self.indexfile)
555 pendingfilename = self.indexfile + b".a"
552 pendingfilename = self.indexfile + b".a"
556 # register as a temp file to ensure cleanup on failure
553 # register as a temp file to ensure cleanup on failure
557 tr.registertmp(pendingfilename)
554 tr.registertmp(pendingfilename)
558 # write existing data
555 # write existing data
559 fp2 = self._realopener(pendingfilename, b"w")
556 fp2 = self._realopener(pendingfilename, b"w")
560 fp2.write(fp1.read())
557 fp2.write(fp1.read())
561 # add pending data
558 # add pending data
562 fp2.write(b"".join(self._delaybuf))
559 fp2.write(b"".join(self._delaybuf))
563 fp2.close()
560 fp2.close()
564 # switch modes so finalize can simply rename
561 # switch modes so finalize can simply rename
565 self._delaybuf = None
562 self._delaybuf = None
566 self._divert = True
563 self._divert = True
567 self.opener = _divertopener(self._realopener, self.indexfile)
564 self.opener = _divertopener(self._realopener, self.indexfile)
568
565
569 if self._divert:
566 if self._divert:
570 return True
567 return True
571
568
572 return False
569 return False
573
570
574 def _enforceinlinesize(self, tr, fp=None):
571 def _enforceinlinesize(self, tr, fp=None):
575 if not self._delayed:
572 if not self._delayed:
576 revlog.revlog._enforceinlinesize(self, tr, fp)
573 revlog.revlog._enforceinlinesize(self, tr, fp)
577
574
578 def read(self, node):
575 def read(self, node):
579 """Obtain data from a parsed changelog revision.
576 """Obtain data from a parsed changelog revision.
580
577
581 Returns a 6-tuple of:
578 Returns a 6-tuple of:
582
579
583 - manifest node in binary
580 - manifest node in binary
584 - author/user as a localstr
581 - author/user as a localstr
585 - date as a 2-tuple of (time, timezone)
582 - date as a 2-tuple of (time, timezone)
586 - list of files
583 - list of files
587 - commit message as a localstr
584 - commit message as a localstr
588 - dict of extra metadata
585 - dict of extra metadata
589
586
590 Unless you need to access all fields, consider calling
587 Unless you need to access all fields, consider calling
591 ``changelogrevision`` instead, as it is faster for partial object
588 ``changelogrevision`` instead, as it is faster for partial object
592 access.
589 access.
593 """
590 """
594 d, s = self._revisiondata(node)
591 d, s = self._revisiondata(node)
595 c = changelogrevision(
592 c = changelogrevision(
596 d, s, self._copiesstorage == b'changeset-sidedata'
593 d, s, self._copiesstorage == b'changeset-sidedata'
597 )
594 )
598 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
595 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
599
596
600 def changelogrevision(self, nodeorrev):
597 def changelogrevision(self, nodeorrev):
601 """Obtain a ``changelogrevision`` for a node or revision."""
598 """Obtain a ``changelogrevision`` for a node or revision."""
602 text, sidedata = self._revisiondata(nodeorrev)
599 text, sidedata = self._revisiondata(nodeorrev)
603 return changelogrevision(
600 return changelogrevision(
604 text, sidedata, self._copiesstorage == b'changeset-sidedata'
601 text, sidedata, self._copiesstorage == b'changeset-sidedata'
605 )
602 )
606
603
607 def readfiles(self, node):
604 def readfiles(self, node):
608 """
605 """
609 short version of read that only returns the files modified by the cset
606 short version of read that only returns the files modified by the cset
610 """
607 """
611 text = self.revision(node)
608 text = self.revision(node)
612 if not text:
609 if not text:
613 return []
610 return []
614 last = text.index(b"\n\n")
611 last = text.index(b"\n\n")
615 l = text[:last].split(b'\n')
612 l = text[:last].split(b'\n')
616 return l[3:]
613 return l[3:]
617
614
618 def add(
615 def add(
619 self,
616 self,
620 manifest,
617 manifest,
621 files,
618 files,
622 desc,
619 desc,
623 transaction,
620 transaction,
624 p1,
621 p1,
625 p2,
622 p2,
626 user,
623 user,
627 date=None,
624 date=None,
628 extra=None,
625 extra=None,
629 p1copies=None,
626 p1copies=None,
630 p2copies=None,
627 p2copies=None,
631 filesadded=None,
628 filesadded=None,
632 filesremoved=None,
629 filesremoved=None,
633 ):
630 ):
634 # Convert to UTF-8 encoded bytestrings as the very first
631 # Convert to UTF-8 encoded bytestrings as the very first
635 # thing: calling any method on a localstr object will turn it
632 # thing: calling any method on a localstr object will turn it
636 # into a str object and the cached UTF-8 string is thus lost.
633 # into a str object and the cached UTF-8 string is thus lost.
637 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
634 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
638
635
639 user = user.strip()
636 user = user.strip()
640 # An empty username or a username with a "\n" will make the
637 # An empty username or a username with a "\n" will make the
641 # revision text contain two "\n\n" sequences -> corrupt
638 # revision text contain two "\n\n" sequences -> corrupt
642 # repository since read cannot unpack the revision.
639 # repository since read cannot unpack the revision.
643 if not user:
640 if not user:
644 raise error.StorageError(_(b"empty username"))
641 raise error.StorageError(_(b"empty username"))
645 if b"\n" in user:
642 if b"\n" in user:
646 raise error.StorageError(
643 raise error.StorageError(
647 _(b"username %r contains a newline") % pycompat.bytestr(user)
644 _(b"username %r contains a newline") % pycompat.bytestr(user)
648 )
645 )
649
646
650 desc = stripdesc(desc)
647 desc = stripdesc(desc)
651
648
652 if date:
649 if date:
653 parseddate = b"%d %d" % dateutil.parsedate(date)
650 parseddate = b"%d %d" % dateutil.parsedate(date)
654 else:
651 else:
655 parseddate = b"%d %d" % dateutil.makedate()
652 parseddate = b"%d %d" % dateutil.makedate()
656 if extra:
653 if extra:
657 branch = extra.get(b"branch")
654 branch = extra.get(b"branch")
658 if branch in (b"default", b""):
655 if branch in (b"default", b""):
659 del extra[b"branch"]
656 del extra[b"branch"]
660 elif branch in (b".", b"null", b"tip"):
657 elif branch in (b".", b"null", b"tip"):
661 raise error.StorageError(
658 raise error.StorageError(
662 _(b'the name \'%s\' is reserved') % branch
659 _(b'the name \'%s\' is reserved') % branch
663 )
660 )
664 sortedfiles = sorted(files)
661 sortedfiles = sorted(files)
665 sidedata = None
662 sidedata = None
666 if extra is not None:
663 if extra is not None:
667 for name in (
664 for name in (
668 b'p1copies',
665 b'p1copies',
669 b'p2copies',
666 b'p2copies',
670 b'filesadded',
667 b'filesadded',
671 b'filesremoved',
668 b'filesremoved',
672 ):
669 ):
673 extra.pop(name, None)
670 extra.pop(name, None)
674 if p1copies is not None:
671 if p1copies is not None:
675 p1copies = copies.encodecopies(sortedfiles, p1copies)
672 p1copies = copies.encodecopies(sortedfiles, p1copies)
676 if p2copies is not None:
673 if p2copies is not None:
677 p2copies = copies.encodecopies(sortedfiles, p2copies)
674 p2copies = copies.encodecopies(sortedfiles, p2copies)
678 if filesadded is not None:
675 if filesadded is not None:
679 filesadded = copies.encodefileindices(sortedfiles, filesadded)
676 filesadded = copies.encodefileindices(sortedfiles, filesadded)
680 if filesremoved is not None:
677 if filesremoved is not None:
681 filesremoved = copies.encodefileindices(sortedfiles, filesremoved)
678 filesremoved = copies.encodefileindices(sortedfiles, filesremoved)
682 if self._copiesstorage == b'extra':
679 if self._copiesstorage == b'extra':
683 extrasentries = p1copies, p2copies, filesadded, filesremoved
680 extrasentries = p1copies, p2copies, filesadded, filesremoved
684 if extra is None and any(x is not None for x in extrasentries):
681 if extra is None and any(x is not None for x in extrasentries):
685 extra = {}
682 extra = {}
686 if p1copies is not None:
683 if p1copies is not None:
687 extra[b'p1copies'] = p1copies
684 extra[b'p1copies'] = p1copies
688 if p2copies is not None:
685 if p2copies is not None:
689 extra[b'p2copies'] = p2copies
686 extra[b'p2copies'] = p2copies
690 if filesadded is not None:
687 if filesadded is not None:
691 extra[b'filesadded'] = filesadded
688 extra[b'filesadded'] = filesadded
692 if filesremoved is not None:
689 if filesremoved is not None:
693 extra[b'filesremoved'] = filesremoved
690 extra[b'filesremoved'] = filesremoved
694 elif self._copiesstorage == b'changeset-sidedata':
691 elif self._copiesstorage == b'changeset-sidedata':
695 sidedata = {}
692 sidedata = {}
696 if p1copies:
693 if p1copies:
697 sidedata[sidedatamod.SD_P1COPIES] = p1copies
694 sidedata[sidedatamod.SD_P1COPIES] = p1copies
698 if p2copies:
695 if p2copies:
699 sidedata[sidedatamod.SD_P2COPIES] = p2copies
696 sidedata[sidedatamod.SD_P2COPIES] = p2copies
700 if filesadded:
697 if filesadded:
701 sidedata[sidedatamod.SD_FILESADDED] = filesadded
698 sidedata[sidedatamod.SD_FILESADDED] = filesadded
702 if filesremoved:
699 if filesremoved:
703 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
700 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
704 if not sidedata:
701 if not sidedata:
705 sidedata = None
702 sidedata = None
706
703
707 if extra:
704 if extra:
708 extra = encodeextra(extra)
705 extra = encodeextra(extra)
709 parseddate = b"%s %s" % (parseddate, extra)
706 parseddate = b"%s %s" % (parseddate, extra)
710 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
707 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
711 text = b"\n".join(l)
708 text = b"\n".join(l)
712 return self.addrevision(
709 return self.addrevision(
713 text, transaction, len(self), p1, p2, sidedata=sidedata
710 text, transaction, len(self), p1, p2, sidedata=sidedata
714 )
711 )
715
712
716 def branchinfo(self, rev):
713 def branchinfo(self, rev):
717 """return the branch name and open/close state of a revision
714 """return the branch name and open/close state of a revision
718
715
719 This function exists because creating a changectx object
716 This function exists because creating a changectx object
720 just to access this is costly."""
717 just to access this is costly."""
721 extra = self.read(rev)[5]
718 extra = self.read(rev)[5]
722 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
719 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
723
720
724 def _nodeduplicatecallback(self, transaction, node):
721 def _nodeduplicatecallback(self, transaction, node):
725 # keep track of revisions that got "re-added", eg: unbunde of know rev.
722 # keep track of revisions that got "re-added", eg: unbunde of know rev.
726 #
723 #
727 # We track them in a list to preserve their order from the source bundle
724 # We track them in a list to preserve their order from the source bundle
728 duplicates = transaction.changes.setdefault(b'revduplicates', [])
725 duplicates = transaction.changes.setdefault(b'revduplicates', [])
729 duplicates.append(self.rev(node))
726 duplicates.append(self.rev(node))
@@ -1,2958 +1,2961 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import io
19 import io
20 import os
20 import os
21 import struct
21 import struct
22 import zlib
22 import zlib
23
23
24 # import stuff from node for others to import from revlog
24 # import stuff from node for others to import from revlog
25 from .node import (
25 from .node import (
26 bin,
26 bin,
27 hex,
27 hex,
28 nullhex,
28 nullhex,
29 nullid,
29 nullid,
30 nullrev,
30 nullrev,
31 short,
31 short,
32 wdirfilenodeids,
32 wdirfilenodeids,
33 wdirhex,
33 wdirhex,
34 wdirid,
34 wdirid,
35 wdirrev,
35 wdirrev,
36 )
36 )
37 from .i18n import _
37 from .i18n import _
38 from .pycompat import getattr
38 from .pycompat import getattr
39 from .revlogutils.constants import (
39 from .revlogutils.constants import (
40 FLAG_GENERALDELTA,
40 FLAG_GENERALDELTA,
41 FLAG_INLINE_DATA,
41 FLAG_INLINE_DATA,
42 REVLOGV0,
42 REVLOGV0,
43 REVLOGV1,
43 REVLOGV1,
44 REVLOGV1_FLAGS,
44 REVLOGV1_FLAGS,
45 REVLOGV2,
45 REVLOGV2,
46 REVLOGV2_FLAGS,
46 REVLOGV2_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
47 REVLOG_DEFAULT_FLAGS,
48 REVLOG_DEFAULT_FORMAT,
48 REVLOG_DEFAULT_FORMAT,
49 REVLOG_DEFAULT_VERSION,
49 REVLOG_DEFAULT_VERSION,
50 )
50 )
51 from .revlogutils.flagutil import (
51 from .revlogutils.flagutil import (
52 REVIDX_DEFAULT_FLAGS,
52 REVIDX_DEFAULT_FLAGS,
53 REVIDX_ELLIPSIS,
53 REVIDX_ELLIPSIS,
54 REVIDX_EXTSTORED,
54 REVIDX_EXTSTORED,
55 REVIDX_FLAGS_ORDER,
55 REVIDX_FLAGS_ORDER,
56 REVIDX_ISCENSORED,
56 REVIDX_ISCENSORED,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
57 REVIDX_RAWTEXT_CHANGING_FLAGS,
58 REVIDX_SIDEDATA,
58 REVIDX_SIDEDATA,
59 )
59 )
60 from .thirdparty import attr
60 from .thirdparty import attr
61 from . import (
61 from . import (
62 ancestor,
62 ancestor,
63 dagop,
63 dagop,
64 error,
64 error,
65 mdiff,
65 mdiff,
66 policy,
66 policy,
67 pycompat,
67 pycompat,
68 templatefilters,
68 templatefilters,
69 util,
69 util,
70 )
70 )
71 from .interfaces import (
71 from .interfaces import (
72 repository,
72 repository,
73 util as interfaceutil,
73 util as interfaceutil,
74 )
74 )
75 from .revlogutils import (
75 from .revlogutils import (
76 deltas as deltautil,
76 deltas as deltautil,
77 flagutil,
77 flagutil,
78 sidedata as sidedatautil,
78 sidedata as sidedatautil,
79 )
79 )
80 from .utils import (
80 from .utils import (
81 storageutil,
81 storageutil,
82 stringutil,
82 stringutil,
83 )
83 )
84
84
85 # blanked usage of all the name to prevent pyflakes constraints
85 # blanked usage of all the name to prevent pyflakes constraints
86 # We need these name available in the module for extensions.
86 # We need these name available in the module for extensions.
87 REVLOGV0
87 REVLOGV0
88 REVLOGV1
88 REVLOGV1
89 REVLOGV2
89 REVLOGV2
90 FLAG_INLINE_DATA
90 FLAG_INLINE_DATA
91 FLAG_GENERALDELTA
91 FLAG_GENERALDELTA
92 REVLOG_DEFAULT_FLAGS
92 REVLOG_DEFAULT_FLAGS
93 REVLOG_DEFAULT_FORMAT
93 REVLOG_DEFAULT_FORMAT
94 REVLOG_DEFAULT_VERSION
94 REVLOG_DEFAULT_VERSION
95 REVLOGV1_FLAGS
95 REVLOGV1_FLAGS
96 REVLOGV2_FLAGS
96 REVLOGV2_FLAGS
97 REVIDX_ISCENSORED
97 REVIDX_ISCENSORED
98 REVIDX_ELLIPSIS
98 REVIDX_ELLIPSIS
99 REVIDX_SIDEDATA
99 REVIDX_SIDEDATA
100 REVIDX_EXTSTORED
100 REVIDX_EXTSTORED
101 REVIDX_DEFAULT_FLAGS
101 REVIDX_DEFAULT_FLAGS
102 REVIDX_FLAGS_ORDER
102 REVIDX_FLAGS_ORDER
103 REVIDX_RAWTEXT_CHANGING_FLAGS
103 REVIDX_RAWTEXT_CHANGING_FLAGS
104
104
105 parsers = policy.importmod(r'parsers')
105 parsers = policy.importmod(r'parsers')
106 rustancestor = policy.importrust(r'ancestor')
106 rustancestor = policy.importrust(r'ancestor')
107 rustdagop = policy.importrust(r'dagop')
107 rustdagop = policy.importrust(r'dagop')
108
108
109 # Aliased for performance.
109 # Aliased for performance.
110 _zlibdecompress = zlib.decompress
110 _zlibdecompress = zlib.decompress
111
111
112 # max size of revlog with inline data
112 # max size of revlog with inline data
113 _maxinline = 131072
113 _maxinline = 131072
114 _chunksize = 1048576
114 _chunksize = 1048576
115
115
116 # Flag processors for REVIDX_ELLIPSIS.
116 # Flag processors for REVIDX_ELLIPSIS.
117 def ellipsisreadprocessor(rl, text):
117 def ellipsisreadprocessor(rl, text):
118 return text, False, {}
118 return text, False, {}
119
119
120
120
121 def ellipsiswriteprocessor(rl, text, sidedata):
121 def ellipsiswriteprocessor(rl, text, sidedata):
122 return text, False
122 return text, False
123
123
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128
128
129 ellipsisprocessor = (
129 ellipsisprocessor = (
130 ellipsisreadprocessor,
130 ellipsisreadprocessor,
131 ellipsiswriteprocessor,
131 ellipsiswriteprocessor,
132 ellipsisrawprocessor,
132 ellipsisrawprocessor,
133 )
133 )
134
134
135
135
136 def getoffset(q):
136 def getoffset(q):
137 return int(q >> 16)
137 return int(q >> 16)
138
138
139
139
140 def gettype(q):
140 def gettype(q):
141 return int(q & 0xFFFF)
141 return int(q & 0xFFFF)
142
142
143
143
144 def offset_type(offset, type):
144 def offset_type(offset, type):
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
145 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 raise ValueError(b'unknown revlog index flags')
146 raise ValueError(b'unknown revlog index flags')
147 return int(int(offset) << 16 | type)
147 return int(int(offset) << 16 | type)
148
148
149
149
150 @attr.s(slots=True, frozen=True)
150 @attr.s(slots=True, frozen=True)
151 class _revisioninfo(object):
151 class _revisioninfo(object):
152 """Information about a revision that allows building its fulltext
152 """Information about a revision that allows building its fulltext
153 node: expected hash of the revision
153 node: expected hash of the revision
154 p1, p2: parent revs of the revision
154 p1, p2: parent revs of the revision
155 btext: built text cache consisting of a one-element list
155 btext: built text cache consisting of a one-element list
156 cachedelta: (baserev, uncompressed_delta) or None
156 cachedelta: (baserev, uncompressed_delta) or None
157 flags: flags associated to the revision storage
157 flags: flags associated to the revision storage
158
158
159 One of btext[0] or cachedelta must be set.
159 One of btext[0] or cachedelta must be set.
160 """
160 """
161
161
162 node = attr.ib()
162 node = attr.ib()
163 p1 = attr.ib()
163 p1 = attr.ib()
164 p2 = attr.ib()
164 p2 = attr.ib()
165 btext = attr.ib()
165 btext = attr.ib()
166 textlen = attr.ib()
166 textlen = attr.ib()
167 cachedelta = attr.ib()
167 cachedelta = attr.ib()
168 flags = attr.ib()
168 flags = attr.ib()
169
169
170
170
171 @interfaceutil.implementer(repository.irevisiondelta)
171 @interfaceutil.implementer(repository.irevisiondelta)
172 @attr.s(slots=True)
172 @attr.s(slots=True)
173 class revlogrevisiondelta(object):
173 class revlogrevisiondelta(object):
174 node = attr.ib()
174 node = attr.ib()
175 p1node = attr.ib()
175 p1node = attr.ib()
176 p2node = attr.ib()
176 p2node = attr.ib()
177 basenode = attr.ib()
177 basenode = attr.ib()
178 flags = attr.ib()
178 flags = attr.ib()
179 baserevisionsize = attr.ib()
179 baserevisionsize = attr.ib()
180 revision = attr.ib()
180 revision = attr.ib()
181 delta = attr.ib()
181 delta = attr.ib()
182 linknode = attr.ib(default=None)
182 linknode = attr.ib(default=None)
183
183
184
184
185 @interfaceutil.implementer(repository.iverifyproblem)
185 @interfaceutil.implementer(repository.iverifyproblem)
186 @attr.s(frozen=True)
186 @attr.s(frozen=True)
187 class revlogproblem(object):
187 class revlogproblem(object):
188 warning = attr.ib(default=None)
188 warning = attr.ib(default=None)
189 error = attr.ib(default=None)
189 error = attr.ib(default=None)
190 node = attr.ib(default=None)
190 node = attr.ib(default=None)
191
191
192
192
193 # index v0:
193 # index v0:
194 # 4 bytes: offset
194 # 4 bytes: offset
195 # 4 bytes: compressed length
195 # 4 bytes: compressed length
196 # 4 bytes: base rev
196 # 4 bytes: base rev
197 # 4 bytes: link rev
197 # 4 bytes: link rev
198 # 20 bytes: parent 1 nodeid
198 # 20 bytes: parent 1 nodeid
199 # 20 bytes: parent 2 nodeid
199 # 20 bytes: parent 2 nodeid
200 # 20 bytes: nodeid
200 # 20 bytes: nodeid
201 indexformatv0 = struct.Struct(b">4l20s20s20s")
201 indexformatv0 = struct.Struct(b">4l20s20s20s")
202 indexformatv0_pack = indexformatv0.pack
202 indexformatv0_pack = indexformatv0.pack
203 indexformatv0_unpack = indexformatv0.unpack
203 indexformatv0_unpack = indexformatv0.unpack
204
204
205
205
206 class revlogoldindex(list):
206 class revlogoldindex(list):
207 def __getitem__(self, i):
207 def __getitem__(self, i):
208 if i == -1:
208 if i == -1:
209 return (0, 0, 0, -1, -1, -1, -1, nullid)
209 return (0, 0, 0, -1, -1, -1, -1, nullid)
210 return list.__getitem__(self, i)
210 return list.__getitem__(self, i)
211
211
212
212
213 class revlogoldio(object):
213 class revlogoldio(object):
214 def __init__(self):
214 def __init__(self):
215 self.size = indexformatv0.size
215 self.size = indexformatv0.size
216
216
217 def parseindex(self, data, inline):
217 def parseindex(self, data, inline):
218 s = self.size
218 s = self.size
219 index = []
219 index = []
220 nodemap = {nullid: nullrev}
220 nodemap = {nullid: nullrev}
221 n = off = 0
221 n = off = 0
222 l = len(data)
222 l = len(data)
223 while off + s <= l:
223 while off + s <= l:
224 cur = data[off : off + s]
224 cur = data[off : off + s]
225 off += s
225 off += s
226 e = indexformatv0_unpack(cur)
226 e = indexformatv0_unpack(cur)
227 # transform to revlogv1 format
227 # transform to revlogv1 format
228 e2 = (
228 e2 = (
229 offset_type(e[0], 0),
229 offset_type(e[0], 0),
230 e[1],
230 e[1],
231 -1,
231 -1,
232 e[2],
232 e[2],
233 e[3],
233 e[3],
234 nodemap.get(e[4], nullrev),
234 nodemap.get(e[4], nullrev),
235 nodemap.get(e[5], nullrev),
235 nodemap.get(e[5], nullrev),
236 e[6],
236 e[6],
237 )
237 )
238 index.append(e2)
238 index.append(e2)
239 nodemap[e[6]] = n
239 nodemap[e[6]] = n
240 n += 1
240 n += 1
241
241
242 return revlogoldindex(index), nodemap, None
242 return revlogoldindex(index), nodemap, None
243
243
244 def packentry(self, entry, node, version, rev):
244 def packentry(self, entry, node, version, rev):
245 if gettype(entry[0]):
245 if gettype(entry[0]):
246 raise error.RevlogError(
246 raise error.RevlogError(
247 _(b'index entry flags need revlog version 1')
247 _(b'index entry flags need revlog version 1')
248 )
248 )
249 e2 = (
249 e2 = (
250 getoffset(entry[0]),
250 getoffset(entry[0]),
251 entry[1],
251 entry[1],
252 entry[3],
252 entry[3],
253 entry[4],
253 entry[4],
254 node(entry[5]),
254 node(entry[5]),
255 node(entry[6]),
255 node(entry[6]),
256 entry[7],
256 entry[7],
257 )
257 )
258 return indexformatv0_pack(*e2)
258 return indexformatv0_pack(*e2)
259
259
260
260
261 # index ng:
261 # index ng:
262 # 6 bytes: offset
262 # 6 bytes: offset
263 # 2 bytes: flags
263 # 2 bytes: flags
264 # 4 bytes: compressed length
264 # 4 bytes: compressed length
265 # 4 bytes: uncompressed length
265 # 4 bytes: uncompressed length
266 # 4 bytes: base rev
266 # 4 bytes: base rev
267 # 4 bytes: link rev
267 # 4 bytes: link rev
268 # 4 bytes: parent 1 rev
268 # 4 bytes: parent 1 rev
269 # 4 bytes: parent 2 rev
269 # 4 bytes: parent 2 rev
270 # 32 bytes: nodeid
270 # 32 bytes: nodeid
271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
271 indexformatng = struct.Struct(b">Qiiiiii20s12x")
272 indexformatng_pack = indexformatng.pack
272 indexformatng_pack = indexformatng.pack
273 versionformat = struct.Struct(b">I")
273 versionformat = struct.Struct(b">I")
274 versionformat_pack = versionformat.pack
274 versionformat_pack = versionformat.pack
275 versionformat_unpack = versionformat.unpack
275 versionformat_unpack = versionformat.unpack
276
276
277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
277 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
278 # signed integer)
278 # signed integer)
279 _maxentrysize = 0x7FFFFFFF
279 _maxentrysize = 0x7FFFFFFF
280
280
281
281
282 class revlogio(object):
282 class revlogio(object):
283 def __init__(self):
283 def __init__(self):
284 self.size = indexformatng.size
284 self.size = indexformatng.size
285
285
286 def parseindex(self, data, inline):
286 def parseindex(self, data, inline):
287 # call the C implementation to parse the index data
287 # call the C implementation to parse the index data
288 index, cache = parsers.parse_index2(data, inline)
288 index, cache = parsers.parse_index2(data, inline)
289 return index, getattr(index, 'nodemap', None), cache
289 return index, getattr(index, 'nodemap', None), cache
290
290
291 def packentry(self, entry, node, version, rev):
291 def packentry(self, entry, node, version, rev):
292 p = indexformatng_pack(*entry)
292 p = indexformatng_pack(*entry)
293 if rev == 0:
293 if rev == 0:
294 p = versionformat_pack(version) + p[4:]
294 p = versionformat_pack(version) + p[4:]
295 return p
295 return p
296
296
297
297
298 class revlog(object):
298 class revlog(object):
299 """
299 """
300 the underlying revision storage object
300 the underlying revision storage object
301
301
302 A revlog consists of two parts, an index and the revision data.
302 A revlog consists of two parts, an index and the revision data.
303
303
304 The index is a file with a fixed record size containing
304 The index is a file with a fixed record size containing
305 information on each revision, including its nodeid (hash), the
305 information on each revision, including its nodeid (hash), the
306 nodeids of its parents, the position and offset of its data within
306 nodeids of its parents, the position and offset of its data within
307 the data file, and the revision it's based on. Finally, each entry
307 the data file, and the revision it's based on. Finally, each entry
308 contains a linkrev entry that can serve as a pointer to external
308 contains a linkrev entry that can serve as a pointer to external
309 data.
309 data.
310
310
311 The revision data itself is a linear collection of data chunks.
311 The revision data itself is a linear collection of data chunks.
312 Each chunk represents a revision and is usually represented as a
312 Each chunk represents a revision and is usually represented as a
313 delta against the previous chunk. To bound lookup time, runs of
313 delta against the previous chunk. To bound lookup time, runs of
314 deltas are limited to about 2 times the length of the original
314 deltas are limited to about 2 times the length of the original
315 version data. This makes retrieval of a version proportional to
315 version data. This makes retrieval of a version proportional to
316 its size, or O(1) relative to the number of revisions.
316 its size, or O(1) relative to the number of revisions.
317
317
318 Both pieces of the revlog are written to in an append-only
318 Both pieces of the revlog are written to in an append-only
319 fashion, which means we never need to rewrite a file to insert or
319 fashion, which means we never need to rewrite a file to insert or
320 remove data, and can use some simple techniques to avoid the need
320 remove data, and can use some simple techniques to avoid the need
321 for locking while reading.
321 for locking while reading.
322
322
323 If checkambig, indexfile is opened with checkambig=True at
323 If checkambig, indexfile is opened with checkambig=True at
324 writing, to avoid file stat ambiguity.
324 writing, to avoid file stat ambiguity.
325
325
326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
326 If mmaplargeindex is True, and an mmapindexthreshold is set, the
327 index will be mmapped rather than read if it is larger than the
327 index will be mmapped rather than read if it is larger than the
328 configured threshold.
328 configured threshold.
329
329
330 If censorable is True, the revlog can have censored revisions.
330 If censorable is True, the revlog can have censored revisions.
331
331
332 If `upperboundcomp` is not None, this is the expected maximal gain from
332 If `upperboundcomp` is not None, this is the expected maximal gain from
333 compression for the data content.
333 compression for the data content.
334 """
334 """
335
335
336 _flagserrorclass = error.RevlogError
336 _flagserrorclass = error.RevlogError
337
337
338 def __init__(
338 def __init__(
339 self,
339 self,
340 opener,
340 opener,
341 indexfile,
341 indexfile,
342 datafile=None,
342 datafile=None,
343 checkambig=False,
343 checkambig=False,
344 mmaplargeindex=False,
344 mmaplargeindex=False,
345 censorable=False,
345 censorable=False,
346 upperboundcomp=None,
346 upperboundcomp=None,
347 ):
347 ):
348 """
348 """
349 create a revlog object
349 create a revlog object
350
350
351 opener is a function that abstracts the file opening operation
351 opener is a function that abstracts the file opening operation
352 and can be used to implement COW semantics or the like.
352 and can be used to implement COW semantics or the like.
353
353
354 """
354 """
355 self.upperboundcomp = upperboundcomp
355 self.upperboundcomp = upperboundcomp
356 self.indexfile = indexfile
356 self.indexfile = indexfile
357 self.datafile = datafile or (indexfile[:-2] + b".d")
357 self.datafile = datafile or (indexfile[:-2] + b".d")
358 self.opener = opener
358 self.opener = opener
359 # When True, indexfile is opened with checkambig=True at writing, to
359 # When True, indexfile is opened with checkambig=True at writing, to
360 # avoid file stat ambiguity.
360 # avoid file stat ambiguity.
361 self._checkambig = checkambig
361 self._checkambig = checkambig
362 self._mmaplargeindex = mmaplargeindex
362 self._mmaplargeindex = mmaplargeindex
363 self._censorable = censorable
363 self._censorable = censorable
364 # 3-tuple of (node, rev, text) for a raw revision.
364 # 3-tuple of (node, rev, text) for a raw revision.
365 self._revisioncache = None
365 self._revisioncache = None
366 # Maps rev to chain base rev.
366 # Maps rev to chain base rev.
367 self._chainbasecache = util.lrucachedict(100)
367 self._chainbasecache = util.lrucachedict(100)
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
368 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
369 self._chunkcache = (0, b'')
369 self._chunkcache = (0, b'')
370 # How much data to read and cache into the raw revlog data cache.
370 # How much data to read and cache into the raw revlog data cache.
371 self._chunkcachesize = 65536
371 self._chunkcachesize = 65536
372 self._maxchainlen = None
372 self._maxchainlen = None
373 self._deltabothparents = True
373 self._deltabothparents = True
374 self.index = []
374 self.index = []
375 # Mapping of partial identifiers to full nodes.
375 # Mapping of partial identifiers to full nodes.
376 self._pcache = {}
376 self._pcache = {}
377 # Mapping of revision integer to full node.
377 # Mapping of revision integer to full node.
378 self._nodecache = {nullid: nullrev}
378 self._nodecache = {nullid: nullrev}
379 self._nodepos = None
379 self._nodepos = None
380 self._compengine = b'zlib'
380 self._compengine = b'zlib'
381 self._compengineopts = {}
381 self._compengineopts = {}
382 self._maxdeltachainspan = -1
382 self._maxdeltachainspan = -1
383 self._withsparseread = False
383 self._withsparseread = False
384 self._sparserevlog = False
384 self._sparserevlog = False
385 self._srdensitythreshold = 0.50
385 self._srdensitythreshold = 0.50
386 self._srmingapsize = 262144
386 self._srmingapsize = 262144
387
387
388 # Make copy of flag processors so each revlog instance can support
388 # Make copy of flag processors so each revlog instance can support
389 # custom flags.
389 # custom flags.
390 self._flagprocessors = dict(flagutil.flagprocessors)
390 self._flagprocessors = dict(flagutil.flagprocessors)
391
391
392 # 2-tuple of file handles being used for active writing.
392 # 2-tuple of file handles being used for active writing.
393 self._writinghandles = None
393 self._writinghandles = None
394
394
395 self._loadindex()
395 self._loadindex()
396
396
397 def _loadindex(self):
397 def _loadindex(self):
398 mmapindexthreshold = None
398 mmapindexthreshold = None
399 opts = self.opener.options
399 opts = self.opener.options
400
400
401 if b'revlogv2' in opts:
401 if b'revlogv2' in opts:
402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
402 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
403 elif b'revlogv1' in opts:
403 elif b'revlogv1' in opts:
404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
404 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
405 if b'generaldelta' in opts:
405 if b'generaldelta' in opts:
406 newversionflags |= FLAG_GENERALDELTA
406 newversionflags |= FLAG_GENERALDELTA
407 elif b'revlogv0' in self.opener.options:
407 elif b'revlogv0' in self.opener.options:
408 newversionflags = REVLOGV0
408 newversionflags = REVLOGV0
409 else:
409 else:
410 newversionflags = REVLOG_DEFAULT_VERSION
410 newversionflags = REVLOG_DEFAULT_VERSION
411
411
412 if b'chunkcachesize' in opts:
412 if b'chunkcachesize' in opts:
413 self._chunkcachesize = opts[b'chunkcachesize']
413 self._chunkcachesize = opts[b'chunkcachesize']
414 if b'maxchainlen' in opts:
414 if b'maxchainlen' in opts:
415 self._maxchainlen = opts[b'maxchainlen']
415 self._maxchainlen = opts[b'maxchainlen']
416 if b'deltabothparents' in opts:
416 if b'deltabothparents' in opts:
417 self._deltabothparents = opts[b'deltabothparents']
417 self._deltabothparents = opts[b'deltabothparents']
418 self._lazydelta = bool(opts.get(b'lazydelta', True))
418 self._lazydelta = bool(opts.get(b'lazydelta', True))
419 self._lazydeltabase = False
419 self._lazydeltabase = False
420 if self._lazydelta:
420 if self._lazydelta:
421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
421 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
422 if b'compengine' in opts:
422 if b'compengine' in opts:
423 self._compengine = opts[b'compengine']
423 self._compengine = opts[b'compengine']
424 if b'zlib.level' in opts:
424 if b'zlib.level' in opts:
425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
425 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
426 if b'zstd.level' in opts:
426 if b'zstd.level' in opts:
427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
427 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
428 if b'maxdeltachainspan' in opts:
428 if b'maxdeltachainspan' in opts:
429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
429 self._maxdeltachainspan = opts[b'maxdeltachainspan']
430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
430 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
431 mmapindexthreshold = opts[b'mmapindexthreshold']
431 mmapindexthreshold = opts[b'mmapindexthreshold']
432 self.hassidedata = bool(opts.get(b'side-data', False))
432 self.hassidedata = bool(opts.get(b'side-data', False))
433 if self.hassidedata:
433 if self.hassidedata:
434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
434 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
435 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
436 withsparseread = bool(opts.get(b'with-sparse-read', False))
436 withsparseread = bool(opts.get(b'with-sparse-read', False))
437 # sparse-revlog forces sparse-read
437 # sparse-revlog forces sparse-read
438 self._withsparseread = self._sparserevlog or withsparseread
438 self._withsparseread = self._sparserevlog or withsparseread
439 if b'sparse-read-density-threshold' in opts:
439 if b'sparse-read-density-threshold' in opts:
440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
440 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
441 if b'sparse-read-min-gap-size' in opts:
441 if b'sparse-read-min-gap-size' in opts:
442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
442 self._srmingapsize = opts[b'sparse-read-min-gap-size']
443 if opts.get(b'enableellipsis'):
443 if opts.get(b'enableellipsis'):
444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
444 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
445
445
446 # revlog v0 doesn't have flag processors
446 # revlog v0 doesn't have flag processors
447 for flag, processor in pycompat.iteritems(
447 for flag, processor in pycompat.iteritems(
448 opts.get(b'flagprocessors', {})
448 opts.get(b'flagprocessors', {})
449 ):
449 ):
450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
450 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
451
451
452 if self._chunkcachesize <= 0:
452 if self._chunkcachesize <= 0:
453 raise error.RevlogError(
453 raise error.RevlogError(
454 _(b'revlog chunk cache size %r is not greater than 0')
454 _(b'revlog chunk cache size %r is not greater than 0')
455 % self._chunkcachesize
455 % self._chunkcachesize
456 )
456 )
457 elif self._chunkcachesize & (self._chunkcachesize - 1):
457 elif self._chunkcachesize & (self._chunkcachesize - 1):
458 raise error.RevlogError(
458 raise error.RevlogError(
459 _(b'revlog chunk cache size %r is not a power of 2')
459 _(b'revlog chunk cache size %r is not a power of 2')
460 % self._chunkcachesize
460 % self._chunkcachesize
461 )
461 )
462
462
463 indexdata = b''
463 indexdata = b''
464 self._initempty = True
464 self._initempty = True
465 try:
465 try:
466 with self._indexfp() as f:
466 with self._indexfp() as f:
467 if (
467 if (
468 mmapindexthreshold is not None
468 mmapindexthreshold is not None
469 and self.opener.fstat(f).st_size >= mmapindexthreshold
469 and self.opener.fstat(f).st_size >= mmapindexthreshold
470 ):
470 ):
471 # TODO: should .close() to release resources without
471 # TODO: should .close() to release resources without
472 # relying on Python GC
472 # relying on Python GC
473 indexdata = util.buffer(util.mmapread(f))
473 indexdata = util.buffer(util.mmapread(f))
474 else:
474 else:
475 indexdata = f.read()
475 indexdata = f.read()
476 if len(indexdata) > 0:
476 if len(indexdata) > 0:
477 versionflags = versionformat_unpack(indexdata[:4])[0]
477 versionflags = versionformat_unpack(indexdata[:4])[0]
478 self._initempty = False
478 self._initempty = False
479 else:
479 else:
480 versionflags = newversionflags
480 versionflags = newversionflags
481 except IOError as inst:
481 except IOError as inst:
482 if inst.errno != errno.ENOENT:
482 if inst.errno != errno.ENOENT:
483 raise
483 raise
484
484
485 versionflags = newversionflags
485 versionflags = newversionflags
486
486
487 self.version = versionflags
487 self.version = versionflags
488
488
489 flags = versionflags & ~0xFFFF
489 flags = versionflags & ~0xFFFF
490 fmt = versionflags & 0xFFFF
490 fmt = versionflags & 0xFFFF
491
491
492 if fmt == REVLOGV0:
492 if fmt == REVLOGV0:
493 if flags:
493 if flags:
494 raise error.RevlogError(
494 raise error.RevlogError(
495 _(b'unknown flags (%#04x) in version %d revlog %s')
495 _(b'unknown flags (%#04x) in version %d revlog %s')
496 % (flags >> 16, fmt, self.indexfile)
496 % (flags >> 16, fmt, self.indexfile)
497 )
497 )
498
498
499 self._inline = False
499 self._inline = False
500 self._generaldelta = False
500 self._generaldelta = False
501
501
502 elif fmt == REVLOGV1:
502 elif fmt == REVLOGV1:
503 if flags & ~REVLOGV1_FLAGS:
503 if flags & ~REVLOGV1_FLAGS:
504 raise error.RevlogError(
504 raise error.RevlogError(
505 _(b'unknown flags (%#04x) in version %d revlog %s')
505 _(b'unknown flags (%#04x) in version %d revlog %s')
506 % (flags >> 16, fmt, self.indexfile)
506 % (flags >> 16, fmt, self.indexfile)
507 )
507 )
508
508
509 self._inline = versionflags & FLAG_INLINE_DATA
509 self._inline = versionflags & FLAG_INLINE_DATA
510 self._generaldelta = versionflags & FLAG_GENERALDELTA
510 self._generaldelta = versionflags & FLAG_GENERALDELTA
511
511
512 elif fmt == REVLOGV2:
512 elif fmt == REVLOGV2:
513 if flags & ~REVLOGV2_FLAGS:
513 if flags & ~REVLOGV2_FLAGS:
514 raise error.RevlogError(
514 raise error.RevlogError(
515 _(b'unknown flags (%#04x) in version %d revlog %s')
515 _(b'unknown flags (%#04x) in version %d revlog %s')
516 % (flags >> 16, fmt, self.indexfile)
516 % (flags >> 16, fmt, self.indexfile)
517 )
517 )
518
518
519 self._inline = versionflags & FLAG_INLINE_DATA
519 self._inline = versionflags & FLAG_INLINE_DATA
520 # generaldelta implied by version 2 revlogs.
520 # generaldelta implied by version 2 revlogs.
521 self._generaldelta = True
521 self._generaldelta = True
522
522
523 else:
523 else:
524 raise error.RevlogError(
524 raise error.RevlogError(
525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
525 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
526 )
526 )
527 # sparse-revlog can't be on without general-delta (issue6056)
527 # sparse-revlog can't be on without general-delta (issue6056)
528 if not self._generaldelta:
528 if not self._generaldelta:
529 self._sparserevlog = False
529 self._sparserevlog = False
530
530
531 self._storedeltachains = True
531 self._storedeltachains = True
532
532
533 self._io = revlogio()
533 self._io = revlogio()
534 if self.version == REVLOGV0:
534 if self.version == REVLOGV0:
535 self._io = revlogoldio()
535 self._io = revlogoldio()
536 try:
536 try:
537 d = self._io.parseindex(indexdata, self._inline)
537 d = self._io.parseindex(indexdata, self._inline)
538 except (ValueError, IndexError):
538 except (ValueError, IndexError):
539 raise error.RevlogError(
539 raise error.RevlogError(
540 _(b"index %s is corrupted") % self.indexfile
540 _(b"index %s is corrupted") % self.indexfile
541 )
541 )
542 self.index, nodemap, self._chunkcache = d
542 self.index, nodemap, self._chunkcache = d
543 if nodemap is not None:
543 if nodemap is not None:
544 self.nodemap = self._nodecache = nodemap
544 self.nodemap = self._nodecache = nodemap
545 if not self._chunkcache:
545 if not self._chunkcache:
546 self._chunkclear()
546 self._chunkclear()
547 # revnum -> (chain-length, sum-delta-length)
547 # revnum -> (chain-length, sum-delta-length)
548 self._chaininfocache = {}
548 self._chaininfocache = {}
549 # revlog header -> revlog compressor
549 # revlog header -> revlog compressor
550 self._decompressors = {}
550 self._decompressors = {}
551
551
552 @util.propertycache
552 @util.propertycache
553 def _compressor(self):
553 def _compressor(self):
554 engine = util.compengines[self._compengine]
554 engine = util.compengines[self._compengine]
555 return engine.revlogcompressor(self._compengineopts)
555 return engine.revlogcompressor(self._compengineopts)
556
556
557 def _indexfp(self, mode=b'r'):
557 def _indexfp(self, mode=b'r'):
558 """file object for the revlog's index file"""
558 """file object for the revlog's index file"""
559 args = {r'mode': mode}
559 args = {r'mode': mode}
560 if mode != b'r':
560 if mode != b'r':
561 args[r'checkambig'] = self._checkambig
561 args[r'checkambig'] = self._checkambig
562 if mode == b'w':
562 if mode == b'w':
563 args[r'atomictemp'] = True
563 args[r'atomictemp'] = True
564 return self.opener(self.indexfile, **args)
564 return self.opener(self.indexfile, **args)
565
565
566 def _datafp(self, mode=b'r'):
566 def _datafp(self, mode=b'r'):
567 """file object for the revlog's data file"""
567 """file object for the revlog's data file"""
568 return self.opener(self.datafile, mode=mode)
568 return self.opener(self.datafile, mode=mode)
569
569
570 @contextlib.contextmanager
570 @contextlib.contextmanager
571 def _datareadfp(self, existingfp=None):
571 def _datareadfp(self, existingfp=None):
572 """file object suitable to read data"""
572 """file object suitable to read data"""
573 # Use explicit file handle, if given.
573 # Use explicit file handle, if given.
574 if existingfp is not None:
574 if existingfp is not None:
575 yield existingfp
575 yield existingfp
576
576
577 # Use a file handle being actively used for writes, if available.
577 # Use a file handle being actively used for writes, if available.
578 # There is some danger to doing this because reads will seek the
578 # There is some danger to doing this because reads will seek the
579 # file. However, _writeentry() performs a SEEK_END before all writes,
579 # file. However, _writeentry() performs a SEEK_END before all writes,
580 # so we should be safe.
580 # so we should be safe.
581 elif self._writinghandles:
581 elif self._writinghandles:
582 if self._inline:
582 if self._inline:
583 yield self._writinghandles[0]
583 yield self._writinghandles[0]
584 else:
584 else:
585 yield self._writinghandles[1]
585 yield self._writinghandles[1]
586
586
587 # Otherwise open a new file handle.
587 # Otherwise open a new file handle.
588 else:
588 else:
589 if self._inline:
589 if self._inline:
590 func = self._indexfp
590 func = self._indexfp
591 else:
591 else:
592 func = self._datafp
592 func = self._datafp
593 with func() as fp:
593 with func() as fp:
594 yield fp
594 yield fp
595
595
596 def tiprev(self):
597 return len(self.index) - 1
598
596 def tip(self):
599 def tip(self):
597 return self.node(len(self.index) - 1)
600 return self.node(self.tiprev())
598
601
599 def __contains__(self, rev):
602 def __contains__(self, rev):
600 return 0 <= rev < len(self)
603 return 0 <= rev < len(self)
601
604
602 def __len__(self):
605 def __len__(self):
603 return len(self.index)
606 return len(self.index)
604
607
605 def __iter__(self):
608 def __iter__(self):
606 return iter(pycompat.xrange(len(self)))
609 return iter(pycompat.xrange(len(self)))
607
610
608 def revs(self, start=0, stop=None):
611 def revs(self, start=0, stop=None):
609 """iterate over all rev in this revlog (from start to stop)"""
612 """iterate over all rev in this revlog (from start to stop)"""
610 return storageutil.iterrevs(len(self), start=start, stop=stop)
613 return storageutil.iterrevs(len(self), start=start, stop=stop)
611
614
612 @util.propertycache
615 @util.propertycache
613 def nodemap(self):
616 def nodemap(self):
614 if self.index:
617 if self.index:
615 # populate mapping down to the initial node
618 # populate mapping down to the initial node
616 node0 = self.index[0][7] # get around changelog filtering
619 node0 = self.index[0][7] # get around changelog filtering
617 self.rev(node0)
620 self.rev(node0)
618 return self._nodecache
621 return self._nodecache
619
622
620 def hasnode(self, node):
623 def hasnode(self, node):
621 try:
624 try:
622 self.rev(node)
625 self.rev(node)
623 return True
626 return True
624 except KeyError:
627 except KeyError:
625 return False
628 return False
626
629
627 def candelta(self, baserev, rev):
630 def candelta(self, baserev, rev):
628 """whether two revisions (baserev, rev) can be delta-ed or not"""
631 """whether two revisions (baserev, rev) can be delta-ed or not"""
629 # Disable delta if either rev requires a content-changing flag
632 # Disable delta if either rev requires a content-changing flag
630 # processor (ex. LFS). This is because such flag processor can alter
633 # processor (ex. LFS). This is because such flag processor can alter
631 # the rawtext content that the delta will be based on, and two clients
634 # the rawtext content that the delta will be based on, and two clients
632 # could have a same revlog node with different flags (i.e. different
635 # could have a same revlog node with different flags (i.e. different
633 # rawtext contents) and the delta could be incompatible.
636 # rawtext contents) and the delta could be incompatible.
634 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
637 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
635 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
638 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
636 ):
639 ):
637 return False
640 return False
638 return True
641 return True
639
642
640 def clearcaches(self):
643 def clearcaches(self):
641 self._revisioncache = None
644 self._revisioncache = None
642 self._chainbasecache.clear()
645 self._chainbasecache.clear()
643 self._chunkcache = (0, b'')
646 self._chunkcache = (0, b'')
644 self._pcache = {}
647 self._pcache = {}
645
648
646 try:
649 try:
647 # If we are using the native C version, you are in a fun case
650 # If we are using the native C version, you are in a fun case
648 # where self.index, self.nodemap and self._nodecaches is the same
651 # where self.index, self.nodemap and self._nodecaches is the same
649 # object.
652 # object.
650 self._nodecache.clearcaches()
653 self._nodecache.clearcaches()
651 except AttributeError:
654 except AttributeError:
652 self._nodecache = {nullid: nullrev}
655 self._nodecache = {nullid: nullrev}
653 self._nodepos = None
656 self._nodepos = None
654
657
655 def rev(self, node):
658 def rev(self, node):
656 try:
659 try:
657 return self._nodecache[node]
660 return self._nodecache[node]
658 except TypeError:
661 except TypeError:
659 raise
662 raise
660 except error.RevlogError:
663 except error.RevlogError:
661 # parsers.c radix tree lookup failed
664 # parsers.c radix tree lookup failed
662 if node == wdirid or node in wdirfilenodeids:
665 if node == wdirid or node in wdirfilenodeids:
663 raise error.WdirUnsupported
666 raise error.WdirUnsupported
664 raise error.LookupError(node, self.indexfile, _(b'no node'))
667 raise error.LookupError(node, self.indexfile, _(b'no node'))
665 except KeyError:
668 except KeyError:
666 # pure python cache lookup failed
669 # pure python cache lookup failed
667 n = self._nodecache
670 n = self._nodecache
668 i = self.index
671 i = self.index
669 p = self._nodepos
672 p = self._nodepos
670 if p is None:
673 if p is None:
671 p = len(i) - 1
674 p = len(i) - 1
672 else:
675 else:
673 assert p < len(i)
676 assert p < len(i)
674 for r in pycompat.xrange(p, -1, -1):
677 for r in pycompat.xrange(p, -1, -1):
675 v = i[r][7]
678 v = i[r][7]
676 n[v] = r
679 n[v] = r
677 if v == node:
680 if v == node:
678 self._nodepos = r - 1
681 self._nodepos = r - 1
679 return r
682 return r
680 if node == wdirid or node in wdirfilenodeids:
683 if node == wdirid or node in wdirfilenodeids:
681 raise error.WdirUnsupported
684 raise error.WdirUnsupported
682 raise error.LookupError(node, self.indexfile, _(b'no node'))
685 raise error.LookupError(node, self.indexfile, _(b'no node'))
683
686
684 # Accessors for index entries.
687 # Accessors for index entries.
685
688
686 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
689 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
687 # are flags.
690 # are flags.
688 def start(self, rev):
691 def start(self, rev):
689 return int(self.index[rev][0] >> 16)
692 return int(self.index[rev][0] >> 16)
690
693
691 def flags(self, rev):
694 def flags(self, rev):
692 return self.index[rev][0] & 0xFFFF
695 return self.index[rev][0] & 0xFFFF
693
696
694 def length(self, rev):
697 def length(self, rev):
695 return self.index[rev][1]
698 return self.index[rev][1]
696
699
697 def rawsize(self, rev):
700 def rawsize(self, rev):
698 """return the length of the uncompressed text for a given revision"""
701 """return the length of the uncompressed text for a given revision"""
699 l = self.index[rev][2]
702 l = self.index[rev][2]
700 if l >= 0:
703 if l >= 0:
701 return l
704 return l
702
705
703 t = self.rawdata(rev)
706 t = self.rawdata(rev)
704 return len(t)
707 return len(t)
705
708
706 def size(self, rev):
709 def size(self, rev):
707 """length of non-raw text (processed by a "read" flag processor)"""
710 """length of non-raw text (processed by a "read" flag processor)"""
708 # fast path: if no "read" flag processor could change the content,
711 # fast path: if no "read" flag processor could change the content,
709 # size is rawsize. note: ELLIPSIS is known to not change the content.
712 # size is rawsize. note: ELLIPSIS is known to not change the content.
710 flags = self.flags(rev)
713 flags = self.flags(rev)
711 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
714 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
712 return self.rawsize(rev)
715 return self.rawsize(rev)
713
716
714 return len(self.revision(rev, raw=False))
717 return len(self.revision(rev, raw=False))
715
718
716 def chainbase(self, rev):
719 def chainbase(self, rev):
717 base = self._chainbasecache.get(rev)
720 base = self._chainbasecache.get(rev)
718 if base is not None:
721 if base is not None:
719 return base
722 return base
720
723
721 index = self.index
724 index = self.index
722 iterrev = rev
725 iterrev = rev
723 base = index[iterrev][3]
726 base = index[iterrev][3]
724 while base != iterrev:
727 while base != iterrev:
725 iterrev = base
728 iterrev = base
726 base = index[iterrev][3]
729 base = index[iterrev][3]
727
730
728 self._chainbasecache[rev] = base
731 self._chainbasecache[rev] = base
729 return base
732 return base
730
733
731 def linkrev(self, rev):
734 def linkrev(self, rev):
732 return self.index[rev][4]
735 return self.index[rev][4]
733
736
734 def parentrevs(self, rev):
737 def parentrevs(self, rev):
735 try:
738 try:
736 entry = self.index[rev]
739 entry = self.index[rev]
737 except IndexError:
740 except IndexError:
738 if rev == wdirrev:
741 if rev == wdirrev:
739 raise error.WdirUnsupported
742 raise error.WdirUnsupported
740 raise
743 raise
741
744
742 return entry[5], entry[6]
745 return entry[5], entry[6]
743
746
744 # fast parentrevs(rev) where rev isn't filtered
747 # fast parentrevs(rev) where rev isn't filtered
745 _uncheckedparentrevs = parentrevs
748 _uncheckedparentrevs = parentrevs
746
749
747 def node(self, rev):
750 def node(self, rev):
748 try:
751 try:
749 return self.index[rev][7]
752 return self.index[rev][7]
750 except IndexError:
753 except IndexError:
751 if rev == wdirrev:
754 if rev == wdirrev:
752 raise error.WdirUnsupported
755 raise error.WdirUnsupported
753 raise
756 raise
754
757
755 # Derived from index values.
758 # Derived from index values.
756
759
757 def end(self, rev):
760 def end(self, rev):
758 return self.start(rev) + self.length(rev)
761 return self.start(rev) + self.length(rev)
759
762
760 def parents(self, node):
763 def parents(self, node):
761 i = self.index
764 i = self.index
762 d = i[self.rev(node)]
765 d = i[self.rev(node)]
763 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
766 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
764
767
765 def chainlen(self, rev):
768 def chainlen(self, rev):
766 return self._chaininfo(rev)[0]
769 return self._chaininfo(rev)[0]
767
770
768 def _chaininfo(self, rev):
771 def _chaininfo(self, rev):
769 chaininfocache = self._chaininfocache
772 chaininfocache = self._chaininfocache
770 if rev in chaininfocache:
773 if rev in chaininfocache:
771 return chaininfocache[rev]
774 return chaininfocache[rev]
772 index = self.index
775 index = self.index
773 generaldelta = self._generaldelta
776 generaldelta = self._generaldelta
774 iterrev = rev
777 iterrev = rev
775 e = index[iterrev]
778 e = index[iterrev]
776 clen = 0
779 clen = 0
777 compresseddeltalen = 0
780 compresseddeltalen = 0
778 while iterrev != e[3]:
781 while iterrev != e[3]:
779 clen += 1
782 clen += 1
780 compresseddeltalen += e[1]
783 compresseddeltalen += e[1]
781 if generaldelta:
784 if generaldelta:
782 iterrev = e[3]
785 iterrev = e[3]
783 else:
786 else:
784 iterrev -= 1
787 iterrev -= 1
785 if iterrev in chaininfocache:
788 if iterrev in chaininfocache:
786 t = chaininfocache[iterrev]
789 t = chaininfocache[iterrev]
787 clen += t[0]
790 clen += t[0]
788 compresseddeltalen += t[1]
791 compresseddeltalen += t[1]
789 break
792 break
790 e = index[iterrev]
793 e = index[iterrev]
791 else:
794 else:
792 # Add text length of base since decompressing that also takes
795 # Add text length of base since decompressing that also takes
793 # work. For cache hits the length is already included.
796 # work. For cache hits the length is already included.
794 compresseddeltalen += e[1]
797 compresseddeltalen += e[1]
795 r = (clen, compresseddeltalen)
798 r = (clen, compresseddeltalen)
796 chaininfocache[rev] = r
799 chaininfocache[rev] = r
797 return r
800 return r
798
801
799 def _deltachain(self, rev, stoprev=None):
802 def _deltachain(self, rev, stoprev=None):
800 """Obtain the delta chain for a revision.
803 """Obtain the delta chain for a revision.
801
804
802 ``stoprev`` specifies a revision to stop at. If not specified, we
805 ``stoprev`` specifies a revision to stop at. If not specified, we
803 stop at the base of the chain.
806 stop at the base of the chain.
804
807
805 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
808 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
806 revs in ascending order and ``stopped`` is a bool indicating whether
809 revs in ascending order and ``stopped`` is a bool indicating whether
807 ``stoprev`` was hit.
810 ``stoprev`` was hit.
808 """
811 """
809 # Try C implementation.
812 # Try C implementation.
810 try:
813 try:
811 return self.index.deltachain(rev, stoprev, self._generaldelta)
814 return self.index.deltachain(rev, stoprev, self._generaldelta)
812 except AttributeError:
815 except AttributeError:
813 pass
816 pass
814
817
815 chain = []
818 chain = []
816
819
817 # Alias to prevent attribute lookup in tight loop.
820 # Alias to prevent attribute lookup in tight loop.
818 index = self.index
821 index = self.index
819 generaldelta = self._generaldelta
822 generaldelta = self._generaldelta
820
823
821 iterrev = rev
824 iterrev = rev
822 e = index[iterrev]
825 e = index[iterrev]
823 while iterrev != e[3] and iterrev != stoprev:
826 while iterrev != e[3] and iterrev != stoprev:
824 chain.append(iterrev)
827 chain.append(iterrev)
825 if generaldelta:
828 if generaldelta:
826 iterrev = e[3]
829 iterrev = e[3]
827 else:
830 else:
828 iterrev -= 1
831 iterrev -= 1
829 e = index[iterrev]
832 e = index[iterrev]
830
833
831 if iterrev == stoprev:
834 if iterrev == stoprev:
832 stopped = True
835 stopped = True
833 else:
836 else:
834 chain.append(iterrev)
837 chain.append(iterrev)
835 stopped = False
838 stopped = False
836
839
837 chain.reverse()
840 chain.reverse()
838 return chain, stopped
841 return chain, stopped
839
842
840 def ancestors(self, revs, stoprev=0, inclusive=False):
843 def ancestors(self, revs, stoprev=0, inclusive=False):
841 """Generate the ancestors of 'revs' in reverse revision order.
844 """Generate the ancestors of 'revs' in reverse revision order.
842 Does not generate revs lower than stoprev.
845 Does not generate revs lower than stoprev.
843
846
844 See the documentation for ancestor.lazyancestors for more details."""
847 See the documentation for ancestor.lazyancestors for more details."""
845
848
846 # first, make sure start revisions aren't filtered
849 # first, make sure start revisions aren't filtered
847 revs = list(revs)
850 revs = list(revs)
848 checkrev = self.node
851 checkrev = self.node
849 for r in revs:
852 for r in revs:
850 checkrev(r)
853 checkrev(r)
851 # and we're sure ancestors aren't filtered as well
854 # and we're sure ancestors aren't filtered as well
852
855
853 if rustancestor is not None:
856 if rustancestor is not None:
854 lazyancestors = rustancestor.LazyAncestors
857 lazyancestors = rustancestor.LazyAncestors
855 arg = self.index
858 arg = self.index
856 elif util.safehasattr(parsers, b'rustlazyancestors'):
859 elif util.safehasattr(parsers, b'rustlazyancestors'):
857 lazyancestors = ancestor.rustlazyancestors
860 lazyancestors = ancestor.rustlazyancestors
858 arg = self.index
861 arg = self.index
859 else:
862 else:
860 lazyancestors = ancestor.lazyancestors
863 lazyancestors = ancestor.lazyancestors
861 arg = self._uncheckedparentrevs
864 arg = self._uncheckedparentrevs
862 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
865 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
863
866
864 def descendants(self, revs):
867 def descendants(self, revs):
865 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
868 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
866
869
867 def findcommonmissing(self, common=None, heads=None):
870 def findcommonmissing(self, common=None, heads=None):
868 """Return a tuple of the ancestors of common and the ancestors of heads
871 """Return a tuple of the ancestors of common and the ancestors of heads
869 that are not ancestors of common. In revset terminology, we return the
872 that are not ancestors of common. In revset terminology, we return the
870 tuple:
873 tuple:
871
874
872 ::common, (::heads) - (::common)
875 ::common, (::heads) - (::common)
873
876
874 The list is sorted by revision number, meaning it is
877 The list is sorted by revision number, meaning it is
875 topologically sorted.
878 topologically sorted.
876
879
877 'heads' and 'common' are both lists of node IDs. If heads is
880 'heads' and 'common' are both lists of node IDs. If heads is
878 not supplied, uses all of the revlog's heads. If common is not
881 not supplied, uses all of the revlog's heads. If common is not
879 supplied, uses nullid."""
882 supplied, uses nullid."""
880 if common is None:
883 if common is None:
881 common = [nullid]
884 common = [nullid]
882 if heads is None:
885 if heads is None:
883 heads = self.heads()
886 heads = self.heads()
884
887
885 common = [self.rev(n) for n in common]
888 common = [self.rev(n) for n in common]
886 heads = [self.rev(n) for n in heads]
889 heads = [self.rev(n) for n in heads]
887
890
888 # we want the ancestors, but inclusive
891 # we want the ancestors, but inclusive
889 class lazyset(object):
892 class lazyset(object):
890 def __init__(self, lazyvalues):
893 def __init__(self, lazyvalues):
891 self.addedvalues = set()
894 self.addedvalues = set()
892 self.lazyvalues = lazyvalues
895 self.lazyvalues = lazyvalues
893
896
894 def __contains__(self, value):
897 def __contains__(self, value):
895 return value in self.addedvalues or value in self.lazyvalues
898 return value in self.addedvalues or value in self.lazyvalues
896
899
897 def __iter__(self):
900 def __iter__(self):
898 added = self.addedvalues
901 added = self.addedvalues
899 for r in added:
902 for r in added:
900 yield r
903 yield r
901 for r in self.lazyvalues:
904 for r in self.lazyvalues:
902 if not r in added:
905 if not r in added:
903 yield r
906 yield r
904
907
905 def add(self, value):
908 def add(self, value):
906 self.addedvalues.add(value)
909 self.addedvalues.add(value)
907
910
908 def update(self, values):
911 def update(self, values):
909 self.addedvalues.update(values)
912 self.addedvalues.update(values)
910
913
911 has = lazyset(self.ancestors(common))
914 has = lazyset(self.ancestors(common))
912 has.add(nullrev)
915 has.add(nullrev)
913 has.update(common)
916 has.update(common)
914
917
915 # take all ancestors from heads that aren't in has
918 # take all ancestors from heads that aren't in has
916 missing = set()
919 missing = set()
917 visit = collections.deque(r for r in heads if r not in has)
920 visit = collections.deque(r for r in heads if r not in has)
918 while visit:
921 while visit:
919 r = visit.popleft()
922 r = visit.popleft()
920 if r in missing:
923 if r in missing:
921 continue
924 continue
922 else:
925 else:
923 missing.add(r)
926 missing.add(r)
924 for p in self.parentrevs(r):
927 for p in self.parentrevs(r):
925 if p not in has:
928 if p not in has:
926 visit.append(p)
929 visit.append(p)
927 missing = list(missing)
930 missing = list(missing)
928 missing.sort()
931 missing.sort()
929 return has, [self.node(miss) for miss in missing]
932 return has, [self.node(miss) for miss in missing]
930
933
931 def incrementalmissingrevs(self, common=None):
934 def incrementalmissingrevs(self, common=None):
932 """Return an object that can be used to incrementally compute the
935 """Return an object that can be used to incrementally compute the
933 revision numbers of the ancestors of arbitrary sets that are not
936 revision numbers of the ancestors of arbitrary sets that are not
934 ancestors of common. This is an ancestor.incrementalmissingancestors
937 ancestors of common. This is an ancestor.incrementalmissingancestors
935 object.
938 object.
936
939
937 'common' is a list of revision numbers. If common is not supplied, uses
940 'common' is a list of revision numbers. If common is not supplied, uses
938 nullrev.
941 nullrev.
939 """
942 """
940 if common is None:
943 if common is None:
941 common = [nullrev]
944 common = [nullrev]
942
945
943 if rustancestor is not None:
946 if rustancestor is not None:
944 return rustancestor.MissingAncestors(self.index, common)
947 return rustancestor.MissingAncestors(self.index, common)
945 return ancestor.incrementalmissingancestors(self.parentrevs, common)
948 return ancestor.incrementalmissingancestors(self.parentrevs, common)
946
949
947 def findmissingrevs(self, common=None, heads=None):
950 def findmissingrevs(self, common=None, heads=None):
948 """Return the revision numbers of the ancestors of heads that
951 """Return the revision numbers of the ancestors of heads that
949 are not ancestors of common.
952 are not ancestors of common.
950
953
951 More specifically, return a list of revision numbers corresponding to
954 More specifically, return a list of revision numbers corresponding to
952 nodes N such that every N satisfies the following constraints:
955 nodes N such that every N satisfies the following constraints:
953
956
954 1. N is an ancestor of some node in 'heads'
957 1. N is an ancestor of some node in 'heads'
955 2. N is not an ancestor of any node in 'common'
958 2. N is not an ancestor of any node in 'common'
956
959
957 The list is sorted by revision number, meaning it is
960 The list is sorted by revision number, meaning it is
958 topologically sorted.
961 topologically sorted.
959
962
960 'heads' and 'common' are both lists of revision numbers. If heads is
963 'heads' and 'common' are both lists of revision numbers. If heads is
961 not supplied, uses all of the revlog's heads. If common is not
964 not supplied, uses all of the revlog's heads. If common is not
962 supplied, uses nullid."""
965 supplied, uses nullid."""
963 if common is None:
966 if common is None:
964 common = [nullrev]
967 common = [nullrev]
965 if heads is None:
968 if heads is None:
966 heads = self.headrevs()
969 heads = self.headrevs()
967
970
968 inc = self.incrementalmissingrevs(common=common)
971 inc = self.incrementalmissingrevs(common=common)
969 return inc.missingancestors(heads)
972 return inc.missingancestors(heads)
970
973
971 def findmissing(self, common=None, heads=None):
974 def findmissing(self, common=None, heads=None):
972 """Return the ancestors of heads that are not ancestors of common.
975 """Return the ancestors of heads that are not ancestors of common.
973
976
974 More specifically, return a list of nodes N such that every N
977 More specifically, return a list of nodes N such that every N
975 satisfies the following constraints:
978 satisfies the following constraints:
976
979
977 1. N is an ancestor of some node in 'heads'
980 1. N is an ancestor of some node in 'heads'
978 2. N is not an ancestor of any node in 'common'
981 2. N is not an ancestor of any node in 'common'
979
982
980 The list is sorted by revision number, meaning it is
983 The list is sorted by revision number, meaning it is
981 topologically sorted.
984 topologically sorted.
982
985
983 'heads' and 'common' are both lists of node IDs. If heads is
986 'heads' and 'common' are both lists of node IDs. If heads is
984 not supplied, uses all of the revlog's heads. If common is not
987 not supplied, uses all of the revlog's heads. If common is not
985 supplied, uses nullid."""
988 supplied, uses nullid."""
986 if common is None:
989 if common is None:
987 common = [nullid]
990 common = [nullid]
988 if heads is None:
991 if heads is None:
989 heads = self.heads()
992 heads = self.heads()
990
993
991 common = [self.rev(n) for n in common]
994 common = [self.rev(n) for n in common]
992 heads = [self.rev(n) for n in heads]
995 heads = [self.rev(n) for n in heads]
993
996
994 inc = self.incrementalmissingrevs(common=common)
997 inc = self.incrementalmissingrevs(common=common)
995 return [self.node(r) for r in inc.missingancestors(heads)]
998 return [self.node(r) for r in inc.missingancestors(heads)]
996
999
997 def nodesbetween(self, roots=None, heads=None):
1000 def nodesbetween(self, roots=None, heads=None):
998 """Return a topological path from 'roots' to 'heads'.
1001 """Return a topological path from 'roots' to 'heads'.
999
1002
1000 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1003 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1001 topologically sorted list of all nodes N that satisfy both of
1004 topologically sorted list of all nodes N that satisfy both of
1002 these constraints:
1005 these constraints:
1003
1006
1004 1. N is a descendant of some node in 'roots'
1007 1. N is a descendant of some node in 'roots'
1005 2. N is an ancestor of some node in 'heads'
1008 2. N is an ancestor of some node in 'heads'
1006
1009
1007 Every node is considered to be both a descendant and an ancestor
1010 Every node is considered to be both a descendant and an ancestor
1008 of itself, so every reachable node in 'roots' and 'heads' will be
1011 of itself, so every reachable node in 'roots' and 'heads' will be
1009 included in 'nodes'.
1012 included in 'nodes'.
1010
1013
1011 'outroots' is the list of reachable nodes in 'roots', i.e., the
1014 'outroots' is the list of reachable nodes in 'roots', i.e., the
1012 subset of 'roots' that is returned in 'nodes'. Likewise,
1015 subset of 'roots' that is returned in 'nodes'. Likewise,
1013 'outheads' is the subset of 'heads' that is also in 'nodes'.
1016 'outheads' is the subset of 'heads' that is also in 'nodes'.
1014
1017
1015 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1018 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1016 unspecified, uses nullid as the only root. If 'heads' is
1019 unspecified, uses nullid as the only root. If 'heads' is
1017 unspecified, uses list of all of the revlog's heads."""
1020 unspecified, uses list of all of the revlog's heads."""
1018 nonodes = ([], [], [])
1021 nonodes = ([], [], [])
1019 if roots is not None:
1022 if roots is not None:
1020 roots = list(roots)
1023 roots = list(roots)
1021 if not roots:
1024 if not roots:
1022 return nonodes
1025 return nonodes
1023 lowestrev = min([self.rev(n) for n in roots])
1026 lowestrev = min([self.rev(n) for n in roots])
1024 else:
1027 else:
1025 roots = [nullid] # Everybody's a descendant of nullid
1028 roots = [nullid] # Everybody's a descendant of nullid
1026 lowestrev = nullrev
1029 lowestrev = nullrev
1027 if (lowestrev == nullrev) and (heads is None):
1030 if (lowestrev == nullrev) and (heads is None):
1028 # We want _all_ the nodes!
1031 # We want _all_ the nodes!
1029 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1032 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1030 if heads is None:
1033 if heads is None:
1031 # All nodes are ancestors, so the latest ancestor is the last
1034 # All nodes are ancestors, so the latest ancestor is the last
1032 # node.
1035 # node.
1033 highestrev = len(self) - 1
1036 highestrev = len(self) - 1
1034 # Set ancestors to None to signal that every node is an ancestor.
1037 # Set ancestors to None to signal that every node is an ancestor.
1035 ancestors = None
1038 ancestors = None
1036 # Set heads to an empty dictionary for later discovery of heads
1039 # Set heads to an empty dictionary for later discovery of heads
1037 heads = {}
1040 heads = {}
1038 else:
1041 else:
1039 heads = list(heads)
1042 heads = list(heads)
1040 if not heads:
1043 if not heads:
1041 return nonodes
1044 return nonodes
1042 ancestors = set()
1045 ancestors = set()
1043 # Turn heads into a dictionary so we can remove 'fake' heads.
1046 # Turn heads into a dictionary so we can remove 'fake' heads.
1044 # Also, later we will be using it to filter out the heads we can't
1047 # Also, later we will be using it to filter out the heads we can't
1045 # find from roots.
1048 # find from roots.
1046 heads = dict.fromkeys(heads, False)
1049 heads = dict.fromkeys(heads, False)
1047 # Start at the top and keep marking parents until we're done.
1050 # Start at the top and keep marking parents until we're done.
1048 nodestotag = set(heads)
1051 nodestotag = set(heads)
1049 # Remember where the top was so we can use it as a limit later.
1052 # Remember where the top was so we can use it as a limit later.
1050 highestrev = max([self.rev(n) for n in nodestotag])
1053 highestrev = max([self.rev(n) for n in nodestotag])
1051 while nodestotag:
1054 while nodestotag:
1052 # grab a node to tag
1055 # grab a node to tag
1053 n = nodestotag.pop()
1056 n = nodestotag.pop()
1054 # Never tag nullid
1057 # Never tag nullid
1055 if n == nullid:
1058 if n == nullid:
1056 continue
1059 continue
1057 # A node's revision number represents its place in a
1060 # A node's revision number represents its place in a
1058 # topologically sorted list of nodes.
1061 # topologically sorted list of nodes.
1059 r = self.rev(n)
1062 r = self.rev(n)
1060 if r >= lowestrev:
1063 if r >= lowestrev:
1061 if n not in ancestors:
1064 if n not in ancestors:
1062 # If we are possibly a descendant of one of the roots
1065 # If we are possibly a descendant of one of the roots
1063 # and we haven't already been marked as an ancestor
1066 # and we haven't already been marked as an ancestor
1064 ancestors.add(n) # Mark as ancestor
1067 ancestors.add(n) # Mark as ancestor
1065 # Add non-nullid parents to list of nodes to tag.
1068 # Add non-nullid parents to list of nodes to tag.
1066 nodestotag.update(
1069 nodestotag.update(
1067 [p for p in self.parents(n) if p != nullid]
1070 [p for p in self.parents(n) if p != nullid]
1068 )
1071 )
1069 elif n in heads: # We've seen it before, is it a fake head?
1072 elif n in heads: # We've seen it before, is it a fake head?
1070 # So it is, real heads should not be the ancestors of
1073 # So it is, real heads should not be the ancestors of
1071 # any other heads.
1074 # any other heads.
1072 heads.pop(n)
1075 heads.pop(n)
1073 if not ancestors:
1076 if not ancestors:
1074 return nonodes
1077 return nonodes
1075 # Now that we have our set of ancestors, we want to remove any
1078 # Now that we have our set of ancestors, we want to remove any
1076 # roots that are not ancestors.
1079 # roots that are not ancestors.
1077
1080
1078 # If one of the roots was nullid, everything is included anyway.
1081 # If one of the roots was nullid, everything is included anyway.
1079 if lowestrev > nullrev:
1082 if lowestrev > nullrev:
1080 # But, since we weren't, let's recompute the lowest rev to not
1083 # But, since we weren't, let's recompute the lowest rev to not
1081 # include roots that aren't ancestors.
1084 # include roots that aren't ancestors.
1082
1085
1083 # Filter out roots that aren't ancestors of heads
1086 # Filter out roots that aren't ancestors of heads
1084 roots = [root for root in roots if root in ancestors]
1087 roots = [root for root in roots if root in ancestors]
1085 # Recompute the lowest revision
1088 # Recompute the lowest revision
1086 if roots:
1089 if roots:
1087 lowestrev = min([self.rev(root) for root in roots])
1090 lowestrev = min([self.rev(root) for root in roots])
1088 else:
1091 else:
1089 # No more roots? Return empty list
1092 # No more roots? Return empty list
1090 return nonodes
1093 return nonodes
1091 else:
1094 else:
1092 # We are descending from nullid, and don't need to care about
1095 # We are descending from nullid, and don't need to care about
1093 # any other roots.
1096 # any other roots.
1094 lowestrev = nullrev
1097 lowestrev = nullrev
1095 roots = [nullid]
1098 roots = [nullid]
1096 # Transform our roots list into a set.
1099 # Transform our roots list into a set.
1097 descendants = set(roots)
1100 descendants = set(roots)
1098 # Also, keep the original roots so we can filter out roots that aren't
1101 # Also, keep the original roots so we can filter out roots that aren't
1099 # 'real' roots (i.e. are descended from other roots).
1102 # 'real' roots (i.e. are descended from other roots).
1100 roots = descendants.copy()
1103 roots = descendants.copy()
1101 # Our topologically sorted list of output nodes.
1104 # Our topologically sorted list of output nodes.
1102 orderedout = []
1105 orderedout = []
1103 # Don't start at nullid since we don't want nullid in our output list,
1106 # Don't start at nullid since we don't want nullid in our output list,
1104 # and if nullid shows up in descendants, empty parents will look like
1107 # and if nullid shows up in descendants, empty parents will look like
1105 # they're descendants.
1108 # they're descendants.
1106 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1109 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1107 n = self.node(r)
1110 n = self.node(r)
1108 isdescendant = False
1111 isdescendant = False
1109 if lowestrev == nullrev: # Everybody is a descendant of nullid
1112 if lowestrev == nullrev: # Everybody is a descendant of nullid
1110 isdescendant = True
1113 isdescendant = True
1111 elif n in descendants:
1114 elif n in descendants:
1112 # n is already a descendant
1115 # n is already a descendant
1113 isdescendant = True
1116 isdescendant = True
1114 # This check only needs to be done here because all the roots
1117 # This check only needs to be done here because all the roots
1115 # will start being marked is descendants before the loop.
1118 # will start being marked is descendants before the loop.
1116 if n in roots:
1119 if n in roots:
1117 # If n was a root, check if it's a 'real' root.
1120 # If n was a root, check if it's a 'real' root.
1118 p = tuple(self.parents(n))
1121 p = tuple(self.parents(n))
1119 # If any of its parents are descendants, it's not a root.
1122 # If any of its parents are descendants, it's not a root.
1120 if (p[0] in descendants) or (p[1] in descendants):
1123 if (p[0] in descendants) or (p[1] in descendants):
1121 roots.remove(n)
1124 roots.remove(n)
1122 else:
1125 else:
1123 p = tuple(self.parents(n))
1126 p = tuple(self.parents(n))
1124 # A node is a descendant if either of its parents are
1127 # A node is a descendant if either of its parents are
1125 # descendants. (We seeded the dependents list with the roots
1128 # descendants. (We seeded the dependents list with the roots
1126 # up there, remember?)
1129 # up there, remember?)
1127 if (p[0] in descendants) or (p[1] in descendants):
1130 if (p[0] in descendants) or (p[1] in descendants):
1128 descendants.add(n)
1131 descendants.add(n)
1129 isdescendant = True
1132 isdescendant = True
1130 if isdescendant and ((ancestors is None) or (n in ancestors)):
1133 if isdescendant and ((ancestors is None) or (n in ancestors)):
1131 # Only include nodes that are both descendants and ancestors.
1134 # Only include nodes that are both descendants and ancestors.
1132 orderedout.append(n)
1135 orderedout.append(n)
1133 if (ancestors is not None) and (n in heads):
1136 if (ancestors is not None) and (n in heads):
1134 # We're trying to figure out which heads are reachable
1137 # We're trying to figure out which heads are reachable
1135 # from roots.
1138 # from roots.
1136 # Mark this head as having been reached
1139 # Mark this head as having been reached
1137 heads[n] = True
1140 heads[n] = True
1138 elif ancestors is None:
1141 elif ancestors is None:
1139 # Otherwise, we're trying to discover the heads.
1142 # Otherwise, we're trying to discover the heads.
1140 # Assume this is a head because if it isn't, the next step
1143 # Assume this is a head because if it isn't, the next step
1141 # will eventually remove it.
1144 # will eventually remove it.
1142 heads[n] = True
1145 heads[n] = True
1143 # But, obviously its parents aren't.
1146 # But, obviously its parents aren't.
1144 for p in self.parents(n):
1147 for p in self.parents(n):
1145 heads.pop(p, None)
1148 heads.pop(p, None)
1146 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1149 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1147 roots = list(roots)
1150 roots = list(roots)
1148 assert orderedout
1151 assert orderedout
1149 assert roots
1152 assert roots
1150 assert heads
1153 assert heads
1151 return (orderedout, roots, heads)
1154 return (orderedout, roots, heads)
1152
1155
1153 def headrevs(self, revs=None):
1156 def headrevs(self, revs=None):
1154 if revs is None:
1157 if revs is None:
1155 try:
1158 try:
1156 return self.index.headrevs()
1159 return self.index.headrevs()
1157 except AttributeError:
1160 except AttributeError:
1158 return self._headrevs()
1161 return self._headrevs()
1159 if rustdagop is not None:
1162 if rustdagop is not None:
1160 return rustdagop.headrevs(self.index, revs)
1163 return rustdagop.headrevs(self.index, revs)
1161 return dagop.headrevs(revs, self._uncheckedparentrevs)
1164 return dagop.headrevs(revs, self._uncheckedparentrevs)
1162
1165
1163 def computephases(self, roots):
1166 def computephases(self, roots):
1164 return self.index.computephasesmapsets(roots)
1167 return self.index.computephasesmapsets(roots)
1165
1168
1166 def _headrevs(self):
1169 def _headrevs(self):
1167 count = len(self)
1170 count = len(self)
1168 if not count:
1171 if not count:
1169 return [nullrev]
1172 return [nullrev]
1170 # we won't iter over filtered rev so nobody is a head at start
1173 # we won't iter over filtered rev so nobody is a head at start
1171 ishead = [0] * (count + 1)
1174 ishead = [0] * (count + 1)
1172 index = self.index
1175 index = self.index
1173 for r in self:
1176 for r in self:
1174 ishead[r] = 1 # I may be an head
1177 ishead[r] = 1 # I may be an head
1175 e = index[r]
1178 e = index[r]
1176 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1179 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1177 return [r for r, val in enumerate(ishead) if val]
1180 return [r for r, val in enumerate(ishead) if val]
1178
1181
1179 def heads(self, start=None, stop=None):
1182 def heads(self, start=None, stop=None):
1180 """return the list of all nodes that have no children
1183 """return the list of all nodes that have no children
1181
1184
1182 if start is specified, only heads that are descendants of
1185 if start is specified, only heads that are descendants of
1183 start will be returned
1186 start will be returned
1184 if stop is specified, it will consider all the revs from stop
1187 if stop is specified, it will consider all the revs from stop
1185 as if they had no children
1188 as if they had no children
1186 """
1189 """
1187 if start is None and stop is None:
1190 if start is None and stop is None:
1188 if not len(self):
1191 if not len(self):
1189 return [nullid]
1192 return [nullid]
1190 return [self.node(r) for r in self.headrevs()]
1193 return [self.node(r) for r in self.headrevs()]
1191
1194
1192 if start is None:
1195 if start is None:
1193 start = nullrev
1196 start = nullrev
1194 else:
1197 else:
1195 start = self.rev(start)
1198 start = self.rev(start)
1196
1199
1197 stoprevs = set(self.rev(n) for n in stop or [])
1200 stoprevs = set(self.rev(n) for n in stop or [])
1198
1201
1199 revs = dagop.headrevssubset(
1202 revs = dagop.headrevssubset(
1200 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1203 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1201 )
1204 )
1202
1205
1203 return [self.node(rev) for rev in revs]
1206 return [self.node(rev) for rev in revs]
1204
1207
1205 def children(self, node):
1208 def children(self, node):
1206 """find the children of a given node"""
1209 """find the children of a given node"""
1207 c = []
1210 c = []
1208 p = self.rev(node)
1211 p = self.rev(node)
1209 for r in self.revs(start=p + 1):
1212 for r in self.revs(start=p + 1):
1210 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1213 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1211 if prevs:
1214 if prevs:
1212 for pr in prevs:
1215 for pr in prevs:
1213 if pr == p:
1216 if pr == p:
1214 c.append(self.node(r))
1217 c.append(self.node(r))
1215 elif p == nullrev:
1218 elif p == nullrev:
1216 c.append(self.node(r))
1219 c.append(self.node(r))
1217 return c
1220 return c
1218
1221
1219 def commonancestorsheads(self, a, b):
1222 def commonancestorsheads(self, a, b):
1220 """calculate all the heads of the common ancestors of nodes a and b"""
1223 """calculate all the heads of the common ancestors of nodes a and b"""
1221 a, b = self.rev(a), self.rev(b)
1224 a, b = self.rev(a), self.rev(b)
1222 ancs = self._commonancestorsheads(a, b)
1225 ancs = self._commonancestorsheads(a, b)
1223 return pycompat.maplist(self.node, ancs)
1226 return pycompat.maplist(self.node, ancs)
1224
1227
1225 def _commonancestorsheads(self, *revs):
1228 def _commonancestorsheads(self, *revs):
1226 """calculate all the heads of the common ancestors of revs"""
1229 """calculate all the heads of the common ancestors of revs"""
1227 try:
1230 try:
1228 ancs = self.index.commonancestorsheads(*revs)
1231 ancs = self.index.commonancestorsheads(*revs)
1229 except (AttributeError, OverflowError): # C implementation failed
1232 except (AttributeError, OverflowError): # C implementation failed
1230 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1233 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1231 return ancs
1234 return ancs
1232
1235
1233 def isancestor(self, a, b):
1236 def isancestor(self, a, b):
1234 """return True if node a is an ancestor of node b
1237 """return True if node a is an ancestor of node b
1235
1238
1236 A revision is considered an ancestor of itself."""
1239 A revision is considered an ancestor of itself."""
1237 a, b = self.rev(a), self.rev(b)
1240 a, b = self.rev(a), self.rev(b)
1238 return self.isancestorrev(a, b)
1241 return self.isancestorrev(a, b)
1239
1242
1240 def isancestorrev(self, a, b):
1243 def isancestorrev(self, a, b):
1241 """return True if revision a is an ancestor of revision b
1244 """return True if revision a is an ancestor of revision b
1242
1245
1243 A revision is considered an ancestor of itself.
1246 A revision is considered an ancestor of itself.
1244
1247
1245 The implementation of this is trivial but the use of
1248 The implementation of this is trivial but the use of
1246 reachableroots is not."""
1249 reachableroots is not."""
1247 if a == nullrev:
1250 if a == nullrev:
1248 return True
1251 return True
1249 elif a == b:
1252 elif a == b:
1250 return True
1253 return True
1251 elif a > b:
1254 elif a > b:
1252 return False
1255 return False
1253 return bool(self.reachableroots(a, [b], [a], includepath=False))
1256 return bool(self.reachableroots(a, [b], [a], includepath=False))
1254
1257
1255 def reachableroots(self, minroot, heads, roots, includepath=False):
1258 def reachableroots(self, minroot, heads, roots, includepath=False):
1256 """return (heads(::<roots> and <roots>::<heads>))
1259 """return (heads(::<roots> and <roots>::<heads>))
1257
1260
1258 If includepath is True, return (<roots>::<heads>)."""
1261 If includepath is True, return (<roots>::<heads>)."""
1259 try:
1262 try:
1260 return self.index.reachableroots2(
1263 return self.index.reachableroots2(
1261 minroot, heads, roots, includepath
1264 minroot, heads, roots, includepath
1262 )
1265 )
1263 except AttributeError:
1266 except AttributeError:
1264 return dagop._reachablerootspure(
1267 return dagop._reachablerootspure(
1265 self.parentrevs, minroot, roots, heads, includepath
1268 self.parentrevs, minroot, roots, heads, includepath
1266 )
1269 )
1267
1270
1268 def ancestor(self, a, b):
1271 def ancestor(self, a, b):
1269 """calculate the "best" common ancestor of nodes a and b"""
1272 """calculate the "best" common ancestor of nodes a and b"""
1270
1273
1271 a, b = self.rev(a), self.rev(b)
1274 a, b = self.rev(a), self.rev(b)
1272 try:
1275 try:
1273 ancs = self.index.ancestors(a, b)
1276 ancs = self.index.ancestors(a, b)
1274 except (AttributeError, OverflowError):
1277 except (AttributeError, OverflowError):
1275 ancs = ancestor.ancestors(self.parentrevs, a, b)
1278 ancs = ancestor.ancestors(self.parentrevs, a, b)
1276 if ancs:
1279 if ancs:
1277 # choose a consistent winner when there's a tie
1280 # choose a consistent winner when there's a tie
1278 return min(map(self.node, ancs))
1281 return min(map(self.node, ancs))
1279 return nullid
1282 return nullid
1280
1283
1281 def _match(self, id):
1284 def _match(self, id):
1282 if isinstance(id, int):
1285 if isinstance(id, int):
1283 # rev
1286 # rev
1284 return self.node(id)
1287 return self.node(id)
1285 if len(id) == 20:
1288 if len(id) == 20:
1286 # possibly a binary node
1289 # possibly a binary node
1287 # odds of a binary node being all hex in ASCII are 1 in 10**25
1290 # odds of a binary node being all hex in ASCII are 1 in 10**25
1288 try:
1291 try:
1289 node = id
1292 node = id
1290 self.rev(node) # quick search the index
1293 self.rev(node) # quick search the index
1291 return node
1294 return node
1292 except error.LookupError:
1295 except error.LookupError:
1293 pass # may be partial hex id
1296 pass # may be partial hex id
1294 try:
1297 try:
1295 # str(rev)
1298 # str(rev)
1296 rev = int(id)
1299 rev = int(id)
1297 if b"%d" % rev != id:
1300 if b"%d" % rev != id:
1298 raise ValueError
1301 raise ValueError
1299 if rev < 0:
1302 if rev < 0:
1300 rev = len(self) + rev
1303 rev = len(self) + rev
1301 if rev < 0 or rev >= len(self):
1304 if rev < 0 or rev >= len(self):
1302 raise ValueError
1305 raise ValueError
1303 return self.node(rev)
1306 return self.node(rev)
1304 except (ValueError, OverflowError):
1307 except (ValueError, OverflowError):
1305 pass
1308 pass
1306 if len(id) == 40:
1309 if len(id) == 40:
1307 try:
1310 try:
1308 # a full hex nodeid?
1311 # a full hex nodeid?
1309 node = bin(id)
1312 node = bin(id)
1310 self.rev(node)
1313 self.rev(node)
1311 return node
1314 return node
1312 except (TypeError, error.LookupError):
1315 except (TypeError, error.LookupError):
1313 pass
1316 pass
1314
1317
1315 def _partialmatch(self, id):
1318 def _partialmatch(self, id):
1316 # we don't care wdirfilenodeids as they should be always full hash
1319 # we don't care wdirfilenodeids as they should be always full hash
1317 maybewdir = wdirhex.startswith(id)
1320 maybewdir = wdirhex.startswith(id)
1318 try:
1321 try:
1319 partial = self.index.partialmatch(id)
1322 partial = self.index.partialmatch(id)
1320 if partial and self.hasnode(partial):
1323 if partial and self.hasnode(partial):
1321 if maybewdir:
1324 if maybewdir:
1322 # single 'ff...' match in radix tree, ambiguous with wdir
1325 # single 'ff...' match in radix tree, ambiguous with wdir
1323 raise error.RevlogError
1326 raise error.RevlogError
1324 return partial
1327 return partial
1325 if maybewdir:
1328 if maybewdir:
1326 # no 'ff...' match in radix tree, wdir identified
1329 # no 'ff...' match in radix tree, wdir identified
1327 raise error.WdirUnsupported
1330 raise error.WdirUnsupported
1328 return None
1331 return None
1329 except error.RevlogError:
1332 except error.RevlogError:
1330 # parsers.c radix tree lookup gave multiple matches
1333 # parsers.c radix tree lookup gave multiple matches
1331 # fast path: for unfiltered changelog, radix tree is accurate
1334 # fast path: for unfiltered changelog, radix tree is accurate
1332 if not getattr(self, 'filteredrevs', None):
1335 if not getattr(self, 'filteredrevs', None):
1333 raise error.AmbiguousPrefixLookupError(
1336 raise error.AmbiguousPrefixLookupError(
1334 id, self.indexfile, _(b'ambiguous identifier')
1337 id, self.indexfile, _(b'ambiguous identifier')
1335 )
1338 )
1336 # fall through to slow path that filters hidden revisions
1339 # fall through to slow path that filters hidden revisions
1337 except (AttributeError, ValueError):
1340 except (AttributeError, ValueError):
1338 # we are pure python, or key was too short to search radix tree
1341 # we are pure python, or key was too short to search radix tree
1339 pass
1342 pass
1340
1343
1341 if id in self._pcache:
1344 if id in self._pcache:
1342 return self._pcache[id]
1345 return self._pcache[id]
1343
1346
1344 if len(id) <= 40:
1347 if len(id) <= 40:
1345 try:
1348 try:
1346 # hex(node)[:...]
1349 # hex(node)[:...]
1347 l = len(id) // 2 # grab an even number of digits
1350 l = len(id) // 2 # grab an even number of digits
1348 prefix = bin(id[: l * 2])
1351 prefix = bin(id[: l * 2])
1349 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1352 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1350 nl = [
1353 nl = [
1351 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1354 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1352 ]
1355 ]
1353 if nullhex.startswith(id):
1356 if nullhex.startswith(id):
1354 nl.append(nullid)
1357 nl.append(nullid)
1355 if len(nl) > 0:
1358 if len(nl) > 0:
1356 if len(nl) == 1 and not maybewdir:
1359 if len(nl) == 1 and not maybewdir:
1357 self._pcache[id] = nl[0]
1360 self._pcache[id] = nl[0]
1358 return nl[0]
1361 return nl[0]
1359 raise error.AmbiguousPrefixLookupError(
1362 raise error.AmbiguousPrefixLookupError(
1360 id, self.indexfile, _(b'ambiguous identifier')
1363 id, self.indexfile, _(b'ambiguous identifier')
1361 )
1364 )
1362 if maybewdir:
1365 if maybewdir:
1363 raise error.WdirUnsupported
1366 raise error.WdirUnsupported
1364 return None
1367 return None
1365 except TypeError:
1368 except TypeError:
1366 pass
1369 pass
1367
1370
1368 def lookup(self, id):
1371 def lookup(self, id):
1369 """locate a node based on:
1372 """locate a node based on:
1370 - revision number or str(revision number)
1373 - revision number or str(revision number)
1371 - nodeid or subset of hex nodeid
1374 - nodeid or subset of hex nodeid
1372 """
1375 """
1373 n = self._match(id)
1376 n = self._match(id)
1374 if n is not None:
1377 if n is not None:
1375 return n
1378 return n
1376 n = self._partialmatch(id)
1379 n = self._partialmatch(id)
1377 if n:
1380 if n:
1378 return n
1381 return n
1379
1382
1380 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1383 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1381
1384
1382 def shortest(self, node, minlength=1):
1385 def shortest(self, node, minlength=1):
1383 """Find the shortest unambiguous prefix that matches node."""
1386 """Find the shortest unambiguous prefix that matches node."""
1384
1387
1385 def isvalid(prefix):
1388 def isvalid(prefix):
1386 try:
1389 try:
1387 matchednode = self._partialmatch(prefix)
1390 matchednode = self._partialmatch(prefix)
1388 except error.AmbiguousPrefixLookupError:
1391 except error.AmbiguousPrefixLookupError:
1389 return False
1392 return False
1390 except error.WdirUnsupported:
1393 except error.WdirUnsupported:
1391 # single 'ff...' match
1394 # single 'ff...' match
1392 return True
1395 return True
1393 if matchednode is None:
1396 if matchednode is None:
1394 raise error.LookupError(node, self.indexfile, _(b'no node'))
1397 raise error.LookupError(node, self.indexfile, _(b'no node'))
1395 return True
1398 return True
1396
1399
1397 def maybewdir(prefix):
1400 def maybewdir(prefix):
1398 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1401 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1399
1402
1400 hexnode = hex(node)
1403 hexnode = hex(node)
1401
1404
1402 def disambiguate(hexnode, minlength):
1405 def disambiguate(hexnode, minlength):
1403 """Disambiguate against wdirid."""
1406 """Disambiguate against wdirid."""
1404 for length in range(minlength, 41):
1407 for length in range(minlength, 41):
1405 prefix = hexnode[:length]
1408 prefix = hexnode[:length]
1406 if not maybewdir(prefix):
1409 if not maybewdir(prefix):
1407 return prefix
1410 return prefix
1408
1411
1409 if not getattr(self, 'filteredrevs', None):
1412 if not getattr(self, 'filteredrevs', None):
1410 try:
1413 try:
1411 length = max(self.index.shortest(node), minlength)
1414 length = max(self.index.shortest(node), minlength)
1412 return disambiguate(hexnode, length)
1415 return disambiguate(hexnode, length)
1413 except error.RevlogError:
1416 except error.RevlogError:
1414 if node != wdirid:
1417 if node != wdirid:
1415 raise error.LookupError(node, self.indexfile, _(b'no node'))
1418 raise error.LookupError(node, self.indexfile, _(b'no node'))
1416 except AttributeError:
1419 except AttributeError:
1417 # Fall through to pure code
1420 # Fall through to pure code
1418 pass
1421 pass
1419
1422
1420 if node == wdirid:
1423 if node == wdirid:
1421 for length in range(minlength, 41):
1424 for length in range(minlength, 41):
1422 prefix = hexnode[:length]
1425 prefix = hexnode[:length]
1423 if isvalid(prefix):
1426 if isvalid(prefix):
1424 return prefix
1427 return prefix
1425
1428
1426 for length in range(minlength, 41):
1429 for length in range(minlength, 41):
1427 prefix = hexnode[:length]
1430 prefix = hexnode[:length]
1428 if isvalid(prefix):
1431 if isvalid(prefix):
1429 return disambiguate(hexnode, length)
1432 return disambiguate(hexnode, length)
1430
1433
1431 def cmp(self, node, text):
1434 def cmp(self, node, text):
1432 """compare text with a given file revision
1435 """compare text with a given file revision
1433
1436
1434 returns True if text is different than what is stored.
1437 returns True if text is different than what is stored.
1435 """
1438 """
1436 p1, p2 = self.parents(node)
1439 p1, p2 = self.parents(node)
1437 return storageutil.hashrevisionsha1(text, p1, p2) != node
1440 return storageutil.hashrevisionsha1(text, p1, p2) != node
1438
1441
1439 def _cachesegment(self, offset, data):
1442 def _cachesegment(self, offset, data):
1440 """Add a segment to the revlog cache.
1443 """Add a segment to the revlog cache.
1441
1444
1442 Accepts an absolute offset and the data that is at that location.
1445 Accepts an absolute offset and the data that is at that location.
1443 """
1446 """
1444 o, d = self._chunkcache
1447 o, d = self._chunkcache
1445 # try to add to existing cache
1448 # try to add to existing cache
1446 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1449 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1447 self._chunkcache = o, d + data
1450 self._chunkcache = o, d + data
1448 else:
1451 else:
1449 self._chunkcache = offset, data
1452 self._chunkcache = offset, data
1450
1453
1451 def _readsegment(self, offset, length, df=None):
1454 def _readsegment(self, offset, length, df=None):
1452 """Load a segment of raw data from the revlog.
1455 """Load a segment of raw data from the revlog.
1453
1456
1454 Accepts an absolute offset, length to read, and an optional existing
1457 Accepts an absolute offset, length to read, and an optional existing
1455 file handle to read from.
1458 file handle to read from.
1456
1459
1457 If an existing file handle is passed, it will be seeked and the
1460 If an existing file handle is passed, it will be seeked and the
1458 original seek position will NOT be restored.
1461 original seek position will NOT be restored.
1459
1462
1460 Returns a str or buffer of raw byte data.
1463 Returns a str or buffer of raw byte data.
1461
1464
1462 Raises if the requested number of bytes could not be read.
1465 Raises if the requested number of bytes could not be read.
1463 """
1466 """
1464 # Cache data both forward and backward around the requested
1467 # Cache data both forward and backward around the requested
1465 # data, in a fixed size window. This helps speed up operations
1468 # data, in a fixed size window. This helps speed up operations
1466 # involving reading the revlog backwards.
1469 # involving reading the revlog backwards.
1467 cachesize = self._chunkcachesize
1470 cachesize = self._chunkcachesize
1468 realoffset = offset & ~(cachesize - 1)
1471 realoffset = offset & ~(cachesize - 1)
1469 reallength = (
1472 reallength = (
1470 (offset + length + cachesize) & ~(cachesize - 1)
1473 (offset + length + cachesize) & ~(cachesize - 1)
1471 ) - realoffset
1474 ) - realoffset
1472 with self._datareadfp(df) as df:
1475 with self._datareadfp(df) as df:
1473 df.seek(realoffset)
1476 df.seek(realoffset)
1474 d = df.read(reallength)
1477 d = df.read(reallength)
1475
1478
1476 self._cachesegment(realoffset, d)
1479 self._cachesegment(realoffset, d)
1477 if offset != realoffset or reallength != length:
1480 if offset != realoffset or reallength != length:
1478 startoffset = offset - realoffset
1481 startoffset = offset - realoffset
1479 if len(d) - startoffset < length:
1482 if len(d) - startoffset < length:
1480 raise error.RevlogError(
1483 raise error.RevlogError(
1481 _(
1484 _(
1482 b'partial read of revlog %s; expected %d bytes from '
1485 b'partial read of revlog %s; expected %d bytes from '
1483 b'offset %d, got %d'
1486 b'offset %d, got %d'
1484 )
1487 )
1485 % (
1488 % (
1486 self.indexfile if self._inline else self.datafile,
1489 self.indexfile if self._inline else self.datafile,
1487 length,
1490 length,
1488 realoffset,
1491 realoffset,
1489 len(d) - startoffset,
1492 len(d) - startoffset,
1490 )
1493 )
1491 )
1494 )
1492
1495
1493 return util.buffer(d, startoffset, length)
1496 return util.buffer(d, startoffset, length)
1494
1497
1495 if len(d) < length:
1498 if len(d) < length:
1496 raise error.RevlogError(
1499 raise error.RevlogError(
1497 _(
1500 _(
1498 b'partial read of revlog %s; expected %d bytes from offset '
1501 b'partial read of revlog %s; expected %d bytes from offset '
1499 b'%d, got %d'
1502 b'%d, got %d'
1500 )
1503 )
1501 % (
1504 % (
1502 self.indexfile if self._inline else self.datafile,
1505 self.indexfile if self._inline else self.datafile,
1503 length,
1506 length,
1504 offset,
1507 offset,
1505 len(d),
1508 len(d),
1506 )
1509 )
1507 )
1510 )
1508
1511
1509 return d
1512 return d
1510
1513
1511 def _getsegment(self, offset, length, df=None):
1514 def _getsegment(self, offset, length, df=None):
1512 """Obtain a segment of raw data from the revlog.
1515 """Obtain a segment of raw data from the revlog.
1513
1516
1514 Accepts an absolute offset, length of bytes to obtain, and an
1517 Accepts an absolute offset, length of bytes to obtain, and an
1515 optional file handle to the already-opened revlog. If the file
1518 optional file handle to the already-opened revlog. If the file
1516 handle is used, it's original seek position will not be preserved.
1519 handle is used, it's original seek position will not be preserved.
1517
1520
1518 Requests for data may be returned from a cache.
1521 Requests for data may be returned from a cache.
1519
1522
1520 Returns a str or a buffer instance of raw byte data.
1523 Returns a str or a buffer instance of raw byte data.
1521 """
1524 """
1522 o, d = self._chunkcache
1525 o, d = self._chunkcache
1523 l = len(d)
1526 l = len(d)
1524
1527
1525 # is it in the cache?
1528 # is it in the cache?
1526 cachestart = offset - o
1529 cachestart = offset - o
1527 cacheend = cachestart + length
1530 cacheend = cachestart + length
1528 if cachestart >= 0 and cacheend <= l:
1531 if cachestart >= 0 and cacheend <= l:
1529 if cachestart == 0 and cacheend == l:
1532 if cachestart == 0 and cacheend == l:
1530 return d # avoid a copy
1533 return d # avoid a copy
1531 return util.buffer(d, cachestart, cacheend - cachestart)
1534 return util.buffer(d, cachestart, cacheend - cachestart)
1532
1535
1533 return self._readsegment(offset, length, df=df)
1536 return self._readsegment(offset, length, df=df)
1534
1537
1535 def _getsegmentforrevs(self, startrev, endrev, df=None):
1538 def _getsegmentforrevs(self, startrev, endrev, df=None):
1536 """Obtain a segment of raw data corresponding to a range of revisions.
1539 """Obtain a segment of raw data corresponding to a range of revisions.
1537
1540
1538 Accepts the start and end revisions and an optional already-open
1541 Accepts the start and end revisions and an optional already-open
1539 file handle to be used for reading. If the file handle is read, its
1542 file handle to be used for reading. If the file handle is read, its
1540 seek position will not be preserved.
1543 seek position will not be preserved.
1541
1544
1542 Requests for data may be satisfied by a cache.
1545 Requests for data may be satisfied by a cache.
1543
1546
1544 Returns a 2-tuple of (offset, data) for the requested range of
1547 Returns a 2-tuple of (offset, data) for the requested range of
1545 revisions. Offset is the integer offset from the beginning of the
1548 revisions. Offset is the integer offset from the beginning of the
1546 revlog and data is a str or buffer of the raw byte data.
1549 revlog and data is a str or buffer of the raw byte data.
1547
1550
1548 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1551 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1549 to determine where each revision's data begins and ends.
1552 to determine where each revision's data begins and ends.
1550 """
1553 """
1551 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1554 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1552 # (functions are expensive).
1555 # (functions are expensive).
1553 index = self.index
1556 index = self.index
1554 istart = index[startrev]
1557 istart = index[startrev]
1555 start = int(istart[0] >> 16)
1558 start = int(istart[0] >> 16)
1556 if startrev == endrev:
1559 if startrev == endrev:
1557 end = start + istart[1]
1560 end = start + istart[1]
1558 else:
1561 else:
1559 iend = index[endrev]
1562 iend = index[endrev]
1560 end = int(iend[0] >> 16) + iend[1]
1563 end = int(iend[0] >> 16) + iend[1]
1561
1564
1562 if self._inline:
1565 if self._inline:
1563 start += (startrev + 1) * self._io.size
1566 start += (startrev + 1) * self._io.size
1564 end += (endrev + 1) * self._io.size
1567 end += (endrev + 1) * self._io.size
1565 length = end - start
1568 length = end - start
1566
1569
1567 return start, self._getsegment(start, length, df=df)
1570 return start, self._getsegment(start, length, df=df)
1568
1571
1569 def _chunk(self, rev, df=None):
1572 def _chunk(self, rev, df=None):
1570 """Obtain a single decompressed chunk for a revision.
1573 """Obtain a single decompressed chunk for a revision.
1571
1574
1572 Accepts an integer revision and an optional already-open file handle
1575 Accepts an integer revision and an optional already-open file handle
1573 to be used for reading. If used, the seek position of the file will not
1576 to be used for reading. If used, the seek position of the file will not
1574 be preserved.
1577 be preserved.
1575
1578
1576 Returns a str holding uncompressed data for the requested revision.
1579 Returns a str holding uncompressed data for the requested revision.
1577 """
1580 """
1578 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1581 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1579
1582
1580 def _chunks(self, revs, df=None, targetsize=None):
1583 def _chunks(self, revs, df=None, targetsize=None):
1581 """Obtain decompressed chunks for the specified revisions.
1584 """Obtain decompressed chunks for the specified revisions.
1582
1585
1583 Accepts an iterable of numeric revisions that are assumed to be in
1586 Accepts an iterable of numeric revisions that are assumed to be in
1584 ascending order. Also accepts an optional already-open file handle
1587 ascending order. Also accepts an optional already-open file handle
1585 to be used for reading. If used, the seek position of the file will
1588 to be used for reading. If used, the seek position of the file will
1586 not be preserved.
1589 not be preserved.
1587
1590
1588 This function is similar to calling ``self._chunk()`` multiple times,
1591 This function is similar to calling ``self._chunk()`` multiple times,
1589 but is faster.
1592 but is faster.
1590
1593
1591 Returns a list with decompressed data for each requested revision.
1594 Returns a list with decompressed data for each requested revision.
1592 """
1595 """
1593 if not revs:
1596 if not revs:
1594 return []
1597 return []
1595 start = self.start
1598 start = self.start
1596 length = self.length
1599 length = self.length
1597 inline = self._inline
1600 inline = self._inline
1598 iosize = self._io.size
1601 iosize = self._io.size
1599 buffer = util.buffer
1602 buffer = util.buffer
1600
1603
1601 l = []
1604 l = []
1602 ladd = l.append
1605 ladd = l.append
1603
1606
1604 if not self._withsparseread:
1607 if not self._withsparseread:
1605 slicedchunks = (revs,)
1608 slicedchunks = (revs,)
1606 else:
1609 else:
1607 slicedchunks = deltautil.slicechunk(
1610 slicedchunks = deltautil.slicechunk(
1608 self, revs, targetsize=targetsize
1611 self, revs, targetsize=targetsize
1609 )
1612 )
1610
1613
1611 for revschunk in slicedchunks:
1614 for revschunk in slicedchunks:
1612 firstrev = revschunk[0]
1615 firstrev = revschunk[0]
1613 # Skip trailing revisions with empty diff
1616 # Skip trailing revisions with empty diff
1614 for lastrev in revschunk[::-1]:
1617 for lastrev in revschunk[::-1]:
1615 if length(lastrev) != 0:
1618 if length(lastrev) != 0:
1616 break
1619 break
1617
1620
1618 try:
1621 try:
1619 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1622 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1620 except OverflowError:
1623 except OverflowError:
1621 # issue4215 - we can't cache a run of chunks greater than
1624 # issue4215 - we can't cache a run of chunks greater than
1622 # 2G on Windows
1625 # 2G on Windows
1623 return [self._chunk(rev, df=df) for rev in revschunk]
1626 return [self._chunk(rev, df=df) for rev in revschunk]
1624
1627
1625 decomp = self.decompress
1628 decomp = self.decompress
1626 for rev in revschunk:
1629 for rev in revschunk:
1627 chunkstart = start(rev)
1630 chunkstart = start(rev)
1628 if inline:
1631 if inline:
1629 chunkstart += (rev + 1) * iosize
1632 chunkstart += (rev + 1) * iosize
1630 chunklength = length(rev)
1633 chunklength = length(rev)
1631 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1634 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1632
1635
1633 return l
1636 return l
1634
1637
1635 def _chunkclear(self):
1638 def _chunkclear(self):
1636 """Clear the raw chunk cache."""
1639 """Clear the raw chunk cache."""
1637 self._chunkcache = (0, b'')
1640 self._chunkcache = (0, b'')
1638
1641
1639 def deltaparent(self, rev):
1642 def deltaparent(self, rev):
1640 """return deltaparent of the given revision"""
1643 """return deltaparent of the given revision"""
1641 base = self.index[rev][3]
1644 base = self.index[rev][3]
1642 if base == rev:
1645 if base == rev:
1643 return nullrev
1646 return nullrev
1644 elif self._generaldelta:
1647 elif self._generaldelta:
1645 return base
1648 return base
1646 else:
1649 else:
1647 return rev - 1
1650 return rev - 1
1648
1651
1649 def issnapshot(self, rev):
1652 def issnapshot(self, rev):
1650 """tells whether rev is a snapshot
1653 """tells whether rev is a snapshot
1651 """
1654 """
1652 if not self._sparserevlog:
1655 if not self._sparserevlog:
1653 return self.deltaparent(rev) == nullrev
1656 return self.deltaparent(rev) == nullrev
1654 elif util.safehasattr(self.index, b'issnapshot'):
1657 elif util.safehasattr(self.index, b'issnapshot'):
1655 # directly assign the method to cache the testing and access
1658 # directly assign the method to cache the testing and access
1656 self.issnapshot = self.index.issnapshot
1659 self.issnapshot = self.index.issnapshot
1657 return self.issnapshot(rev)
1660 return self.issnapshot(rev)
1658 if rev == nullrev:
1661 if rev == nullrev:
1659 return True
1662 return True
1660 entry = self.index[rev]
1663 entry = self.index[rev]
1661 base = entry[3]
1664 base = entry[3]
1662 if base == rev:
1665 if base == rev:
1663 return True
1666 return True
1664 if base == nullrev:
1667 if base == nullrev:
1665 return True
1668 return True
1666 p1 = entry[5]
1669 p1 = entry[5]
1667 p2 = entry[6]
1670 p2 = entry[6]
1668 if base == p1 or base == p2:
1671 if base == p1 or base == p2:
1669 return False
1672 return False
1670 return self.issnapshot(base)
1673 return self.issnapshot(base)
1671
1674
1672 def snapshotdepth(self, rev):
1675 def snapshotdepth(self, rev):
1673 """number of snapshot in the chain before this one"""
1676 """number of snapshot in the chain before this one"""
1674 if not self.issnapshot(rev):
1677 if not self.issnapshot(rev):
1675 raise error.ProgrammingError(b'revision %d not a snapshot')
1678 raise error.ProgrammingError(b'revision %d not a snapshot')
1676 return len(self._deltachain(rev)[0]) - 1
1679 return len(self._deltachain(rev)[0]) - 1
1677
1680
1678 def revdiff(self, rev1, rev2):
1681 def revdiff(self, rev1, rev2):
1679 """return or calculate a delta between two revisions
1682 """return or calculate a delta between two revisions
1680
1683
1681 The delta calculated is in binary form and is intended to be written to
1684 The delta calculated is in binary form and is intended to be written to
1682 revlog data directly. So this function needs raw revision data.
1685 revlog data directly. So this function needs raw revision data.
1683 """
1686 """
1684 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1687 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1685 return bytes(self._chunk(rev2))
1688 return bytes(self._chunk(rev2))
1686
1689
1687 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1690 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1688
1691
1689 def _processflags(self, text, flags, operation, raw=False):
1692 def _processflags(self, text, flags, operation, raw=False):
1690 """deprecated entry point to access flag processors"""
1693 """deprecated entry point to access flag processors"""
1691 msg = b'_processflag(...) use the specialized variant'
1694 msg = b'_processflag(...) use the specialized variant'
1692 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1695 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1693 if raw:
1696 if raw:
1694 return text, flagutil.processflagsraw(self, text, flags)
1697 return text, flagutil.processflagsraw(self, text, flags)
1695 elif operation == b'read':
1698 elif operation == b'read':
1696 return flagutil.processflagsread(self, text, flags)
1699 return flagutil.processflagsread(self, text, flags)
1697 else: # write operation
1700 else: # write operation
1698 return flagutil.processflagswrite(self, text, flags)
1701 return flagutil.processflagswrite(self, text, flags)
1699
1702
1700 def revision(self, nodeorrev, _df=None, raw=False):
1703 def revision(self, nodeorrev, _df=None, raw=False):
1701 """return an uncompressed revision of a given node or revision
1704 """return an uncompressed revision of a given node or revision
1702 number.
1705 number.
1703
1706
1704 _df - an existing file handle to read from. (internal-only)
1707 _df - an existing file handle to read from. (internal-only)
1705 raw - an optional argument specifying if the revision data is to be
1708 raw - an optional argument specifying if the revision data is to be
1706 treated as raw data when applying flag transforms. 'raw' should be set
1709 treated as raw data when applying flag transforms. 'raw' should be set
1707 to True when generating changegroups or in debug commands.
1710 to True when generating changegroups or in debug commands.
1708 """
1711 """
1709 if raw:
1712 if raw:
1710 msg = (
1713 msg = (
1711 b'revlog.revision(..., raw=True) is deprecated, '
1714 b'revlog.revision(..., raw=True) is deprecated, '
1712 b'use revlog.rawdata(...)'
1715 b'use revlog.rawdata(...)'
1713 )
1716 )
1714 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1717 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1715 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1718 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1716
1719
1717 def sidedata(self, nodeorrev, _df=None):
1720 def sidedata(self, nodeorrev, _df=None):
1718 """a map of extra data related to the changeset but not part of the hash
1721 """a map of extra data related to the changeset but not part of the hash
1719
1722
1720 This function currently return a dictionary. However, more advanced
1723 This function currently return a dictionary. However, more advanced
1721 mapping object will likely be used in the future for a more
1724 mapping object will likely be used in the future for a more
1722 efficient/lazy code.
1725 efficient/lazy code.
1723 """
1726 """
1724 return self._revisiondata(nodeorrev, _df)[1]
1727 return self._revisiondata(nodeorrev, _df)[1]
1725
1728
1726 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1729 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1727 # deal with <nodeorrev> argument type
1730 # deal with <nodeorrev> argument type
1728 if isinstance(nodeorrev, int):
1731 if isinstance(nodeorrev, int):
1729 rev = nodeorrev
1732 rev = nodeorrev
1730 node = self.node(rev)
1733 node = self.node(rev)
1731 else:
1734 else:
1732 node = nodeorrev
1735 node = nodeorrev
1733 rev = None
1736 rev = None
1734
1737
1735 # fast path the special `nullid` rev
1738 # fast path the special `nullid` rev
1736 if node == nullid:
1739 if node == nullid:
1737 return b"", {}
1740 return b"", {}
1738
1741
1739 # The text as stored inside the revlog. Might be the revision or might
1742 # The text as stored inside the revlog. Might be the revision or might
1740 # need to be processed to retrieve the revision.
1743 # need to be processed to retrieve the revision.
1741 rawtext = None
1744 rawtext = None
1742
1745
1743 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1746 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1744
1747
1745 if raw and validated:
1748 if raw and validated:
1746 # if we don't want to process the raw text and that raw
1749 # if we don't want to process the raw text and that raw
1747 # text is cached, we can exit early.
1750 # text is cached, we can exit early.
1748 return rawtext, {}
1751 return rawtext, {}
1749 if rev is None:
1752 if rev is None:
1750 rev = self.rev(node)
1753 rev = self.rev(node)
1751 # the revlog's flag for this revision
1754 # the revlog's flag for this revision
1752 # (usually alter its state or content)
1755 # (usually alter its state or content)
1753 flags = self.flags(rev)
1756 flags = self.flags(rev)
1754
1757
1755 if validated and flags == REVIDX_DEFAULT_FLAGS:
1758 if validated and flags == REVIDX_DEFAULT_FLAGS:
1756 # no extra flags set, no flag processor runs, text = rawtext
1759 # no extra flags set, no flag processor runs, text = rawtext
1757 return rawtext, {}
1760 return rawtext, {}
1758
1761
1759 sidedata = {}
1762 sidedata = {}
1760 if raw:
1763 if raw:
1761 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1764 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1762 text = rawtext
1765 text = rawtext
1763 else:
1766 else:
1764 try:
1767 try:
1765 r = flagutil.processflagsread(self, rawtext, flags)
1768 r = flagutil.processflagsread(self, rawtext, flags)
1766 except error.SidedataHashError as exc:
1769 except error.SidedataHashError as exc:
1767 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1770 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1768 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1771 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1769 raise error.RevlogError(msg)
1772 raise error.RevlogError(msg)
1770 text, validatehash, sidedata = r
1773 text, validatehash, sidedata = r
1771 if validatehash:
1774 if validatehash:
1772 self.checkhash(text, node, rev=rev)
1775 self.checkhash(text, node, rev=rev)
1773 if not validated:
1776 if not validated:
1774 self._revisioncache = (node, rev, rawtext)
1777 self._revisioncache = (node, rev, rawtext)
1775
1778
1776 return text, sidedata
1779 return text, sidedata
1777
1780
1778 def _rawtext(self, node, rev, _df=None):
1781 def _rawtext(self, node, rev, _df=None):
1779 """return the possibly unvalidated rawtext for a revision
1782 """return the possibly unvalidated rawtext for a revision
1780
1783
1781 returns (rev, rawtext, validated)
1784 returns (rev, rawtext, validated)
1782 """
1785 """
1783
1786
1784 # revision in the cache (could be useful to apply delta)
1787 # revision in the cache (could be useful to apply delta)
1785 cachedrev = None
1788 cachedrev = None
1786 # An intermediate text to apply deltas to
1789 # An intermediate text to apply deltas to
1787 basetext = None
1790 basetext = None
1788
1791
1789 # Check if we have the entry in cache
1792 # Check if we have the entry in cache
1790 # The cache entry looks like (node, rev, rawtext)
1793 # The cache entry looks like (node, rev, rawtext)
1791 if self._revisioncache:
1794 if self._revisioncache:
1792 if self._revisioncache[0] == node:
1795 if self._revisioncache[0] == node:
1793 return (rev, self._revisioncache[2], True)
1796 return (rev, self._revisioncache[2], True)
1794 cachedrev = self._revisioncache[1]
1797 cachedrev = self._revisioncache[1]
1795
1798
1796 if rev is None:
1799 if rev is None:
1797 rev = self.rev(node)
1800 rev = self.rev(node)
1798
1801
1799 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1802 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1800 if stopped:
1803 if stopped:
1801 basetext = self._revisioncache[2]
1804 basetext = self._revisioncache[2]
1802
1805
1803 # drop cache to save memory, the caller is expected to
1806 # drop cache to save memory, the caller is expected to
1804 # update self._revisioncache after validating the text
1807 # update self._revisioncache after validating the text
1805 self._revisioncache = None
1808 self._revisioncache = None
1806
1809
1807 targetsize = None
1810 targetsize = None
1808 rawsize = self.index[rev][2]
1811 rawsize = self.index[rev][2]
1809 if 0 <= rawsize:
1812 if 0 <= rawsize:
1810 targetsize = 4 * rawsize
1813 targetsize = 4 * rawsize
1811
1814
1812 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1815 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1813 if basetext is None:
1816 if basetext is None:
1814 basetext = bytes(bins[0])
1817 basetext = bytes(bins[0])
1815 bins = bins[1:]
1818 bins = bins[1:]
1816
1819
1817 rawtext = mdiff.patches(basetext, bins)
1820 rawtext = mdiff.patches(basetext, bins)
1818 del basetext # let us have a chance to free memory early
1821 del basetext # let us have a chance to free memory early
1819 return (rev, rawtext, False)
1822 return (rev, rawtext, False)
1820
1823
1821 def rawdata(self, nodeorrev, _df=None):
1824 def rawdata(self, nodeorrev, _df=None):
1822 """return an uncompressed raw data of a given node or revision number.
1825 """return an uncompressed raw data of a given node or revision number.
1823
1826
1824 _df - an existing file handle to read from. (internal-only)
1827 _df - an existing file handle to read from. (internal-only)
1825 """
1828 """
1826 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1829 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1827
1830
1828 def hash(self, text, p1, p2):
1831 def hash(self, text, p1, p2):
1829 """Compute a node hash.
1832 """Compute a node hash.
1830
1833
1831 Available as a function so that subclasses can replace the hash
1834 Available as a function so that subclasses can replace the hash
1832 as needed.
1835 as needed.
1833 """
1836 """
1834 return storageutil.hashrevisionsha1(text, p1, p2)
1837 return storageutil.hashrevisionsha1(text, p1, p2)
1835
1838
1836 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1839 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1837 """Check node hash integrity.
1840 """Check node hash integrity.
1838
1841
1839 Available as a function so that subclasses can extend hash mismatch
1842 Available as a function so that subclasses can extend hash mismatch
1840 behaviors as needed.
1843 behaviors as needed.
1841 """
1844 """
1842 try:
1845 try:
1843 if p1 is None and p2 is None:
1846 if p1 is None and p2 is None:
1844 p1, p2 = self.parents(node)
1847 p1, p2 = self.parents(node)
1845 if node != self.hash(text, p1, p2):
1848 if node != self.hash(text, p1, p2):
1846 # Clear the revision cache on hash failure. The revision cache
1849 # Clear the revision cache on hash failure. The revision cache
1847 # only stores the raw revision and clearing the cache does have
1850 # only stores the raw revision and clearing the cache does have
1848 # the side-effect that we won't have a cache hit when the raw
1851 # the side-effect that we won't have a cache hit when the raw
1849 # revision data is accessed. But this case should be rare and
1852 # revision data is accessed. But this case should be rare and
1850 # it is extra work to teach the cache about the hash
1853 # it is extra work to teach the cache about the hash
1851 # verification state.
1854 # verification state.
1852 if self._revisioncache and self._revisioncache[0] == node:
1855 if self._revisioncache and self._revisioncache[0] == node:
1853 self._revisioncache = None
1856 self._revisioncache = None
1854
1857
1855 revornode = rev
1858 revornode = rev
1856 if revornode is None:
1859 if revornode is None:
1857 revornode = templatefilters.short(hex(node))
1860 revornode = templatefilters.short(hex(node))
1858 raise error.RevlogError(
1861 raise error.RevlogError(
1859 _(b"integrity check failed on %s:%s")
1862 _(b"integrity check failed on %s:%s")
1860 % (self.indexfile, pycompat.bytestr(revornode))
1863 % (self.indexfile, pycompat.bytestr(revornode))
1861 )
1864 )
1862 except error.RevlogError:
1865 except error.RevlogError:
1863 if self._censorable and storageutil.iscensoredtext(text):
1866 if self._censorable and storageutil.iscensoredtext(text):
1864 raise error.CensoredNodeError(self.indexfile, node, text)
1867 raise error.CensoredNodeError(self.indexfile, node, text)
1865 raise
1868 raise
1866
1869
1867 def _enforceinlinesize(self, tr, fp=None):
1870 def _enforceinlinesize(self, tr, fp=None):
1868 """Check if the revlog is too big for inline and convert if so.
1871 """Check if the revlog is too big for inline and convert if so.
1869
1872
1870 This should be called after revisions are added to the revlog. If the
1873 This should be called after revisions are added to the revlog. If the
1871 revlog has grown too large to be an inline revlog, it will convert it
1874 revlog has grown too large to be an inline revlog, it will convert it
1872 to use multiple index and data files.
1875 to use multiple index and data files.
1873 """
1876 """
1874 tiprev = len(self) - 1
1877 tiprev = len(self) - 1
1875 if (
1878 if (
1876 not self._inline
1879 not self._inline
1877 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1880 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
1878 ):
1881 ):
1879 return
1882 return
1880
1883
1881 trinfo = tr.find(self.indexfile)
1884 trinfo = tr.find(self.indexfile)
1882 if trinfo is None:
1885 if trinfo is None:
1883 raise error.RevlogError(
1886 raise error.RevlogError(
1884 _(b"%s not found in the transaction") % self.indexfile
1887 _(b"%s not found in the transaction") % self.indexfile
1885 )
1888 )
1886
1889
1887 trindex = trinfo[2]
1890 trindex = trinfo[2]
1888 if trindex is not None:
1891 if trindex is not None:
1889 dataoff = self.start(trindex)
1892 dataoff = self.start(trindex)
1890 else:
1893 else:
1891 # revlog was stripped at start of transaction, use all leftover data
1894 # revlog was stripped at start of transaction, use all leftover data
1892 trindex = len(self) - 1
1895 trindex = len(self) - 1
1893 dataoff = self.end(tiprev)
1896 dataoff = self.end(tiprev)
1894
1897
1895 tr.add(self.datafile, dataoff)
1898 tr.add(self.datafile, dataoff)
1896
1899
1897 if fp:
1900 if fp:
1898 fp.flush()
1901 fp.flush()
1899 fp.close()
1902 fp.close()
1900 # We can't use the cached file handle after close(). So prevent
1903 # We can't use the cached file handle after close(). So prevent
1901 # its usage.
1904 # its usage.
1902 self._writinghandles = None
1905 self._writinghandles = None
1903
1906
1904 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1907 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
1905 for r in self:
1908 for r in self:
1906 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1909 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1907
1910
1908 with self._indexfp(b'w') as fp:
1911 with self._indexfp(b'w') as fp:
1909 self.version &= ~FLAG_INLINE_DATA
1912 self.version &= ~FLAG_INLINE_DATA
1910 self._inline = False
1913 self._inline = False
1911 io = self._io
1914 io = self._io
1912 for i in self:
1915 for i in self:
1913 e = io.packentry(self.index[i], self.node, self.version, i)
1916 e = io.packentry(self.index[i], self.node, self.version, i)
1914 fp.write(e)
1917 fp.write(e)
1915
1918
1916 # the temp file replace the real index when we exit the context
1919 # the temp file replace the real index when we exit the context
1917 # manager
1920 # manager
1918
1921
1919 tr.replace(self.indexfile, trindex * self._io.size)
1922 tr.replace(self.indexfile, trindex * self._io.size)
1920 self._chunkclear()
1923 self._chunkclear()
1921
1924
1922 def _nodeduplicatecallback(self, transaction, node):
1925 def _nodeduplicatecallback(self, transaction, node):
1923 """called when trying to add a node already stored.
1926 """called when trying to add a node already stored.
1924 """
1927 """
1925
1928
1926 def addrevision(
1929 def addrevision(
1927 self,
1930 self,
1928 text,
1931 text,
1929 transaction,
1932 transaction,
1930 link,
1933 link,
1931 p1,
1934 p1,
1932 p2,
1935 p2,
1933 cachedelta=None,
1936 cachedelta=None,
1934 node=None,
1937 node=None,
1935 flags=REVIDX_DEFAULT_FLAGS,
1938 flags=REVIDX_DEFAULT_FLAGS,
1936 deltacomputer=None,
1939 deltacomputer=None,
1937 sidedata=None,
1940 sidedata=None,
1938 ):
1941 ):
1939 """add a revision to the log
1942 """add a revision to the log
1940
1943
1941 text - the revision data to add
1944 text - the revision data to add
1942 transaction - the transaction object used for rollback
1945 transaction - the transaction object used for rollback
1943 link - the linkrev data to add
1946 link - the linkrev data to add
1944 p1, p2 - the parent nodeids of the revision
1947 p1, p2 - the parent nodeids of the revision
1945 cachedelta - an optional precomputed delta
1948 cachedelta - an optional precomputed delta
1946 node - nodeid of revision; typically node is not specified, and it is
1949 node - nodeid of revision; typically node is not specified, and it is
1947 computed by default as hash(text, p1, p2), however subclasses might
1950 computed by default as hash(text, p1, p2), however subclasses might
1948 use different hashing method (and override checkhash() in such case)
1951 use different hashing method (and override checkhash() in such case)
1949 flags - the known flags to set on the revision
1952 flags - the known flags to set on the revision
1950 deltacomputer - an optional deltacomputer instance shared between
1953 deltacomputer - an optional deltacomputer instance shared between
1951 multiple calls
1954 multiple calls
1952 """
1955 """
1953 if link == nullrev:
1956 if link == nullrev:
1954 raise error.RevlogError(
1957 raise error.RevlogError(
1955 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1958 _(b"attempted to add linkrev -1 to %s") % self.indexfile
1956 )
1959 )
1957
1960
1958 if sidedata is None:
1961 if sidedata is None:
1959 sidedata = {}
1962 sidedata = {}
1960 flags = flags & ~REVIDX_SIDEDATA
1963 flags = flags & ~REVIDX_SIDEDATA
1961 elif not self.hassidedata:
1964 elif not self.hassidedata:
1962 raise error.ProgrammingError(
1965 raise error.ProgrammingError(
1963 _(b"trying to add sidedata to a revlog who don't support them")
1966 _(b"trying to add sidedata to a revlog who don't support them")
1964 )
1967 )
1965 else:
1968 else:
1966 flags |= REVIDX_SIDEDATA
1969 flags |= REVIDX_SIDEDATA
1967
1970
1968 if flags:
1971 if flags:
1969 node = node or self.hash(text, p1, p2)
1972 node = node or self.hash(text, p1, p2)
1970
1973
1971 rawtext, validatehash = flagutil.processflagswrite(
1974 rawtext, validatehash = flagutil.processflagswrite(
1972 self, text, flags, sidedata=sidedata
1975 self, text, flags, sidedata=sidedata
1973 )
1976 )
1974
1977
1975 # If the flag processor modifies the revision data, ignore any provided
1978 # If the flag processor modifies the revision data, ignore any provided
1976 # cachedelta.
1979 # cachedelta.
1977 if rawtext != text:
1980 if rawtext != text:
1978 cachedelta = None
1981 cachedelta = None
1979
1982
1980 if len(rawtext) > _maxentrysize:
1983 if len(rawtext) > _maxentrysize:
1981 raise error.RevlogError(
1984 raise error.RevlogError(
1982 _(
1985 _(
1983 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1986 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
1984 )
1987 )
1985 % (self.indexfile, len(rawtext))
1988 % (self.indexfile, len(rawtext))
1986 )
1989 )
1987
1990
1988 node = node or self.hash(rawtext, p1, p2)
1991 node = node or self.hash(rawtext, p1, p2)
1989 if node in self.nodemap:
1992 if node in self.nodemap:
1990 return node
1993 return node
1991
1994
1992 if validatehash:
1995 if validatehash:
1993 self.checkhash(rawtext, node, p1=p1, p2=p2)
1996 self.checkhash(rawtext, node, p1=p1, p2=p2)
1994
1997
1995 return self.addrawrevision(
1998 return self.addrawrevision(
1996 rawtext,
1999 rawtext,
1997 transaction,
2000 transaction,
1998 link,
2001 link,
1999 p1,
2002 p1,
2000 p2,
2003 p2,
2001 node,
2004 node,
2002 flags,
2005 flags,
2003 cachedelta=cachedelta,
2006 cachedelta=cachedelta,
2004 deltacomputer=deltacomputer,
2007 deltacomputer=deltacomputer,
2005 )
2008 )
2006
2009
2007 def addrawrevision(
2010 def addrawrevision(
2008 self,
2011 self,
2009 rawtext,
2012 rawtext,
2010 transaction,
2013 transaction,
2011 link,
2014 link,
2012 p1,
2015 p1,
2013 p2,
2016 p2,
2014 node,
2017 node,
2015 flags,
2018 flags,
2016 cachedelta=None,
2019 cachedelta=None,
2017 deltacomputer=None,
2020 deltacomputer=None,
2018 ):
2021 ):
2019 """add a raw revision with known flags, node and parents
2022 """add a raw revision with known flags, node and parents
2020 useful when reusing a revision not stored in this revlog (ex: received
2023 useful when reusing a revision not stored in this revlog (ex: received
2021 over wire, or read from an external bundle).
2024 over wire, or read from an external bundle).
2022 """
2025 """
2023 dfh = None
2026 dfh = None
2024 if not self._inline:
2027 if not self._inline:
2025 dfh = self._datafp(b"a+")
2028 dfh = self._datafp(b"a+")
2026 ifh = self._indexfp(b"a+")
2029 ifh = self._indexfp(b"a+")
2027 try:
2030 try:
2028 return self._addrevision(
2031 return self._addrevision(
2029 node,
2032 node,
2030 rawtext,
2033 rawtext,
2031 transaction,
2034 transaction,
2032 link,
2035 link,
2033 p1,
2036 p1,
2034 p2,
2037 p2,
2035 flags,
2038 flags,
2036 cachedelta,
2039 cachedelta,
2037 ifh,
2040 ifh,
2038 dfh,
2041 dfh,
2039 deltacomputer=deltacomputer,
2042 deltacomputer=deltacomputer,
2040 )
2043 )
2041 finally:
2044 finally:
2042 if dfh:
2045 if dfh:
2043 dfh.close()
2046 dfh.close()
2044 ifh.close()
2047 ifh.close()
2045
2048
2046 def compress(self, data):
2049 def compress(self, data):
2047 """Generate a possibly-compressed representation of data."""
2050 """Generate a possibly-compressed representation of data."""
2048 if not data:
2051 if not data:
2049 return b'', data
2052 return b'', data
2050
2053
2051 compressed = self._compressor.compress(data)
2054 compressed = self._compressor.compress(data)
2052
2055
2053 if compressed:
2056 if compressed:
2054 # The revlog compressor added the header in the returned data.
2057 # The revlog compressor added the header in the returned data.
2055 return b'', compressed
2058 return b'', compressed
2056
2059
2057 if data[0:1] == b'\0':
2060 if data[0:1] == b'\0':
2058 return b'', data
2061 return b'', data
2059 return b'u', data
2062 return b'u', data
2060
2063
2061 def decompress(self, data):
2064 def decompress(self, data):
2062 """Decompress a revlog chunk.
2065 """Decompress a revlog chunk.
2063
2066
2064 The chunk is expected to begin with a header identifying the
2067 The chunk is expected to begin with a header identifying the
2065 format type so it can be routed to an appropriate decompressor.
2068 format type so it can be routed to an appropriate decompressor.
2066 """
2069 """
2067 if not data:
2070 if not data:
2068 return data
2071 return data
2069
2072
2070 # Revlogs are read much more frequently than they are written and many
2073 # Revlogs are read much more frequently than they are written and many
2071 # chunks only take microseconds to decompress, so performance is
2074 # chunks only take microseconds to decompress, so performance is
2072 # important here.
2075 # important here.
2073 #
2076 #
2074 # We can make a few assumptions about revlogs:
2077 # We can make a few assumptions about revlogs:
2075 #
2078 #
2076 # 1) the majority of chunks will be compressed (as opposed to inline
2079 # 1) the majority of chunks will be compressed (as opposed to inline
2077 # raw data).
2080 # raw data).
2078 # 2) decompressing *any* data will likely by at least 10x slower than
2081 # 2) decompressing *any* data will likely by at least 10x slower than
2079 # returning raw inline data.
2082 # returning raw inline data.
2080 # 3) we want to prioritize common and officially supported compression
2083 # 3) we want to prioritize common and officially supported compression
2081 # engines
2084 # engines
2082 #
2085 #
2083 # It follows that we want to optimize for "decompress compressed data
2086 # It follows that we want to optimize for "decompress compressed data
2084 # when encoded with common and officially supported compression engines"
2087 # when encoded with common and officially supported compression engines"
2085 # case over "raw data" and "data encoded by less common or non-official
2088 # case over "raw data" and "data encoded by less common or non-official
2086 # compression engines." That is why we have the inline lookup first
2089 # compression engines." That is why we have the inline lookup first
2087 # followed by the compengines lookup.
2090 # followed by the compengines lookup.
2088 #
2091 #
2089 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2092 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2090 # compressed chunks. And this matters for changelog and manifest reads.
2093 # compressed chunks. And this matters for changelog and manifest reads.
2091 t = data[0:1]
2094 t = data[0:1]
2092
2095
2093 if t == b'x':
2096 if t == b'x':
2094 try:
2097 try:
2095 return _zlibdecompress(data)
2098 return _zlibdecompress(data)
2096 except zlib.error as e:
2099 except zlib.error as e:
2097 raise error.RevlogError(
2100 raise error.RevlogError(
2098 _(b'revlog decompress error: %s')
2101 _(b'revlog decompress error: %s')
2099 % stringutil.forcebytestr(e)
2102 % stringutil.forcebytestr(e)
2100 )
2103 )
2101 # '\0' is more common than 'u' so it goes first.
2104 # '\0' is more common than 'u' so it goes first.
2102 elif t == b'\0':
2105 elif t == b'\0':
2103 return data
2106 return data
2104 elif t == b'u':
2107 elif t == b'u':
2105 return util.buffer(data, 1)
2108 return util.buffer(data, 1)
2106
2109
2107 try:
2110 try:
2108 compressor = self._decompressors[t]
2111 compressor = self._decompressors[t]
2109 except KeyError:
2112 except KeyError:
2110 try:
2113 try:
2111 engine = util.compengines.forrevlogheader(t)
2114 engine = util.compengines.forrevlogheader(t)
2112 compressor = engine.revlogcompressor(self._compengineopts)
2115 compressor = engine.revlogcompressor(self._compengineopts)
2113 self._decompressors[t] = compressor
2116 self._decompressors[t] = compressor
2114 except KeyError:
2117 except KeyError:
2115 raise error.RevlogError(_(b'unknown compression type %r') % t)
2118 raise error.RevlogError(_(b'unknown compression type %r') % t)
2116
2119
2117 return compressor.decompress(data)
2120 return compressor.decompress(data)
2118
2121
2119 def _addrevision(
2122 def _addrevision(
2120 self,
2123 self,
2121 node,
2124 node,
2122 rawtext,
2125 rawtext,
2123 transaction,
2126 transaction,
2124 link,
2127 link,
2125 p1,
2128 p1,
2126 p2,
2129 p2,
2127 flags,
2130 flags,
2128 cachedelta,
2131 cachedelta,
2129 ifh,
2132 ifh,
2130 dfh,
2133 dfh,
2131 alwayscache=False,
2134 alwayscache=False,
2132 deltacomputer=None,
2135 deltacomputer=None,
2133 ):
2136 ):
2134 """internal function to add revisions to the log
2137 """internal function to add revisions to the log
2135
2138
2136 see addrevision for argument descriptions.
2139 see addrevision for argument descriptions.
2137
2140
2138 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2141 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2139
2142
2140 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2143 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2141 be used.
2144 be used.
2142
2145
2143 invariants:
2146 invariants:
2144 - rawtext is optional (can be None); if not set, cachedelta must be set.
2147 - rawtext is optional (can be None); if not set, cachedelta must be set.
2145 if both are set, they must correspond to each other.
2148 if both are set, they must correspond to each other.
2146 """
2149 """
2147 if node == nullid:
2150 if node == nullid:
2148 raise error.RevlogError(
2151 raise error.RevlogError(
2149 _(b"%s: attempt to add null revision") % self.indexfile
2152 _(b"%s: attempt to add null revision") % self.indexfile
2150 )
2153 )
2151 if node == wdirid or node in wdirfilenodeids:
2154 if node == wdirid or node in wdirfilenodeids:
2152 raise error.RevlogError(
2155 raise error.RevlogError(
2153 _(b"%s: attempt to add wdir revision") % self.indexfile
2156 _(b"%s: attempt to add wdir revision") % self.indexfile
2154 )
2157 )
2155
2158
2156 if self._inline:
2159 if self._inline:
2157 fh = ifh
2160 fh = ifh
2158 else:
2161 else:
2159 fh = dfh
2162 fh = dfh
2160
2163
2161 btext = [rawtext]
2164 btext = [rawtext]
2162
2165
2163 curr = len(self)
2166 curr = len(self)
2164 prev = curr - 1
2167 prev = curr - 1
2165 offset = self.end(prev)
2168 offset = self.end(prev)
2166 p1r, p2r = self.rev(p1), self.rev(p2)
2169 p1r, p2r = self.rev(p1), self.rev(p2)
2167
2170
2168 # full versions are inserted when the needed deltas
2171 # full versions are inserted when the needed deltas
2169 # become comparable to the uncompressed text
2172 # become comparable to the uncompressed text
2170 if rawtext is None:
2173 if rawtext is None:
2171 # need rawtext size, before changed by flag processors, which is
2174 # need rawtext size, before changed by flag processors, which is
2172 # the non-raw size. use revlog explicitly to avoid filelog's extra
2175 # the non-raw size. use revlog explicitly to avoid filelog's extra
2173 # logic that might remove metadata size.
2176 # logic that might remove metadata size.
2174 textlen = mdiff.patchedsize(
2177 textlen = mdiff.patchedsize(
2175 revlog.size(self, cachedelta[0]), cachedelta[1]
2178 revlog.size(self, cachedelta[0]), cachedelta[1]
2176 )
2179 )
2177 else:
2180 else:
2178 textlen = len(rawtext)
2181 textlen = len(rawtext)
2179
2182
2180 if deltacomputer is None:
2183 if deltacomputer is None:
2181 deltacomputer = deltautil.deltacomputer(self)
2184 deltacomputer = deltautil.deltacomputer(self)
2182
2185
2183 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2186 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2184
2187
2185 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2188 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2186
2189
2187 e = (
2190 e = (
2188 offset_type(offset, flags),
2191 offset_type(offset, flags),
2189 deltainfo.deltalen,
2192 deltainfo.deltalen,
2190 textlen,
2193 textlen,
2191 deltainfo.base,
2194 deltainfo.base,
2192 link,
2195 link,
2193 p1r,
2196 p1r,
2194 p2r,
2197 p2r,
2195 node,
2198 node,
2196 )
2199 )
2197 self.index.append(e)
2200 self.index.append(e)
2198 self.nodemap[node] = curr
2201 self.nodemap[node] = curr
2199
2202
2200 # Reset the pure node cache start lookup offset to account for new
2203 # Reset the pure node cache start lookup offset to account for new
2201 # revision.
2204 # revision.
2202 if self._nodepos is not None:
2205 if self._nodepos is not None:
2203 self._nodepos = curr
2206 self._nodepos = curr
2204
2207
2205 entry = self._io.packentry(e, self.node, self.version, curr)
2208 entry = self._io.packentry(e, self.node, self.version, curr)
2206 self._writeentry(
2209 self._writeentry(
2207 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2210 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2208 )
2211 )
2209
2212
2210 rawtext = btext[0]
2213 rawtext = btext[0]
2211
2214
2212 if alwayscache and rawtext is None:
2215 if alwayscache and rawtext is None:
2213 rawtext = deltacomputer.buildtext(revinfo, fh)
2216 rawtext = deltacomputer.buildtext(revinfo, fh)
2214
2217
2215 if type(rawtext) == bytes: # only accept immutable objects
2218 if type(rawtext) == bytes: # only accept immutable objects
2216 self._revisioncache = (node, curr, rawtext)
2219 self._revisioncache = (node, curr, rawtext)
2217 self._chainbasecache[curr] = deltainfo.chainbase
2220 self._chainbasecache[curr] = deltainfo.chainbase
2218 return node
2221 return node
2219
2222
2220 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2223 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2221 # Files opened in a+ mode have inconsistent behavior on various
2224 # Files opened in a+ mode have inconsistent behavior on various
2222 # platforms. Windows requires that a file positioning call be made
2225 # platforms. Windows requires that a file positioning call be made
2223 # when the file handle transitions between reads and writes. See
2226 # when the file handle transitions between reads and writes. See
2224 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2227 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2225 # platforms, Python or the platform itself can be buggy. Some versions
2228 # platforms, Python or the platform itself can be buggy. Some versions
2226 # of Solaris have been observed to not append at the end of the file
2229 # of Solaris have been observed to not append at the end of the file
2227 # if the file was seeked to before the end. See issue4943 for more.
2230 # if the file was seeked to before the end. See issue4943 for more.
2228 #
2231 #
2229 # We work around this issue by inserting a seek() before writing.
2232 # We work around this issue by inserting a seek() before writing.
2230 # Note: This is likely not necessary on Python 3. However, because
2233 # Note: This is likely not necessary on Python 3. However, because
2231 # the file handle is reused for reads and may be seeked there, we need
2234 # the file handle is reused for reads and may be seeked there, we need
2232 # to be careful before changing this.
2235 # to be careful before changing this.
2233 ifh.seek(0, os.SEEK_END)
2236 ifh.seek(0, os.SEEK_END)
2234 if dfh:
2237 if dfh:
2235 dfh.seek(0, os.SEEK_END)
2238 dfh.seek(0, os.SEEK_END)
2236
2239
2237 curr = len(self) - 1
2240 curr = len(self) - 1
2238 if not self._inline:
2241 if not self._inline:
2239 transaction.add(self.datafile, offset)
2242 transaction.add(self.datafile, offset)
2240 transaction.add(self.indexfile, curr * len(entry))
2243 transaction.add(self.indexfile, curr * len(entry))
2241 if data[0]:
2244 if data[0]:
2242 dfh.write(data[0])
2245 dfh.write(data[0])
2243 dfh.write(data[1])
2246 dfh.write(data[1])
2244 ifh.write(entry)
2247 ifh.write(entry)
2245 else:
2248 else:
2246 offset += curr * self._io.size
2249 offset += curr * self._io.size
2247 transaction.add(self.indexfile, offset, curr)
2250 transaction.add(self.indexfile, offset, curr)
2248 ifh.write(entry)
2251 ifh.write(entry)
2249 ifh.write(data[0])
2252 ifh.write(data[0])
2250 ifh.write(data[1])
2253 ifh.write(data[1])
2251 self._enforceinlinesize(transaction, ifh)
2254 self._enforceinlinesize(transaction, ifh)
2252
2255
2253 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2256 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2254 """
2257 """
2255 add a delta group
2258 add a delta group
2256
2259
2257 given a set of deltas, add them to the revision log. the
2260 given a set of deltas, add them to the revision log. the
2258 first delta is against its parent, which should be in our
2261 first delta is against its parent, which should be in our
2259 log, the rest are against the previous delta.
2262 log, the rest are against the previous delta.
2260
2263
2261 If ``addrevisioncb`` is defined, it will be called with arguments of
2264 If ``addrevisioncb`` is defined, it will be called with arguments of
2262 this revlog and the node that was added.
2265 this revlog and the node that was added.
2263 """
2266 """
2264
2267
2265 if self._writinghandles:
2268 if self._writinghandles:
2266 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2269 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2267
2270
2268 nodes = []
2271 nodes = []
2269
2272
2270 r = len(self)
2273 r = len(self)
2271 end = 0
2274 end = 0
2272 if r:
2275 if r:
2273 end = self.end(r - 1)
2276 end = self.end(r - 1)
2274 ifh = self._indexfp(b"a+")
2277 ifh = self._indexfp(b"a+")
2275 isize = r * self._io.size
2278 isize = r * self._io.size
2276 if self._inline:
2279 if self._inline:
2277 transaction.add(self.indexfile, end + isize, r)
2280 transaction.add(self.indexfile, end + isize, r)
2278 dfh = None
2281 dfh = None
2279 else:
2282 else:
2280 transaction.add(self.indexfile, isize, r)
2283 transaction.add(self.indexfile, isize, r)
2281 transaction.add(self.datafile, end)
2284 transaction.add(self.datafile, end)
2282 dfh = self._datafp(b"a+")
2285 dfh = self._datafp(b"a+")
2283
2286
2284 def flush():
2287 def flush():
2285 if dfh:
2288 if dfh:
2286 dfh.flush()
2289 dfh.flush()
2287 ifh.flush()
2290 ifh.flush()
2288
2291
2289 self._writinghandles = (ifh, dfh)
2292 self._writinghandles = (ifh, dfh)
2290
2293
2291 try:
2294 try:
2292 deltacomputer = deltautil.deltacomputer(self)
2295 deltacomputer = deltautil.deltacomputer(self)
2293 # loop through our set of deltas
2296 # loop through our set of deltas
2294 for data in deltas:
2297 for data in deltas:
2295 node, p1, p2, linknode, deltabase, delta, flags = data
2298 node, p1, p2, linknode, deltabase, delta, flags = data
2296 link = linkmapper(linknode)
2299 link = linkmapper(linknode)
2297 flags = flags or REVIDX_DEFAULT_FLAGS
2300 flags = flags or REVIDX_DEFAULT_FLAGS
2298
2301
2299 nodes.append(node)
2302 nodes.append(node)
2300
2303
2301 if node in self.nodemap:
2304 if node in self.nodemap:
2302 self._nodeduplicatecallback(transaction, node)
2305 self._nodeduplicatecallback(transaction, node)
2303 # this can happen if two branches make the same change
2306 # this can happen if two branches make the same change
2304 continue
2307 continue
2305
2308
2306 for p in (p1, p2):
2309 for p in (p1, p2):
2307 if p not in self.nodemap:
2310 if p not in self.nodemap:
2308 raise error.LookupError(
2311 raise error.LookupError(
2309 p, self.indexfile, _(b'unknown parent')
2312 p, self.indexfile, _(b'unknown parent')
2310 )
2313 )
2311
2314
2312 if deltabase not in self.nodemap:
2315 if deltabase not in self.nodemap:
2313 raise error.LookupError(
2316 raise error.LookupError(
2314 deltabase, self.indexfile, _(b'unknown delta base')
2317 deltabase, self.indexfile, _(b'unknown delta base')
2315 )
2318 )
2316
2319
2317 baserev = self.rev(deltabase)
2320 baserev = self.rev(deltabase)
2318
2321
2319 if baserev != nullrev and self.iscensored(baserev):
2322 if baserev != nullrev and self.iscensored(baserev):
2320 # if base is censored, delta must be full replacement in a
2323 # if base is censored, delta must be full replacement in a
2321 # single patch operation
2324 # single patch operation
2322 hlen = struct.calcsize(b">lll")
2325 hlen = struct.calcsize(b">lll")
2323 oldlen = self.rawsize(baserev)
2326 oldlen = self.rawsize(baserev)
2324 newlen = len(delta) - hlen
2327 newlen = len(delta) - hlen
2325 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2328 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2326 raise error.CensoredBaseError(
2329 raise error.CensoredBaseError(
2327 self.indexfile, self.node(baserev)
2330 self.indexfile, self.node(baserev)
2328 )
2331 )
2329
2332
2330 if not flags and self._peek_iscensored(baserev, delta, flush):
2333 if not flags and self._peek_iscensored(baserev, delta, flush):
2331 flags |= REVIDX_ISCENSORED
2334 flags |= REVIDX_ISCENSORED
2332
2335
2333 # We assume consumers of addrevisioncb will want to retrieve
2336 # We assume consumers of addrevisioncb will want to retrieve
2334 # the added revision, which will require a call to
2337 # the added revision, which will require a call to
2335 # revision(). revision() will fast path if there is a cache
2338 # revision(). revision() will fast path if there is a cache
2336 # hit. So, we tell _addrevision() to always cache in this case.
2339 # hit. So, we tell _addrevision() to always cache in this case.
2337 # We're only using addgroup() in the context of changegroup
2340 # We're only using addgroup() in the context of changegroup
2338 # generation so the revision data can always be handled as raw
2341 # generation so the revision data can always be handled as raw
2339 # by the flagprocessor.
2342 # by the flagprocessor.
2340 self._addrevision(
2343 self._addrevision(
2341 node,
2344 node,
2342 None,
2345 None,
2343 transaction,
2346 transaction,
2344 link,
2347 link,
2345 p1,
2348 p1,
2346 p2,
2349 p2,
2347 flags,
2350 flags,
2348 (baserev, delta),
2351 (baserev, delta),
2349 ifh,
2352 ifh,
2350 dfh,
2353 dfh,
2351 alwayscache=bool(addrevisioncb),
2354 alwayscache=bool(addrevisioncb),
2352 deltacomputer=deltacomputer,
2355 deltacomputer=deltacomputer,
2353 )
2356 )
2354
2357
2355 if addrevisioncb:
2358 if addrevisioncb:
2356 addrevisioncb(self, node)
2359 addrevisioncb(self, node)
2357
2360
2358 if not dfh and not self._inline:
2361 if not dfh and not self._inline:
2359 # addrevision switched from inline to conventional
2362 # addrevision switched from inline to conventional
2360 # reopen the index
2363 # reopen the index
2361 ifh.close()
2364 ifh.close()
2362 dfh = self._datafp(b"a+")
2365 dfh = self._datafp(b"a+")
2363 ifh = self._indexfp(b"a+")
2366 ifh = self._indexfp(b"a+")
2364 self._writinghandles = (ifh, dfh)
2367 self._writinghandles = (ifh, dfh)
2365 finally:
2368 finally:
2366 self._writinghandles = None
2369 self._writinghandles = None
2367
2370
2368 if dfh:
2371 if dfh:
2369 dfh.close()
2372 dfh.close()
2370 ifh.close()
2373 ifh.close()
2371
2374
2372 return nodes
2375 return nodes
2373
2376
2374 def iscensored(self, rev):
2377 def iscensored(self, rev):
2375 """Check if a file revision is censored."""
2378 """Check if a file revision is censored."""
2376 if not self._censorable:
2379 if not self._censorable:
2377 return False
2380 return False
2378
2381
2379 return self.flags(rev) & REVIDX_ISCENSORED
2382 return self.flags(rev) & REVIDX_ISCENSORED
2380
2383
2381 def _peek_iscensored(self, baserev, delta, flush):
2384 def _peek_iscensored(self, baserev, delta, flush):
2382 """Quickly check if a delta produces a censored revision."""
2385 """Quickly check if a delta produces a censored revision."""
2383 if not self._censorable:
2386 if not self._censorable:
2384 return False
2387 return False
2385
2388
2386 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2389 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2387
2390
2388 def getstrippoint(self, minlink):
2391 def getstrippoint(self, minlink):
2389 """find the minimum rev that must be stripped to strip the linkrev
2392 """find the minimum rev that must be stripped to strip the linkrev
2390
2393
2391 Returns a tuple containing the minimum rev and a set of all revs that
2394 Returns a tuple containing the minimum rev and a set of all revs that
2392 have linkrevs that will be broken by this strip.
2395 have linkrevs that will be broken by this strip.
2393 """
2396 """
2394 return storageutil.resolvestripinfo(
2397 return storageutil.resolvestripinfo(
2395 minlink,
2398 minlink,
2396 len(self) - 1,
2399 len(self) - 1,
2397 self.headrevs(),
2400 self.headrevs(),
2398 self.linkrev,
2401 self.linkrev,
2399 self.parentrevs,
2402 self.parentrevs,
2400 )
2403 )
2401
2404
2402 def strip(self, minlink, transaction):
2405 def strip(self, minlink, transaction):
2403 """truncate the revlog on the first revision with a linkrev >= minlink
2406 """truncate the revlog on the first revision with a linkrev >= minlink
2404
2407
2405 This function is called when we're stripping revision minlink and
2408 This function is called when we're stripping revision minlink and
2406 its descendants from the repository.
2409 its descendants from the repository.
2407
2410
2408 We have to remove all revisions with linkrev >= minlink, because
2411 We have to remove all revisions with linkrev >= minlink, because
2409 the equivalent changelog revisions will be renumbered after the
2412 the equivalent changelog revisions will be renumbered after the
2410 strip.
2413 strip.
2411
2414
2412 So we truncate the revlog on the first of these revisions, and
2415 So we truncate the revlog on the first of these revisions, and
2413 trust that the caller has saved the revisions that shouldn't be
2416 trust that the caller has saved the revisions that shouldn't be
2414 removed and that it'll re-add them after this truncation.
2417 removed and that it'll re-add them after this truncation.
2415 """
2418 """
2416 if len(self) == 0:
2419 if len(self) == 0:
2417 return
2420 return
2418
2421
2419 rev, _ = self.getstrippoint(minlink)
2422 rev, _ = self.getstrippoint(minlink)
2420 if rev == len(self):
2423 if rev == len(self):
2421 return
2424 return
2422
2425
2423 # first truncate the files on disk
2426 # first truncate the files on disk
2424 end = self.start(rev)
2427 end = self.start(rev)
2425 if not self._inline:
2428 if not self._inline:
2426 transaction.add(self.datafile, end)
2429 transaction.add(self.datafile, end)
2427 end = rev * self._io.size
2430 end = rev * self._io.size
2428 else:
2431 else:
2429 end += rev * self._io.size
2432 end += rev * self._io.size
2430
2433
2431 transaction.add(self.indexfile, end)
2434 transaction.add(self.indexfile, end)
2432
2435
2433 # then reset internal state in memory to forget those revisions
2436 # then reset internal state in memory to forget those revisions
2434 self._revisioncache = None
2437 self._revisioncache = None
2435 self._chaininfocache = {}
2438 self._chaininfocache = {}
2436 self._chunkclear()
2439 self._chunkclear()
2437 for x in pycompat.xrange(rev, len(self)):
2440 for x in pycompat.xrange(rev, len(self)):
2438 del self.nodemap[self.node(x)]
2441 del self.nodemap[self.node(x)]
2439
2442
2440 del self.index[rev:-1]
2443 del self.index[rev:-1]
2441 self._nodepos = None
2444 self._nodepos = None
2442
2445
2443 def checksize(self):
2446 def checksize(self):
2444 """Check size of index and data files
2447 """Check size of index and data files
2445
2448
2446 return a (dd, di) tuple.
2449 return a (dd, di) tuple.
2447 - dd: extra bytes for the "data" file
2450 - dd: extra bytes for the "data" file
2448 - di: extra bytes for the "index" file
2451 - di: extra bytes for the "index" file
2449
2452
2450 A healthy revlog will return (0, 0).
2453 A healthy revlog will return (0, 0).
2451 """
2454 """
2452 expected = 0
2455 expected = 0
2453 if len(self):
2456 if len(self):
2454 expected = max(0, self.end(len(self) - 1))
2457 expected = max(0, self.end(len(self) - 1))
2455
2458
2456 try:
2459 try:
2457 with self._datafp() as f:
2460 with self._datafp() as f:
2458 f.seek(0, io.SEEK_END)
2461 f.seek(0, io.SEEK_END)
2459 actual = f.tell()
2462 actual = f.tell()
2460 dd = actual - expected
2463 dd = actual - expected
2461 except IOError as inst:
2464 except IOError as inst:
2462 if inst.errno != errno.ENOENT:
2465 if inst.errno != errno.ENOENT:
2463 raise
2466 raise
2464 dd = 0
2467 dd = 0
2465
2468
2466 try:
2469 try:
2467 f = self.opener(self.indexfile)
2470 f = self.opener(self.indexfile)
2468 f.seek(0, io.SEEK_END)
2471 f.seek(0, io.SEEK_END)
2469 actual = f.tell()
2472 actual = f.tell()
2470 f.close()
2473 f.close()
2471 s = self._io.size
2474 s = self._io.size
2472 i = max(0, actual // s)
2475 i = max(0, actual // s)
2473 di = actual - (i * s)
2476 di = actual - (i * s)
2474 if self._inline:
2477 if self._inline:
2475 databytes = 0
2478 databytes = 0
2476 for r in self:
2479 for r in self:
2477 databytes += max(0, self.length(r))
2480 databytes += max(0, self.length(r))
2478 dd = 0
2481 dd = 0
2479 di = actual - len(self) * s - databytes
2482 di = actual - len(self) * s - databytes
2480 except IOError as inst:
2483 except IOError as inst:
2481 if inst.errno != errno.ENOENT:
2484 if inst.errno != errno.ENOENT:
2482 raise
2485 raise
2483 di = 0
2486 di = 0
2484
2487
2485 return (dd, di)
2488 return (dd, di)
2486
2489
2487 def files(self):
2490 def files(self):
2488 res = [self.indexfile]
2491 res = [self.indexfile]
2489 if not self._inline:
2492 if not self._inline:
2490 res.append(self.datafile)
2493 res.append(self.datafile)
2491 return res
2494 return res
2492
2495
2493 def emitrevisions(
2496 def emitrevisions(
2494 self,
2497 self,
2495 nodes,
2498 nodes,
2496 nodesorder=None,
2499 nodesorder=None,
2497 revisiondata=False,
2500 revisiondata=False,
2498 assumehaveparentrevisions=False,
2501 assumehaveparentrevisions=False,
2499 deltamode=repository.CG_DELTAMODE_STD,
2502 deltamode=repository.CG_DELTAMODE_STD,
2500 ):
2503 ):
2501 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2504 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2502 raise error.ProgrammingError(
2505 raise error.ProgrammingError(
2503 b'unhandled value for nodesorder: %s' % nodesorder
2506 b'unhandled value for nodesorder: %s' % nodesorder
2504 )
2507 )
2505
2508
2506 if nodesorder is None and not self._generaldelta:
2509 if nodesorder is None and not self._generaldelta:
2507 nodesorder = b'storage'
2510 nodesorder = b'storage'
2508
2511
2509 if (
2512 if (
2510 not self._storedeltachains
2513 not self._storedeltachains
2511 and deltamode != repository.CG_DELTAMODE_PREV
2514 and deltamode != repository.CG_DELTAMODE_PREV
2512 ):
2515 ):
2513 deltamode = repository.CG_DELTAMODE_FULL
2516 deltamode = repository.CG_DELTAMODE_FULL
2514
2517
2515 return storageutil.emitrevisions(
2518 return storageutil.emitrevisions(
2516 self,
2519 self,
2517 nodes,
2520 nodes,
2518 nodesorder,
2521 nodesorder,
2519 revlogrevisiondelta,
2522 revlogrevisiondelta,
2520 deltaparentfn=self.deltaparent,
2523 deltaparentfn=self.deltaparent,
2521 candeltafn=self.candelta,
2524 candeltafn=self.candelta,
2522 rawsizefn=self.rawsize,
2525 rawsizefn=self.rawsize,
2523 revdifffn=self.revdiff,
2526 revdifffn=self.revdiff,
2524 flagsfn=self.flags,
2527 flagsfn=self.flags,
2525 deltamode=deltamode,
2528 deltamode=deltamode,
2526 revisiondata=revisiondata,
2529 revisiondata=revisiondata,
2527 assumehaveparentrevisions=assumehaveparentrevisions,
2530 assumehaveparentrevisions=assumehaveparentrevisions,
2528 )
2531 )
2529
2532
2530 DELTAREUSEALWAYS = b'always'
2533 DELTAREUSEALWAYS = b'always'
2531 DELTAREUSESAMEREVS = b'samerevs'
2534 DELTAREUSESAMEREVS = b'samerevs'
2532 DELTAREUSENEVER = b'never'
2535 DELTAREUSENEVER = b'never'
2533
2536
2534 DELTAREUSEFULLADD = b'fulladd'
2537 DELTAREUSEFULLADD = b'fulladd'
2535
2538
2536 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2539 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2537
2540
2538 def clone(
2541 def clone(
2539 self,
2542 self,
2540 tr,
2543 tr,
2541 destrevlog,
2544 destrevlog,
2542 addrevisioncb=None,
2545 addrevisioncb=None,
2543 deltareuse=DELTAREUSESAMEREVS,
2546 deltareuse=DELTAREUSESAMEREVS,
2544 forcedeltabothparents=None,
2547 forcedeltabothparents=None,
2545 sidedatacompanion=None,
2548 sidedatacompanion=None,
2546 ):
2549 ):
2547 """Copy this revlog to another, possibly with format changes.
2550 """Copy this revlog to another, possibly with format changes.
2548
2551
2549 The destination revlog will contain the same revisions and nodes.
2552 The destination revlog will contain the same revisions and nodes.
2550 However, it may not be bit-for-bit identical due to e.g. delta encoding
2553 However, it may not be bit-for-bit identical due to e.g. delta encoding
2551 differences.
2554 differences.
2552
2555
2553 The ``deltareuse`` argument control how deltas from the existing revlog
2556 The ``deltareuse`` argument control how deltas from the existing revlog
2554 are preserved in the destination revlog. The argument can have the
2557 are preserved in the destination revlog. The argument can have the
2555 following values:
2558 following values:
2556
2559
2557 DELTAREUSEALWAYS
2560 DELTAREUSEALWAYS
2558 Deltas will always be reused (if possible), even if the destination
2561 Deltas will always be reused (if possible), even if the destination
2559 revlog would not select the same revisions for the delta. This is the
2562 revlog would not select the same revisions for the delta. This is the
2560 fastest mode of operation.
2563 fastest mode of operation.
2561 DELTAREUSESAMEREVS
2564 DELTAREUSESAMEREVS
2562 Deltas will be reused if the destination revlog would pick the same
2565 Deltas will be reused if the destination revlog would pick the same
2563 revisions for the delta. This mode strikes a balance between speed
2566 revisions for the delta. This mode strikes a balance between speed
2564 and optimization.
2567 and optimization.
2565 DELTAREUSENEVER
2568 DELTAREUSENEVER
2566 Deltas will never be reused. This is the slowest mode of execution.
2569 Deltas will never be reused. This is the slowest mode of execution.
2567 This mode can be used to recompute deltas (e.g. if the diff/delta
2570 This mode can be used to recompute deltas (e.g. if the diff/delta
2568 algorithm changes).
2571 algorithm changes).
2569 DELTAREUSEFULLADD
2572 DELTAREUSEFULLADD
2570 Revision will be re-added as if their were new content. This is
2573 Revision will be re-added as if their were new content. This is
2571 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2574 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2572 eg: large file detection and handling.
2575 eg: large file detection and handling.
2573
2576
2574 Delta computation can be slow, so the choice of delta reuse policy can
2577 Delta computation can be slow, so the choice of delta reuse policy can
2575 significantly affect run time.
2578 significantly affect run time.
2576
2579
2577 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2580 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2578 two extremes. Deltas will be reused if they are appropriate. But if the
2581 two extremes. Deltas will be reused if they are appropriate. But if the
2579 delta could choose a better revision, it will do so. This means if you
2582 delta could choose a better revision, it will do so. This means if you
2580 are converting a non-generaldelta revlog to a generaldelta revlog,
2583 are converting a non-generaldelta revlog to a generaldelta revlog,
2581 deltas will be recomputed if the delta's parent isn't a parent of the
2584 deltas will be recomputed if the delta's parent isn't a parent of the
2582 revision.
2585 revision.
2583
2586
2584 In addition to the delta policy, the ``forcedeltabothparents``
2587 In addition to the delta policy, the ``forcedeltabothparents``
2585 argument controls whether to force compute deltas against both parents
2588 argument controls whether to force compute deltas against both parents
2586 for merges. By default, the current default is used.
2589 for merges. By default, the current default is used.
2587
2590
2588 If not None, the `sidedatacompanion` is callable that accept two
2591 If not None, the `sidedatacompanion` is callable that accept two
2589 arguments:
2592 arguments:
2590
2593
2591 (srcrevlog, rev)
2594 (srcrevlog, rev)
2592
2595
2593 and return a triplet that control changes to sidedata content from the
2596 and return a triplet that control changes to sidedata content from the
2594 old revision to the new clone result:
2597 old revision to the new clone result:
2595
2598
2596 (dropall, filterout, update)
2599 (dropall, filterout, update)
2597
2600
2598 * if `dropall` is True, all sidedata should be dropped
2601 * if `dropall` is True, all sidedata should be dropped
2599 * `filterout` is a set of sidedata keys that should be dropped
2602 * `filterout` is a set of sidedata keys that should be dropped
2600 * `update` is a mapping of additionnal/new key -> value
2603 * `update` is a mapping of additionnal/new key -> value
2601 """
2604 """
2602 if deltareuse not in self.DELTAREUSEALL:
2605 if deltareuse not in self.DELTAREUSEALL:
2603 raise ValueError(
2606 raise ValueError(
2604 _(b'value for deltareuse invalid: %s') % deltareuse
2607 _(b'value for deltareuse invalid: %s') % deltareuse
2605 )
2608 )
2606
2609
2607 if len(destrevlog):
2610 if len(destrevlog):
2608 raise ValueError(_(b'destination revlog is not empty'))
2611 raise ValueError(_(b'destination revlog is not empty'))
2609
2612
2610 if getattr(self, 'filteredrevs', None):
2613 if getattr(self, 'filteredrevs', None):
2611 raise ValueError(_(b'source revlog has filtered revisions'))
2614 raise ValueError(_(b'source revlog has filtered revisions'))
2612 if getattr(destrevlog, 'filteredrevs', None):
2615 if getattr(destrevlog, 'filteredrevs', None):
2613 raise ValueError(_(b'destination revlog has filtered revisions'))
2616 raise ValueError(_(b'destination revlog has filtered revisions'))
2614
2617
2615 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2618 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2616 # if possible.
2619 # if possible.
2617 oldlazydelta = destrevlog._lazydelta
2620 oldlazydelta = destrevlog._lazydelta
2618 oldlazydeltabase = destrevlog._lazydeltabase
2621 oldlazydeltabase = destrevlog._lazydeltabase
2619 oldamd = destrevlog._deltabothparents
2622 oldamd = destrevlog._deltabothparents
2620
2623
2621 try:
2624 try:
2622 if deltareuse == self.DELTAREUSEALWAYS:
2625 if deltareuse == self.DELTAREUSEALWAYS:
2623 destrevlog._lazydeltabase = True
2626 destrevlog._lazydeltabase = True
2624 destrevlog._lazydelta = True
2627 destrevlog._lazydelta = True
2625 elif deltareuse == self.DELTAREUSESAMEREVS:
2628 elif deltareuse == self.DELTAREUSESAMEREVS:
2626 destrevlog._lazydeltabase = False
2629 destrevlog._lazydeltabase = False
2627 destrevlog._lazydelta = True
2630 destrevlog._lazydelta = True
2628 elif deltareuse == self.DELTAREUSENEVER:
2631 elif deltareuse == self.DELTAREUSENEVER:
2629 destrevlog._lazydeltabase = False
2632 destrevlog._lazydeltabase = False
2630 destrevlog._lazydelta = False
2633 destrevlog._lazydelta = False
2631
2634
2632 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2635 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2633
2636
2634 self._clone(
2637 self._clone(
2635 tr,
2638 tr,
2636 destrevlog,
2639 destrevlog,
2637 addrevisioncb,
2640 addrevisioncb,
2638 deltareuse,
2641 deltareuse,
2639 forcedeltabothparents,
2642 forcedeltabothparents,
2640 sidedatacompanion,
2643 sidedatacompanion,
2641 )
2644 )
2642
2645
2643 finally:
2646 finally:
2644 destrevlog._lazydelta = oldlazydelta
2647 destrevlog._lazydelta = oldlazydelta
2645 destrevlog._lazydeltabase = oldlazydeltabase
2648 destrevlog._lazydeltabase = oldlazydeltabase
2646 destrevlog._deltabothparents = oldamd
2649 destrevlog._deltabothparents = oldamd
2647
2650
2648 def _clone(
2651 def _clone(
2649 self,
2652 self,
2650 tr,
2653 tr,
2651 destrevlog,
2654 destrevlog,
2652 addrevisioncb,
2655 addrevisioncb,
2653 deltareuse,
2656 deltareuse,
2654 forcedeltabothparents,
2657 forcedeltabothparents,
2655 sidedatacompanion,
2658 sidedatacompanion,
2656 ):
2659 ):
2657 """perform the core duty of `revlog.clone` after parameter processing"""
2660 """perform the core duty of `revlog.clone` after parameter processing"""
2658 deltacomputer = deltautil.deltacomputer(destrevlog)
2661 deltacomputer = deltautil.deltacomputer(destrevlog)
2659 index = self.index
2662 index = self.index
2660 for rev in self:
2663 for rev in self:
2661 entry = index[rev]
2664 entry = index[rev]
2662
2665
2663 # Some classes override linkrev to take filtered revs into
2666 # Some classes override linkrev to take filtered revs into
2664 # account. Use raw entry from index.
2667 # account. Use raw entry from index.
2665 flags = entry[0] & 0xFFFF
2668 flags = entry[0] & 0xFFFF
2666 linkrev = entry[4]
2669 linkrev = entry[4]
2667 p1 = index[entry[5]][7]
2670 p1 = index[entry[5]][7]
2668 p2 = index[entry[6]][7]
2671 p2 = index[entry[6]][7]
2669 node = entry[7]
2672 node = entry[7]
2670
2673
2671 sidedataactions = (False, [], {})
2674 sidedataactions = (False, [], {})
2672 if sidedatacompanion is not None:
2675 if sidedatacompanion is not None:
2673 sidedataactions = sidedatacompanion(self, rev)
2676 sidedataactions = sidedatacompanion(self, rev)
2674
2677
2675 # (Possibly) reuse the delta from the revlog if allowed and
2678 # (Possibly) reuse the delta from the revlog if allowed and
2676 # the revlog chunk is a delta.
2679 # the revlog chunk is a delta.
2677 cachedelta = None
2680 cachedelta = None
2678 rawtext = None
2681 rawtext = None
2679 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2682 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2680 dropall, filterout, update = sidedataactions
2683 dropall, filterout, update = sidedataactions
2681 text, sidedata = self._revisiondata(rev)
2684 text, sidedata = self._revisiondata(rev)
2682 if dropall:
2685 if dropall:
2683 sidedata = {}
2686 sidedata = {}
2684 for key in filterout:
2687 for key in filterout:
2685 sidedata.pop(key, None)
2688 sidedata.pop(key, None)
2686 sidedata.update(update)
2689 sidedata.update(update)
2687 if not sidedata:
2690 if not sidedata:
2688 sidedata = None
2691 sidedata = None
2689 destrevlog.addrevision(
2692 destrevlog.addrevision(
2690 text,
2693 text,
2691 tr,
2694 tr,
2692 linkrev,
2695 linkrev,
2693 p1,
2696 p1,
2694 p2,
2697 p2,
2695 cachedelta=cachedelta,
2698 cachedelta=cachedelta,
2696 node=node,
2699 node=node,
2697 flags=flags,
2700 flags=flags,
2698 deltacomputer=deltacomputer,
2701 deltacomputer=deltacomputer,
2699 sidedata=sidedata,
2702 sidedata=sidedata,
2700 )
2703 )
2701 else:
2704 else:
2702 if destrevlog._lazydelta:
2705 if destrevlog._lazydelta:
2703 dp = self.deltaparent(rev)
2706 dp = self.deltaparent(rev)
2704 if dp != nullrev:
2707 if dp != nullrev:
2705 cachedelta = (dp, bytes(self._chunk(rev)))
2708 cachedelta = (dp, bytes(self._chunk(rev)))
2706
2709
2707 if not cachedelta:
2710 if not cachedelta:
2708 rawtext = self.rawdata(rev)
2711 rawtext = self.rawdata(rev)
2709
2712
2710 ifh = destrevlog.opener(
2713 ifh = destrevlog.opener(
2711 destrevlog.indexfile, b'a+', checkambig=False
2714 destrevlog.indexfile, b'a+', checkambig=False
2712 )
2715 )
2713 dfh = None
2716 dfh = None
2714 if not destrevlog._inline:
2717 if not destrevlog._inline:
2715 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2718 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2716 try:
2719 try:
2717 destrevlog._addrevision(
2720 destrevlog._addrevision(
2718 node,
2721 node,
2719 rawtext,
2722 rawtext,
2720 tr,
2723 tr,
2721 linkrev,
2724 linkrev,
2722 p1,
2725 p1,
2723 p2,
2726 p2,
2724 flags,
2727 flags,
2725 cachedelta,
2728 cachedelta,
2726 ifh,
2729 ifh,
2727 dfh,
2730 dfh,
2728 deltacomputer=deltacomputer,
2731 deltacomputer=deltacomputer,
2729 )
2732 )
2730 finally:
2733 finally:
2731 if dfh:
2734 if dfh:
2732 dfh.close()
2735 dfh.close()
2733 ifh.close()
2736 ifh.close()
2734
2737
2735 if addrevisioncb:
2738 if addrevisioncb:
2736 addrevisioncb(self, rev, node)
2739 addrevisioncb(self, rev, node)
2737
2740
2738 def censorrevision(self, tr, censornode, tombstone=b''):
2741 def censorrevision(self, tr, censornode, tombstone=b''):
2739 if (self.version & 0xFFFF) == REVLOGV0:
2742 if (self.version & 0xFFFF) == REVLOGV0:
2740 raise error.RevlogError(
2743 raise error.RevlogError(
2741 _(b'cannot censor with version %d revlogs') % self.version
2744 _(b'cannot censor with version %d revlogs') % self.version
2742 )
2745 )
2743
2746
2744 censorrev = self.rev(censornode)
2747 censorrev = self.rev(censornode)
2745 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2748 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2746
2749
2747 if len(tombstone) > self.rawsize(censorrev):
2750 if len(tombstone) > self.rawsize(censorrev):
2748 raise error.Abort(
2751 raise error.Abort(
2749 _(b'censor tombstone must be no longer than censored data')
2752 _(b'censor tombstone must be no longer than censored data')
2750 )
2753 )
2751
2754
2752 # Rewriting the revlog in place is hard. Our strategy for censoring is
2755 # Rewriting the revlog in place is hard. Our strategy for censoring is
2753 # to create a new revlog, copy all revisions to it, then replace the
2756 # to create a new revlog, copy all revisions to it, then replace the
2754 # revlogs on transaction close.
2757 # revlogs on transaction close.
2755
2758
2756 newindexfile = self.indexfile + b'.tmpcensored'
2759 newindexfile = self.indexfile + b'.tmpcensored'
2757 newdatafile = self.datafile + b'.tmpcensored'
2760 newdatafile = self.datafile + b'.tmpcensored'
2758
2761
2759 # This is a bit dangerous. We could easily have a mismatch of state.
2762 # This is a bit dangerous. We could easily have a mismatch of state.
2760 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2763 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2761 newrl.version = self.version
2764 newrl.version = self.version
2762 newrl._generaldelta = self._generaldelta
2765 newrl._generaldelta = self._generaldelta
2763 newrl._io = self._io
2766 newrl._io = self._io
2764
2767
2765 for rev in self.revs():
2768 for rev in self.revs():
2766 node = self.node(rev)
2769 node = self.node(rev)
2767 p1, p2 = self.parents(node)
2770 p1, p2 = self.parents(node)
2768
2771
2769 if rev == censorrev:
2772 if rev == censorrev:
2770 newrl.addrawrevision(
2773 newrl.addrawrevision(
2771 tombstone,
2774 tombstone,
2772 tr,
2775 tr,
2773 self.linkrev(censorrev),
2776 self.linkrev(censorrev),
2774 p1,
2777 p1,
2775 p2,
2778 p2,
2776 censornode,
2779 censornode,
2777 REVIDX_ISCENSORED,
2780 REVIDX_ISCENSORED,
2778 )
2781 )
2779
2782
2780 if newrl.deltaparent(rev) != nullrev:
2783 if newrl.deltaparent(rev) != nullrev:
2781 raise error.Abort(
2784 raise error.Abort(
2782 _(
2785 _(
2783 b'censored revision stored as delta; '
2786 b'censored revision stored as delta; '
2784 b'cannot censor'
2787 b'cannot censor'
2785 ),
2788 ),
2786 hint=_(
2789 hint=_(
2787 b'censoring of revlogs is not '
2790 b'censoring of revlogs is not '
2788 b'fully implemented; please report '
2791 b'fully implemented; please report '
2789 b'this bug'
2792 b'this bug'
2790 ),
2793 ),
2791 )
2794 )
2792 continue
2795 continue
2793
2796
2794 if self.iscensored(rev):
2797 if self.iscensored(rev):
2795 if self.deltaparent(rev) != nullrev:
2798 if self.deltaparent(rev) != nullrev:
2796 raise error.Abort(
2799 raise error.Abort(
2797 _(
2800 _(
2798 b'cannot censor due to censored '
2801 b'cannot censor due to censored '
2799 b'revision having delta stored'
2802 b'revision having delta stored'
2800 )
2803 )
2801 )
2804 )
2802 rawtext = self._chunk(rev)
2805 rawtext = self._chunk(rev)
2803 else:
2806 else:
2804 rawtext = self.rawdata(rev)
2807 rawtext = self.rawdata(rev)
2805
2808
2806 newrl.addrawrevision(
2809 newrl.addrawrevision(
2807 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2810 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2808 )
2811 )
2809
2812
2810 tr.addbackup(self.indexfile, location=b'store')
2813 tr.addbackup(self.indexfile, location=b'store')
2811 if not self._inline:
2814 if not self._inline:
2812 tr.addbackup(self.datafile, location=b'store')
2815 tr.addbackup(self.datafile, location=b'store')
2813
2816
2814 self.opener.rename(newrl.indexfile, self.indexfile)
2817 self.opener.rename(newrl.indexfile, self.indexfile)
2815 if not self._inline:
2818 if not self._inline:
2816 self.opener.rename(newrl.datafile, self.datafile)
2819 self.opener.rename(newrl.datafile, self.datafile)
2817
2820
2818 self.clearcaches()
2821 self.clearcaches()
2819 self._loadindex()
2822 self._loadindex()
2820
2823
2821 def verifyintegrity(self, state):
2824 def verifyintegrity(self, state):
2822 """Verifies the integrity of the revlog.
2825 """Verifies the integrity of the revlog.
2823
2826
2824 Yields ``revlogproblem`` instances describing problems that are
2827 Yields ``revlogproblem`` instances describing problems that are
2825 found.
2828 found.
2826 """
2829 """
2827 dd, di = self.checksize()
2830 dd, di = self.checksize()
2828 if dd:
2831 if dd:
2829 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2832 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2830 if di:
2833 if di:
2831 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2834 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2832
2835
2833 version = self.version & 0xFFFF
2836 version = self.version & 0xFFFF
2834
2837
2835 # The verifier tells us what version revlog we should be.
2838 # The verifier tells us what version revlog we should be.
2836 if version != state[b'expectedversion']:
2839 if version != state[b'expectedversion']:
2837 yield revlogproblem(
2840 yield revlogproblem(
2838 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2841 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2839 % (self.indexfile, version, state[b'expectedversion'])
2842 % (self.indexfile, version, state[b'expectedversion'])
2840 )
2843 )
2841
2844
2842 state[b'skipread'] = set()
2845 state[b'skipread'] = set()
2843
2846
2844 for rev in self:
2847 for rev in self:
2845 node = self.node(rev)
2848 node = self.node(rev)
2846
2849
2847 # Verify contents. 4 cases to care about:
2850 # Verify contents. 4 cases to care about:
2848 #
2851 #
2849 # common: the most common case
2852 # common: the most common case
2850 # rename: with a rename
2853 # rename: with a rename
2851 # meta: file content starts with b'\1\n', the metadata
2854 # meta: file content starts with b'\1\n', the metadata
2852 # header defined in filelog.py, but without a rename
2855 # header defined in filelog.py, but without a rename
2853 # ext: content stored externally
2856 # ext: content stored externally
2854 #
2857 #
2855 # More formally, their differences are shown below:
2858 # More formally, their differences are shown below:
2856 #
2859 #
2857 # | common | rename | meta | ext
2860 # | common | rename | meta | ext
2858 # -------------------------------------------------------
2861 # -------------------------------------------------------
2859 # flags() | 0 | 0 | 0 | not 0
2862 # flags() | 0 | 0 | 0 | not 0
2860 # renamed() | False | True | False | ?
2863 # renamed() | False | True | False | ?
2861 # rawtext[0:2]=='\1\n'| False | True | True | ?
2864 # rawtext[0:2]=='\1\n'| False | True | True | ?
2862 #
2865 #
2863 # "rawtext" means the raw text stored in revlog data, which
2866 # "rawtext" means the raw text stored in revlog data, which
2864 # could be retrieved by "rawdata(rev)". "text"
2867 # could be retrieved by "rawdata(rev)". "text"
2865 # mentioned below is "revision(rev)".
2868 # mentioned below is "revision(rev)".
2866 #
2869 #
2867 # There are 3 different lengths stored physically:
2870 # There are 3 different lengths stored physically:
2868 # 1. L1: rawsize, stored in revlog index
2871 # 1. L1: rawsize, stored in revlog index
2869 # 2. L2: len(rawtext), stored in revlog data
2872 # 2. L2: len(rawtext), stored in revlog data
2870 # 3. L3: len(text), stored in revlog data if flags==0, or
2873 # 3. L3: len(text), stored in revlog data if flags==0, or
2871 # possibly somewhere else if flags!=0
2874 # possibly somewhere else if flags!=0
2872 #
2875 #
2873 # L1 should be equal to L2. L3 could be different from them.
2876 # L1 should be equal to L2. L3 could be different from them.
2874 # "text" may or may not affect commit hash depending on flag
2877 # "text" may or may not affect commit hash depending on flag
2875 # processors (see flagutil.addflagprocessor).
2878 # processors (see flagutil.addflagprocessor).
2876 #
2879 #
2877 # | common | rename | meta | ext
2880 # | common | rename | meta | ext
2878 # -------------------------------------------------
2881 # -------------------------------------------------
2879 # rawsize() | L1 | L1 | L1 | L1
2882 # rawsize() | L1 | L1 | L1 | L1
2880 # size() | L1 | L2-LM | L1(*) | L1 (?)
2883 # size() | L1 | L2-LM | L1(*) | L1 (?)
2881 # len(rawtext) | L2 | L2 | L2 | L2
2884 # len(rawtext) | L2 | L2 | L2 | L2
2882 # len(text) | L2 | L2 | L2 | L3
2885 # len(text) | L2 | L2 | L2 | L3
2883 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2886 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2884 #
2887 #
2885 # LM: length of metadata, depending on rawtext
2888 # LM: length of metadata, depending on rawtext
2886 # (*): not ideal, see comment in filelog.size
2889 # (*): not ideal, see comment in filelog.size
2887 # (?): could be "- len(meta)" if the resolved content has
2890 # (?): could be "- len(meta)" if the resolved content has
2888 # rename metadata
2891 # rename metadata
2889 #
2892 #
2890 # Checks needed to be done:
2893 # Checks needed to be done:
2891 # 1. length check: L1 == L2, in all cases.
2894 # 1. length check: L1 == L2, in all cases.
2892 # 2. hash check: depending on flag processor, we may need to
2895 # 2. hash check: depending on flag processor, we may need to
2893 # use either "text" (external), or "rawtext" (in revlog).
2896 # use either "text" (external), or "rawtext" (in revlog).
2894
2897
2895 try:
2898 try:
2896 skipflags = state.get(b'skipflags', 0)
2899 skipflags = state.get(b'skipflags', 0)
2897 if skipflags:
2900 if skipflags:
2898 skipflags &= self.flags(rev)
2901 skipflags &= self.flags(rev)
2899
2902
2900 if skipflags:
2903 if skipflags:
2901 state[b'skipread'].add(node)
2904 state[b'skipread'].add(node)
2902 else:
2905 else:
2903 # Side-effect: read content and verify hash.
2906 # Side-effect: read content and verify hash.
2904 self.revision(node)
2907 self.revision(node)
2905
2908
2906 l1 = self.rawsize(rev)
2909 l1 = self.rawsize(rev)
2907 l2 = len(self.rawdata(node))
2910 l2 = len(self.rawdata(node))
2908
2911
2909 if l1 != l2:
2912 if l1 != l2:
2910 yield revlogproblem(
2913 yield revlogproblem(
2911 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2914 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
2912 node=node,
2915 node=node,
2913 )
2916 )
2914
2917
2915 except error.CensoredNodeError:
2918 except error.CensoredNodeError:
2916 if state[b'erroroncensored']:
2919 if state[b'erroroncensored']:
2917 yield revlogproblem(
2920 yield revlogproblem(
2918 error=_(b'censored file data'), node=node
2921 error=_(b'censored file data'), node=node
2919 )
2922 )
2920 state[b'skipread'].add(node)
2923 state[b'skipread'].add(node)
2921 except Exception as e:
2924 except Exception as e:
2922 yield revlogproblem(
2925 yield revlogproblem(
2923 error=_(b'unpacking %s: %s')
2926 error=_(b'unpacking %s: %s')
2924 % (short(node), stringutil.forcebytestr(e)),
2927 % (short(node), stringutil.forcebytestr(e)),
2925 node=node,
2928 node=node,
2926 )
2929 )
2927 state[b'skipread'].add(node)
2930 state[b'skipread'].add(node)
2928
2931
2929 def storageinfo(
2932 def storageinfo(
2930 self,
2933 self,
2931 exclusivefiles=False,
2934 exclusivefiles=False,
2932 sharedfiles=False,
2935 sharedfiles=False,
2933 revisionscount=False,
2936 revisionscount=False,
2934 trackedsize=False,
2937 trackedsize=False,
2935 storedsize=False,
2938 storedsize=False,
2936 ):
2939 ):
2937 d = {}
2940 d = {}
2938
2941
2939 if exclusivefiles:
2942 if exclusivefiles:
2940 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2943 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
2941 if not self._inline:
2944 if not self._inline:
2942 d[b'exclusivefiles'].append((self.opener, self.datafile))
2945 d[b'exclusivefiles'].append((self.opener, self.datafile))
2943
2946
2944 if sharedfiles:
2947 if sharedfiles:
2945 d[b'sharedfiles'] = []
2948 d[b'sharedfiles'] = []
2946
2949
2947 if revisionscount:
2950 if revisionscount:
2948 d[b'revisionscount'] = len(self)
2951 d[b'revisionscount'] = len(self)
2949
2952
2950 if trackedsize:
2953 if trackedsize:
2951 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2954 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
2952
2955
2953 if storedsize:
2956 if storedsize:
2954 d[b'storedsize'] = sum(
2957 d[b'storedsize'] = sum(
2955 self.opener.stat(path).st_size for path in self.files()
2958 self.opener.stat(path).st_size for path in self.files()
2956 )
2959 )
2957
2960
2958 return d
2961 return d
General Comments 0
You need to be logged in to leave comments. Login now