##// END OF EJS Templates
revlog: accept a revs argument in `headrevs`...
Boris Feld -
r41311:1421d048 default
parent child Browse files
Show More
@@ -1,555 +1,555 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import (
16 from .thirdparty import (
17 attr,
17 attr,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 pycompat,
23 pycompat,
24 revlog,
24 revlog,
25 )
25 )
26 from .utils import (
26 from .utils import (
27 dateutil,
27 dateutil,
28 stringutil,
28 stringutil,
29 )
29 )
30
30
31 _defaultextra = {'branch': 'default'}
31 _defaultextra = {'branch': 'default'}
32
32
33 def _string_escape(text):
33 def _string_escape(text):
34 """
34 """
35 >>> from .pycompat import bytechr as chr
35 >>> from .pycompat import bytechr as chr
36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
36 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
37 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
38 >>> s
38 >>> s
39 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
39 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
40 >>> res = _string_escape(s)
40 >>> res = _string_escape(s)
41 >>> s == stringutil.unescapestr(res)
41 >>> s == stringutil.unescapestr(res)
42 True
42 True
43 """
43 """
44 # subset of the string_escape codec
44 # subset of the string_escape codec
45 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
45 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
46 return text.replace('\0', '\\0')
46 return text.replace('\0', '\\0')
47
47
48 def decodeextra(text):
48 def decodeextra(text):
49 """
49 """
50 >>> from .pycompat import bytechr as chr
50 >>> from .pycompat import bytechr as chr
51 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
51 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
52 ... ).items())
52 ... ).items())
53 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
53 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
54 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
55 ... b'baz': chr(92) + chr(0) + b'2'})
55 ... b'baz': chr(92) + chr(0) + b'2'})
56 ... ).items())
56 ... ).items())
57 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
57 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
58 """
58 """
59 extra = _defaultextra.copy()
59 extra = _defaultextra.copy()
60 for l in text.split('\0'):
60 for l in text.split('\0'):
61 if l:
61 if l:
62 if '\\0' in l:
62 if '\\0' in l:
63 # fix up \0 without getting into trouble with \\0
63 # fix up \0 without getting into trouble with \\0
64 l = l.replace('\\\\', '\\\\\n')
64 l = l.replace('\\\\', '\\\\\n')
65 l = l.replace('\\0', '\0')
65 l = l.replace('\\0', '\0')
66 l = l.replace('\n', '')
66 l = l.replace('\n', '')
67 k, v = stringutil.unescapestr(l).split(':', 1)
67 k, v = stringutil.unescapestr(l).split(':', 1)
68 extra[k] = v
68 extra[k] = v
69 return extra
69 return extra
70
70
71 def encodeextra(d):
71 def encodeextra(d):
72 # keys must be sorted to produce a deterministic changelog entry
72 # keys must be sorted to produce a deterministic changelog entry
73 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
73 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
74 return "\0".join(items)
74 return "\0".join(items)
75
75
76 def stripdesc(desc):
76 def stripdesc(desc):
77 """strip trailing whitespace and leading and trailing empty lines"""
77 """strip trailing whitespace and leading and trailing empty lines"""
78 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
78 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
79
79
80 class appender(object):
80 class appender(object):
81 '''the changelog index must be updated last on disk, so we use this class
81 '''the changelog index must be updated last on disk, so we use this class
82 to delay writes to it'''
82 to delay writes to it'''
83 def __init__(self, vfs, name, mode, buf):
83 def __init__(self, vfs, name, mode, buf):
84 self.data = buf
84 self.data = buf
85 fp = vfs(name, mode)
85 fp = vfs(name, mode)
86 self.fp = fp
86 self.fp = fp
87 self.offset = fp.tell()
87 self.offset = fp.tell()
88 self.size = vfs.fstat(fp).st_size
88 self.size = vfs.fstat(fp).st_size
89 self._end = self.size
89 self._end = self.size
90
90
91 def end(self):
91 def end(self):
92 return self._end
92 return self._end
93 def tell(self):
93 def tell(self):
94 return self.offset
94 return self.offset
95 def flush(self):
95 def flush(self):
96 pass
96 pass
97
97
98 @property
98 @property
99 def closed(self):
99 def closed(self):
100 return self.fp.closed
100 return self.fp.closed
101
101
102 def close(self):
102 def close(self):
103 self.fp.close()
103 self.fp.close()
104
104
105 def seek(self, offset, whence=0):
105 def seek(self, offset, whence=0):
106 '''virtual file offset spans real file and data'''
106 '''virtual file offset spans real file and data'''
107 if whence == 0:
107 if whence == 0:
108 self.offset = offset
108 self.offset = offset
109 elif whence == 1:
109 elif whence == 1:
110 self.offset += offset
110 self.offset += offset
111 elif whence == 2:
111 elif whence == 2:
112 self.offset = self.end() + offset
112 self.offset = self.end() + offset
113 if self.offset < self.size:
113 if self.offset < self.size:
114 self.fp.seek(self.offset)
114 self.fp.seek(self.offset)
115
115
116 def read(self, count=-1):
116 def read(self, count=-1):
117 '''only trick here is reads that span real file and data'''
117 '''only trick here is reads that span real file and data'''
118 ret = ""
118 ret = ""
119 if self.offset < self.size:
119 if self.offset < self.size:
120 s = self.fp.read(count)
120 s = self.fp.read(count)
121 ret = s
121 ret = s
122 self.offset += len(s)
122 self.offset += len(s)
123 if count > 0:
123 if count > 0:
124 count -= len(s)
124 count -= len(s)
125 if count != 0:
125 if count != 0:
126 doff = self.offset - self.size
126 doff = self.offset - self.size
127 self.data.insert(0, "".join(self.data))
127 self.data.insert(0, "".join(self.data))
128 del self.data[1:]
128 del self.data[1:]
129 s = self.data[0][doff:doff + count]
129 s = self.data[0][doff:doff + count]
130 self.offset += len(s)
130 self.offset += len(s)
131 ret += s
131 ret += s
132 return ret
132 return ret
133
133
134 def write(self, s):
134 def write(self, s):
135 self.data.append(bytes(s))
135 self.data.append(bytes(s))
136 self.offset += len(s)
136 self.offset += len(s)
137 self._end += len(s)
137 self._end += len(s)
138
138
139 def __enter__(self):
139 def __enter__(self):
140 self.fp.__enter__()
140 self.fp.__enter__()
141 return self
141 return self
142
142
143 def __exit__(self, *args):
143 def __exit__(self, *args):
144 return self.fp.__exit__(*args)
144 return self.fp.__exit__(*args)
145
145
146 def _divertopener(opener, target):
146 def _divertopener(opener, target):
147 """build an opener that writes in 'target.a' instead of 'target'"""
147 """build an opener that writes in 'target.a' instead of 'target'"""
148 def _divert(name, mode='r', checkambig=False):
148 def _divert(name, mode='r', checkambig=False):
149 if name != target:
149 if name != target:
150 return opener(name, mode)
150 return opener(name, mode)
151 return opener(name + ".a", mode)
151 return opener(name + ".a", mode)
152 return _divert
152 return _divert
153
153
154 def _delayopener(opener, target, buf):
154 def _delayopener(opener, target, buf):
155 """build an opener that stores chunks in 'buf' instead of 'target'"""
155 """build an opener that stores chunks in 'buf' instead of 'target'"""
156 def _delay(name, mode='r', checkambig=False):
156 def _delay(name, mode='r', checkambig=False):
157 if name != target:
157 if name != target:
158 return opener(name, mode)
158 return opener(name, mode)
159 return appender(opener, name, mode, buf)
159 return appender(opener, name, mode, buf)
160 return _delay
160 return _delay
161
161
162 @attr.s
162 @attr.s
163 class _changelogrevision(object):
163 class _changelogrevision(object):
164 # Extensions might modify _defaultextra, so let the constructor below pass
164 # Extensions might modify _defaultextra, so let the constructor below pass
165 # it in
165 # it in
166 extra = attr.ib()
166 extra = attr.ib()
167 manifest = attr.ib(default=nullid)
167 manifest = attr.ib(default=nullid)
168 user = attr.ib(default='')
168 user = attr.ib(default='')
169 date = attr.ib(default=(0, 0))
169 date = attr.ib(default=(0, 0))
170 files = attr.ib(default=attr.Factory(list))
170 files = attr.ib(default=attr.Factory(list))
171 description = attr.ib(default='')
171 description = attr.ib(default='')
172
172
173 class changelogrevision(object):
173 class changelogrevision(object):
174 """Holds results of a parsed changelog revision.
174 """Holds results of a parsed changelog revision.
175
175
176 Changelog revisions consist of multiple pieces of data, including
176 Changelog revisions consist of multiple pieces of data, including
177 the manifest node, user, and date. This object exposes a view into
177 the manifest node, user, and date. This object exposes a view into
178 the parsed object.
178 the parsed object.
179 """
179 """
180
180
181 __slots__ = (
181 __slots__ = (
182 u'_offsets',
182 u'_offsets',
183 u'_text',
183 u'_text',
184 )
184 )
185
185
186 def __new__(cls, text):
186 def __new__(cls, text):
187 if not text:
187 if not text:
188 return _changelogrevision(extra=_defaultextra)
188 return _changelogrevision(extra=_defaultextra)
189
189
190 self = super(changelogrevision, cls).__new__(cls)
190 self = super(changelogrevision, cls).__new__(cls)
191 # We could return here and implement the following as an __init__.
191 # We could return here and implement the following as an __init__.
192 # But doing it here is equivalent and saves an extra function call.
192 # But doing it here is equivalent and saves an extra function call.
193
193
194 # format used:
194 # format used:
195 # nodeid\n : manifest node in ascii
195 # nodeid\n : manifest node in ascii
196 # user\n : user, no \n or \r allowed
196 # user\n : user, no \n or \r allowed
197 # time tz extra\n : date (time is int or float, timezone is int)
197 # time tz extra\n : date (time is int or float, timezone is int)
198 # : extra is metadata, encoded and separated by '\0'
198 # : extra is metadata, encoded and separated by '\0'
199 # : older versions ignore it
199 # : older versions ignore it
200 # files\n\n : files modified by the cset, no \n or \r allowed
200 # files\n\n : files modified by the cset, no \n or \r allowed
201 # (.*) : comment (free text, ideally utf-8)
201 # (.*) : comment (free text, ideally utf-8)
202 #
202 #
203 # changelog v0 doesn't use extra
203 # changelog v0 doesn't use extra
204
204
205 nl1 = text.index('\n')
205 nl1 = text.index('\n')
206 nl2 = text.index('\n', nl1 + 1)
206 nl2 = text.index('\n', nl1 + 1)
207 nl3 = text.index('\n', nl2 + 1)
207 nl3 = text.index('\n', nl2 + 1)
208
208
209 # The list of files may be empty. Which means nl3 is the first of the
209 # The list of files may be empty. Which means nl3 is the first of the
210 # double newline that precedes the description.
210 # double newline that precedes the description.
211 if text[nl3 + 1:nl3 + 2] == '\n':
211 if text[nl3 + 1:nl3 + 2] == '\n':
212 doublenl = nl3
212 doublenl = nl3
213 else:
213 else:
214 doublenl = text.index('\n\n', nl3 + 1)
214 doublenl = text.index('\n\n', nl3 + 1)
215
215
216 self._offsets = (nl1, nl2, nl3, doublenl)
216 self._offsets = (nl1, nl2, nl3, doublenl)
217 self._text = text
217 self._text = text
218
218
219 return self
219 return self
220
220
221 @property
221 @property
222 def manifest(self):
222 def manifest(self):
223 return bin(self._text[0:self._offsets[0]])
223 return bin(self._text[0:self._offsets[0]])
224
224
225 @property
225 @property
226 def user(self):
226 def user(self):
227 off = self._offsets
227 off = self._offsets
228 return encoding.tolocal(self._text[off[0] + 1:off[1]])
228 return encoding.tolocal(self._text[off[0] + 1:off[1]])
229
229
230 @property
230 @property
231 def _rawdate(self):
231 def _rawdate(self):
232 off = self._offsets
232 off = self._offsets
233 dateextra = self._text[off[1] + 1:off[2]]
233 dateextra = self._text[off[1] + 1:off[2]]
234 return dateextra.split(' ', 2)[0:2]
234 return dateextra.split(' ', 2)[0:2]
235
235
236 @property
236 @property
237 def _rawextra(self):
237 def _rawextra(self):
238 off = self._offsets
238 off = self._offsets
239 dateextra = self._text[off[1] + 1:off[2]]
239 dateextra = self._text[off[1] + 1:off[2]]
240 fields = dateextra.split(' ', 2)
240 fields = dateextra.split(' ', 2)
241 if len(fields) != 3:
241 if len(fields) != 3:
242 return None
242 return None
243
243
244 return fields[2]
244 return fields[2]
245
245
246 @property
246 @property
247 def date(self):
247 def date(self):
248 raw = self._rawdate
248 raw = self._rawdate
249 time = float(raw[0])
249 time = float(raw[0])
250 # Various tools did silly things with the timezone.
250 # Various tools did silly things with the timezone.
251 try:
251 try:
252 timezone = int(raw[1])
252 timezone = int(raw[1])
253 except ValueError:
253 except ValueError:
254 timezone = 0
254 timezone = 0
255
255
256 return time, timezone
256 return time, timezone
257
257
258 @property
258 @property
259 def extra(self):
259 def extra(self):
260 raw = self._rawextra
260 raw = self._rawextra
261 if raw is None:
261 if raw is None:
262 return _defaultextra
262 return _defaultextra
263
263
264 return decodeextra(raw)
264 return decodeextra(raw)
265
265
266 @property
266 @property
267 def files(self):
267 def files(self):
268 off = self._offsets
268 off = self._offsets
269 if off[2] == off[3]:
269 if off[2] == off[3]:
270 return []
270 return []
271
271
272 return self._text[off[2] + 1:off[3]].split('\n')
272 return self._text[off[2] + 1:off[3]].split('\n')
273
273
274 @property
274 @property
275 def description(self):
275 def description(self):
276 return encoding.tolocal(self._text[self._offsets[3] + 2:])
276 return encoding.tolocal(self._text[self._offsets[3] + 2:])
277
277
278 class changelog(revlog.revlog):
278 class changelog(revlog.revlog):
279 def __init__(self, opener, trypending=False):
279 def __init__(self, opener, trypending=False):
280 """Load a changelog revlog using an opener.
280 """Load a changelog revlog using an opener.
281
281
282 If ``trypending`` is true, we attempt to load the index from a
282 If ``trypending`` is true, we attempt to load the index from a
283 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
283 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
284 The ``00changelog.i.a`` file contains index (and possibly inline
284 The ``00changelog.i.a`` file contains index (and possibly inline
285 revision) data for a transaction that hasn't been finalized yet.
285 revision) data for a transaction that hasn't been finalized yet.
286 It exists in a separate file to facilitate readers (such as
286 It exists in a separate file to facilitate readers (such as
287 hooks processes) accessing data before a transaction is finalized.
287 hooks processes) accessing data before a transaction is finalized.
288 """
288 """
289 if trypending and opener.exists('00changelog.i.a'):
289 if trypending and opener.exists('00changelog.i.a'):
290 indexfile = '00changelog.i.a'
290 indexfile = '00changelog.i.a'
291 else:
291 else:
292 indexfile = '00changelog.i'
292 indexfile = '00changelog.i'
293
293
294 datafile = '00changelog.d'
294 datafile = '00changelog.d'
295 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
295 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
296 checkambig=True, mmaplargeindex=True)
296 checkambig=True, mmaplargeindex=True)
297
297
298 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
298 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
299 # changelogs don't benefit from generaldelta.
299 # changelogs don't benefit from generaldelta.
300
300
301 self.version &= ~revlog.FLAG_GENERALDELTA
301 self.version &= ~revlog.FLAG_GENERALDELTA
302 self._generaldelta = False
302 self._generaldelta = False
303
303
304 # Delta chains for changelogs tend to be very small because entries
304 # Delta chains for changelogs tend to be very small because entries
305 # tend to be small and don't delta well with each. So disable delta
305 # tend to be small and don't delta well with each. So disable delta
306 # chains.
306 # chains.
307 self._storedeltachains = False
307 self._storedeltachains = False
308
308
309 self._realopener = opener
309 self._realopener = opener
310 self._delayed = False
310 self._delayed = False
311 self._delaybuf = None
311 self._delaybuf = None
312 self._divert = False
312 self._divert = False
313 self.filteredrevs = frozenset()
313 self.filteredrevs = frozenset()
314
314
315 def tiprev(self):
315 def tiprev(self):
316 for i in pycompat.xrange(len(self) -1, -2, -1):
316 for i in pycompat.xrange(len(self) -1, -2, -1):
317 if i not in self.filteredrevs:
317 if i not in self.filteredrevs:
318 return i
318 return i
319
319
320 def tip(self):
320 def tip(self):
321 """filtered version of revlog.tip"""
321 """filtered version of revlog.tip"""
322 return self.node(self.tiprev())
322 return self.node(self.tiprev())
323
323
324 def __contains__(self, rev):
324 def __contains__(self, rev):
325 """filtered version of revlog.__contains__"""
325 """filtered version of revlog.__contains__"""
326 return (0 <= rev < len(self)
326 return (0 <= rev < len(self)
327 and rev not in self.filteredrevs)
327 and rev not in self.filteredrevs)
328
328
329 def __iter__(self):
329 def __iter__(self):
330 """filtered version of revlog.__iter__"""
330 """filtered version of revlog.__iter__"""
331 if len(self.filteredrevs) == 0:
331 if len(self.filteredrevs) == 0:
332 return revlog.revlog.__iter__(self)
332 return revlog.revlog.__iter__(self)
333
333
334 def filterediter():
334 def filterediter():
335 for i in pycompat.xrange(len(self)):
335 for i in pycompat.xrange(len(self)):
336 if i not in self.filteredrevs:
336 if i not in self.filteredrevs:
337 yield i
337 yield i
338
338
339 return filterediter()
339 return filterediter()
340
340
341 def revs(self, start=0, stop=None):
341 def revs(self, start=0, stop=None):
342 """filtered version of revlog.revs"""
342 """filtered version of revlog.revs"""
343 for i in super(changelog, self).revs(start, stop):
343 for i in super(changelog, self).revs(start, stop):
344 if i not in self.filteredrevs:
344 if i not in self.filteredrevs:
345 yield i
345 yield i
346
346
347 def reachableroots(self, minroot, heads, roots, includepath=False):
347 def reachableroots(self, minroot, heads, roots, includepath=False):
348 return self.index.reachableroots2(minroot, heads, roots, includepath)
348 return self.index.reachableroots2(minroot, heads, roots, includepath)
349
349
350 def headrevs(self):
350 def headrevs(self, revs=None):
351 if self.filteredrevs:
351 if revs is None and self.filteredrevs:
352 try:
352 try:
353 return self.index.headrevsfiltered(self.filteredrevs)
353 return self.index.headrevsfiltered(self.filteredrevs)
354 # AttributeError covers non-c-extension environments and
354 # AttributeError covers non-c-extension environments and
355 # old c extensions without filter handling.
355 # old c extensions without filter handling.
356 except AttributeError:
356 except AttributeError:
357 return self._headrevs()
357 return self._headrevs()
358
358
359 return super(changelog, self).headrevs()
359 return super(changelog, self).headrevs(revs)
360
360
361 def strip(self, *args, **kwargs):
361 def strip(self, *args, **kwargs):
362 # XXX make something better than assert
362 # XXX make something better than assert
363 # We can't expect proper strip behavior if we are filtered.
363 # We can't expect proper strip behavior if we are filtered.
364 assert not self.filteredrevs
364 assert not self.filteredrevs
365 super(changelog, self).strip(*args, **kwargs)
365 super(changelog, self).strip(*args, **kwargs)
366
366
367 def rev(self, node):
367 def rev(self, node):
368 """filtered version of revlog.rev"""
368 """filtered version of revlog.rev"""
369 r = super(changelog, self).rev(node)
369 r = super(changelog, self).rev(node)
370 if r in self.filteredrevs:
370 if r in self.filteredrevs:
371 raise error.FilteredLookupError(hex(node), self.indexfile,
371 raise error.FilteredLookupError(hex(node), self.indexfile,
372 _('filtered node'))
372 _('filtered node'))
373 return r
373 return r
374
374
375 def node(self, rev):
375 def node(self, rev):
376 """filtered version of revlog.node"""
376 """filtered version of revlog.node"""
377 if rev in self.filteredrevs:
377 if rev in self.filteredrevs:
378 raise error.FilteredIndexError(rev)
378 raise error.FilteredIndexError(rev)
379 return super(changelog, self).node(rev)
379 return super(changelog, self).node(rev)
380
380
381 def linkrev(self, rev):
381 def linkrev(self, rev):
382 """filtered version of revlog.linkrev"""
382 """filtered version of revlog.linkrev"""
383 if rev in self.filteredrevs:
383 if rev in self.filteredrevs:
384 raise error.FilteredIndexError(rev)
384 raise error.FilteredIndexError(rev)
385 return super(changelog, self).linkrev(rev)
385 return super(changelog, self).linkrev(rev)
386
386
387 def parentrevs(self, rev):
387 def parentrevs(self, rev):
388 """filtered version of revlog.parentrevs"""
388 """filtered version of revlog.parentrevs"""
389 if rev in self.filteredrevs:
389 if rev in self.filteredrevs:
390 raise error.FilteredIndexError(rev)
390 raise error.FilteredIndexError(rev)
391 return super(changelog, self).parentrevs(rev)
391 return super(changelog, self).parentrevs(rev)
392
392
393 def flags(self, rev):
393 def flags(self, rev):
394 """filtered version of revlog.flags"""
394 """filtered version of revlog.flags"""
395 if rev in self.filteredrevs:
395 if rev in self.filteredrevs:
396 raise error.FilteredIndexError(rev)
396 raise error.FilteredIndexError(rev)
397 return super(changelog, self).flags(rev)
397 return super(changelog, self).flags(rev)
398
398
399 def delayupdate(self, tr):
399 def delayupdate(self, tr):
400 "delay visibility of index updates to other readers"
400 "delay visibility of index updates to other readers"
401
401
402 if not self._delayed:
402 if not self._delayed:
403 if len(self) == 0:
403 if len(self) == 0:
404 self._divert = True
404 self._divert = True
405 if self._realopener.exists(self.indexfile + '.a'):
405 if self._realopener.exists(self.indexfile + '.a'):
406 self._realopener.unlink(self.indexfile + '.a')
406 self._realopener.unlink(self.indexfile + '.a')
407 self.opener = _divertopener(self._realopener, self.indexfile)
407 self.opener = _divertopener(self._realopener, self.indexfile)
408 else:
408 else:
409 self._delaybuf = []
409 self._delaybuf = []
410 self.opener = _delayopener(self._realopener, self.indexfile,
410 self.opener = _delayopener(self._realopener, self.indexfile,
411 self._delaybuf)
411 self._delaybuf)
412 self._delayed = True
412 self._delayed = True
413 tr.addpending('cl-%i' % id(self), self._writepending)
413 tr.addpending('cl-%i' % id(self), self._writepending)
414 tr.addfinalize('cl-%i' % id(self), self._finalize)
414 tr.addfinalize('cl-%i' % id(self), self._finalize)
415
415
416 def _finalize(self, tr):
416 def _finalize(self, tr):
417 "finalize index updates"
417 "finalize index updates"
418 self._delayed = False
418 self._delayed = False
419 self.opener = self._realopener
419 self.opener = self._realopener
420 # move redirected index data back into place
420 # move redirected index data back into place
421 if self._divert:
421 if self._divert:
422 assert not self._delaybuf
422 assert not self._delaybuf
423 tmpname = self.indexfile + ".a"
423 tmpname = self.indexfile + ".a"
424 nfile = self.opener.open(tmpname)
424 nfile = self.opener.open(tmpname)
425 nfile.close()
425 nfile.close()
426 self.opener.rename(tmpname, self.indexfile, checkambig=True)
426 self.opener.rename(tmpname, self.indexfile, checkambig=True)
427 elif self._delaybuf:
427 elif self._delaybuf:
428 fp = self.opener(self.indexfile, 'a', checkambig=True)
428 fp = self.opener(self.indexfile, 'a', checkambig=True)
429 fp.write("".join(self._delaybuf))
429 fp.write("".join(self._delaybuf))
430 fp.close()
430 fp.close()
431 self._delaybuf = None
431 self._delaybuf = None
432 self._divert = False
432 self._divert = False
433 # split when we're done
433 # split when we're done
434 self._enforceinlinesize(tr)
434 self._enforceinlinesize(tr)
435
435
436 def _writepending(self, tr):
436 def _writepending(self, tr):
437 "create a file containing the unfinalized state for pretxnchangegroup"
437 "create a file containing the unfinalized state for pretxnchangegroup"
438 if self._delaybuf:
438 if self._delaybuf:
439 # make a temporary copy of the index
439 # make a temporary copy of the index
440 fp1 = self._realopener(self.indexfile)
440 fp1 = self._realopener(self.indexfile)
441 pendingfilename = self.indexfile + ".a"
441 pendingfilename = self.indexfile + ".a"
442 # register as a temp file to ensure cleanup on failure
442 # register as a temp file to ensure cleanup on failure
443 tr.registertmp(pendingfilename)
443 tr.registertmp(pendingfilename)
444 # write existing data
444 # write existing data
445 fp2 = self._realopener(pendingfilename, "w")
445 fp2 = self._realopener(pendingfilename, "w")
446 fp2.write(fp1.read())
446 fp2.write(fp1.read())
447 # add pending data
447 # add pending data
448 fp2.write("".join(self._delaybuf))
448 fp2.write("".join(self._delaybuf))
449 fp2.close()
449 fp2.close()
450 # switch modes so finalize can simply rename
450 # switch modes so finalize can simply rename
451 self._delaybuf = None
451 self._delaybuf = None
452 self._divert = True
452 self._divert = True
453 self.opener = _divertopener(self._realopener, self.indexfile)
453 self.opener = _divertopener(self._realopener, self.indexfile)
454
454
455 if self._divert:
455 if self._divert:
456 return True
456 return True
457
457
458 return False
458 return False
459
459
460 def _enforceinlinesize(self, tr, fp=None):
460 def _enforceinlinesize(self, tr, fp=None):
461 if not self._delayed:
461 if not self._delayed:
462 revlog.revlog._enforceinlinesize(self, tr, fp)
462 revlog.revlog._enforceinlinesize(self, tr, fp)
463
463
464 def read(self, node):
464 def read(self, node):
465 """Obtain data from a parsed changelog revision.
465 """Obtain data from a parsed changelog revision.
466
466
467 Returns a 6-tuple of:
467 Returns a 6-tuple of:
468
468
469 - manifest node in binary
469 - manifest node in binary
470 - author/user as a localstr
470 - author/user as a localstr
471 - date as a 2-tuple of (time, timezone)
471 - date as a 2-tuple of (time, timezone)
472 - list of files
472 - list of files
473 - commit message as a localstr
473 - commit message as a localstr
474 - dict of extra metadata
474 - dict of extra metadata
475
475
476 Unless you need to access all fields, consider calling
476 Unless you need to access all fields, consider calling
477 ``changelogrevision`` instead, as it is faster for partial object
477 ``changelogrevision`` instead, as it is faster for partial object
478 access.
478 access.
479 """
479 """
480 c = changelogrevision(self.revision(node))
480 c = changelogrevision(self.revision(node))
481 return (
481 return (
482 c.manifest,
482 c.manifest,
483 c.user,
483 c.user,
484 c.date,
484 c.date,
485 c.files,
485 c.files,
486 c.description,
486 c.description,
487 c.extra
487 c.extra
488 )
488 )
489
489
490 def changelogrevision(self, nodeorrev):
490 def changelogrevision(self, nodeorrev):
491 """Obtain a ``changelogrevision`` for a node or revision."""
491 """Obtain a ``changelogrevision`` for a node or revision."""
492 return changelogrevision(self.revision(nodeorrev))
492 return changelogrevision(self.revision(nodeorrev))
493
493
494 def readfiles(self, node):
494 def readfiles(self, node):
495 """
495 """
496 short version of read that only returns the files modified by the cset
496 short version of read that only returns the files modified by the cset
497 """
497 """
498 text = self.revision(node)
498 text = self.revision(node)
499 if not text:
499 if not text:
500 return []
500 return []
501 last = text.index("\n\n")
501 last = text.index("\n\n")
502 l = text[:last].split('\n')
502 l = text[:last].split('\n')
503 return l[3:]
503 return l[3:]
504
504
505 def add(self, manifest, files, desc, transaction, p1, p2,
505 def add(self, manifest, files, desc, transaction, p1, p2,
506 user, date=None, extra=None):
506 user, date=None, extra=None):
507 # Convert to UTF-8 encoded bytestrings as the very first
507 # Convert to UTF-8 encoded bytestrings as the very first
508 # thing: calling any method on a localstr object will turn it
508 # thing: calling any method on a localstr object will turn it
509 # into a str object and the cached UTF-8 string is thus lost.
509 # into a str object and the cached UTF-8 string is thus lost.
510 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
510 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
511
511
512 user = user.strip()
512 user = user.strip()
513 # An empty username or a username with a "\n" will make the
513 # An empty username or a username with a "\n" will make the
514 # revision text contain two "\n\n" sequences -> corrupt
514 # revision text contain two "\n\n" sequences -> corrupt
515 # repository since read cannot unpack the revision.
515 # repository since read cannot unpack the revision.
516 if not user:
516 if not user:
517 raise error.StorageError(_("empty username"))
517 raise error.StorageError(_("empty username"))
518 if "\n" in user:
518 if "\n" in user:
519 raise error.StorageError(_("username %r contains a newline")
519 raise error.StorageError(_("username %r contains a newline")
520 % pycompat.bytestr(user))
520 % pycompat.bytestr(user))
521
521
522 desc = stripdesc(desc)
522 desc = stripdesc(desc)
523
523
524 if date:
524 if date:
525 parseddate = "%d %d" % dateutil.parsedate(date)
525 parseddate = "%d %d" % dateutil.parsedate(date)
526 else:
526 else:
527 parseddate = "%d %d" % dateutil.makedate()
527 parseddate = "%d %d" % dateutil.makedate()
528 if extra:
528 if extra:
529 branch = extra.get("branch")
529 branch = extra.get("branch")
530 if branch in ("default", ""):
530 if branch in ("default", ""):
531 del extra["branch"]
531 del extra["branch"]
532 elif branch in (".", "null", "tip"):
532 elif branch in (".", "null", "tip"):
533 raise error.StorageError(_('the name \'%s\' is reserved')
533 raise error.StorageError(_('the name \'%s\' is reserved')
534 % branch)
534 % branch)
535 if extra:
535 if extra:
536 extra = encodeextra(extra)
536 extra = encodeextra(extra)
537 parseddate = "%s %s" % (parseddate, extra)
537 parseddate = "%s %s" % (parseddate, extra)
538 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
538 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
539 text = "\n".join(l)
539 text = "\n".join(l)
540 return self.addrevision(text, transaction, len(self), p1, p2)
540 return self.addrevision(text, transaction, len(self), p1, p2)
541
541
542 def branchinfo(self, rev):
542 def branchinfo(self, rev):
543 """return the branch name and open/close state of a revision
543 """return the branch name and open/close state of a revision
544
544
545 This function exists because creating a changectx object
545 This function exists because creating a changectx object
546 just to access this is costly."""
546 just to access this is costly."""
547 extra = self.read(rev)[5]
547 extra = self.read(rev)[5]
548 return encoding.tolocal(extra.get("branch")), 'close' in extra
548 return encoding.tolocal(extra.get("branch")), 'close' in extra
549
549
550 def _nodeduplicatecallback(self, transaction, node):
550 def _nodeduplicatecallback(self, transaction, node):
551 # keep track of revisions that got "re-added", eg: unbunde of know rev.
551 # keep track of revisions that got "re-added", eg: unbunde of know rev.
552 #
552 #
553 # We track them in a list to preserve their order from the source bundle
553 # We track them in a list to preserve their order from the source bundle
554 duplicates = transaction.changes.setdefault('revduplicates', [])
554 duplicates = transaction.changes.setdefault('revduplicates', [])
555 duplicates.append(self.rev(node))
555 duplicates.append(self.rev(node))
@@ -1,2637 +1,2639 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._maxdeltachainspan = -1
374 self._maxdeltachainspan = -1
375 self._withsparseread = False
375 self._withsparseread = False
376 self._sparserevlog = False
376 self._sparserevlog = False
377 self._srdensitythreshold = 0.50
377 self._srdensitythreshold = 0.50
378 self._srmingapsize = 262144
378 self._srmingapsize = 262144
379
379
380 # Make copy of flag processors so each revlog instance can support
380 # Make copy of flag processors so each revlog instance can support
381 # custom flags.
381 # custom flags.
382 self._flagprocessors = dict(_flagprocessors)
382 self._flagprocessors = dict(_flagprocessors)
383
383
384 # 2-tuple of file handles being used for active writing.
384 # 2-tuple of file handles being used for active writing.
385 self._writinghandles = None
385 self._writinghandles = None
386
386
387 self._loadindex()
387 self._loadindex()
388
388
389 def _loadindex(self):
389 def _loadindex(self):
390 mmapindexthreshold = None
390 mmapindexthreshold = None
391 opts = getattr(self.opener, 'options', {}) or {}
391 opts = getattr(self.opener, 'options', {}) or {}
392
392
393 if 'revlogv2' in opts:
393 if 'revlogv2' in opts:
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
394 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 elif 'revlogv1' in opts:
395 elif 'revlogv1' in opts:
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
396 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 if 'generaldelta' in opts:
397 if 'generaldelta' in opts:
398 newversionflags |= FLAG_GENERALDELTA
398 newversionflags |= FLAG_GENERALDELTA
399 else:
399 else:
400 newversionflags = REVLOG_DEFAULT_VERSION
400 newversionflags = REVLOG_DEFAULT_VERSION
401
401
402 if 'chunkcachesize' in opts:
402 if 'chunkcachesize' in opts:
403 self._chunkcachesize = opts['chunkcachesize']
403 self._chunkcachesize = opts['chunkcachesize']
404 if 'maxchainlen' in opts:
404 if 'maxchainlen' in opts:
405 self._maxchainlen = opts['maxchainlen']
405 self._maxchainlen = opts['maxchainlen']
406 if 'deltabothparents' in opts:
406 if 'deltabothparents' in opts:
407 self._deltabothparents = opts['deltabothparents']
407 self._deltabothparents = opts['deltabothparents']
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
408 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
409 if 'compengine' in opts:
409 if 'compengine' in opts:
410 self._compengine = opts['compengine']
410 self._compengine = opts['compengine']
411 if 'maxdeltachainspan' in opts:
411 if 'maxdeltachainspan' in opts:
412 self._maxdeltachainspan = opts['maxdeltachainspan']
412 self._maxdeltachainspan = opts['maxdeltachainspan']
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
413 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
414 mmapindexthreshold = opts['mmapindexthreshold']
414 mmapindexthreshold = opts['mmapindexthreshold']
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
415 self._sparserevlog = bool(opts.get('sparse-revlog', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
416 withsparseread = bool(opts.get('with-sparse-read', False))
417 # sparse-revlog forces sparse-read
417 # sparse-revlog forces sparse-read
418 self._withsparseread = self._sparserevlog or withsparseread
418 self._withsparseread = self._sparserevlog or withsparseread
419 if 'sparse-read-density-threshold' in opts:
419 if 'sparse-read-density-threshold' in opts:
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
420 self._srdensitythreshold = opts['sparse-read-density-threshold']
421 if 'sparse-read-min-gap-size' in opts:
421 if 'sparse-read-min-gap-size' in opts:
422 self._srmingapsize = opts['sparse-read-min-gap-size']
422 self._srmingapsize = opts['sparse-read-min-gap-size']
423 if opts.get('enableellipsis'):
423 if opts.get('enableellipsis'):
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
424 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
425
425
426 # revlog v0 doesn't have flag processors
426 # revlog v0 doesn't have flag processors
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
427 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
428 _insertflagprocessor(flag, processor, self._flagprocessors)
428 _insertflagprocessor(flag, processor, self._flagprocessors)
429
429
430 if self._chunkcachesize <= 0:
430 if self._chunkcachesize <= 0:
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
431 raise error.RevlogError(_('revlog chunk cache size %r is not '
432 'greater than 0') % self._chunkcachesize)
432 'greater than 0') % self._chunkcachesize)
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
433 elif self._chunkcachesize & (self._chunkcachesize - 1):
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
434 raise error.RevlogError(_('revlog chunk cache size %r is not a '
435 'power of 2') % self._chunkcachesize)
435 'power of 2') % self._chunkcachesize)
436
436
437 indexdata = ''
437 indexdata = ''
438 self._initempty = True
438 self._initempty = True
439 try:
439 try:
440 with self._indexfp() as f:
440 with self._indexfp() as f:
441 if (mmapindexthreshold is not None and
441 if (mmapindexthreshold is not None and
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
442 self.opener.fstat(f).st_size >= mmapindexthreshold):
443 indexdata = util.buffer(util.mmapread(f))
443 indexdata = util.buffer(util.mmapread(f))
444 else:
444 else:
445 indexdata = f.read()
445 indexdata = f.read()
446 if len(indexdata) > 0:
446 if len(indexdata) > 0:
447 versionflags = versionformat_unpack(indexdata[:4])[0]
447 versionflags = versionformat_unpack(indexdata[:4])[0]
448 self._initempty = False
448 self._initempty = False
449 else:
449 else:
450 versionflags = newversionflags
450 versionflags = newversionflags
451 except IOError as inst:
451 except IOError as inst:
452 if inst.errno != errno.ENOENT:
452 if inst.errno != errno.ENOENT:
453 raise
453 raise
454
454
455 versionflags = newversionflags
455 versionflags = newversionflags
456
456
457 self.version = versionflags
457 self.version = versionflags
458
458
459 flags = versionflags & ~0xFFFF
459 flags = versionflags & ~0xFFFF
460 fmt = versionflags & 0xFFFF
460 fmt = versionflags & 0xFFFF
461
461
462 if fmt == REVLOGV0:
462 if fmt == REVLOGV0:
463 if flags:
463 if flags:
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
464 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
465 'revlog %s') %
465 'revlog %s') %
466 (flags >> 16, fmt, self.indexfile))
466 (flags >> 16, fmt, self.indexfile))
467
467
468 self._inline = False
468 self._inline = False
469 self._generaldelta = False
469 self._generaldelta = False
470
470
471 elif fmt == REVLOGV1:
471 elif fmt == REVLOGV1:
472 if flags & ~REVLOGV1_FLAGS:
472 if flags & ~REVLOGV1_FLAGS:
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
473 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
474 'revlog %s') %
474 'revlog %s') %
475 (flags >> 16, fmt, self.indexfile))
475 (flags >> 16, fmt, self.indexfile))
476
476
477 self._inline = versionflags & FLAG_INLINE_DATA
477 self._inline = versionflags & FLAG_INLINE_DATA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
478 self._generaldelta = versionflags & FLAG_GENERALDELTA
479
479
480 elif fmt == REVLOGV2:
480 elif fmt == REVLOGV2:
481 if flags & ~REVLOGV2_FLAGS:
481 if flags & ~REVLOGV2_FLAGS:
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
482 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
483 'revlog %s') %
483 'revlog %s') %
484 (flags >> 16, fmt, self.indexfile))
484 (flags >> 16, fmt, self.indexfile))
485
485
486 self._inline = versionflags & FLAG_INLINE_DATA
486 self._inline = versionflags & FLAG_INLINE_DATA
487 # generaldelta implied by version 2 revlogs.
487 # generaldelta implied by version 2 revlogs.
488 self._generaldelta = True
488 self._generaldelta = True
489
489
490 else:
490 else:
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
491 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
492 (fmt, self.indexfile))
492 (fmt, self.indexfile))
493
493
494 self._storedeltachains = True
494 self._storedeltachains = True
495
495
496 self._io = revlogio()
496 self._io = revlogio()
497 if self.version == REVLOGV0:
497 if self.version == REVLOGV0:
498 self._io = revlogoldio()
498 self._io = revlogoldio()
499 try:
499 try:
500 d = self._io.parseindex(indexdata, self._inline)
500 d = self._io.parseindex(indexdata, self._inline)
501 except (ValueError, IndexError):
501 except (ValueError, IndexError):
502 raise error.RevlogError(_("index %s is corrupted") %
502 raise error.RevlogError(_("index %s is corrupted") %
503 self.indexfile)
503 self.indexfile)
504 self.index, nodemap, self._chunkcache = d
504 self.index, nodemap, self._chunkcache = d
505 if nodemap is not None:
505 if nodemap is not None:
506 self.nodemap = self._nodecache = nodemap
506 self.nodemap = self._nodecache = nodemap
507 if not self._chunkcache:
507 if not self._chunkcache:
508 self._chunkclear()
508 self._chunkclear()
509 # revnum -> (chain-length, sum-delta-length)
509 # revnum -> (chain-length, sum-delta-length)
510 self._chaininfocache = {}
510 self._chaininfocache = {}
511 # revlog header -> revlog compressor
511 # revlog header -> revlog compressor
512 self._decompressors = {}
512 self._decompressors = {}
513
513
514 @util.propertycache
514 @util.propertycache
515 def _compressor(self):
515 def _compressor(self):
516 return util.compengines[self._compengine].revlogcompressor()
516 return util.compengines[self._compengine].revlogcompressor()
517
517
518 def _indexfp(self, mode='r'):
518 def _indexfp(self, mode='r'):
519 """file object for the revlog's index file"""
519 """file object for the revlog's index file"""
520 args = {r'mode': mode}
520 args = {r'mode': mode}
521 if mode != 'r':
521 if mode != 'r':
522 args[r'checkambig'] = self._checkambig
522 args[r'checkambig'] = self._checkambig
523 if mode == 'w':
523 if mode == 'w':
524 args[r'atomictemp'] = True
524 args[r'atomictemp'] = True
525 return self.opener(self.indexfile, **args)
525 return self.opener(self.indexfile, **args)
526
526
527 def _datafp(self, mode='r'):
527 def _datafp(self, mode='r'):
528 """file object for the revlog's data file"""
528 """file object for the revlog's data file"""
529 return self.opener(self.datafile, mode=mode)
529 return self.opener(self.datafile, mode=mode)
530
530
531 @contextlib.contextmanager
531 @contextlib.contextmanager
532 def _datareadfp(self, existingfp=None):
532 def _datareadfp(self, existingfp=None):
533 """file object suitable to read data"""
533 """file object suitable to read data"""
534 # Use explicit file handle, if given.
534 # Use explicit file handle, if given.
535 if existingfp is not None:
535 if existingfp is not None:
536 yield existingfp
536 yield existingfp
537
537
538 # Use a file handle being actively used for writes, if available.
538 # Use a file handle being actively used for writes, if available.
539 # There is some danger to doing this because reads will seek the
539 # There is some danger to doing this because reads will seek the
540 # file. However, _writeentry() performs a SEEK_END before all writes,
540 # file. However, _writeentry() performs a SEEK_END before all writes,
541 # so we should be safe.
541 # so we should be safe.
542 elif self._writinghandles:
542 elif self._writinghandles:
543 if self._inline:
543 if self._inline:
544 yield self._writinghandles[0]
544 yield self._writinghandles[0]
545 else:
545 else:
546 yield self._writinghandles[1]
546 yield self._writinghandles[1]
547
547
548 # Otherwise open a new file handle.
548 # Otherwise open a new file handle.
549 else:
549 else:
550 if self._inline:
550 if self._inline:
551 func = self._indexfp
551 func = self._indexfp
552 else:
552 else:
553 func = self._datafp
553 func = self._datafp
554 with func() as fp:
554 with func() as fp:
555 yield fp
555 yield fp
556
556
557 def tip(self):
557 def tip(self):
558 return self.node(len(self.index) - 1)
558 return self.node(len(self.index) - 1)
559 def __contains__(self, rev):
559 def __contains__(self, rev):
560 return 0 <= rev < len(self)
560 return 0 <= rev < len(self)
561 def __len__(self):
561 def __len__(self):
562 return len(self.index)
562 return len(self.index)
563 def __iter__(self):
563 def __iter__(self):
564 return iter(pycompat.xrange(len(self)))
564 return iter(pycompat.xrange(len(self)))
565 def revs(self, start=0, stop=None):
565 def revs(self, start=0, stop=None):
566 """iterate over all rev in this revlog (from start to stop)"""
566 """iterate over all rev in this revlog (from start to stop)"""
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
567 return storageutil.iterrevs(len(self), start=start, stop=stop)
568
568
569 @util.propertycache
569 @util.propertycache
570 def nodemap(self):
570 def nodemap(self):
571 if self.index:
571 if self.index:
572 # populate mapping down to the initial node
572 # populate mapping down to the initial node
573 node0 = self.index[0][7] # get around changelog filtering
573 node0 = self.index[0][7] # get around changelog filtering
574 self.rev(node0)
574 self.rev(node0)
575 return self._nodecache
575 return self._nodecache
576
576
577 def hasnode(self, node):
577 def hasnode(self, node):
578 try:
578 try:
579 self.rev(node)
579 self.rev(node)
580 return True
580 return True
581 except KeyError:
581 except KeyError:
582 return False
582 return False
583
583
584 def candelta(self, baserev, rev):
584 def candelta(self, baserev, rev):
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
585 """whether two revisions (baserev, rev) can be delta-ed or not"""
586 # Disable delta if either rev requires a content-changing flag
586 # Disable delta if either rev requires a content-changing flag
587 # processor (ex. LFS). This is because such flag processor can alter
587 # processor (ex. LFS). This is because such flag processor can alter
588 # the rawtext content that the delta will be based on, and two clients
588 # the rawtext content that the delta will be based on, and two clients
589 # could have a same revlog node with different flags (i.e. different
589 # could have a same revlog node with different flags (i.e. different
590 # rawtext contents) and the delta could be incompatible.
590 # rawtext contents) and the delta could be incompatible.
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
591 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
592 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
593 return False
593 return False
594 return True
594 return True
595
595
596 def clearcaches(self):
596 def clearcaches(self):
597 self._revisioncache = None
597 self._revisioncache = None
598 self._chainbasecache.clear()
598 self._chainbasecache.clear()
599 self._chunkcache = (0, '')
599 self._chunkcache = (0, '')
600 self._pcache = {}
600 self._pcache = {}
601
601
602 try:
602 try:
603 self._nodecache.clearcaches()
603 self._nodecache.clearcaches()
604 except AttributeError:
604 except AttributeError:
605 self._nodecache = {nullid: nullrev}
605 self._nodecache = {nullid: nullrev}
606 self._nodepos = None
606 self._nodepos = None
607
607
608 def rev(self, node):
608 def rev(self, node):
609 try:
609 try:
610 return self._nodecache[node]
610 return self._nodecache[node]
611 except TypeError:
611 except TypeError:
612 raise
612 raise
613 except error.RevlogError:
613 except error.RevlogError:
614 # parsers.c radix tree lookup failed
614 # parsers.c radix tree lookup failed
615 if node == wdirid or node in wdirfilenodeids:
615 if node == wdirid or node in wdirfilenodeids:
616 raise error.WdirUnsupported
616 raise error.WdirUnsupported
617 raise error.LookupError(node, self.indexfile, _('no node'))
617 raise error.LookupError(node, self.indexfile, _('no node'))
618 except KeyError:
618 except KeyError:
619 # pure python cache lookup failed
619 # pure python cache lookup failed
620 n = self._nodecache
620 n = self._nodecache
621 i = self.index
621 i = self.index
622 p = self._nodepos
622 p = self._nodepos
623 if p is None:
623 if p is None:
624 p = len(i) - 1
624 p = len(i) - 1
625 else:
625 else:
626 assert p < len(i)
626 assert p < len(i)
627 for r in pycompat.xrange(p, -1, -1):
627 for r in pycompat.xrange(p, -1, -1):
628 v = i[r][7]
628 v = i[r][7]
629 n[v] = r
629 n[v] = r
630 if v == node:
630 if v == node:
631 self._nodepos = r - 1
631 self._nodepos = r - 1
632 return r
632 return r
633 if node == wdirid or node in wdirfilenodeids:
633 if node == wdirid or node in wdirfilenodeids:
634 raise error.WdirUnsupported
634 raise error.WdirUnsupported
635 raise error.LookupError(node, self.indexfile, _('no node'))
635 raise error.LookupError(node, self.indexfile, _('no node'))
636
636
637 # Accessors for index entries.
637 # Accessors for index entries.
638
638
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
639 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
640 # are flags.
640 # are flags.
641 def start(self, rev):
641 def start(self, rev):
642 return int(self.index[rev][0] >> 16)
642 return int(self.index[rev][0] >> 16)
643
643
644 def flags(self, rev):
644 def flags(self, rev):
645 return self.index[rev][0] & 0xFFFF
645 return self.index[rev][0] & 0xFFFF
646
646
647 def length(self, rev):
647 def length(self, rev):
648 return self.index[rev][1]
648 return self.index[rev][1]
649
649
650 def rawsize(self, rev):
650 def rawsize(self, rev):
651 """return the length of the uncompressed text for a given revision"""
651 """return the length of the uncompressed text for a given revision"""
652 l = self.index[rev][2]
652 l = self.index[rev][2]
653 if l >= 0:
653 if l >= 0:
654 return l
654 return l
655
655
656 t = self.revision(rev, raw=True)
656 t = self.revision(rev, raw=True)
657 return len(t)
657 return len(t)
658
658
659 def size(self, rev):
659 def size(self, rev):
660 """length of non-raw text (processed by a "read" flag processor)"""
660 """length of non-raw text (processed by a "read" flag processor)"""
661 # fast path: if no "read" flag processor could change the content,
661 # fast path: if no "read" flag processor could change the content,
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
662 # size is rawsize. note: ELLIPSIS is known to not change the content.
663 flags = self.flags(rev)
663 flags = self.flags(rev)
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
664 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
665 return self.rawsize(rev)
665 return self.rawsize(rev)
666
666
667 return len(self.revision(rev, raw=False))
667 return len(self.revision(rev, raw=False))
668
668
669 def chainbase(self, rev):
669 def chainbase(self, rev):
670 base = self._chainbasecache.get(rev)
670 base = self._chainbasecache.get(rev)
671 if base is not None:
671 if base is not None:
672 return base
672 return base
673
673
674 index = self.index
674 index = self.index
675 iterrev = rev
675 iterrev = rev
676 base = index[iterrev][3]
676 base = index[iterrev][3]
677 while base != iterrev:
677 while base != iterrev:
678 iterrev = base
678 iterrev = base
679 base = index[iterrev][3]
679 base = index[iterrev][3]
680
680
681 self._chainbasecache[rev] = base
681 self._chainbasecache[rev] = base
682 return base
682 return base
683
683
684 def linkrev(self, rev):
684 def linkrev(self, rev):
685 return self.index[rev][4]
685 return self.index[rev][4]
686
686
687 def parentrevs(self, rev):
687 def parentrevs(self, rev):
688 try:
688 try:
689 entry = self.index[rev]
689 entry = self.index[rev]
690 except IndexError:
690 except IndexError:
691 if rev == wdirrev:
691 if rev == wdirrev:
692 raise error.WdirUnsupported
692 raise error.WdirUnsupported
693 raise
693 raise
694
694
695 return entry[5], entry[6]
695 return entry[5], entry[6]
696
696
697 # fast parentrevs(rev) where rev isn't filtered
697 # fast parentrevs(rev) where rev isn't filtered
698 _uncheckedparentrevs = parentrevs
698 _uncheckedparentrevs = parentrevs
699
699
700 def node(self, rev):
700 def node(self, rev):
701 try:
701 try:
702 return self.index[rev][7]
702 return self.index[rev][7]
703 except IndexError:
703 except IndexError:
704 if rev == wdirrev:
704 if rev == wdirrev:
705 raise error.WdirUnsupported
705 raise error.WdirUnsupported
706 raise
706 raise
707
707
708 # Derived from index values.
708 # Derived from index values.
709
709
710 def end(self, rev):
710 def end(self, rev):
711 return self.start(rev) + self.length(rev)
711 return self.start(rev) + self.length(rev)
712
712
713 def parents(self, node):
713 def parents(self, node):
714 i = self.index
714 i = self.index
715 d = i[self.rev(node)]
715 d = i[self.rev(node)]
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
716 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
717
717
718 def chainlen(self, rev):
718 def chainlen(self, rev):
719 return self._chaininfo(rev)[0]
719 return self._chaininfo(rev)[0]
720
720
721 def _chaininfo(self, rev):
721 def _chaininfo(self, rev):
722 chaininfocache = self._chaininfocache
722 chaininfocache = self._chaininfocache
723 if rev in chaininfocache:
723 if rev in chaininfocache:
724 return chaininfocache[rev]
724 return chaininfocache[rev]
725 index = self.index
725 index = self.index
726 generaldelta = self._generaldelta
726 generaldelta = self._generaldelta
727 iterrev = rev
727 iterrev = rev
728 e = index[iterrev]
728 e = index[iterrev]
729 clen = 0
729 clen = 0
730 compresseddeltalen = 0
730 compresseddeltalen = 0
731 while iterrev != e[3]:
731 while iterrev != e[3]:
732 clen += 1
732 clen += 1
733 compresseddeltalen += e[1]
733 compresseddeltalen += e[1]
734 if generaldelta:
734 if generaldelta:
735 iterrev = e[3]
735 iterrev = e[3]
736 else:
736 else:
737 iterrev -= 1
737 iterrev -= 1
738 if iterrev in chaininfocache:
738 if iterrev in chaininfocache:
739 t = chaininfocache[iterrev]
739 t = chaininfocache[iterrev]
740 clen += t[0]
740 clen += t[0]
741 compresseddeltalen += t[1]
741 compresseddeltalen += t[1]
742 break
742 break
743 e = index[iterrev]
743 e = index[iterrev]
744 else:
744 else:
745 # Add text length of base since decompressing that also takes
745 # Add text length of base since decompressing that also takes
746 # work. For cache hits the length is already included.
746 # work. For cache hits the length is already included.
747 compresseddeltalen += e[1]
747 compresseddeltalen += e[1]
748 r = (clen, compresseddeltalen)
748 r = (clen, compresseddeltalen)
749 chaininfocache[rev] = r
749 chaininfocache[rev] = r
750 return r
750 return r
751
751
752 def _deltachain(self, rev, stoprev=None):
752 def _deltachain(self, rev, stoprev=None):
753 """Obtain the delta chain for a revision.
753 """Obtain the delta chain for a revision.
754
754
755 ``stoprev`` specifies a revision to stop at. If not specified, we
755 ``stoprev`` specifies a revision to stop at. If not specified, we
756 stop at the base of the chain.
756 stop at the base of the chain.
757
757
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
758 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
759 revs in ascending order and ``stopped`` is a bool indicating whether
759 revs in ascending order and ``stopped`` is a bool indicating whether
760 ``stoprev`` was hit.
760 ``stoprev`` was hit.
761 """
761 """
762 # Try C implementation.
762 # Try C implementation.
763 try:
763 try:
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
764 return self.index.deltachain(rev, stoprev, self._generaldelta)
765 except AttributeError:
765 except AttributeError:
766 pass
766 pass
767
767
768 chain = []
768 chain = []
769
769
770 # Alias to prevent attribute lookup in tight loop.
770 # Alias to prevent attribute lookup in tight loop.
771 index = self.index
771 index = self.index
772 generaldelta = self._generaldelta
772 generaldelta = self._generaldelta
773
773
774 iterrev = rev
774 iterrev = rev
775 e = index[iterrev]
775 e = index[iterrev]
776 while iterrev != e[3] and iterrev != stoprev:
776 while iterrev != e[3] and iterrev != stoprev:
777 chain.append(iterrev)
777 chain.append(iterrev)
778 if generaldelta:
778 if generaldelta:
779 iterrev = e[3]
779 iterrev = e[3]
780 else:
780 else:
781 iterrev -= 1
781 iterrev -= 1
782 e = index[iterrev]
782 e = index[iterrev]
783
783
784 if iterrev == stoprev:
784 if iterrev == stoprev:
785 stopped = True
785 stopped = True
786 else:
786 else:
787 chain.append(iterrev)
787 chain.append(iterrev)
788 stopped = False
788 stopped = False
789
789
790 chain.reverse()
790 chain.reverse()
791 return chain, stopped
791 return chain, stopped
792
792
793 def ancestors(self, revs, stoprev=0, inclusive=False):
793 def ancestors(self, revs, stoprev=0, inclusive=False):
794 """Generate the ancestors of 'revs' in reverse revision order.
794 """Generate the ancestors of 'revs' in reverse revision order.
795 Does not generate revs lower than stoprev.
795 Does not generate revs lower than stoprev.
796
796
797 See the documentation for ancestor.lazyancestors for more details."""
797 See the documentation for ancestor.lazyancestors for more details."""
798
798
799 # first, make sure start revisions aren't filtered
799 # first, make sure start revisions aren't filtered
800 revs = list(revs)
800 revs = list(revs)
801 checkrev = self.node
801 checkrev = self.node
802 for r in revs:
802 for r in revs:
803 checkrev(r)
803 checkrev(r)
804 # and we're sure ancestors aren't filtered as well
804 # and we're sure ancestors aren't filtered as well
805
805
806 if rustext is not None:
806 if rustext is not None:
807 lazyancestors = rustext.ancestor.LazyAncestors
807 lazyancestors = rustext.ancestor.LazyAncestors
808 arg = self.index
808 arg = self.index
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
809 elif util.safehasattr(parsers, 'rustlazyancestors'):
810 lazyancestors = ancestor.rustlazyancestors
810 lazyancestors = ancestor.rustlazyancestors
811 arg = self.index
811 arg = self.index
812 else:
812 else:
813 lazyancestors = ancestor.lazyancestors
813 lazyancestors = ancestor.lazyancestors
814 arg = self._uncheckedparentrevs
814 arg = self._uncheckedparentrevs
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
815 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
816
816
817 def descendants(self, revs):
817 def descendants(self, revs):
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
818 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
819
819
820 def findcommonmissing(self, common=None, heads=None):
820 def findcommonmissing(self, common=None, heads=None):
821 """Return a tuple of the ancestors of common and the ancestors of heads
821 """Return a tuple of the ancestors of common and the ancestors of heads
822 that are not ancestors of common. In revset terminology, we return the
822 that are not ancestors of common. In revset terminology, we return the
823 tuple:
823 tuple:
824
824
825 ::common, (::heads) - (::common)
825 ::common, (::heads) - (::common)
826
826
827 The list is sorted by revision number, meaning it is
827 The list is sorted by revision number, meaning it is
828 topologically sorted.
828 topologically sorted.
829
829
830 'heads' and 'common' are both lists of node IDs. If heads is
830 'heads' and 'common' are both lists of node IDs. If heads is
831 not supplied, uses all of the revlog's heads. If common is not
831 not supplied, uses all of the revlog's heads. If common is not
832 supplied, uses nullid."""
832 supplied, uses nullid."""
833 if common is None:
833 if common is None:
834 common = [nullid]
834 common = [nullid]
835 if heads is None:
835 if heads is None:
836 heads = self.heads()
836 heads = self.heads()
837
837
838 common = [self.rev(n) for n in common]
838 common = [self.rev(n) for n in common]
839 heads = [self.rev(n) for n in heads]
839 heads = [self.rev(n) for n in heads]
840
840
841 # we want the ancestors, but inclusive
841 # we want the ancestors, but inclusive
842 class lazyset(object):
842 class lazyset(object):
843 def __init__(self, lazyvalues):
843 def __init__(self, lazyvalues):
844 self.addedvalues = set()
844 self.addedvalues = set()
845 self.lazyvalues = lazyvalues
845 self.lazyvalues = lazyvalues
846
846
847 def __contains__(self, value):
847 def __contains__(self, value):
848 return value in self.addedvalues or value in self.lazyvalues
848 return value in self.addedvalues or value in self.lazyvalues
849
849
850 def __iter__(self):
850 def __iter__(self):
851 added = self.addedvalues
851 added = self.addedvalues
852 for r in added:
852 for r in added:
853 yield r
853 yield r
854 for r in self.lazyvalues:
854 for r in self.lazyvalues:
855 if not r in added:
855 if not r in added:
856 yield r
856 yield r
857
857
858 def add(self, value):
858 def add(self, value):
859 self.addedvalues.add(value)
859 self.addedvalues.add(value)
860
860
861 def update(self, values):
861 def update(self, values):
862 self.addedvalues.update(values)
862 self.addedvalues.update(values)
863
863
864 has = lazyset(self.ancestors(common))
864 has = lazyset(self.ancestors(common))
865 has.add(nullrev)
865 has.add(nullrev)
866 has.update(common)
866 has.update(common)
867
867
868 # take all ancestors from heads that aren't in has
868 # take all ancestors from heads that aren't in has
869 missing = set()
869 missing = set()
870 visit = collections.deque(r for r in heads if r not in has)
870 visit = collections.deque(r for r in heads if r not in has)
871 while visit:
871 while visit:
872 r = visit.popleft()
872 r = visit.popleft()
873 if r in missing:
873 if r in missing:
874 continue
874 continue
875 else:
875 else:
876 missing.add(r)
876 missing.add(r)
877 for p in self.parentrevs(r):
877 for p in self.parentrevs(r):
878 if p not in has:
878 if p not in has:
879 visit.append(p)
879 visit.append(p)
880 missing = list(missing)
880 missing = list(missing)
881 missing.sort()
881 missing.sort()
882 return has, [self.node(miss) for miss in missing]
882 return has, [self.node(miss) for miss in missing]
883
883
884 def incrementalmissingrevs(self, common=None):
884 def incrementalmissingrevs(self, common=None):
885 """Return an object that can be used to incrementally compute the
885 """Return an object that can be used to incrementally compute the
886 revision numbers of the ancestors of arbitrary sets that are not
886 revision numbers of the ancestors of arbitrary sets that are not
887 ancestors of common. This is an ancestor.incrementalmissingancestors
887 ancestors of common. This is an ancestor.incrementalmissingancestors
888 object.
888 object.
889
889
890 'common' is a list of revision numbers. If common is not supplied, uses
890 'common' is a list of revision numbers. If common is not supplied, uses
891 nullrev.
891 nullrev.
892 """
892 """
893 if common is None:
893 if common is None:
894 common = [nullrev]
894 common = [nullrev]
895
895
896 if rustext is not None:
896 if rustext is not None:
897 return rustext.ancestor.MissingAncestors(self.index, common)
897 return rustext.ancestor.MissingAncestors(self.index, common)
898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
898 return ancestor.incrementalmissingancestors(self.parentrevs, common)
899
899
900 def findmissingrevs(self, common=None, heads=None):
900 def findmissingrevs(self, common=None, heads=None):
901 """Return the revision numbers of the ancestors of heads that
901 """Return the revision numbers of the ancestors of heads that
902 are not ancestors of common.
902 are not ancestors of common.
903
903
904 More specifically, return a list of revision numbers corresponding to
904 More specifically, return a list of revision numbers corresponding to
905 nodes N such that every N satisfies the following constraints:
905 nodes N such that every N satisfies the following constraints:
906
906
907 1. N is an ancestor of some node in 'heads'
907 1. N is an ancestor of some node in 'heads'
908 2. N is not an ancestor of any node in 'common'
908 2. N is not an ancestor of any node in 'common'
909
909
910 The list is sorted by revision number, meaning it is
910 The list is sorted by revision number, meaning it is
911 topologically sorted.
911 topologically sorted.
912
912
913 'heads' and 'common' are both lists of revision numbers. If heads is
913 'heads' and 'common' are both lists of revision numbers. If heads is
914 not supplied, uses all of the revlog's heads. If common is not
914 not supplied, uses all of the revlog's heads. If common is not
915 supplied, uses nullid."""
915 supplied, uses nullid."""
916 if common is None:
916 if common is None:
917 common = [nullrev]
917 common = [nullrev]
918 if heads is None:
918 if heads is None:
919 heads = self.headrevs()
919 heads = self.headrevs()
920
920
921 inc = self.incrementalmissingrevs(common=common)
921 inc = self.incrementalmissingrevs(common=common)
922 return inc.missingancestors(heads)
922 return inc.missingancestors(heads)
923
923
924 def findmissing(self, common=None, heads=None):
924 def findmissing(self, common=None, heads=None):
925 """Return the ancestors of heads that are not ancestors of common.
925 """Return the ancestors of heads that are not ancestors of common.
926
926
927 More specifically, return a list of nodes N such that every N
927 More specifically, return a list of nodes N such that every N
928 satisfies the following constraints:
928 satisfies the following constraints:
929
929
930 1. N is an ancestor of some node in 'heads'
930 1. N is an ancestor of some node in 'heads'
931 2. N is not an ancestor of any node in 'common'
931 2. N is not an ancestor of any node in 'common'
932
932
933 The list is sorted by revision number, meaning it is
933 The list is sorted by revision number, meaning it is
934 topologically sorted.
934 topologically sorted.
935
935
936 'heads' and 'common' are both lists of node IDs. If heads is
936 'heads' and 'common' are both lists of node IDs. If heads is
937 not supplied, uses all of the revlog's heads. If common is not
937 not supplied, uses all of the revlog's heads. If common is not
938 supplied, uses nullid."""
938 supplied, uses nullid."""
939 if common is None:
939 if common is None:
940 common = [nullid]
940 common = [nullid]
941 if heads is None:
941 if heads is None:
942 heads = self.heads()
942 heads = self.heads()
943
943
944 common = [self.rev(n) for n in common]
944 common = [self.rev(n) for n in common]
945 heads = [self.rev(n) for n in heads]
945 heads = [self.rev(n) for n in heads]
946
946
947 inc = self.incrementalmissingrevs(common=common)
947 inc = self.incrementalmissingrevs(common=common)
948 return [self.node(r) for r in inc.missingancestors(heads)]
948 return [self.node(r) for r in inc.missingancestors(heads)]
949
949
950 def nodesbetween(self, roots=None, heads=None):
950 def nodesbetween(self, roots=None, heads=None):
951 """Return a topological path from 'roots' to 'heads'.
951 """Return a topological path from 'roots' to 'heads'.
952
952
953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
953 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
954 topologically sorted list of all nodes N that satisfy both of
954 topologically sorted list of all nodes N that satisfy both of
955 these constraints:
955 these constraints:
956
956
957 1. N is a descendant of some node in 'roots'
957 1. N is a descendant of some node in 'roots'
958 2. N is an ancestor of some node in 'heads'
958 2. N is an ancestor of some node in 'heads'
959
959
960 Every node is considered to be both a descendant and an ancestor
960 Every node is considered to be both a descendant and an ancestor
961 of itself, so every reachable node in 'roots' and 'heads' will be
961 of itself, so every reachable node in 'roots' and 'heads' will be
962 included in 'nodes'.
962 included in 'nodes'.
963
963
964 'outroots' is the list of reachable nodes in 'roots', i.e., the
964 'outroots' is the list of reachable nodes in 'roots', i.e., the
965 subset of 'roots' that is returned in 'nodes'. Likewise,
965 subset of 'roots' that is returned in 'nodes'. Likewise,
966 'outheads' is the subset of 'heads' that is also in 'nodes'.
966 'outheads' is the subset of 'heads' that is also in 'nodes'.
967
967
968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
968 'roots' and 'heads' are both lists of node IDs. If 'roots' is
969 unspecified, uses nullid as the only root. If 'heads' is
969 unspecified, uses nullid as the only root. If 'heads' is
970 unspecified, uses list of all of the revlog's heads."""
970 unspecified, uses list of all of the revlog's heads."""
971 nonodes = ([], [], [])
971 nonodes = ([], [], [])
972 if roots is not None:
972 if roots is not None:
973 roots = list(roots)
973 roots = list(roots)
974 if not roots:
974 if not roots:
975 return nonodes
975 return nonodes
976 lowestrev = min([self.rev(n) for n in roots])
976 lowestrev = min([self.rev(n) for n in roots])
977 else:
977 else:
978 roots = [nullid] # Everybody's a descendant of nullid
978 roots = [nullid] # Everybody's a descendant of nullid
979 lowestrev = nullrev
979 lowestrev = nullrev
980 if (lowestrev == nullrev) and (heads is None):
980 if (lowestrev == nullrev) and (heads is None):
981 # We want _all_ the nodes!
981 # We want _all_ the nodes!
982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
982 return ([self.node(r) for r in self], [nullid], list(self.heads()))
983 if heads is None:
983 if heads is None:
984 # All nodes are ancestors, so the latest ancestor is the last
984 # All nodes are ancestors, so the latest ancestor is the last
985 # node.
985 # node.
986 highestrev = len(self) - 1
986 highestrev = len(self) - 1
987 # Set ancestors to None to signal that every node is an ancestor.
987 # Set ancestors to None to signal that every node is an ancestor.
988 ancestors = None
988 ancestors = None
989 # Set heads to an empty dictionary for later discovery of heads
989 # Set heads to an empty dictionary for later discovery of heads
990 heads = {}
990 heads = {}
991 else:
991 else:
992 heads = list(heads)
992 heads = list(heads)
993 if not heads:
993 if not heads:
994 return nonodes
994 return nonodes
995 ancestors = set()
995 ancestors = set()
996 # Turn heads into a dictionary so we can remove 'fake' heads.
996 # Turn heads into a dictionary so we can remove 'fake' heads.
997 # Also, later we will be using it to filter out the heads we can't
997 # Also, later we will be using it to filter out the heads we can't
998 # find from roots.
998 # find from roots.
999 heads = dict.fromkeys(heads, False)
999 heads = dict.fromkeys(heads, False)
1000 # Start at the top and keep marking parents until we're done.
1000 # Start at the top and keep marking parents until we're done.
1001 nodestotag = set(heads)
1001 nodestotag = set(heads)
1002 # Remember where the top was so we can use it as a limit later.
1002 # Remember where the top was so we can use it as a limit later.
1003 highestrev = max([self.rev(n) for n in nodestotag])
1003 highestrev = max([self.rev(n) for n in nodestotag])
1004 while nodestotag:
1004 while nodestotag:
1005 # grab a node to tag
1005 # grab a node to tag
1006 n = nodestotag.pop()
1006 n = nodestotag.pop()
1007 # Never tag nullid
1007 # Never tag nullid
1008 if n == nullid:
1008 if n == nullid:
1009 continue
1009 continue
1010 # A node's revision number represents its place in a
1010 # A node's revision number represents its place in a
1011 # topologically sorted list of nodes.
1011 # topologically sorted list of nodes.
1012 r = self.rev(n)
1012 r = self.rev(n)
1013 if r >= lowestrev:
1013 if r >= lowestrev:
1014 if n not in ancestors:
1014 if n not in ancestors:
1015 # If we are possibly a descendant of one of the roots
1015 # If we are possibly a descendant of one of the roots
1016 # and we haven't already been marked as an ancestor
1016 # and we haven't already been marked as an ancestor
1017 ancestors.add(n) # Mark as ancestor
1017 ancestors.add(n) # Mark as ancestor
1018 # Add non-nullid parents to list of nodes to tag.
1018 # Add non-nullid parents to list of nodes to tag.
1019 nodestotag.update([p for p in self.parents(n) if
1019 nodestotag.update([p for p in self.parents(n) if
1020 p != nullid])
1020 p != nullid])
1021 elif n in heads: # We've seen it before, is it a fake head?
1021 elif n in heads: # We've seen it before, is it a fake head?
1022 # So it is, real heads should not be the ancestors of
1022 # So it is, real heads should not be the ancestors of
1023 # any other heads.
1023 # any other heads.
1024 heads.pop(n)
1024 heads.pop(n)
1025 if not ancestors:
1025 if not ancestors:
1026 return nonodes
1026 return nonodes
1027 # Now that we have our set of ancestors, we want to remove any
1027 # Now that we have our set of ancestors, we want to remove any
1028 # roots that are not ancestors.
1028 # roots that are not ancestors.
1029
1029
1030 # If one of the roots was nullid, everything is included anyway.
1030 # If one of the roots was nullid, everything is included anyway.
1031 if lowestrev > nullrev:
1031 if lowestrev > nullrev:
1032 # But, since we weren't, let's recompute the lowest rev to not
1032 # But, since we weren't, let's recompute the lowest rev to not
1033 # include roots that aren't ancestors.
1033 # include roots that aren't ancestors.
1034
1034
1035 # Filter out roots that aren't ancestors of heads
1035 # Filter out roots that aren't ancestors of heads
1036 roots = [root for root in roots if root in ancestors]
1036 roots = [root for root in roots if root in ancestors]
1037 # Recompute the lowest revision
1037 # Recompute the lowest revision
1038 if roots:
1038 if roots:
1039 lowestrev = min([self.rev(root) for root in roots])
1039 lowestrev = min([self.rev(root) for root in roots])
1040 else:
1040 else:
1041 # No more roots? Return empty list
1041 # No more roots? Return empty list
1042 return nonodes
1042 return nonodes
1043 else:
1043 else:
1044 # We are descending from nullid, and don't need to care about
1044 # We are descending from nullid, and don't need to care about
1045 # any other roots.
1045 # any other roots.
1046 lowestrev = nullrev
1046 lowestrev = nullrev
1047 roots = [nullid]
1047 roots = [nullid]
1048 # Transform our roots list into a set.
1048 # Transform our roots list into a set.
1049 descendants = set(roots)
1049 descendants = set(roots)
1050 # Also, keep the original roots so we can filter out roots that aren't
1050 # Also, keep the original roots so we can filter out roots that aren't
1051 # 'real' roots (i.e. are descended from other roots).
1051 # 'real' roots (i.e. are descended from other roots).
1052 roots = descendants.copy()
1052 roots = descendants.copy()
1053 # Our topologically sorted list of output nodes.
1053 # Our topologically sorted list of output nodes.
1054 orderedout = []
1054 orderedout = []
1055 # Don't start at nullid since we don't want nullid in our output list,
1055 # Don't start at nullid since we don't want nullid in our output list,
1056 # and if nullid shows up in descendants, empty parents will look like
1056 # and if nullid shows up in descendants, empty parents will look like
1057 # they're descendants.
1057 # they're descendants.
1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1058 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1059 n = self.node(r)
1059 n = self.node(r)
1060 isdescendant = False
1060 isdescendant = False
1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1061 if lowestrev == nullrev: # Everybody is a descendant of nullid
1062 isdescendant = True
1062 isdescendant = True
1063 elif n in descendants:
1063 elif n in descendants:
1064 # n is already a descendant
1064 # n is already a descendant
1065 isdescendant = True
1065 isdescendant = True
1066 # This check only needs to be done here because all the roots
1066 # This check only needs to be done here because all the roots
1067 # will start being marked is descendants before the loop.
1067 # will start being marked is descendants before the loop.
1068 if n in roots:
1068 if n in roots:
1069 # If n was a root, check if it's a 'real' root.
1069 # If n was a root, check if it's a 'real' root.
1070 p = tuple(self.parents(n))
1070 p = tuple(self.parents(n))
1071 # If any of its parents are descendants, it's not a root.
1071 # If any of its parents are descendants, it's not a root.
1072 if (p[0] in descendants) or (p[1] in descendants):
1072 if (p[0] in descendants) or (p[1] in descendants):
1073 roots.remove(n)
1073 roots.remove(n)
1074 else:
1074 else:
1075 p = tuple(self.parents(n))
1075 p = tuple(self.parents(n))
1076 # A node is a descendant if either of its parents are
1076 # A node is a descendant if either of its parents are
1077 # descendants. (We seeded the dependents list with the roots
1077 # descendants. (We seeded the dependents list with the roots
1078 # up there, remember?)
1078 # up there, remember?)
1079 if (p[0] in descendants) or (p[1] in descendants):
1079 if (p[0] in descendants) or (p[1] in descendants):
1080 descendants.add(n)
1080 descendants.add(n)
1081 isdescendant = True
1081 isdescendant = True
1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1082 if isdescendant and ((ancestors is None) or (n in ancestors)):
1083 # Only include nodes that are both descendants and ancestors.
1083 # Only include nodes that are both descendants and ancestors.
1084 orderedout.append(n)
1084 orderedout.append(n)
1085 if (ancestors is not None) and (n in heads):
1085 if (ancestors is not None) and (n in heads):
1086 # We're trying to figure out which heads are reachable
1086 # We're trying to figure out which heads are reachable
1087 # from roots.
1087 # from roots.
1088 # Mark this head as having been reached
1088 # Mark this head as having been reached
1089 heads[n] = True
1089 heads[n] = True
1090 elif ancestors is None:
1090 elif ancestors is None:
1091 # Otherwise, we're trying to discover the heads.
1091 # Otherwise, we're trying to discover the heads.
1092 # Assume this is a head because if it isn't, the next step
1092 # Assume this is a head because if it isn't, the next step
1093 # will eventually remove it.
1093 # will eventually remove it.
1094 heads[n] = True
1094 heads[n] = True
1095 # But, obviously its parents aren't.
1095 # But, obviously its parents aren't.
1096 for p in self.parents(n):
1096 for p in self.parents(n):
1097 heads.pop(p, None)
1097 heads.pop(p, None)
1098 heads = [head for head, flag in heads.iteritems() if flag]
1098 heads = [head for head, flag in heads.iteritems() if flag]
1099 roots = list(roots)
1099 roots = list(roots)
1100 assert orderedout
1100 assert orderedout
1101 assert roots
1101 assert roots
1102 assert heads
1102 assert heads
1103 return (orderedout, roots, heads)
1103 return (orderedout, roots, heads)
1104
1104
1105 def headrevs(self):
1105 def headrevs(self, revs=None):
1106 try:
1106 if revs is None:
1107 return self.index.headrevs()
1107 try:
1108 except AttributeError:
1108 return self.index.headrevs()
1109 return self._headrevs()
1109 except AttributeError:
1110 return self._headrevs()
1111 return dagop.headrevs(revs, self.parentrevs)
1110
1112
1111 def computephases(self, roots):
1113 def computephases(self, roots):
1112 return self.index.computephasesmapsets(roots)
1114 return self.index.computephasesmapsets(roots)
1113
1115
1114 def _headrevs(self):
1116 def _headrevs(self):
1115 count = len(self)
1117 count = len(self)
1116 if not count:
1118 if not count:
1117 return [nullrev]
1119 return [nullrev]
1118 # we won't iter over filtered rev so nobody is a head at start
1120 # we won't iter over filtered rev so nobody is a head at start
1119 ishead = [0] * (count + 1)
1121 ishead = [0] * (count + 1)
1120 index = self.index
1122 index = self.index
1121 for r in self:
1123 for r in self:
1122 ishead[r] = 1 # I may be an head
1124 ishead[r] = 1 # I may be an head
1123 e = index[r]
1125 e = index[r]
1124 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1126 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1125 return [r for r, val in enumerate(ishead) if val]
1127 return [r for r, val in enumerate(ishead) if val]
1126
1128
1127 def heads(self, start=None, stop=None):
1129 def heads(self, start=None, stop=None):
1128 """return the list of all nodes that have no children
1130 """return the list of all nodes that have no children
1129
1131
1130 if start is specified, only heads that are descendants of
1132 if start is specified, only heads that are descendants of
1131 start will be returned
1133 start will be returned
1132 if stop is specified, it will consider all the revs from stop
1134 if stop is specified, it will consider all the revs from stop
1133 as if they had no children
1135 as if they had no children
1134 """
1136 """
1135 if start is None and stop is None:
1137 if start is None and stop is None:
1136 if not len(self):
1138 if not len(self):
1137 return [nullid]
1139 return [nullid]
1138 return [self.node(r) for r in self.headrevs()]
1140 return [self.node(r) for r in self.headrevs()]
1139
1141
1140 if start is None:
1142 if start is None:
1141 start = nullrev
1143 start = nullrev
1142 else:
1144 else:
1143 start = self.rev(start)
1145 start = self.rev(start)
1144
1146
1145 stoprevs = set(self.rev(n) for n in stop or [])
1147 stoprevs = set(self.rev(n) for n in stop or [])
1146
1148
1147 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1149 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1148 stoprevs=stoprevs)
1150 stoprevs=stoprevs)
1149
1151
1150 return [self.node(rev) for rev in revs]
1152 return [self.node(rev) for rev in revs]
1151
1153
1152 def children(self, node):
1154 def children(self, node):
1153 """find the children of a given node"""
1155 """find the children of a given node"""
1154 c = []
1156 c = []
1155 p = self.rev(node)
1157 p = self.rev(node)
1156 for r in self.revs(start=p + 1):
1158 for r in self.revs(start=p + 1):
1157 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1159 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1158 if prevs:
1160 if prevs:
1159 for pr in prevs:
1161 for pr in prevs:
1160 if pr == p:
1162 if pr == p:
1161 c.append(self.node(r))
1163 c.append(self.node(r))
1162 elif p == nullrev:
1164 elif p == nullrev:
1163 c.append(self.node(r))
1165 c.append(self.node(r))
1164 return c
1166 return c
1165
1167
1166 def commonancestorsheads(self, a, b):
1168 def commonancestorsheads(self, a, b):
1167 """calculate all the heads of the common ancestors of nodes a and b"""
1169 """calculate all the heads of the common ancestors of nodes a and b"""
1168 a, b = self.rev(a), self.rev(b)
1170 a, b = self.rev(a), self.rev(b)
1169 ancs = self._commonancestorsheads(a, b)
1171 ancs = self._commonancestorsheads(a, b)
1170 return pycompat.maplist(self.node, ancs)
1172 return pycompat.maplist(self.node, ancs)
1171
1173
1172 def _commonancestorsheads(self, *revs):
1174 def _commonancestorsheads(self, *revs):
1173 """calculate all the heads of the common ancestors of revs"""
1175 """calculate all the heads of the common ancestors of revs"""
1174 try:
1176 try:
1175 ancs = self.index.commonancestorsheads(*revs)
1177 ancs = self.index.commonancestorsheads(*revs)
1176 except (AttributeError, OverflowError): # C implementation failed
1178 except (AttributeError, OverflowError): # C implementation failed
1177 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1179 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1178 return ancs
1180 return ancs
1179
1181
1180 def isancestor(self, a, b):
1182 def isancestor(self, a, b):
1181 """return True if node a is an ancestor of node b
1183 """return True if node a is an ancestor of node b
1182
1184
1183 A revision is considered an ancestor of itself."""
1185 A revision is considered an ancestor of itself."""
1184 a, b = self.rev(a), self.rev(b)
1186 a, b = self.rev(a), self.rev(b)
1185 return self.isancestorrev(a, b)
1187 return self.isancestorrev(a, b)
1186
1188
1187 def isancestorrev(self, a, b):
1189 def isancestorrev(self, a, b):
1188 """return True if revision a is an ancestor of revision b
1190 """return True if revision a is an ancestor of revision b
1189
1191
1190 A revision is considered an ancestor of itself.
1192 A revision is considered an ancestor of itself.
1191
1193
1192 The implementation of this is trivial but the use of
1194 The implementation of this is trivial but the use of
1193 commonancestorsheads is not."""
1195 commonancestorsheads is not."""
1194 if a == nullrev:
1196 if a == nullrev:
1195 return True
1197 return True
1196 elif a == b:
1198 elif a == b:
1197 return True
1199 return True
1198 elif a > b:
1200 elif a > b:
1199 return False
1201 return False
1200 return a in self._commonancestorsheads(a, b)
1202 return a in self._commonancestorsheads(a, b)
1201
1203
1202 def ancestor(self, a, b):
1204 def ancestor(self, a, b):
1203 """calculate the "best" common ancestor of nodes a and b"""
1205 """calculate the "best" common ancestor of nodes a and b"""
1204
1206
1205 a, b = self.rev(a), self.rev(b)
1207 a, b = self.rev(a), self.rev(b)
1206 try:
1208 try:
1207 ancs = self.index.ancestors(a, b)
1209 ancs = self.index.ancestors(a, b)
1208 except (AttributeError, OverflowError):
1210 except (AttributeError, OverflowError):
1209 ancs = ancestor.ancestors(self.parentrevs, a, b)
1211 ancs = ancestor.ancestors(self.parentrevs, a, b)
1210 if ancs:
1212 if ancs:
1211 # choose a consistent winner when there's a tie
1213 # choose a consistent winner when there's a tie
1212 return min(map(self.node, ancs))
1214 return min(map(self.node, ancs))
1213 return nullid
1215 return nullid
1214
1216
1215 def _match(self, id):
1217 def _match(self, id):
1216 if isinstance(id, int):
1218 if isinstance(id, int):
1217 # rev
1219 # rev
1218 return self.node(id)
1220 return self.node(id)
1219 if len(id) == 20:
1221 if len(id) == 20:
1220 # possibly a binary node
1222 # possibly a binary node
1221 # odds of a binary node being all hex in ASCII are 1 in 10**25
1223 # odds of a binary node being all hex in ASCII are 1 in 10**25
1222 try:
1224 try:
1223 node = id
1225 node = id
1224 self.rev(node) # quick search the index
1226 self.rev(node) # quick search the index
1225 return node
1227 return node
1226 except error.LookupError:
1228 except error.LookupError:
1227 pass # may be partial hex id
1229 pass # may be partial hex id
1228 try:
1230 try:
1229 # str(rev)
1231 # str(rev)
1230 rev = int(id)
1232 rev = int(id)
1231 if "%d" % rev != id:
1233 if "%d" % rev != id:
1232 raise ValueError
1234 raise ValueError
1233 if rev < 0:
1235 if rev < 0:
1234 rev = len(self) + rev
1236 rev = len(self) + rev
1235 if rev < 0 or rev >= len(self):
1237 if rev < 0 or rev >= len(self):
1236 raise ValueError
1238 raise ValueError
1237 return self.node(rev)
1239 return self.node(rev)
1238 except (ValueError, OverflowError):
1240 except (ValueError, OverflowError):
1239 pass
1241 pass
1240 if len(id) == 40:
1242 if len(id) == 40:
1241 try:
1243 try:
1242 # a full hex nodeid?
1244 # a full hex nodeid?
1243 node = bin(id)
1245 node = bin(id)
1244 self.rev(node)
1246 self.rev(node)
1245 return node
1247 return node
1246 except (TypeError, error.LookupError):
1248 except (TypeError, error.LookupError):
1247 pass
1249 pass
1248
1250
1249 def _partialmatch(self, id):
1251 def _partialmatch(self, id):
1250 # we don't care wdirfilenodeids as they should be always full hash
1252 # we don't care wdirfilenodeids as they should be always full hash
1251 maybewdir = wdirhex.startswith(id)
1253 maybewdir = wdirhex.startswith(id)
1252 try:
1254 try:
1253 partial = self.index.partialmatch(id)
1255 partial = self.index.partialmatch(id)
1254 if partial and self.hasnode(partial):
1256 if partial and self.hasnode(partial):
1255 if maybewdir:
1257 if maybewdir:
1256 # single 'ff...' match in radix tree, ambiguous with wdir
1258 # single 'ff...' match in radix tree, ambiguous with wdir
1257 raise error.RevlogError
1259 raise error.RevlogError
1258 return partial
1260 return partial
1259 if maybewdir:
1261 if maybewdir:
1260 # no 'ff...' match in radix tree, wdir identified
1262 # no 'ff...' match in radix tree, wdir identified
1261 raise error.WdirUnsupported
1263 raise error.WdirUnsupported
1262 return None
1264 return None
1263 except error.RevlogError:
1265 except error.RevlogError:
1264 # parsers.c radix tree lookup gave multiple matches
1266 # parsers.c radix tree lookup gave multiple matches
1265 # fast path: for unfiltered changelog, radix tree is accurate
1267 # fast path: for unfiltered changelog, radix tree is accurate
1266 if not getattr(self, 'filteredrevs', None):
1268 if not getattr(self, 'filteredrevs', None):
1267 raise error.AmbiguousPrefixLookupError(
1269 raise error.AmbiguousPrefixLookupError(
1268 id, self.indexfile, _('ambiguous identifier'))
1270 id, self.indexfile, _('ambiguous identifier'))
1269 # fall through to slow path that filters hidden revisions
1271 # fall through to slow path that filters hidden revisions
1270 except (AttributeError, ValueError):
1272 except (AttributeError, ValueError):
1271 # we are pure python, or key was too short to search radix tree
1273 # we are pure python, or key was too short to search radix tree
1272 pass
1274 pass
1273
1275
1274 if id in self._pcache:
1276 if id in self._pcache:
1275 return self._pcache[id]
1277 return self._pcache[id]
1276
1278
1277 if len(id) <= 40:
1279 if len(id) <= 40:
1278 try:
1280 try:
1279 # hex(node)[:...]
1281 # hex(node)[:...]
1280 l = len(id) // 2 # grab an even number of digits
1282 l = len(id) // 2 # grab an even number of digits
1281 prefix = bin(id[:l * 2])
1283 prefix = bin(id[:l * 2])
1282 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1284 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1283 nl = [n for n in nl if hex(n).startswith(id) and
1285 nl = [n for n in nl if hex(n).startswith(id) and
1284 self.hasnode(n)]
1286 self.hasnode(n)]
1285 if nullhex.startswith(id):
1287 if nullhex.startswith(id):
1286 nl.append(nullid)
1288 nl.append(nullid)
1287 if len(nl) > 0:
1289 if len(nl) > 0:
1288 if len(nl) == 1 and not maybewdir:
1290 if len(nl) == 1 and not maybewdir:
1289 self._pcache[id] = nl[0]
1291 self._pcache[id] = nl[0]
1290 return nl[0]
1292 return nl[0]
1291 raise error.AmbiguousPrefixLookupError(
1293 raise error.AmbiguousPrefixLookupError(
1292 id, self.indexfile, _('ambiguous identifier'))
1294 id, self.indexfile, _('ambiguous identifier'))
1293 if maybewdir:
1295 if maybewdir:
1294 raise error.WdirUnsupported
1296 raise error.WdirUnsupported
1295 return None
1297 return None
1296 except TypeError:
1298 except TypeError:
1297 pass
1299 pass
1298
1300
1299 def lookup(self, id):
1301 def lookup(self, id):
1300 """locate a node based on:
1302 """locate a node based on:
1301 - revision number or str(revision number)
1303 - revision number or str(revision number)
1302 - nodeid or subset of hex nodeid
1304 - nodeid or subset of hex nodeid
1303 """
1305 """
1304 n = self._match(id)
1306 n = self._match(id)
1305 if n is not None:
1307 if n is not None:
1306 return n
1308 return n
1307 n = self._partialmatch(id)
1309 n = self._partialmatch(id)
1308 if n:
1310 if n:
1309 return n
1311 return n
1310
1312
1311 raise error.LookupError(id, self.indexfile, _('no match found'))
1313 raise error.LookupError(id, self.indexfile, _('no match found'))
1312
1314
1313 def shortest(self, node, minlength=1):
1315 def shortest(self, node, minlength=1):
1314 """Find the shortest unambiguous prefix that matches node."""
1316 """Find the shortest unambiguous prefix that matches node."""
1315 def isvalid(prefix):
1317 def isvalid(prefix):
1316 try:
1318 try:
1317 node = self._partialmatch(prefix)
1319 node = self._partialmatch(prefix)
1318 except error.AmbiguousPrefixLookupError:
1320 except error.AmbiguousPrefixLookupError:
1319 return False
1321 return False
1320 except error.WdirUnsupported:
1322 except error.WdirUnsupported:
1321 # single 'ff...' match
1323 # single 'ff...' match
1322 return True
1324 return True
1323 if node is None:
1325 if node is None:
1324 raise error.LookupError(node, self.indexfile, _('no node'))
1326 raise error.LookupError(node, self.indexfile, _('no node'))
1325 return True
1327 return True
1326
1328
1327 def maybewdir(prefix):
1329 def maybewdir(prefix):
1328 return all(c == 'f' for c in prefix)
1330 return all(c == 'f' for c in prefix)
1329
1331
1330 hexnode = hex(node)
1332 hexnode = hex(node)
1331
1333
1332 def disambiguate(hexnode, minlength):
1334 def disambiguate(hexnode, minlength):
1333 """Disambiguate against wdirid."""
1335 """Disambiguate against wdirid."""
1334 for length in range(minlength, 41):
1336 for length in range(minlength, 41):
1335 prefix = hexnode[:length]
1337 prefix = hexnode[:length]
1336 if not maybewdir(prefix):
1338 if not maybewdir(prefix):
1337 return prefix
1339 return prefix
1338
1340
1339 if not getattr(self, 'filteredrevs', None):
1341 if not getattr(self, 'filteredrevs', None):
1340 try:
1342 try:
1341 length = max(self.index.shortest(node), minlength)
1343 length = max(self.index.shortest(node), minlength)
1342 return disambiguate(hexnode, length)
1344 return disambiguate(hexnode, length)
1343 except error.RevlogError:
1345 except error.RevlogError:
1344 if node != wdirid:
1346 if node != wdirid:
1345 raise error.LookupError(node, self.indexfile, _('no node'))
1347 raise error.LookupError(node, self.indexfile, _('no node'))
1346 except AttributeError:
1348 except AttributeError:
1347 # Fall through to pure code
1349 # Fall through to pure code
1348 pass
1350 pass
1349
1351
1350 if node == wdirid:
1352 if node == wdirid:
1351 for length in range(minlength, 41):
1353 for length in range(minlength, 41):
1352 prefix = hexnode[:length]
1354 prefix = hexnode[:length]
1353 if isvalid(prefix):
1355 if isvalid(prefix):
1354 return prefix
1356 return prefix
1355
1357
1356 for length in range(minlength, 41):
1358 for length in range(minlength, 41):
1357 prefix = hexnode[:length]
1359 prefix = hexnode[:length]
1358 if isvalid(prefix):
1360 if isvalid(prefix):
1359 return disambiguate(hexnode, length)
1361 return disambiguate(hexnode, length)
1360
1362
1361 def cmp(self, node, text):
1363 def cmp(self, node, text):
1362 """compare text with a given file revision
1364 """compare text with a given file revision
1363
1365
1364 returns True if text is different than what is stored.
1366 returns True if text is different than what is stored.
1365 """
1367 """
1366 p1, p2 = self.parents(node)
1368 p1, p2 = self.parents(node)
1367 return storageutil.hashrevisionsha1(text, p1, p2) != node
1369 return storageutil.hashrevisionsha1(text, p1, p2) != node
1368
1370
1369 def _cachesegment(self, offset, data):
1371 def _cachesegment(self, offset, data):
1370 """Add a segment to the revlog cache.
1372 """Add a segment to the revlog cache.
1371
1373
1372 Accepts an absolute offset and the data that is at that location.
1374 Accepts an absolute offset and the data that is at that location.
1373 """
1375 """
1374 o, d = self._chunkcache
1376 o, d = self._chunkcache
1375 # try to add to existing cache
1377 # try to add to existing cache
1376 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1378 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1377 self._chunkcache = o, d + data
1379 self._chunkcache = o, d + data
1378 else:
1380 else:
1379 self._chunkcache = offset, data
1381 self._chunkcache = offset, data
1380
1382
1381 def _readsegment(self, offset, length, df=None):
1383 def _readsegment(self, offset, length, df=None):
1382 """Load a segment of raw data from the revlog.
1384 """Load a segment of raw data from the revlog.
1383
1385
1384 Accepts an absolute offset, length to read, and an optional existing
1386 Accepts an absolute offset, length to read, and an optional existing
1385 file handle to read from.
1387 file handle to read from.
1386
1388
1387 If an existing file handle is passed, it will be seeked and the
1389 If an existing file handle is passed, it will be seeked and the
1388 original seek position will NOT be restored.
1390 original seek position will NOT be restored.
1389
1391
1390 Returns a str or buffer of raw byte data.
1392 Returns a str or buffer of raw byte data.
1391
1393
1392 Raises if the requested number of bytes could not be read.
1394 Raises if the requested number of bytes could not be read.
1393 """
1395 """
1394 # Cache data both forward and backward around the requested
1396 # Cache data both forward and backward around the requested
1395 # data, in a fixed size window. This helps speed up operations
1397 # data, in a fixed size window. This helps speed up operations
1396 # involving reading the revlog backwards.
1398 # involving reading the revlog backwards.
1397 cachesize = self._chunkcachesize
1399 cachesize = self._chunkcachesize
1398 realoffset = offset & ~(cachesize - 1)
1400 realoffset = offset & ~(cachesize - 1)
1399 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1401 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1400 - realoffset)
1402 - realoffset)
1401 with self._datareadfp(df) as df:
1403 with self._datareadfp(df) as df:
1402 df.seek(realoffset)
1404 df.seek(realoffset)
1403 d = df.read(reallength)
1405 d = df.read(reallength)
1404
1406
1405 self._cachesegment(realoffset, d)
1407 self._cachesegment(realoffset, d)
1406 if offset != realoffset or reallength != length:
1408 if offset != realoffset or reallength != length:
1407 startoffset = offset - realoffset
1409 startoffset = offset - realoffset
1408 if len(d) - startoffset < length:
1410 if len(d) - startoffset < length:
1409 raise error.RevlogError(
1411 raise error.RevlogError(
1410 _('partial read of revlog %s; expected %d bytes from '
1412 _('partial read of revlog %s; expected %d bytes from '
1411 'offset %d, got %d') %
1413 'offset %d, got %d') %
1412 (self.indexfile if self._inline else self.datafile,
1414 (self.indexfile if self._inline else self.datafile,
1413 length, realoffset, len(d) - startoffset))
1415 length, realoffset, len(d) - startoffset))
1414
1416
1415 return util.buffer(d, startoffset, length)
1417 return util.buffer(d, startoffset, length)
1416
1418
1417 if len(d) < length:
1419 if len(d) < length:
1418 raise error.RevlogError(
1420 raise error.RevlogError(
1419 _('partial read of revlog %s; expected %d bytes from offset '
1421 _('partial read of revlog %s; expected %d bytes from offset '
1420 '%d, got %d') %
1422 '%d, got %d') %
1421 (self.indexfile if self._inline else self.datafile,
1423 (self.indexfile if self._inline else self.datafile,
1422 length, offset, len(d)))
1424 length, offset, len(d)))
1423
1425
1424 return d
1426 return d
1425
1427
1426 def _getsegment(self, offset, length, df=None):
1428 def _getsegment(self, offset, length, df=None):
1427 """Obtain a segment of raw data from the revlog.
1429 """Obtain a segment of raw data from the revlog.
1428
1430
1429 Accepts an absolute offset, length of bytes to obtain, and an
1431 Accepts an absolute offset, length of bytes to obtain, and an
1430 optional file handle to the already-opened revlog. If the file
1432 optional file handle to the already-opened revlog. If the file
1431 handle is used, it's original seek position will not be preserved.
1433 handle is used, it's original seek position will not be preserved.
1432
1434
1433 Requests for data may be returned from a cache.
1435 Requests for data may be returned from a cache.
1434
1436
1435 Returns a str or a buffer instance of raw byte data.
1437 Returns a str or a buffer instance of raw byte data.
1436 """
1438 """
1437 o, d = self._chunkcache
1439 o, d = self._chunkcache
1438 l = len(d)
1440 l = len(d)
1439
1441
1440 # is it in the cache?
1442 # is it in the cache?
1441 cachestart = offset - o
1443 cachestart = offset - o
1442 cacheend = cachestart + length
1444 cacheend = cachestart + length
1443 if cachestart >= 0 and cacheend <= l:
1445 if cachestart >= 0 and cacheend <= l:
1444 if cachestart == 0 and cacheend == l:
1446 if cachestart == 0 and cacheend == l:
1445 return d # avoid a copy
1447 return d # avoid a copy
1446 return util.buffer(d, cachestart, cacheend - cachestart)
1448 return util.buffer(d, cachestart, cacheend - cachestart)
1447
1449
1448 return self._readsegment(offset, length, df=df)
1450 return self._readsegment(offset, length, df=df)
1449
1451
1450 def _getsegmentforrevs(self, startrev, endrev, df=None):
1452 def _getsegmentforrevs(self, startrev, endrev, df=None):
1451 """Obtain a segment of raw data corresponding to a range of revisions.
1453 """Obtain a segment of raw data corresponding to a range of revisions.
1452
1454
1453 Accepts the start and end revisions and an optional already-open
1455 Accepts the start and end revisions and an optional already-open
1454 file handle to be used for reading. If the file handle is read, its
1456 file handle to be used for reading. If the file handle is read, its
1455 seek position will not be preserved.
1457 seek position will not be preserved.
1456
1458
1457 Requests for data may be satisfied by a cache.
1459 Requests for data may be satisfied by a cache.
1458
1460
1459 Returns a 2-tuple of (offset, data) for the requested range of
1461 Returns a 2-tuple of (offset, data) for the requested range of
1460 revisions. Offset is the integer offset from the beginning of the
1462 revisions. Offset is the integer offset from the beginning of the
1461 revlog and data is a str or buffer of the raw byte data.
1463 revlog and data is a str or buffer of the raw byte data.
1462
1464
1463 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1465 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1464 to determine where each revision's data begins and ends.
1466 to determine where each revision's data begins and ends.
1465 """
1467 """
1466 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1468 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1467 # (functions are expensive).
1469 # (functions are expensive).
1468 index = self.index
1470 index = self.index
1469 istart = index[startrev]
1471 istart = index[startrev]
1470 start = int(istart[0] >> 16)
1472 start = int(istart[0] >> 16)
1471 if startrev == endrev:
1473 if startrev == endrev:
1472 end = start + istart[1]
1474 end = start + istart[1]
1473 else:
1475 else:
1474 iend = index[endrev]
1476 iend = index[endrev]
1475 end = int(iend[0] >> 16) + iend[1]
1477 end = int(iend[0] >> 16) + iend[1]
1476
1478
1477 if self._inline:
1479 if self._inline:
1478 start += (startrev + 1) * self._io.size
1480 start += (startrev + 1) * self._io.size
1479 end += (endrev + 1) * self._io.size
1481 end += (endrev + 1) * self._io.size
1480 length = end - start
1482 length = end - start
1481
1483
1482 return start, self._getsegment(start, length, df=df)
1484 return start, self._getsegment(start, length, df=df)
1483
1485
1484 def _chunk(self, rev, df=None):
1486 def _chunk(self, rev, df=None):
1485 """Obtain a single decompressed chunk for a revision.
1487 """Obtain a single decompressed chunk for a revision.
1486
1488
1487 Accepts an integer revision and an optional already-open file handle
1489 Accepts an integer revision and an optional already-open file handle
1488 to be used for reading. If used, the seek position of the file will not
1490 to be used for reading. If used, the seek position of the file will not
1489 be preserved.
1491 be preserved.
1490
1492
1491 Returns a str holding uncompressed data for the requested revision.
1493 Returns a str holding uncompressed data for the requested revision.
1492 """
1494 """
1493 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1495 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1494
1496
1495 def _chunks(self, revs, df=None, targetsize=None):
1497 def _chunks(self, revs, df=None, targetsize=None):
1496 """Obtain decompressed chunks for the specified revisions.
1498 """Obtain decompressed chunks for the specified revisions.
1497
1499
1498 Accepts an iterable of numeric revisions that are assumed to be in
1500 Accepts an iterable of numeric revisions that are assumed to be in
1499 ascending order. Also accepts an optional already-open file handle
1501 ascending order. Also accepts an optional already-open file handle
1500 to be used for reading. If used, the seek position of the file will
1502 to be used for reading. If used, the seek position of the file will
1501 not be preserved.
1503 not be preserved.
1502
1504
1503 This function is similar to calling ``self._chunk()`` multiple times,
1505 This function is similar to calling ``self._chunk()`` multiple times,
1504 but is faster.
1506 but is faster.
1505
1507
1506 Returns a list with decompressed data for each requested revision.
1508 Returns a list with decompressed data for each requested revision.
1507 """
1509 """
1508 if not revs:
1510 if not revs:
1509 return []
1511 return []
1510 start = self.start
1512 start = self.start
1511 length = self.length
1513 length = self.length
1512 inline = self._inline
1514 inline = self._inline
1513 iosize = self._io.size
1515 iosize = self._io.size
1514 buffer = util.buffer
1516 buffer = util.buffer
1515
1517
1516 l = []
1518 l = []
1517 ladd = l.append
1519 ladd = l.append
1518
1520
1519 if not self._withsparseread:
1521 if not self._withsparseread:
1520 slicedchunks = (revs,)
1522 slicedchunks = (revs,)
1521 else:
1523 else:
1522 slicedchunks = deltautil.slicechunk(self, revs,
1524 slicedchunks = deltautil.slicechunk(self, revs,
1523 targetsize=targetsize)
1525 targetsize=targetsize)
1524
1526
1525 for revschunk in slicedchunks:
1527 for revschunk in slicedchunks:
1526 firstrev = revschunk[0]
1528 firstrev = revschunk[0]
1527 # Skip trailing revisions with empty diff
1529 # Skip trailing revisions with empty diff
1528 for lastrev in revschunk[::-1]:
1530 for lastrev in revschunk[::-1]:
1529 if length(lastrev) != 0:
1531 if length(lastrev) != 0:
1530 break
1532 break
1531
1533
1532 try:
1534 try:
1533 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1535 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1534 except OverflowError:
1536 except OverflowError:
1535 # issue4215 - we can't cache a run of chunks greater than
1537 # issue4215 - we can't cache a run of chunks greater than
1536 # 2G on Windows
1538 # 2G on Windows
1537 return [self._chunk(rev, df=df) for rev in revschunk]
1539 return [self._chunk(rev, df=df) for rev in revschunk]
1538
1540
1539 decomp = self.decompress
1541 decomp = self.decompress
1540 for rev in revschunk:
1542 for rev in revschunk:
1541 chunkstart = start(rev)
1543 chunkstart = start(rev)
1542 if inline:
1544 if inline:
1543 chunkstart += (rev + 1) * iosize
1545 chunkstart += (rev + 1) * iosize
1544 chunklength = length(rev)
1546 chunklength = length(rev)
1545 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1547 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1546
1548
1547 return l
1549 return l
1548
1550
1549 def _chunkclear(self):
1551 def _chunkclear(self):
1550 """Clear the raw chunk cache."""
1552 """Clear the raw chunk cache."""
1551 self._chunkcache = (0, '')
1553 self._chunkcache = (0, '')
1552
1554
1553 def deltaparent(self, rev):
1555 def deltaparent(self, rev):
1554 """return deltaparent of the given revision"""
1556 """return deltaparent of the given revision"""
1555 base = self.index[rev][3]
1557 base = self.index[rev][3]
1556 if base == rev:
1558 if base == rev:
1557 return nullrev
1559 return nullrev
1558 elif self._generaldelta:
1560 elif self._generaldelta:
1559 return base
1561 return base
1560 else:
1562 else:
1561 return rev - 1
1563 return rev - 1
1562
1564
1563 def issnapshot(self, rev):
1565 def issnapshot(self, rev):
1564 """tells whether rev is a snapshot
1566 """tells whether rev is a snapshot
1565 """
1567 """
1566 if not self._sparserevlog:
1568 if not self._sparserevlog:
1567 return self.deltaparent(rev) == nullrev
1569 return self.deltaparent(rev) == nullrev
1568 elif util.safehasattr(self.index, 'issnapshot'):
1570 elif util.safehasattr(self.index, 'issnapshot'):
1569 # directly assign the method to cache the testing and access
1571 # directly assign the method to cache the testing and access
1570 self.issnapshot = self.index.issnapshot
1572 self.issnapshot = self.index.issnapshot
1571 return self.issnapshot(rev)
1573 return self.issnapshot(rev)
1572 if rev == nullrev:
1574 if rev == nullrev:
1573 return True
1575 return True
1574 entry = self.index[rev]
1576 entry = self.index[rev]
1575 base = entry[3]
1577 base = entry[3]
1576 if base == rev:
1578 if base == rev:
1577 return True
1579 return True
1578 if base == nullrev:
1580 if base == nullrev:
1579 return True
1581 return True
1580 p1 = entry[5]
1582 p1 = entry[5]
1581 p2 = entry[6]
1583 p2 = entry[6]
1582 if base == p1 or base == p2:
1584 if base == p1 or base == p2:
1583 return False
1585 return False
1584 return self.issnapshot(base)
1586 return self.issnapshot(base)
1585
1587
1586 def snapshotdepth(self, rev):
1588 def snapshotdepth(self, rev):
1587 """number of snapshot in the chain before this one"""
1589 """number of snapshot in the chain before this one"""
1588 if not self.issnapshot(rev):
1590 if not self.issnapshot(rev):
1589 raise error.ProgrammingError('revision %d not a snapshot')
1591 raise error.ProgrammingError('revision %d not a snapshot')
1590 return len(self._deltachain(rev)[0]) - 1
1592 return len(self._deltachain(rev)[0]) - 1
1591
1593
1592 def revdiff(self, rev1, rev2):
1594 def revdiff(self, rev1, rev2):
1593 """return or calculate a delta between two revisions
1595 """return or calculate a delta between two revisions
1594
1596
1595 The delta calculated is in binary form and is intended to be written to
1597 The delta calculated is in binary form and is intended to be written to
1596 revlog data directly. So this function needs raw revision data.
1598 revlog data directly. So this function needs raw revision data.
1597 """
1599 """
1598 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1600 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1599 return bytes(self._chunk(rev2))
1601 return bytes(self._chunk(rev2))
1600
1602
1601 return mdiff.textdiff(self.revision(rev1, raw=True),
1603 return mdiff.textdiff(self.revision(rev1, raw=True),
1602 self.revision(rev2, raw=True))
1604 self.revision(rev2, raw=True))
1603
1605
1604 def revision(self, nodeorrev, _df=None, raw=False):
1606 def revision(self, nodeorrev, _df=None, raw=False):
1605 """return an uncompressed revision of a given node or revision
1607 """return an uncompressed revision of a given node or revision
1606 number.
1608 number.
1607
1609
1608 _df - an existing file handle to read from. (internal-only)
1610 _df - an existing file handle to read from. (internal-only)
1609 raw - an optional argument specifying if the revision data is to be
1611 raw - an optional argument specifying if the revision data is to be
1610 treated as raw data when applying flag transforms. 'raw' should be set
1612 treated as raw data when applying flag transforms. 'raw' should be set
1611 to True when generating changegroups or in debug commands.
1613 to True when generating changegroups or in debug commands.
1612 """
1614 """
1613 if isinstance(nodeorrev, int):
1615 if isinstance(nodeorrev, int):
1614 rev = nodeorrev
1616 rev = nodeorrev
1615 node = self.node(rev)
1617 node = self.node(rev)
1616 else:
1618 else:
1617 node = nodeorrev
1619 node = nodeorrev
1618 rev = None
1620 rev = None
1619
1621
1620 cachedrev = None
1622 cachedrev = None
1621 flags = None
1623 flags = None
1622 rawtext = None
1624 rawtext = None
1623 if node == nullid:
1625 if node == nullid:
1624 return ""
1626 return ""
1625 if self._revisioncache:
1627 if self._revisioncache:
1626 if self._revisioncache[0] == node:
1628 if self._revisioncache[0] == node:
1627 # _cache only stores rawtext
1629 # _cache only stores rawtext
1628 if raw:
1630 if raw:
1629 return self._revisioncache[2]
1631 return self._revisioncache[2]
1630 # duplicated, but good for perf
1632 # duplicated, but good for perf
1631 if rev is None:
1633 if rev is None:
1632 rev = self.rev(node)
1634 rev = self.rev(node)
1633 if flags is None:
1635 if flags is None:
1634 flags = self.flags(rev)
1636 flags = self.flags(rev)
1635 # no extra flags set, no flag processor runs, text = rawtext
1637 # no extra flags set, no flag processor runs, text = rawtext
1636 if flags == REVIDX_DEFAULT_FLAGS:
1638 if flags == REVIDX_DEFAULT_FLAGS:
1637 return self._revisioncache[2]
1639 return self._revisioncache[2]
1638 # rawtext is reusable. need to run flag processor
1640 # rawtext is reusable. need to run flag processor
1639 rawtext = self._revisioncache[2]
1641 rawtext = self._revisioncache[2]
1640
1642
1641 cachedrev = self._revisioncache[1]
1643 cachedrev = self._revisioncache[1]
1642
1644
1643 # look up what we need to read
1645 # look up what we need to read
1644 if rawtext is None:
1646 if rawtext is None:
1645 if rev is None:
1647 if rev is None:
1646 rev = self.rev(node)
1648 rev = self.rev(node)
1647
1649
1648 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1650 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1649 if stopped:
1651 if stopped:
1650 rawtext = self._revisioncache[2]
1652 rawtext = self._revisioncache[2]
1651
1653
1652 # drop cache to save memory
1654 # drop cache to save memory
1653 self._revisioncache = None
1655 self._revisioncache = None
1654
1656
1655 targetsize = None
1657 targetsize = None
1656 rawsize = self.index[rev][2]
1658 rawsize = self.index[rev][2]
1657 if 0 <= rawsize:
1659 if 0 <= rawsize:
1658 targetsize = 4 * rawsize
1660 targetsize = 4 * rawsize
1659
1661
1660 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1662 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1661 if rawtext is None:
1663 if rawtext is None:
1662 rawtext = bytes(bins[0])
1664 rawtext = bytes(bins[0])
1663 bins = bins[1:]
1665 bins = bins[1:]
1664
1666
1665 rawtext = mdiff.patches(rawtext, bins)
1667 rawtext = mdiff.patches(rawtext, bins)
1666 self._revisioncache = (node, rev, rawtext)
1668 self._revisioncache = (node, rev, rawtext)
1667
1669
1668 if flags is None:
1670 if flags is None:
1669 if rev is None:
1671 if rev is None:
1670 rev = self.rev(node)
1672 rev = self.rev(node)
1671 flags = self.flags(rev)
1673 flags = self.flags(rev)
1672
1674
1673 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1675 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1674 if validatehash:
1676 if validatehash:
1675 self.checkhash(text, node, rev=rev)
1677 self.checkhash(text, node, rev=rev)
1676
1678
1677 return text
1679 return text
1678
1680
1679 def hash(self, text, p1, p2):
1681 def hash(self, text, p1, p2):
1680 """Compute a node hash.
1682 """Compute a node hash.
1681
1683
1682 Available as a function so that subclasses can replace the hash
1684 Available as a function so that subclasses can replace the hash
1683 as needed.
1685 as needed.
1684 """
1686 """
1685 return storageutil.hashrevisionsha1(text, p1, p2)
1687 return storageutil.hashrevisionsha1(text, p1, p2)
1686
1688
1687 def _processflags(self, text, flags, operation, raw=False):
1689 def _processflags(self, text, flags, operation, raw=False):
1688 """Inspect revision data flags and applies transforms defined by
1690 """Inspect revision data flags and applies transforms defined by
1689 registered flag processors.
1691 registered flag processors.
1690
1692
1691 ``text`` - the revision data to process
1693 ``text`` - the revision data to process
1692 ``flags`` - the revision flags
1694 ``flags`` - the revision flags
1693 ``operation`` - the operation being performed (read or write)
1695 ``operation`` - the operation being performed (read or write)
1694 ``raw`` - an optional argument describing if the raw transform should be
1696 ``raw`` - an optional argument describing if the raw transform should be
1695 applied.
1697 applied.
1696
1698
1697 This method processes the flags in the order (or reverse order if
1699 This method processes the flags in the order (or reverse order if
1698 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1700 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1699 flag processors registered for present flags. The order of flags defined
1701 flag processors registered for present flags. The order of flags defined
1700 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1702 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1701
1703
1702 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1704 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1703 processed text and ``validatehash`` is a bool indicating whether the
1705 processed text and ``validatehash`` is a bool indicating whether the
1704 returned text should be checked for hash integrity.
1706 returned text should be checked for hash integrity.
1705
1707
1706 Note: If the ``raw`` argument is set, it has precedence over the
1708 Note: If the ``raw`` argument is set, it has precedence over the
1707 operation and will only update the value of ``validatehash``.
1709 operation and will only update the value of ``validatehash``.
1708 """
1710 """
1709 # fast path: no flag processors will run
1711 # fast path: no flag processors will run
1710 if flags == 0:
1712 if flags == 0:
1711 return text, True
1713 return text, True
1712 if not operation in ('read', 'write'):
1714 if not operation in ('read', 'write'):
1713 raise error.ProgrammingError(_("invalid '%s' operation") %
1715 raise error.ProgrammingError(_("invalid '%s' operation") %
1714 operation)
1716 operation)
1715 # Check all flags are known.
1717 # Check all flags are known.
1716 if flags & ~REVIDX_KNOWN_FLAGS:
1718 if flags & ~REVIDX_KNOWN_FLAGS:
1717 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1719 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1718 (flags & ~REVIDX_KNOWN_FLAGS))
1720 (flags & ~REVIDX_KNOWN_FLAGS))
1719 validatehash = True
1721 validatehash = True
1720 # Depending on the operation (read or write), the order might be
1722 # Depending on the operation (read or write), the order might be
1721 # reversed due to non-commutative transforms.
1723 # reversed due to non-commutative transforms.
1722 orderedflags = REVIDX_FLAGS_ORDER
1724 orderedflags = REVIDX_FLAGS_ORDER
1723 if operation == 'write':
1725 if operation == 'write':
1724 orderedflags = reversed(orderedflags)
1726 orderedflags = reversed(orderedflags)
1725
1727
1726 for flag in orderedflags:
1728 for flag in orderedflags:
1727 # If a flagprocessor has been registered for a known flag, apply the
1729 # If a flagprocessor has been registered for a known flag, apply the
1728 # related operation transform and update result tuple.
1730 # related operation transform and update result tuple.
1729 if flag & flags:
1731 if flag & flags:
1730 vhash = True
1732 vhash = True
1731
1733
1732 if flag not in self._flagprocessors:
1734 if flag not in self._flagprocessors:
1733 message = _("missing processor for flag '%#x'") % (flag)
1735 message = _("missing processor for flag '%#x'") % (flag)
1734 raise error.RevlogError(message)
1736 raise error.RevlogError(message)
1735
1737
1736 processor = self._flagprocessors[flag]
1738 processor = self._flagprocessors[flag]
1737 if processor is not None:
1739 if processor is not None:
1738 readtransform, writetransform, rawtransform = processor
1740 readtransform, writetransform, rawtransform = processor
1739
1741
1740 if raw:
1742 if raw:
1741 vhash = rawtransform(self, text)
1743 vhash = rawtransform(self, text)
1742 elif operation == 'read':
1744 elif operation == 'read':
1743 text, vhash = readtransform(self, text)
1745 text, vhash = readtransform(self, text)
1744 else: # write operation
1746 else: # write operation
1745 text, vhash = writetransform(self, text)
1747 text, vhash = writetransform(self, text)
1746 validatehash = validatehash and vhash
1748 validatehash = validatehash and vhash
1747
1749
1748 return text, validatehash
1750 return text, validatehash
1749
1751
1750 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1752 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1751 """Check node hash integrity.
1753 """Check node hash integrity.
1752
1754
1753 Available as a function so that subclasses can extend hash mismatch
1755 Available as a function so that subclasses can extend hash mismatch
1754 behaviors as needed.
1756 behaviors as needed.
1755 """
1757 """
1756 try:
1758 try:
1757 if p1 is None and p2 is None:
1759 if p1 is None and p2 is None:
1758 p1, p2 = self.parents(node)
1760 p1, p2 = self.parents(node)
1759 if node != self.hash(text, p1, p2):
1761 if node != self.hash(text, p1, p2):
1760 # Clear the revision cache on hash failure. The revision cache
1762 # Clear the revision cache on hash failure. The revision cache
1761 # only stores the raw revision and clearing the cache does have
1763 # only stores the raw revision and clearing the cache does have
1762 # the side-effect that we won't have a cache hit when the raw
1764 # the side-effect that we won't have a cache hit when the raw
1763 # revision data is accessed. But this case should be rare and
1765 # revision data is accessed. But this case should be rare and
1764 # it is extra work to teach the cache about the hash
1766 # it is extra work to teach the cache about the hash
1765 # verification state.
1767 # verification state.
1766 if self._revisioncache and self._revisioncache[0] == node:
1768 if self._revisioncache and self._revisioncache[0] == node:
1767 self._revisioncache = None
1769 self._revisioncache = None
1768
1770
1769 revornode = rev
1771 revornode = rev
1770 if revornode is None:
1772 if revornode is None:
1771 revornode = templatefilters.short(hex(node))
1773 revornode = templatefilters.short(hex(node))
1772 raise error.RevlogError(_("integrity check failed on %s:%s")
1774 raise error.RevlogError(_("integrity check failed on %s:%s")
1773 % (self.indexfile, pycompat.bytestr(revornode)))
1775 % (self.indexfile, pycompat.bytestr(revornode)))
1774 except error.RevlogError:
1776 except error.RevlogError:
1775 if self._censorable and storageutil.iscensoredtext(text):
1777 if self._censorable and storageutil.iscensoredtext(text):
1776 raise error.CensoredNodeError(self.indexfile, node, text)
1778 raise error.CensoredNodeError(self.indexfile, node, text)
1777 raise
1779 raise
1778
1780
1779 def _enforceinlinesize(self, tr, fp=None):
1781 def _enforceinlinesize(self, tr, fp=None):
1780 """Check if the revlog is too big for inline and convert if so.
1782 """Check if the revlog is too big for inline and convert if so.
1781
1783
1782 This should be called after revisions are added to the revlog. If the
1784 This should be called after revisions are added to the revlog. If the
1783 revlog has grown too large to be an inline revlog, it will convert it
1785 revlog has grown too large to be an inline revlog, it will convert it
1784 to use multiple index and data files.
1786 to use multiple index and data files.
1785 """
1787 """
1786 tiprev = len(self) - 1
1788 tiprev = len(self) - 1
1787 if (not self._inline or
1789 if (not self._inline or
1788 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1790 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1789 return
1791 return
1790
1792
1791 trinfo = tr.find(self.indexfile)
1793 trinfo = tr.find(self.indexfile)
1792 if trinfo is None:
1794 if trinfo is None:
1793 raise error.RevlogError(_("%s not found in the transaction")
1795 raise error.RevlogError(_("%s not found in the transaction")
1794 % self.indexfile)
1796 % self.indexfile)
1795
1797
1796 trindex = trinfo[2]
1798 trindex = trinfo[2]
1797 if trindex is not None:
1799 if trindex is not None:
1798 dataoff = self.start(trindex)
1800 dataoff = self.start(trindex)
1799 else:
1801 else:
1800 # revlog was stripped at start of transaction, use all leftover data
1802 # revlog was stripped at start of transaction, use all leftover data
1801 trindex = len(self) - 1
1803 trindex = len(self) - 1
1802 dataoff = self.end(tiprev)
1804 dataoff = self.end(tiprev)
1803
1805
1804 tr.add(self.datafile, dataoff)
1806 tr.add(self.datafile, dataoff)
1805
1807
1806 if fp:
1808 if fp:
1807 fp.flush()
1809 fp.flush()
1808 fp.close()
1810 fp.close()
1809 # We can't use the cached file handle after close(). So prevent
1811 # We can't use the cached file handle after close(). So prevent
1810 # its usage.
1812 # its usage.
1811 self._writinghandles = None
1813 self._writinghandles = None
1812
1814
1813 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1815 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1814 for r in self:
1816 for r in self:
1815 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1817 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1816
1818
1817 with self._indexfp('w') as fp:
1819 with self._indexfp('w') as fp:
1818 self.version &= ~FLAG_INLINE_DATA
1820 self.version &= ~FLAG_INLINE_DATA
1819 self._inline = False
1821 self._inline = False
1820 io = self._io
1822 io = self._io
1821 for i in self:
1823 for i in self:
1822 e = io.packentry(self.index[i], self.node, self.version, i)
1824 e = io.packentry(self.index[i], self.node, self.version, i)
1823 fp.write(e)
1825 fp.write(e)
1824
1826
1825 # the temp file replace the real index when we exit the context
1827 # the temp file replace the real index when we exit the context
1826 # manager
1828 # manager
1827
1829
1828 tr.replace(self.indexfile, trindex * self._io.size)
1830 tr.replace(self.indexfile, trindex * self._io.size)
1829 self._chunkclear()
1831 self._chunkclear()
1830
1832
1831 def _nodeduplicatecallback(self, transaction, node):
1833 def _nodeduplicatecallback(self, transaction, node):
1832 """called when trying to add a node already stored.
1834 """called when trying to add a node already stored.
1833 """
1835 """
1834
1836
1835 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1837 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1836 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1838 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1837 """add a revision to the log
1839 """add a revision to the log
1838
1840
1839 text - the revision data to add
1841 text - the revision data to add
1840 transaction - the transaction object used for rollback
1842 transaction - the transaction object used for rollback
1841 link - the linkrev data to add
1843 link - the linkrev data to add
1842 p1, p2 - the parent nodeids of the revision
1844 p1, p2 - the parent nodeids of the revision
1843 cachedelta - an optional precomputed delta
1845 cachedelta - an optional precomputed delta
1844 node - nodeid of revision; typically node is not specified, and it is
1846 node - nodeid of revision; typically node is not specified, and it is
1845 computed by default as hash(text, p1, p2), however subclasses might
1847 computed by default as hash(text, p1, p2), however subclasses might
1846 use different hashing method (and override checkhash() in such case)
1848 use different hashing method (and override checkhash() in such case)
1847 flags - the known flags to set on the revision
1849 flags - the known flags to set on the revision
1848 deltacomputer - an optional deltacomputer instance shared between
1850 deltacomputer - an optional deltacomputer instance shared between
1849 multiple calls
1851 multiple calls
1850 """
1852 """
1851 if link == nullrev:
1853 if link == nullrev:
1852 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1854 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1853 % self.indexfile)
1855 % self.indexfile)
1854
1856
1855 if flags:
1857 if flags:
1856 node = node or self.hash(text, p1, p2)
1858 node = node or self.hash(text, p1, p2)
1857
1859
1858 rawtext, validatehash = self._processflags(text, flags, 'write')
1860 rawtext, validatehash = self._processflags(text, flags, 'write')
1859
1861
1860 # If the flag processor modifies the revision data, ignore any provided
1862 # If the flag processor modifies the revision data, ignore any provided
1861 # cachedelta.
1863 # cachedelta.
1862 if rawtext != text:
1864 if rawtext != text:
1863 cachedelta = None
1865 cachedelta = None
1864
1866
1865 if len(rawtext) > _maxentrysize:
1867 if len(rawtext) > _maxentrysize:
1866 raise error.RevlogError(
1868 raise error.RevlogError(
1867 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1869 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1868 % (self.indexfile, len(rawtext)))
1870 % (self.indexfile, len(rawtext)))
1869
1871
1870 node = node or self.hash(rawtext, p1, p2)
1872 node = node or self.hash(rawtext, p1, p2)
1871 if node in self.nodemap:
1873 if node in self.nodemap:
1872 return node
1874 return node
1873
1875
1874 if validatehash:
1876 if validatehash:
1875 self.checkhash(rawtext, node, p1=p1, p2=p2)
1877 self.checkhash(rawtext, node, p1=p1, p2=p2)
1876
1878
1877 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1879 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1878 flags, cachedelta=cachedelta,
1880 flags, cachedelta=cachedelta,
1879 deltacomputer=deltacomputer)
1881 deltacomputer=deltacomputer)
1880
1882
1881 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1883 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1882 cachedelta=None, deltacomputer=None):
1884 cachedelta=None, deltacomputer=None):
1883 """add a raw revision with known flags, node and parents
1885 """add a raw revision with known flags, node and parents
1884 useful when reusing a revision not stored in this revlog (ex: received
1886 useful when reusing a revision not stored in this revlog (ex: received
1885 over wire, or read from an external bundle).
1887 over wire, or read from an external bundle).
1886 """
1888 """
1887 dfh = None
1889 dfh = None
1888 if not self._inline:
1890 if not self._inline:
1889 dfh = self._datafp("a+")
1891 dfh = self._datafp("a+")
1890 ifh = self._indexfp("a+")
1892 ifh = self._indexfp("a+")
1891 try:
1893 try:
1892 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1894 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1893 flags, cachedelta, ifh, dfh,
1895 flags, cachedelta, ifh, dfh,
1894 deltacomputer=deltacomputer)
1896 deltacomputer=deltacomputer)
1895 finally:
1897 finally:
1896 if dfh:
1898 if dfh:
1897 dfh.close()
1899 dfh.close()
1898 ifh.close()
1900 ifh.close()
1899
1901
1900 def compress(self, data):
1902 def compress(self, data):
1901 """Generate a possibly-compressed representation of data."""
1903 """Generate a possibly-compressed representation of data."""
1902 if not data:
1904 if not data:
1903 return '', data
1905 return '', data
1904
1906
1905 compressed = self._compressor.compress(data)
1907 compressed = self._compressor.compress(data)
1906
1908
1907 if compressed:
1909 if compressed:
1908 # The revlog compressor added the header in the returned data.
1910 # The revlog compressor added the header in the returned data.
1909 return '', compressed
1911 return '', compressed
1910
1912
1911 if data[0:1] == '\0':
1913 if data[0:1] == '\0':
1912 return '', data
1914 return '', data
1913 return 'u', data
1915 return 'u', data
1914
1916
1915 def decompress(self, data):
1917 def decompress(self, data):
1916 """Decompress a revlog chunk.
1918 """Decompress a revlog chunk.
1917
1919
1918 The chunk is expected to begin with a header identifying the
1920 The chunk is expected to begin with a header identifying the
1919 format type so it can be routed to an appropriate decompressor.
1921 format type so it can be routed to an appropriate decompressor.
1920 """
1922 """
1921 if not data:
1923 if not data:
1922 return data
1924 return data
1923
1925
1924 # Revlogs are read much more frequently than they are written and many
1926 # Revlogs are read much more frequently than they are written and many
1925 # chunks only take microseconds to decompress, so performance is
1927 # chunks only take microseconds to decompress, so performance is
1926 # important here.
1928 # important here.
1927 #
1929 #
1928 # We can make a few assumptions about revlogs:
1930 # We can make a few assumptions about revlogs:
1929 #
1931 #
1930 # 1) the majority of chunks will be compressed (as opposed to inline
1932 # 1) the majority of chunks will be compressed (as opposed to inline
1931 # raw data).
1933 # raw data).
1932 # 2) decompressing *any* data will likely by at least 10x slower than
1934 # 2) decompressing *any* data will likely by at least 10x slower than
1933 # returning raw inline data.
1935 # returning raw inline data.
1934 # 3) we want to prioritize common and officially supported compression
1936 # 3) we want to prioritize common and officially supported compression
1935 # engines
1937 # engines
1936 #
1938 #
1937 # It follows that we want to optimize for "decompress compressed data
1939 # It follows that we want to optimize for "decompress compressed data
1938 # when encoded with common and officially supported compression engines"
1940 # when encoded with common and officially supported compression engines"
1939 # case over "raw data" and "data encoded by less common or non-official
1941 # case over "raw data" and "data encoded by less common or non-official
1940 # compression engines." That is why we have the inline lookup first
1942 # compression engines." That is why we have the inline lookup first
1941 # followed by the compengines lookup.
1943 # followed by the compengines lookup.
1942 #
1944 #
1943 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1945 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1944 # compressed chunks. And this matters for changelog and manifest reads.
1946 # compressed chunks. And this matters for changelog and manifest reads.
1945 t = data[0:1]
1947 t = data[0:1]
1946
1948
1947 if t == 'x':
1949 if t == 'x':
1948 try:
1950 try:
1949 return _zlibdecompress(data)
1951 return _zlibdecompress(data)
1950 except zlib.error as e:
1952 except zlib.error as e:
1951 raise error.RevlogError(_('revlog decompress error: %s') %
1953 raise error.RevlogError(_('revlog decompress error: %s') %
1952 stringutil.forcebytestr(e))
1954 stringutil.forcebytestr(e))
1953 # '\0' is more common than 'u' so it goes first.
1955 # '\0' is more common than 'u' so it goes first.
1954 elif t == '\0':
1956 elif t == '\0':
1955 return data
1957 return data
1956 elif t == 'u':
1958 elif t == 'u':
1957 return util.buffer(data, 1)
1959 return util.buffer(data, 1)
1958
1960
1959 try:
1961 try:
1960 compressor = self._decompressors[t]
1962 compressor = self._decompressors[t]
1961 except KeyError:
1963 except KeyError:
1962 try:
1964 try:
1963 engine = util.compengines.forrevlogheader(t)
1965 engine = util.compengines.forrevlogheader(t)
1964 compressor = engine.revlogcompressor()
1966 compressor = engine.revlogcompressor()
1965 self._decompressors[t] = compressor
1967 self._decompressors[t] = compressor
1966 except KeyError:
1968 except KeyError:
1967 raise error.RevlogError(_('unknown compression type %r') % t)
1969 raise error.RevlogError(_('unknown compression type %r') % t)
1968
1970
1969 return compressor.decompress(data)
1971 return compressor.decompress(data)
1970
1972
1971 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1973 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1972 cachedelta, ifh, dfh, alwayscache=False,
1974 cachedelta, ifh, dfh, alwayscache=False,
1973 deltacomputer=None):
1975 deltacomputer=None):
1974 """internal function to add revisions to the log
1976 """internal function to add revisions to the log
1975
1977
1976 see addrevision for argument descriptions.
1978 see addrevision for argument descriptions.
1977
1979
1978 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1980 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1979
1981
1980 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1982 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1981 be used.
1983 be used.
1982
1984
1983 invariants:
1985 invariants:
1984 - rawtext is optional (can be None); if not set, cachedelta must be set.
1986 - rawtext is optional (can be None); if not set, cachedelta must be set.
1985 if both are set, they must correspond to each other.
1987 if both are set, they must correspond to each other.
1986 """
1988 """
1987 if node == nullid:
1989 if node == nullid:
1988 raise error.RevlogError(_("%s: attempt to add null revision") %
1990 raise error.RevlogError(_("%s: attempt to add null revision") %
1989 self.indexfile)
1991 self.indexfile)
1990 if node == wdirid or node in wdirfilenodeids:
1992 if node == wdirid or node in wdirfilenodeids:
1991 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1993 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1992 self.indexfile)
1994 self.indexfile)
1993
1995
1994 if self._inline:
1996 if self._inline:
1995 fh = ifh
1997 fh = ifh
1996 else:
1998 else:
1997 fh = dfh
1999 fh = dfh
1998
2000
1999 btext = [rawtext]
2001 btext = [rawtext]
2000
2002
2001 curr = len(self)
2003 curr = len(self)
2002 prev = curr - 1
2004 prev = curr - 1
2003 offset = self.end(prev)
2005 offset = self.end(prev)
2004 p1r, p2r = self.rev(p1), self.rev(p2)
2006 p1r, p2r = self.rev(p1), self.rev(p2)
2005
2007
2006 # full versions are inserted when the needed deltas
2008 # full versions are inserted when the needed deltas
2007 # become comparable to the uncompressed text
2009 # become comparable to the uncompressed text
2008 if rawtext is None:
2010 if rawtext is None:
2009 # need rawtext size, before changed by flag processors, which is
2011 # need rawtext size, before changed by flag processors, which is
2010 # the non-raw size. use revlog explicitly to avoid filelog's extra
2012 # the non-raw size. use revlog explicitly to avoid filelog's extra
2011 # logic that might remove metadata size.
2013 # logic that might remove metadata size.
2012 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2014 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2013 cachedelta[1])
2015 cachedelta[1])
2014 else:
2016 else:
2015 textlen = len(rawtext)
2017 textlen = len(rawtext)
2016
2018
2017 if deltacomputer is None:
2019 if deltacomputer is None:
2018 deltacomputer = deltautil.deltacomputer(self)
2020 deltacomputer = deltautil.deltacomputer(self)
2019
2021
2020 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2022 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2021
2023
2022 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2024 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2023
2025
2024 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2026 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2025 deltainfo.base, link, p1r, p2r, node)
2027 deltainfo.base, link, p1r, p2r, node)
2026 self.index.append(e)
2028 self.index.append(e)
2027 self.nodemap[node] = curr
2029 self.nodemap[node] = curr
2028
2030
2029 # Reset the pure node cache start lookup offset to account for new
2031 # Reset the pure node cache start lookup offset to account for new
2030 # revision.
2032 # revision.
2031 if self._nodepos is not None:
2033 if self._nodepos is not None:
2032 self._nodepos = curr
2034 self._nodepos = curr
2033
2035
2034 entry = self._io.packentry(e, self.node, self.version, curr)
2036 entry = self._io.packentry(e, self.node, self.version, curr)
2035 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2037 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2036 link, offset)
2038 link, offset)
2037
2039
2038 rawtext = btext[0]
2040 rawtext = btext[0]
2039
2041
2040 if alwayscache and rawtext is None:
2042 if alwayscache and rawtext is None:
2041 rawtext = deltacomputer.buildtext(revinfo, fh)
2043 rawtext = deltacomputer.buildtext(revinfo, fh)
2042
2044
2043 if type(rawtext) == bytes: # only accept immutable objects
2045 if type(rawtext) == bytes: # only accept immutable objects
2044 self._revisioncache = (node, curr, rawtext)
2046 self._revisioncache = (node, curr, rawtext)
2045 self._chainbasecache[curr] = deltainfo.chainbase
2047 self._chainbasecache[curr] = deltainfo.chainbase
2046 return node
2048 return node
2047
2049
2048 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2050 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2049 # Files opened in a+ mode have inconsistent behavior on various
2051 # Files opened in a+ mode have inconsistent behavior on various
2050 # platforms. Windows requires that a file positioning call be made
2052 # platforms. Windows requires that a file positioning call be made
2051 # when the file handle transitions between reads and writes. See
2053 # when the file handle transitions between reads and writes. See
2052 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2054 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2053 # platforms, Python or the platform itself can be buggy. Some versions
2055 # platforms, Python or the platform itself can be buggy. Some versions
2054 # of Solaris have been observed to not append at the end of the file
2056 # of Solaris have been observed to not append at the end of the file
2055 # if the file was seeked to before the end. See issue4943 for more.
2057 # if the file was seeked to before the end. See issue4943 for more.
2056 #
2058 #
2057 # We work around this issue by inserting a seek() before writing.
2059 # We work around this issue by inserting a seek() before writing.
2058 # Note: This is likely not necessary on Python 3. However, because
2060 # Note: This is likely not necessary on Python 3. However, because
2059 # the file handle is reused for reads and may be seeked there, we need
2061 # the file handle is reused for reads and may be seeked there, we need
2060 # to be careful before changing this.
2062 # to be careful before changing this.
2061 ifh.seek(0, os.SEEK_END)
2063 ifh.seek(0, os.SEEK_END)
2062 if dfh:
2064 if dfh:
2063 dfh.seek(0, os.SEEK_END)
2065 dfh.seek(0, os.SEEK_END)
2064
2066
2065 curr = len(self) - 1
2067 curr = len(self) - 1
2066 if not self._inline:
2068 if not self._inline:
2067 transaction.add(self.datafile, offset)
2069 transaction.add(self.datafile, offset)
2068 transaction.add(self.indexfile, curr * len(entry))
2070 transaction.add(self.indexfile, curr * len(entry))
2069 if data[0]:
2071 if data[0]:
2070 dfh.write(data[0])
2072 dfh.write(data[0])
2071 dfh.write(data[1])
2073 dfh.write(data[1])
2072 ifh.write(entry)
2074 ifh.write(entry)
2073 else:
2075 else:
2074 offset += curr * self._io.size
2076 offset += curr * self._io.size
2075 transaction.add(self.indexfile, offset, curr)
2077 transaction.add(self.indexfile, offset, curr)
2076 ifh.write(entry)
2078 ifh.write(entry)
2077 ifh.write(data[0])
2079 ifh.write(data[0])
2078 ifh.write(data[1])
2080 ifh.write(data[1])
2079 self._enforceinlinesize(transaction, ifh)
2081 self._enforceinlinesize(transaction, ifh)
2080
2082
2081 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2083 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2082 """
2084 """
2083 add a delta group
2085 add a delta group
2084
2086
2085 given a set of deltas, add them to the revision log. the
2087 given a set of deltas, add them to the revision log. the
2086 first delta is against its parent, which should be in our
2088 first delta is against its parent, which should be in our
2087 log, the rest are against the previous delta.
2089 log, the rest are against the previous delta.
2088
2090
2089 If ``addrevisioncb`` is defined, it will be called with arguments of
2091 If ``addrevisioncb`` is defined, it will be called with arguments of
2090 this revlog and the node that was added.
2092 this revlog and the node that was added.
2091 """
2093 """
2092
2094
2093 if self._writinghandles:
2095 if self._writinghandles:
2094 raise error.ProgrammingError('cannot nest addgroup() calls')
2096 raise error.ProgrammingError('cannot nest addgroup() calls')
2095
2097
2096 nodes = []
2098 nodes = []
2097
2099
2098 r = len(self)
2100 r = len(self)
2099 end = 0
2101 end = 0
2100 if r:
2102 if r:
2101 end = self.end(r - 1)
2103 end = self.end(r - 1)
2102 ifh = self._indexfp("a+")
2104 ifh = self._indexfp("a+")
2103 isize = r * self._io.size
2105 isize = r * self._io.size
2104 if self._inline:
2106 if self._inline:
2105 transaction.add(self.indexfile, end + isize, r)
2107 transaction.add(self.indexfile, end + isize, r)
2106 dfh = None
2108 dfh = None
2107 else:
2109 else:
2108 transaction.add(self.indexfile, isize, r)
2110 transaction.add(self.indexfile, isize, r)
2109 transaction.add(self.datafile, end)
2111 transaction.add(self.datafile, end)
2110 dfh = self._datafp("a+")
2112 dfh = self._datafp("a+")
2111 def flush():
2113 def flush():
2112 if dfh:
2114 if dfh:
2113 dfh.flush()
2115 dfh.flush()
2114 ifh.flush()
2116 ifh.flush()
2115
2117
2116 self._writinghandles = (ifh, dfh)
2118 self._writinghandles = (ifh, dfh)
2117
2119
2118 try:
2120 try:
2119 deltacomputer = deltautil.deltacomputer(self)
2121 deltacomputer = deltautil.deltacomputer(self)
2120 # loop through our set of deltas
2122 # loop through our set of deltas
2121 for data in deltas:
2123 for data in deltas:
2122 node, p1, p2, linknode, deltabase, delta, flags = data
2124 node, p1, p2, linknode, deltabase, delta, flags = data
2123 link = linkmapper(linknode)
2125 link = linkmapper(linknode)
2124 flags = flags or REVIDX_DEFAULT_FLAGS
2126 flags = flags or REVIDX_DEFAULT_FLAGS
2125
2127
2126 nodes.append(node)
2128 nodes.append(node)
2127
2129
2128 if node in self.nodemap:
2130 if node in self.nodemap:
2129 self._nodeduplicatecallback(transaction, node)
2131 self._nodeduplicatecallback(transaction, node)
2130 # this can happen if two branches make the same change
2132 # this can happen if two branches make the same change
2131 continue
2133 continue
2132
2134
2133 for p in (p1, p2):
2135 for p in (p1, p2):
2134 if p not in self.nodemap:
2136 if p not in self.nodemap:
2135 raise error.LookupError(p, self.indexfile,
2137 raise error.LookupError(p, self.indexfile,
2136 _('unknown parent'))
2138 _('unknown parent'))
2137
2139
2138 if deltabase not in self.nodemap:
2140 if deltabase not in self.nodemap:
2139 raise error.LookupError(deltabase, self.indexfile,
2141 raise error.LookupError(deltabase, self.indexfile,
2140 _('unknown delta base'))
2142 _('unknown delta base'))
2141
2143
2142 baserev = self.rev(deltabase)
2144 baserev = self.rev(deltabase)
2143
2145
2144 if baserev != nullrev and self.iscensored(baserev):
2146 if baserev != nullrev and self.iscensored(baserev):
2145 # if base is censored, delta must be full replacement in a
2147 # if base is censored, delta must be full replacement in a
2146 # single patch operation
2148 # single patch operation
2147 hlen = struct.calcsize(">lll")
2149 hlen = struct.calcsize(">lll")
2148 oldlen = self.rawsize(baserev)
2150 oldlen = self.rawsize(baserev)
2149 newlen = len(delta) - hlen
2151 newlen = len(delta) - hlen
2150 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2152 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2151 raise error.CensoredBaseError(self.indexfile,
2153 raise error.CensoredBaseError(self.indexfile,
2152 self.node(baserev))
2154 self.node(baserev))
2153
2155
2154 if not flags and self._peek_iscensored(baserev, delta, flush):
2156 if not flags and self._peek_iscensored(baserev, delta, flush):
2155 flags |= REVIDX_ISCENSORED
2157 flags |= REVIDX_ISCENSORED
2156
2158
2157 # We assume consumers of addrevisioncb will want to retrieve
2159 # We assume consumers of addrevisioncb will want to retrieve
2158 # the added revision, which will require a call to
2160 # the added revision, which will require a call to
2159 # revision(). revision() will fast path if there is a cache
2161 # revision(). revision() will fast path if there is a cache
2160 # hit. So, we tell _addrevision() to always cache in this case.
2162 # hit. So, we tell _addrevision() to always cache in this case.
2161 # We're only using addgroup() in the context of changegroup
2163 # We're only using addgroup() in the context of changegroup
2162 # generation so the revision data can always be handled as raw
2164 # generation so the revision data can always be handled as raw
2163 # by the flagprocessor.
2165 # by the flagprocessor.
2164 self._addrevision(node, None, transaction, link,
2166 self._addrevision(node, None, transaction, link,
2165 p1, p2, flags, (baserev, delta),
2167 p1, p2, flags, (baserev, delta),
2166 ifh, dfh,
2168 ifh, dfh,
2167 alwayscache=bool(addrevisioncb),
2169 alwayscache=bool(addrevisioncb),
2168 deltacomputer=deltacomputer)
2170 deltacomputer=deltacomputer)
2169
2171
2170 if addrevisioncb:
2172 if addrevisioncb:
2171 addrevisioncb(self, node)
2173 addrevisioncb(self, node)
2172
2174
2173 if not dfh and not self._inline:
2175 if not dfh and not self._inline:
2174 # addrevision switched from inline to conventional
2176 # addrevision switched from inline to conventional
2175 # reopen the index
2177 # reopen the index
2176 ifh.close()
2178 ifh.close()
2177 dfh = self._datafp("a+")
2179 dfh = self._datafp("a+")
2178 ifh = self._indexfp("a+")
2180 ifh = self._indexfp("a+")
2179 self._writinghandles = (ifh, dfh)
2181 self._writinghandles = (ifh, dfh)
2180 finally:
2182 finally:
2181 self._writinghandles = None
2183 self._writinghandles = None
2182
2184
2183 if dfh:
2185 if dfh:
2184 dfh.close()
2186 dfh.close()
2185 ifh.close()
2187 ifh.close()
2186
2188
2187 return nodes
2189 return nodes
2188
2190
2189 def iscensored(self, rev):
2191 def iscensored(self, rev):
2190 """Check if a file revision is censored."""
2192 """Check if a file revision is censored."""
2191 if not self._censorable:
2193 if not self._censorable:
2192 return False
2194 return False
2193
2195
2194 return self.flags(rev) & REVIDX_ISCENSORED
2196 return self.flags(rev) & REVIDX_ISCENSORED
2195
2197
2196 def _peek_iscensored(self, baserev, delta, flush):
2198 def _peek_iscensored(self, baserev, delta, flush):
2197 """Quickly check if a delta produces a censored revision."""
2199 """Quickly check if a delta produces a censored revision."""
2198 if not self._censorable:
2200 if not self._censorable:
2199 return False
2201 return False
2200
2202
2201 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2203 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2202
2204
2203 def getstrippoint(self, minlink):
2205 def getstrippoint(self, minlink):
2204 """find the minimum rev that must be stripped to strip the linkrev
2206 """find the minimum rev that must be stripped to strip the linkrev
2205
2207
2206 Returns a tuple containing the minimum rev and a set of all revs that
2208 Returns a tuple containing the minimum rev and a set of all revs that
2207 have linkrevs that will be broken by this strip.
2209 have linkrevs that will be broken by this strip.
2208 """
2210 """
2209 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2211 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2210 self.headrevs(),
2212 self.headrevs(),
2211 self.linkrev, self.parentrevs)
2213 self.linkrev, self.parentrevs)
2212
2214
2213 def strip(self, minlink, transaction):
2215 def strip(self, minlink, transaction):
2214 """truncate the revlog on the first revision with a linkrev >= minlink
2216 """truncate the revlog on the first revision with a linkrev >= minlink
2215
2217
2216 This function is called when we're stripping revision minlink and
2218 This function is called when we're stripping revision minlink and
2217 its descendants from the repository.
2219 its descendants from the repository.
2218
2220
2219 We have to remove all revisions with linkrev >= minlink, because
2221 We have to remove all revisions with linkrev >= minlink, because
2220 the equivalent changelog revisions will be renumbered after the
2222 the equivalent changelog revisions will be renumbered after the
2221 strip.
2223 strip.
2222
2224
2223 So we truncate the revlog on the first of these revisions, and
2225 So we truncate the revlog on the first of these revisions, and
2224 trust that the caller has saved the revisions that shouldn't be
2226 trust that the caller has saved the revisions that shouldn't be
2225 removed and that it'll re-add them after this truncation.
2227 removed and that it'll re-add them after this truncation.
2226 """
2228 """
2227 if len(self) == 0:
2229 if len(self) == 0:
2228 return
2230 return
2229
2231
2230 rev, _ = self.getstrippoint(minlink)
2232 rev, _ = self.getstrippoint(minlink)
2231 if rev == len(self):
2233 if rev == len(self):
2232 return
2234 return
2233
2235
2234 # first truncate the files on disk
2236 # first truncate the files on disk
2235 end = self.start(rev)
2237 end = self.start(rev)
2236 if not self._inline:
2238 if not self._inline:
2237 transaction.add(self.datafile, end)
2239 transaction.add(self.datafile, end)
2238 end = rev * self._io.size
2240 end = rev * self._io.size
2239 else:
2241 else:
2240 end += rev * self._io.size
2242 end += rev * self._io.size
2241
2243
2242 transaction.add(self.indexfile, end)
2244 transaction.add(self.indexfile, end)
2243
2245
2244 # then reset internal state in memory to forget those revisions
2246 # then reset internal state in memory to forget those revisions
2245 self._revisioncache = None
2247 self._revisioncache = None
2246 self._chaininfocache = {}
2248 self._chaininfocache = {}
2247 self._chunkclear()
2249 self._chunkclear()
2248 for x in pycompat.xrange(rev, len(self)):
2250 for x in pycompat.xrange(rev, len(self)):
2249 del self.nodemap[self.node(x)]
2251 del self.nodemap[self.node(x)]
2250
2252
2251 del self.index[rev:-1]
2253 del self.index[rev:-1]
2252 self._nodepos = None
2254 self._nodepos = None
2253
2255
2254 def checksize(self):
2256 def checksize(self):
2255 expected = 0
2257 expected = 0
2256 if len(self):
2258 if len(self):
2257 expected = max(0, self.end(len(self) - 1))
2259 expected = max(0, self.end(len(self) - 1))
2258
2260
2259 try:
2261 try:
2260 with self._datafp() as f:
2262 with self._datafp() as f:
2261 f.seek(0, 2)
2263 f.seek(0, 2)
2262 actual = f.tell()
2264 actual = f.tell()
2263 dd = actual - expected
2265 dd = actual - expected
2264 except IOError as inst:
2266 except IOError as inst:
2265 if inst.errno != errno.ENOENT:
2267 if inst.errno != errno.ENOENT:
2266 raise
2268 raise
2267 dd = 0
2269 dd = 0
2268
2270
2269 try:
2271 try:
2270 f = self.opener(self.indexfile)
2272 f = self.opener(self.indexfile)
2271 f.seek(0, 2)
2273 f.seek(0, 2)
2272 actual = f.tell()
2274 actual = f.tell()
2273 f.close()
2275 f.close()
2274 s = self._io.size
2276 s = self._io.size
2275 i = max(0, actual // s)
2277 i = max(0, actual // s)
2276 di = actual - (i * s)
2278 di = actual - (i * s)
2277 if self._inline:
2279 if self._inline:
2278 databytes = 0
2280 databytes = 0
2279 for r in self:
2281 for r in self:
2280 databytes += max(0, self.length(r))
2282 databytes += max(0, self.length(r))
2281 dd = 0
2283 dd = 0
2282 di = actual - len(self) * s - databytes
2284 di = actual - len(self) * s - databytes
2283 except IOError as inst:
2285 except IOError as inst:
2284 if inst.errno != errno.ENOENT:
2286 if inst.errno != errno.ENOENT:
2285 raise
2287 raise
2286 di = 0
2288 di = 0
2287
2289
2288 return (dd, di)
2290 return (dd, di)
2289
2291
2290 def files(self):
2292 def files(self):
2291 res = [self.indexfile]
2293 res = [self.indexfile]
2292 if not self._inline:
2294 if not self._inline:
2293 res.append(self.datafile)
2295 res.append(self.datafile)
2294 return res
2296 return res
2295
2297
2296 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2298 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2297 assumehaveparentrevisions=False,
2299 assumehaveparentrevisions=False,
2298 deltamode=repository.CG_DELTAMODE_STD):
2300 deltamode=repository.CG_DELTAMODE_STD):
2299 if nodesorder not in ('nodes', 'storage', 'linear', None):
2301 if nodesorder not in ('nodes', 'storage', 'linear', None):
2300 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2302 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2301 nodesorder)
2303 nodesorder)
2302
2304
2303 if nodesorder is None and not self._generaldelta:
2305 if nodesorder is None and not self._generaldelta:
2304 nodesorder = 'storage'
2306 nodesorder = 'storage'
2305
2307
2306 if (not self._storedeltachains and
2308 if (not self._storedeltachains and
2307 deltamode != repository.CG_DELTAMODE_PREV):
2309 deltamode != repository.CG_DELTAMODE_PREV):
2308 deltamode = repository.CG_DELTAMODE_FULL
2310 deltamode = repository.CG_DELTAMODE_FULL
2309
2311
2310 return storageutil.emitrevisions(
2312 return storageutil.emitrevisions(
2311 self, nodes, nodesorder, revlogrevisiondelta,
2313 self, nodes, nodesorder, revlogrevisiondelta,
2312 deltaparentfn=self.deltaparent,
2314 deltaparentfn=self.deltaparent,
2313 candeltafn=self.candelta,
2315 candeltafn=self.candelta,
2314 rawsizefn=self.rawsize,
2316 rawsizefn=self.rawsize,
2315 revdifffn=self.revdiff,
2317 revdifffn=self.revdiff,
2316 flagsfn=self.flags,
2318 flagsfn=self.flags,
2317 deltamode=deltamode,
2319 deltamode=deltamode,
2318 revisiondata=revisiondata,
2320 revisiondata=revisiondata,
2319 assumehaveparentrevisions=assumehaveparentrevisions)
2321 assumehaveparentrevisions=assumehaveparentrevisions)
2320
2322
2321 DELTAREUSEALWAYS = 'always'
2323 DELTAREUSEALWAYS = 'always'
2322 DELTAREUSESAMEREVS = 'samerevs'
2324 DELTAREUSESAMEREVS = 'samerevs'
2323 DELTAREUSENEVER = 'never'
2325 DELTAREUSENEVER = 'never'
2324
2326
2325 DELTAREUSEFULLADD = 'fulladd'
2327 DELTAREUSEFULLADD = 'fulladd'
2326
2328
2327 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2329 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2328
2330
2329 def clone(self, tr, destrevlog, addrevisioncb=None,
2331 def clone(self, tr, destrevlog, addrevisioncb=None,
2330 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2332 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2331 """Copy this revlog to another, possibly with format changes.
2333 """Copy this revlog to another, possibly with format changes.
2332
2334
2333 The destination revlog will contain the same revisions and nodes.
2335 The destination revlog will contain the same revisions and nodes.
2334 However, it may not be bit-for-bit identical due to e.g. delta encoding
2336 However, it may not be bit-for-bit identical due to e.g. delta encoding
2335 differences.
2337 differences.
2336
2338
2337 The ``deltareuse`` argument control how deltas from the existing revlog
2339 The ``deltareuse`` argument control how deltas from the existing revlog
2338 are preserved in the destination revlog. The argument can have the
2340 are preserved in the destination revlog. The argument can have the
2339 following values:
2341 following values:
2340
2342
2341 DELTAREUSEALWAYS
2343 DELTAREUSEALWAYS
2342 Deltas will always be reused (if possible), even if the destination
2344 Deltas will always be reused (if possible), even if the destination
2343 revlog would not select the same revisions for the delta. This is the
2345 revlog would not select the same revisions for the delta. This is the
2344 fastest mode of operation.
2346 fastest mode of operation.
2345 DELTAREUSESAMEREVS
2347 DELTAREUSESAMEREVS
2346 Deltas will be reused if the destination revlog would pick the same
2348 Deltas will be reused if the destination revlog would pick the same
2347 revisions for the delta. This mode strikes a balance between speed
2349 revisions for the delta. This mode strikes a balance between speed
2348 and optimization.
2350 and optimization.
2349 DELTAREUSENEVER
2351 DELTAREUSENEVER
2350 Deltas will never be reused. This is the slowest mode of execution.
2352 Deltas will never be reused. This is the slowest mode of execution.
2351 This mode can be used to recompute deltas (e.g. if the diff/delta
2353 This mode can be used to recompute deltas (e.g. if the diff/delta
2352 algorithm changes).
2354 algorithm changes).
2353
2355
2354 Delta computation can be slow, so the choice of delta reuse policy can
2356 Delta computation can be slow, so the choice of delta reuse policy can
2355 significantly affect run time.
2357 significantly affect run time.
2356
2358
2357 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2359 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2358 two extremes. Deltas will be reused if they are appropriate. But if the
2360 two extremes. Deltas will be reused if they are appropriate. But if the
2359 delta could choose a better revision, it will do so. This means if you
2361 delta could choose a better revision, it will do so. This means if you
2360 are converting a non-generaldelta revlog to a generaldelta revlog,
2362 are converting a non-generaldelta revlog to a generaldelta revlog,
2361 deltas will be recomputed if the delta's parent isn't a parent of the
2363 deltas will be recomputed if the delta's parent isn't a parent of the
2362 revision.
2364 revision.
2363
2365
2364 In addition to the delta policy, the ``forcedeltabothparents``
2366 In addition to the delta policy, the ``forcedeltabothparents``
2365 argument controls whether to force compute deltas against both parents
2367 argument controls whether to force compute deltas against both parents
2366 for merges. By default, the current default is used.
2368 for merges. By default, the current default is used.
2367 """
2369 """
2368 if deltareuse not in self.DELTAREUSEALL:
2370 if deltareuse not in self.DELTAREUSEALL:
2369 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2371 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2370
2372
2371 if len(destrevlog):
2373 if len(destrevlog):
2372 raise ValueError(_('destination revlog is not empty'))
2374 raise ValueError(_('destination revlog is not empty'))
2373
2375
2374 if getattr(self, 'filteredrevs', None):
2376 if getattr(self, 'filteredrevs', None):
2375 raise ValueError(_('source revlog has filtered revisions'))
2377 raise ValueError(_('source revlog has filtered revisions'))
2376 if getattr(destrevlog, 'filteredrevs', None):
2378 if getattr(destrevlog, 'filteredrevs', None):
2377 raise ValueError(_('destination revlog has filtered revisions'))
2379 raise ValueError(_('destination revlog has filtered revisions'))
2378
2380
2379 # lazydeltabase controls whether to reuse a cached delta, if possible.
2381 # lazydeltabase controls whether to reuse a cached delta, if possible.
2380 oldlazydeltabase = destrevlog._lazydeltabase
2382 oldlazydeltabase = destrevlog._lazydeltabase
2381 oldamd = destrevlog._deltabothparents
2383 oldamd = destrevlog._deltabothparents
2382
2384
2383 try:
2385 try:
2384 if deltareuse == self.DELTAREUSEALWAYS:
2386 if deltareuse == self.DELTAREUSEALWAYS:
2385 destrevlog._lazydeltabase = True
2387 destrevlog._lazydeltabase = True
2386 elif deltareuse == self.DELTAREUSESAMEREVS:
2388 elif deltareuse == self.DELTAREUSESAMEREVS:
2387 destrevlog._lazydeltabase = False
2389 destrevlog._lazydeltabase = False
2388
2390
2389 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2391 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2390
2392
2391 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2393 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2392 self.DELTAREUSESAMEREVS)
2394 self.DELTAREUSESAMEREVS)
2393
2395
2394 deltacomputer = deltautil.deltacomputer(destrevlog)
2396 deltacomputer = deltautil.deltacomputer(destrevlog)
2395 index = self.index
2397 index = self.index
2396 for rev in self:
2398 for rev in self:
2397 entry = index[rev]
2399 entry = index[rev]
2398
2400
2399 # Some classes override linkrev to take filtered revs into
2401 # Some classes override linkrev to take filtered revs into
2400 # account. Use raw entry from index.
2402 # account. Use raw entry from index.
2401 flags = entry[0] & 0xffff
2403 flags = entry[0] & 0xffff
2402 linkrev = entry[4]
2404 linkrev = entry[4]
2403 p1 = index[entry[5]][7]
2405 p1 = index[entry[5]][7]
2404 p2 = index[entry[6]][7]
2406 p2 = index[entry[6]][7]
2405 node = entry[7]
2407 node = entry[7]
2406
2408
2407 # (Possibly) reuse the delta from the revlog if allowed and
2409 # (Possibly) reuse the delta from the revlog if allowed and
2408 # the revlog chunk is a delta.
2410 # the revlog chunk is a delta.
2409 cachedelta = None
2411 cachedelta = None
2410 rawtext = None
2412 rawtext = None
2411 if populatecachedelta:
2413 if populatecachedelta:
2412 dp = self.deltaparent(rev)
2414 dp = self.deltaparent(rev)
2413 if dp != nullrev:
2415 if dp != nullrev:
2414 cachedelta = (dp, bytes(self._chunk(rev)))
2416 cachedelta = (dp, bytes(self._chunk(rev)))
2415
2417
2416 if not cachedelta:
2418 if not cachedelta:
2417 rawtext = self.revision(rev, raw=True)
2419 rawtext = self.revision(rev, raw=True)
2418
2420
2419
2421
2420 if deltareuse == self.DELTAREUSEFULLADD:
2422 if deltareuse == self.DELTAREUSEFULLADD:
2421 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2423 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2422 cachedelta=cachedelta,
2424 cachedelta=cachedelta,
2423 node=node, flags=flags,
2425 node=node, flags=flags,
2424 deltacomputer=deltacomputer)
2426 deltacomputer=deltacomputer)
2425 else:
2427 else:
2426 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2428 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2427 checkambig=False)
2429 checkambig=False)
2428 dfh = None
2430 dfh = None
2429 if not destrevlog._inline:
2431 if not destrevlog._inline:
2430 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2432 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2431 try:
2433 try:
2432 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2434 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2433 p2, flags, cachedelta, ifh, dfh,
2435 p2, flags, cachedelta, ifh, dfh,
2434 deltacomputer=deltacomputer)
2436 deltacomputer=deltacomputer)
2435 finally:
2437 finally:
2436 if dfh:
2438 if dfh:
2437 dfh.close()
2439 dfh.close()
2438 ifh.close()
2440 ifh.close()
2439
2441
2440 if addrevisioncb:
2442 if addrevisioncb:
2441 addrevisioncb(self, rev, node)
2443 addrevisioncb(self, rev, node)
2442 finally:
2444 finally:
2443 destrevlog._lazydeltabase = oldlazydeltabase
2445 destrevlog._lazydeltabase = oldlazydeltabase
2444 destrevlog._deltabothparents = oldamd
2446 destrevlog._deltabothparents = oldamd
2445
2447
2446 def censorrevision(self, tr, censornode, tombstone=b''):
2448 def censorrevision(self, tr, censornode, tombstone=b''):
2447 if (self.version & 0xFFFF) == REVLOGV0:
2449 if (self.version & 0xFFFF) == REVLOGV0:
2448 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2450 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2449 self.version)
2451 self.version)
2450
2452
2451 censorrev = self.rev(censornode)
2453 censorrev = self.rev(censornode)
2452 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2454 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2453
2455
2454 if len(tombstone) > self.rawsize(censorrev):
2456 if len(tombstone) > self.rawsize(censorrev):
2455 raise error.Abort(_('censor tombstone must be no longer than '
2457 raise error.Abort(_('censor tombstone must be no longer than '
2456 'censored data'))
2458 'censored data'))
2457
2459
2458 # Rewriting the revlog in place is hard. Our strategy for censoring is
2460 # Rewriting the revlog in place is hard. Our strategy for censoring is
2459 # to create a new revlog, copy all revisions to it, then replace the
2461 # to create a new revlog, copy all revisions to it, then replace the
2460 # revlogs on transaction close.
2462 # revlogs on transaction close.
2461
2463
2462 newindexfile = self.indexfile + b'.tmpcensored'
2464 newindexfile = self.indexfile + b'.tmpcensored'
2463 newdatafile = self.datafile + b'.tmpcensored'
2465 newdatafile = self.datafile + b'.tmpcensored'
2464
2466
2465 # This is a bit dangerous. We could easily have a mismatch of state.
2467 # This is a bit dangerous. We could easily have a mismatch of state.
2466 newrl = revlog(self.opener, newindexfile, newdatafile,
2468 newrl = revlog(self.opener, newindexfile, newdatafile,
2467 censorable=True)
2469 censorable=True)
2468 newrl.version = self.version
2470 newrl.version = self.version
2469 newrl._generaldelta = self._generaldelta
2471 newrl._generaldelta = self._generaldelta
2470 newrl._io = self._io
2472 newrl._io = self._io
2471
2473
2472 for rev in self.revs():
2474 for rev in self.revs():
2473 node = self.node(rev)
2475 node = self.node(rev)
2474 p1, p2 = self.parents(node)
2476 p1, p2 = self.parents(node)
2475
2477
2476 if rev == censorrev:
2478 if rev == censorrev:
2477 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2479 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2478 p1, p2, censornode, REVIDX_ISCENSORED)
2480 p1, p2, censornode, REVIDX_ISCENSORED)
2479
2481
2480 if newrl.deltaparent(rev) != nullrev:
2482 if newrl.deltaparent(rev) != nullrev:
2481 raise error.Abort(_('censored revision stored as delta; '
2483 raise error.Abort(_('censored revision stored as delta; '
2482 'cannot censor'),
2484 'cannot censor'),
2483 hint=_('censoring of revlogs is not '
2485 hint=_('censoring of revlogs is not '
2484 'fully implemented; please report '
2486 'fully implemented; please report '
2485 'this bug'))
2487 'this bug'))
2486 continue
2488 continue
2487
2489
2488 if self.iscensored(rev):
2490 if self.iscensored(rev):
2489 if self.deltaparent(rev) != nullrev:
2491 if self.deltaparent(rev) != nullrev:
2490 raise error.Abort(_('cannot censor due to censored '
2492 raise error.Abort(_('cannot censor due to censored '
2491 'revision having delta stored'))
2493 'revision having delta stored'))
2492 rawtext = self._chunk(rev)
2494 rawtext = self._chunk(rev)
2493 else:
2495 else:
2494 rawtext = self.revision(rev, raw=True)
2496 rawtext = self.revision(rev, raw=True)
2495
2497
2496 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2498 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2497 self.flags(rev))
2499 self.flags(rev))
2498
2500
2499 tr.addbackup(self.indexfile, location='store')
2501 tr.addbackup(self.indexfile, location='store')
2500 if not self._inline:
2502 if not self._inline:
2501 tr.addbackup(self.datafile, location='store')
2503 tr.addbackup(self.datafile, location='store')
2502
2504
2503 self.opener.rename(newrl.indexfile, self.indexfile)
2505 self.opener.rename(newrl.indexfile, self.indexfile)
2504 if not self._inline:
2506 if not self._inline:
2505 self.opener.rename(newrl.datafile, self.datafile)
2507 self.opener.rename(newrl.datafile, self.datafile)
2506
2508
2507 self.clearcaches()
2509 self.clearcaches()
2508 self._loadindex()
2510 self._loadindex()
2509
2511
2510 def verifyintegrity(self, state):
2512 def verifyintegrity(self, state):
2511 """Verifies the integrity of the revlog.
2513 """Verifies the integrity of the revlog.
2512
2514
2513 Yields ``revlogproblem`` instances describing problems that are
2515 Yields ``revlogproblem`` instances describing problems that are
2514 found.
2516 found.
2515 """
2517 """
2516 dd, di = self.checksize()
2518 dd, di = self.checksize()
2517 if dd:
2519 if dd:
2518 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2520 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2519 if di:
2521 if di:
2520 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2522 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2521
2523
2522 version = self.version & 0xFFFF
2524 version = self.version & 0xFFFF
2523
2525
2524 # The verifier tells us what version revlog we should be.
2526 # The verifier tells us what version revlog we should be.
2525 if version != state['expectedversion']:
2527 if version != state['expectedversion']:
2526 yield revlogproblem(
2528 yield revlogproblem(
2527 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2529 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2528 (self.indexfile, version, state['expectedversion']))
2530 (self.indexfile, version, state['expectedversion']))
2529
2531
2530 state['skipread'] = set()
2532 state['skipread'] = set()
2531
2533
2532 for rev in self:
2534 for rev in self:
2533 node = self.node(rev)
2535 node = self.node(rev)
2534
2536
2535 # Verify contents. 4 cases to care about:
2537 # Verify contents. 4 cases to care about:
2536 #
2538 #
2537 # common: the most common case
2539 # common: the most common case
2538 # rename: with a rename
2540 # rename: with a rename
2539 # meta: file content starts with b'\1\n', the metadata
2541 # meta: file content starts with b'\1\n', the metadata
2540 # header defined in filelog.py, but without a rename
2542 # header defined in filelog.py, but without a rename
2541 # ext: content stored externally
2543 # ext: content stored externally
2542 #
2544 #
2543 # More formally, their differences are shown below:
2545 # More formally, their differences are shown below:
2544 #
2546 #
2545 # | common | rename | meta | ext
2547 # | common | rename | meta | ext
2546 # -------------------------------------------------------
2548 # -------------------------------------------------------
2547 # flags() | 0 | 0 | 0 | not 0
2549 # flags() | 0 | 0 | 0 | not 0
2548 # renamed() | False | True | False | ?
2550 # renamed() | False | True | False | ?
2549 # rawtext[0:2]=='\1\n'| False | True | True | ?
2551 # rawtext[0:2]=='\1\n'| False | True | True | ?
2550 #
2552 #
2551 # "rawtext" means the raw text stored in revlog data, which
2553 # "rawtext" means the raw text stored in revlog data, which
2552 # could be retrieved by "revision(rev, raw=True)". "text"
2554 # could be retrieved by "revision(rev, raw=True)". "text"
2553 # mentioned below is "revision(rev, raw=False)".
2555 # mentioned below is "revision(rev, raw=False)".
2554 #
2556 #
2555 # There are 3 different lengths stored physically:
2557 # There are 3 different lengths stored physically:
2556 # 1. L1: rawsize, stored in revlog index
2558 # 1. L1: rawsize, stored in revlog index
2557 # 2. L2: len(rawtext), stored in revlog data
2559 # 2. L2: len(rawtext), stored in revlog data
2558 # 3. L3: len(text), stored in revlog data if flags==0, or
2560 # 3. L3: len(text), stored in revlog data if flags==0, or
2559 # possibly somewhere else if flags!=0
2561 # possibly somewhere else if flags!=0
2560 #
2562 #
2561 # L1 should be equal to L2. L3 could be different from them.
2563 # L1 should be equal to L2. L3 could be different from them.
2562 # "text" may or may not affect commit hash depending on flag
2564 # "text" may or may not affect commit hash depending on flag
2563 # processors (see revlog.addflagprocessor).
2565 # processors (see revlog.addflagprocessor).
2564 #
2566 #
2565 # | common | rename | meta | ext
2567 # | common | rename | meta | ext
2566 # -------------------------------------------------
2568 # -------------------------------------------------
2567 # rawsize() | L1 | L1 | L1 | L1
2569 # rawsize() | L1 | L1 | L1 | L1
2568 # size() | L1 | L2-LM | L1(*) | L1 (?)
2570 # size() | L1 | L2-LM | L1(*) | L1 (?)
2569 # len(rawtext) | L2 | L2 | L2 | L2
2571 # len(rawtext) | L2 | L2 | L2 | L2
2570 # len(text) | L2 | L2 | L2 | L3
2572 # len(text) | L2 | L2 | L2 | L3
2571 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2573 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2572 #
2574 #
2573 # LM: length of metadata, depending on rawtext
2575 # LM: length of metadata, depending on rawtext
2574 # (*): not ideal, see comment in filelog.size
2576 # (*): not ideal, see comment in filelog.size
2575 # (?): could be "- len(meta)" if the resolved content has
2577 # (?): could be "- len(meta)" if the resolved content has
2576 # rename metadata
2578 # rename metadata
2577 #
2579 #
2578 # Checks needed to be done:
2580 # Checks needed to be done:
2579 # 1. length check: L1 == L2, in all cases.
2581 # 1. length check: L1 == L2, in all cases.
2580 # 2. hash check: depending on flag processor, we may need to
2582 # 2. hash check: depending on flag processor, we may need to
2581 # use either "text" (external), or "rawtext" (in revlog).
2583 # use either "text" (external), or "rawtext" (in revlog).
2582
2584
2583 try:
2585 try:
2584 skipflags = state.get('skipflags', 0)
2586 skipflags = state.get('skipflags', 0)
2585 if skipflags:
2587 if skipflags:
2586 skipflags &= self.flags(rev)
2588 skipflags &= self.flags(rev)
2587
2589
2588 if skipflags:
2590 if skipflags:
2589 state['skipread'].add(node)
2591 state['skipread'].add(node)
2590 else:
2592 else:
2591 # Side-effect: read content and verify hash.
2593 # Side-effect: read content and verify hash.
2592 self.revision(node)
2594 self.revision(node)
2593
2595
2594 l1 = self.rawsize(rev)
2596 l1 = self.rawsize(rev)
2595 l2 = len(self.revision(node, raw=True))
2597 l2 = len(self.revision(node, raw=True))
2596
2598
2597 if l1 != l2:
2599 if l1 != l2:
2598 yield revlogproblem(
2600 yield revlogproblem(
2599 error=_('unpacked size is %d, %d expected') % (l2, l1),
2601 error=_('unpacked size is %d, %d expected') % (l2, l1),
2600 node=node)
2602 node=node)
2601
2603
2602 except error.CensoredNodeError:
2604 except error.CensoredNodeError:
2603 if state['erroroncensored']:
2605 if state['erroroncensored']:
2604 yield revlogproblem(error=_('censored file data'),
2606 yield revlogproblem(error=_('censored file data'),
2605 node=node)
2607 node=node)
2606 state['skipread'].add(node)
2608 state['skipread'].add(node)
2607 except Exception as e:
2609 except Exception as e:
2608 yield revlogproblem(
2610 yield revlogproblem(
2609 error=_('unpacking %s: %s') % (short(node),
2611 error=_('unpacking %s: %s') % (short(node),
2610 stringutil.forcebytestr(e)),
2612 stringutil.forcebytestr(e)),
2611 node=node)
2613 node=node)
2612 state['skipread'].add(node)
2614 state['skipread'].add(node)
2613
2615
2614 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2616 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2615 revisionscount=False, trackedsize=False,
2617 revisionscount=False, trackedsize=False,
2616 storedsize=False):
2618 storedsize=False):
2617 d = {}
2619 d = {}
2618
2620
2619 if exclusivefiles:
2621 if exclusivefiles:
2620 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2622 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2621 if not self._inline:
2623 if not self._inline:
2622 d['exclusivefiles'].append((self.opener, self.datafile))
2624 d['exclusivefiles'].append((self.opener, self.datafile))
2623
2625
2624 if sharedfiles:
2626 if sharedfiles:
2625 d['sharedfiles'] = []
2627 d['sharedfiles'] = []
2626
2628
2627 if revisionscount:
2629 if revisionscount:
2628 d['revisionscount'] = len(self)
2630 d['revisionscount'] = len(self)
2629
2631
2630 if trackedsize:
2632 if trackedsize:
2631 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2633 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2632
2634
2633 if storedsize:
2635 if storedsize:
2634 d['storedsize'] = sum(self.opener.stat(path).st_size
2636 d['storedsize'] = sum(self.opener.stat(path).st_size
2635 for path in self.files())
2637 for path in self.files())
2636
2638
2637 return d
2639 return d
General Comments 0
You need to be logged in to leave comments. Login now