##// END OF EJS Templates
revlog: rename 'self.checkinlinesize' into '_enforceinlinesize'...
Boris Feld -
r35992:9ba1d0c7 default
parent child Browse files
Show More
@@ -1,565 +1,565 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import (
16 from .thirdparty import (
17 attr,
17 attr,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 revlog,
23 revlog,
24 util,
24 util,
25 )
25 )
26
26
27 _defaultextra = {'branch': 'default'}
27 _defaultextra = {'branch': 'default'}
28
28
29 def _string_escape(text):
29 def _string_escape(text):
30 """
30 """
31 >>> from .pycompat import bytechr as chr
31 >>> from .pycompat import bytechr as chr
32 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
32 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
33 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
33 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)sab%(cr)scd%(bs)s%(nl)s" % d
34 >>> s
34 >>> s
35 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
35 'ab\\ncd\\\\\\\\n\\x00ab\\rcd\\\\\\n'
36 >>> res = _string_escape(s)
36 >>> res = _string_escape(s)
37 >>> s == util.unescapestr(res)
37 >>> s == util.unescapestr(res)
38 True
38 True
39 """
39 """
40 # subset of the string_escape codec
40 # subset of the string_escape codec
41 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
41 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
42 return text.replace('\0', '\\0')
42 return text.replace('\0', '\\0')
43
43
44 def decodeextra(text):
44 def decodeextra(text):
45 """
45 """
46 >>> from .pycompat import bytechr as chr
46 >>> from .pycompat import bytechr as chr
47 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
47 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
48 ... ).items())
48 ... ).items())
49 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
49 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
50 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
50 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
51 ... b'baz': chr(92) + chr(0) + b'2'})
51 ... b'baz': chr(92) + chr(0) + b'2'})
52 ... ).items())
52 ... ).items())
53 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
53 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
54 """
54 """
55 extra = _defaultextra.copy()
55 extra = _defaultextra.copy()
56 for l in text.split('\0'):
56 for l in text.split('\0'):
57 if l:
57 if l:
58 if '\\0' in l:
58 if '\\0' in l:
59 # fix up \0 without getting into trouble with \\0
59 # fix up \0 without getting into trouble with \\0
60 l = l.replace('\\\\', '\\\\\n')
60 l = l.replace('\\\\', '\\\\\n')
61 l = l.replace('\\0', '\0')
61 l = l.replace('\\0', '\0')
62 l = l.replace('\n', '')
62 l = l.replace('\n', '')
63 k, v = util.unescapestr(l).split(':', 1)
63 k, v = util.unescapestr(l).split(':', 1)
64 extra[k] = v
64 extra[k] = v
65 return extra
65 return extra
66
66
67 def encodeextra(d):
67 def encodeextra(d):
68 # keys must be sorted to produce a deterministic changelog entry
68 # keys must be sorted to produce a deterministic changelog entry
69 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
69 items = [_string_escape('%s:%s' % (k, d[k])) for k in sorted(d)]
70 return "\0".join(items)
70 return "\0".join(items)
71
71
72 def stripdesc(desc):
72 def stripdesc(desc):
73 """strip trailing whitespace and leading and trailing empty lines"""
73 """strip trailing whitespace and leading and trailing empty lines"""
74 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
74 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
75
75
76 class appender(object):
76 class appender(object):
77 '''the changelog index must be updated last on disk, so we use this class
77 '''the changelog index must be updated last on disk, so we use this class
78 to delay writes to it'''
78 to delay writes to it'''
79 def __init__(self, vfs, name, mode, buf):
79 def __init__(self, vfs, name, mode, buf):
80 self.data = buf
80 self.data = buf
81 fp = vfs(name, mode)
81 fp = vfs(name, mode)
82 self.fp = fp
82 self.fp = fp
83 self.offset = fp.tell()
83 self.offset = fp.tell()
84 self.size = vfs.fstat(fp).st_size
84 self.size = vfs.fstat(fp).st_size
85 self._end = self.size
85 self._end = self.size
86
86
87 def end(self):
87 def end(self):
88 return self._end
88 return self._end
89 def tell(self):
89 def tell(self):
90 return self.offset
90 return self.offset
91 def flush(self):
91 def flush(self):
92 pass
92 pass
93
93
94 @property
94 @property
95 def closed(self):
95 def closed(self):
96 return self.fp.closed
96 return self.fp.closed
97
97
98 def close(self):
98 def close(self):
99 self.fp.close()
99 self.fp.close()
100
100
101 def seek(self, offset, whence=0):
101 def seek(self, offset, whence=0):
102 '''virtual file offset spans real file and data'''
102 '''virtual file offset spans real file and data'''
103 if whence == 0:
103 if whence == 0:
104 self.offset = offset
104 self.offset = offset
105 elif whence == 1:
105 elif whence == 1:
106 self.offset += offset
106 self.offset += offset
107 elif whence == 2:
107 elif whence == 2:
108 self.offset = self.end() + offset
108 self.offset = self.end() + offset
109 if self.offset < self.size:
109 if self.offset < self.size:
110 self.fp.seek(self.offset)
110 self.fp.seek(self.offset)
111
111
112 def read(self, count=-1):
112 def read(self, count=-1):
113 '''only trick here is reads that span real file and data'''
113 '''only trick here is reads that span real file and data'''
114 ret = ""
114 ret = ""
115 if self.offset < self.size:
115 if self.offset < self.size:
116 s = self.fp.read(count)
116 s = self.fp.read(count)
117 ret = s
117 ret = s
118 self.offset += len(s)
118 self.offset += len(s)
119 if count > 0:
119 if count > 0:
120 count -= len(s)
120 count -= len(s)
121 if count != 0:
121 if count != 0:
122 doff = self.offset - self.size
122 doff = self.offset - self.size
123 self.data.insert(0, "".join(self.data))
123 self.data.insert(0, "".join(self.data))
124 del self.data[1:]
124 del self.data[1:]
125 s = self.data[0][doff:doff + count]
125 s = self.data[0][doff:doff + count]
126 self.offset += len(s)
126 self.offset += len(s)
127 ret += s
127 ret += s
128 return ret
128 return ret
129
129
130 def write(self, s):
130 def write(self, s):
131 self.data.append(bytes(s))
131 self.data.append(bytes(s))
132 self.offset += len(s)
132 self.offset += len(s)
133 self._end += len(s)
133 self._end += len(s)
134
134
135 def __enter__(self):
135 def __enter__(self):
136 self.fp.__enter__()
136 self.fp.__enter__()
137 return self
137 return self
138
138
139 def __exit__(self, *args):
139 def __exit__(self, *args):
140 return self.fp.__exit__(*args)
140 return self.fp.__exit__(*args)
141
141
142 def _divertopener(opener, target):
142 def _divertopener(opener, target):
143 """build an opener that writes in 'target.a' instead of 'target'"""
143 """build an opener that writes in 'target.a' instead of 'target'"""
144 def _divert(name, mode='r', checkambig=False):
144 def _divert(name, mode='r', checkambig=False):
145 if name != target:
145 if name != target:
146 return opener(name, mode)
146 return opener(name, mode)
147 return opener(name + ".a", mode)
147 return opener(name + ".a", mode)
148 return _divert
148 return _divert
149
149
150 def _delayopener(opener, target, buf):
150 def _delayopener(opener, target, buf):
151 """build an opener that stores chunks in 'buf' instead of 'target'"""
151 """build an opener that stores chunks in 'buf' instead of 'target'"""
152 def _delay(name, mode='r', checkambig=False):
152 def _delay(name, mode='r', checkambig=False):
153 if name != target:
153 if name != target:
154 return opener(name, mode)
154 return opener(name, mode)
155 return appender(opener, name, mode, buf)
155 return appender(opener, name, mode, buf)
156 return _delay
156 return _delay
157
157
158 @attr.s
158 @attr.s
159 class _changelogrevision(object):
159 class _changelogrevision(object):
160 # Extensions might modify _defaultextra, so let the constructor below pass
160 # Extensions might modify _defaultextra, so let the constructor below pass
161 # it in
161 # it in
162 extra = attr.ib()
162 extra = attr.ib()
163 manifest = attr.ib(default=nullid)
163 manifest = attr.ib(default=nullid)
164 user = attr.ib(default='')
164 user = attr.ib(default='')
165 date = attr.ib(default=(0, 0))
165 date = attr.ib(default=(0, 0))
166 files = attr.ib(default=attr.Factory(list))
166 files = attr.ib(default=attr.Factory(list))
167 description = attr.ib(default='')
167 description = attr.ib(default='')
168
168
169 class changelogrevision(object):
169 class changelogrevision(object):
170 """Holds results of a parsed changelog revision.
170 """Holds results of a parsed changelog revision.
171
171
172 Changelog revisions consist of multiple pieces of data, including
172 Changelog revisions consist of multiple pieces of data, including
173 the manifest node, user, and date. This object exposes a view into
173 the manifest node, user, and date. This object exposes a view into
174 the parsed object.
174 the parsed object.
175 """
175 """
176
176
177 __slots__ = (
177 __slots__ = (
178 u'_offsets',
178 u'_offsets',
179 u'_text',
179 u'_text',
180 )
180 )
181
181
182 def __new__(cls, text):
182 def __new__(cls, text):
183 if not text:
183 if not text:
184 return _changelogrevision(extra=_defaultextra)
184 return _changelogrevision(extra=_defaultextra)
185
185
186 self = super(changelogrevision, cls).__new__(cls)
186 self = super(changelogrevision, cls).__new__(cls)
187 # We could return here and implement the following as an __init__.
187 # We could return here and implement the following as an __init__.
188 # But doing it here is equivalent and saves an extra function call.
188 # But doing it here is equivalent and saves an extra function call.
189
189
190 # format used:
190 # format used:
191 # nodeid\n : manifest node in ascii
191 # nodeid\n : manifest node in ascii
192 # user\n : user, no \n or \r allowed
192 # user\n : user, no \n or \r allowed
193 # time tz extra\n : date (time is int or float, timezone is int)
193 # time tz extra\n : date (time is int or float, timezone is int)
194 # : extra is metadata, encoded and separated by '\0'
194 # : extra is metadata, encoded and separated by '\0'
195 # : older versions ignore it
195 # : older versions ignore it
196 # files\n\n : files modified by the cset, no \n or \r allowed
196 # files\n\n : files modified by the cset, no \n or \r allowed
197 # (.*) : comment (free text, ideally utf-8)
197 # (.*) : comment (free text, ideally utf-8)
198 #
198 #
199 # changelog v0 doesn't use extra
199 # changelog v0 doesn't use extra
200
200
201 nl1 = text.index('\n')
201 nl1 = text.index('\n')
202 nl2 = text.index('\n', nl1 + 1)
202 nl2 = text.index('\n', nl1 + 1)
203 nl3 = text.index('\n', nl2 + 1)
203 nl3 = text.index('\n', nl2 + 1)
204
204
205 # The list of files may be empty. Which means nl3 is the first of the
205 # The list of files may be empty. Which means nl3 is the first of the
206 # double newline that precedes the description.
206 # double newline that precedes the description.
207 if text[nl3 + 1:nl3 + 2] == '\n':
207 if text[nl3 + 1:nl3 + 2] == '\n':
208 doublenl = nl3
208 doublenl = nl3
209 else:
209 else:
210 doublenl = text.index('\n\n', nl3 + 1)
210 doublenl = text.index('\n\n', nl3 + 1)
211
211
212 self._offsets = (nl1, nl2, nl3, doublenl)
212 self._offsets = (nl1, nl2, nl3, doublenl)
213 self._text = text
213 self._text = text
214
214
215 return self
215 return self
216
216
217 @property
217 @property
218 def manifest(self):
218 def manifest(self):
219 return bin(self._text[0:self._offsets[0]])
219 return bin(self._text[0:self._offsets[0]])
220
220
221 @property
221 @property
222 def user(self):
222 def user(self):
223 off = self._offsets
223 off = self._offsets
224 return encoding.tolocal(self._text[off[0] + 1:off[1]])
224 return encoding.tolocal(self._text[off[0] + 1:off[1]])
225
225
226 @property
226 @property
227 def _rawdate(self):
227 def _rawdate(self):
228 off = self._offsets
228 off = self._offsets
229 dateextra = self._text[off[1] + 1:off[2]]
229 dateextra = self._text[off[1] + 1:off[2]]
230 return dateextra.split(' ', 2)[0:2]
230 return dateextra.split(' ', 2)[0:2]
231
231
232 @property
232 @property
233 def _rawextra(self):
233 def _rawextra(self):
234 off = self._offsets
234 off = self._offsets
235 dateextra = self._text[off[1] + 1:off[2]]
235 dateextra = self._text[off[1] + 1:off[2]]
236 fields = dateextra.split(' ', 2)
236 fields = dateextra.split(' ', 2)
237 if len(fields) != 3:
237 if len(fields) != 3:
238 return None
238 return None
239
239
240 return fields[2]
240 return fields[2]
241
241
242 @property
242 @property
243 def date(self):
243 def date(self):
244 raw = self._rawdate
244 raw = self._rawdate
245 time = float(raw[0])
245 time = float(raw[0])
246 # Various tools did silly things with the timezone.
246 # Various tools did silly things with the timezone.
247 try:
247 try:
248 timezone = int(raw[1])
248 timezone = int(raw[1])
249 except ValueError:
249 except ValueError:
250 timezone = 0
250 timezone = 0
251
251
252 return time, timezone
252 return time, timezone
253
253
254 @property
254 @property
255 def extra(self):
255 def extra(self):
256 raw = self._rawextra
256 raw = self._rawextra
257 if raw is None:
257 if raw is None:
258 return _defaultextra
258 return _defaultextra
259
259
260 return decodeextra(raw)
260 return decodeextra(raw)
261
261
262 @property
262 @property
263 def files(self):
263 def files(self):
264 off = self._offsets
264 off = self._offsets
265 if off[2] == off[3]:
265 if off[2] == off[3]:
266 return []
266 return []
267
267
268 return self._text[off[2] + 1:off[3]].split('\n')
268 return self._text[off[2] + 1:off[3]].split('\n')
269
269
270 @property
270 @property
271 def description(self):
271 def description(self):
272 return encoding.tolocal(self._text[self._offsets[3] + 2:])
272 return encoding.tolocal(self._text[self._offsets[3] + 2:])
273
273
274 class changelog(revlog.revlog):
274 class changelog(revlog.revlog):
275 def __init__(self, opener, trypending=False):
275 def __init__(self, opener, trypending=False):
276 """Load a changelog revlog using an opener.
276 """Load a changelog revlog using an opener.
277
277
278 If ``trypending`` is true, we attempt to load the index from a
278 If ``trypending`` is true, we attempt to load the index from a
279 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
279 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
280 The ``00changelog.i.a`` file contains index (and possibly inline
280 The ``00changelog.i.a`` file contains index (and possibly inline
281 revision) data for a transaction that hasn't been finalized yet.
281 revision) data for a transaction that hasn't been finalized yet.
282 It exists in a separate file to facilitate readers (such as
282 It exists in a separate file to facilitate readers (such as
283 hooks processes) accessing data before a transaction is finalized.
283 hooks processes) accessing data before a transaction is finalized.
284 """
284 """
285 if trypending and opener.exists('00changelog.i.a'):
285 if trypending and opener.exists('00changelog.i.a'):
286 indexfile = '00changelog.i.a'
286 indexfile = '00changelog.i.a'
287 else:
287 else:
288 indexfile = '00changelog.i'
288 indexfile = '00changelog.i'
289
289
290 datafile = '00changelog.d'
290 datafile = '00changelog.d'
291 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
291 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
292 checkambig=True, mmaplargeindex=True)
292 checkambig=True, mmaplargeindex=True)
293
293
294 if self._initempty:
294 if self._initempty:
295 # changelogs don't benefit from generaldelta
295 # changelogs don't benefit from generaldelta
296 self.version &= ~revlog.FLAG_GENERALDELTA
296 self.version &= ~revlog.FLAG_GENERALDELTA
297 self._generaldelta = False
297 self._generaldelta = False
298
298
299 # Delta chains for changelogs tend to be very small because entries
299 # Delta chains for changelogs tend to be very small because entries
300 # tend to be small and don't delta well with each. So disable delta
300 # tend to be small and don't delta well with each. So disable delta
301 # chains.
301 # chains.
302 self.storedeltachains = False
302 self.storedeltachains = False
303
303
304 self._realopener = opener
304 self._realopener = opener
305 self._delayed = False
305 self._delayed = False
306 self._delaybuf = None
306 self._delaybuf = None
307 self._divert = False
307 self._divert = False
308 self.filteredrevs = frozenset()
308 self.filteredrevs = frozenset()
309
309
310 def tiprev(self):
310 def tiprev(self):
311 for i in xrange(len(self) -1, -2, -1):
311 for i in xrange(len(self) -1, -2, -1):
312 if i not in self.filteredrevs:
312 if i not in self.filteredrevs:
313 return i
313 return i
314
314
315 def tip(self):
315 def tip(self):
316 """filtered version of revlog.tip"""
316 """filtered version of revlog.tip"""
317 return self.node(self.tiprev())
317 return self.node(self.tiprev())
318
318
319 def __contains__(self, rev):
319 def __contains__(self, rev):
320 """filtered version of revlog.__contains__"""
320 """filtered version of revlog.__contains__"""
321 return (0 <= rev < len(self)
321 return (0 <= rev < len(self)
322 and rev not in self.filteredrevs)
322 and rev not in self.filteredrevs)
323
323
324 def __iter__(self):
324 def __iter__(self):
325 """filtered version of revlog.__iter__"""
325 """filtered version of revlog.__iter__"""
326 if len(self.filteredrevs) == 0:
326 if len(self.filteredrevs) == 0:
327 return revlog.revlog.__iter__(self)
327 return revlog.revlog.__iter__(self)
328
328
329 def filterediter():
329 def filterediter():
330 for i in xrange(len(self)):
330 for i in xrange(len(self)):
331 if i not in self.filteredrevs:
331 if i not in self.filteredrevs:
332 yield i
332 yield i
333
333
334 return filterediter()
334 return filterediter()
335
335
336 def revs(self, start=0, stop=None):
336 def revs(self, start=0, stop=None):
337 """filtered version of revlog.revs"""
337 """filtered version of revlog.revs"""
338 for i in super(changelog, self).revs(start, stop):
338 for i in super(changelog, self).revs(start, stop):
339 if i not in self.filteredrevs:
339 if i not in self.filteredrevs:
340 yield i
340 yield i
341
341
342 @util.propertycache
342 @util.propertycache
343 def nodemap(self):
343 def nodemap(self):
344 # XXX need filtering too
344 # XXX need filtering too
345 self.rev(self.node(0))
345 self.rev(self.node(0))
346 return self._nodecache
346 return self._nodecache
347
347
348 def reachableroots(self, minroot, heads, roots, includepath=False):
348 def reachableroots(self, minroot, heads, roots, includepath=False):
349 return self.index.reachableroots2(minroot, heads, roots, includepath)
349 return self.index.reachableroots2(minroot, heads, roots, includepath)
350
350
351 def headrevs(self):
351 def headrevs(self):
352 if self.filteredrevs:
352 if self.filteredrevs:
353 try:
353 try:
354 return self.index.headrevsfiltered(self.filteredrevs)
354 return self.index.headrevsfiltered(self.filteredrevs)
355 # AttributeError covers non-c-extension environments and
355 # AttributeError covers non-c-extension environments and
356 # old c extensions without filter handling.
356 # old c extensions without filter handling.
357 except AttributeError:
357 except AttributeError:
358 return self._headrevs()
358 return self._headrevs()
359
359
360 return super(changelog, self).headrevs()
360 return super(changelog, self).headrevs()
361
361
362 def strip(self, *args, **kwargs):
362 def strip(self, *args, **kwargs):
363 # XXX make something better than assert
363 # XXX make something better than assert
364 # We can't expect proper strip behavior if we are filtered.
364 # We can't expect proper strip behavior if we are filtered.
365 assert not self.filteredrevs
365 assert not self.filteredrevs
366 super(changelog, self).strip(*args, **kwargs)
366 super(changelog, self).strip(*args, **kwargs)
367
367
368 def rev(self, node):
368 def rev(self, node):
369 """filtered version of revlog.rev"""
369 """filtered version of revlog.rev"""
370 r = super(changelog, self).rev(node)
370 r = super(changelog, self).rev(node)
371 if r in self.filteredrevs:
371 if r in self.filteredrevs:
372 raise error.FilteredLookupError(hex(node), self.indexfile,
372 raise error.FilteredLookupError(hex(node), self.indexfile,
373 _('filtered node'))
373 _('filtered node'))
374 return r
374 return r
375
375
376 def node(self, rev):
376 def node(self, rev):
377 """filtered version of revlog.node"""
377 """filtered version of revlog.node"""
378 if rev in self.filteredrevs:
378 if rev in self.filteredrevs:
379 raise error.FilteredIndexError(rev)
379 raise error.FilteredIndexError(rev)
380 return super(changelog, self).node(rev)
380 return super(changelog, self).node(rev)
381
381
382 def linkrev(self, rev):
382 def linkrev(self, rev):
383 """filtered version of revlog.linkrev"""
383 """filtered version of revlog.linkrev"""
384 if rev in self.filteredrevs:
384 if rev in self.filteredrevs:
385 raise error.FilteredIndexError(rev)
385 raise error.FilteredIndexError(rev)
386 return super(changelog, self).linkrev(rev)
386 return super(changelog, self).linkrev(rev)
387
387
388 def parentrevs(self, rev):
388 def parentrevs(self, rev):
389 """filtered version of revlog.parentrevs"""
389 """filtered version of revlog.parentrevs"""
390 if rev in self.filteredrevs:
390 if rev in self.filteredrevs:
391 raise error.FilteredIndexError(rev)
391 raise error.FilteredIndexError(rev)
392 return super(changelog, self).parentrevs(rev)
392 return super(changelog, self).parentrevs(rev)
393
393
394 def flags(self, rev):
394 def flags(self, rev):
395 """filtered version of revlog.flags"""
395 """filtered version of revlog.flags"""
396 if rev in self.filteredrevs:
396 if rev in self.filteredrevs:
397 raise error.FilteredIndexError(rev)
397 raise error.FilteredIndexError(rev)
398 return super(changelog, self).flags(rev)
398 return super(changelog, self).flags(rev)
399
399
400 def delayupdate(self, tr):
400 def delayupdate(self, tr):
401 "delay visibility of index updates to other readers"
401 "delay visibility of index updates to other readers"
402
402
403 if not self._delayed:
403 if not self._delayed:
404 if len(self) == 0:
404 if len(self) == 0:
405 self._divert = True
405 self._divert = True
406 if self._realopener.exists(self.indexfile + '.a'):
406 if self._realopener.exists(self.indexfile + '.a'):
407 self._realopener.unlink(self.indexfile + '.a')
407 self._realopener.unlink(self.indexfile + '.a')
408 self.opener = _divertopener(self._realopener, self.indexfile)
408 self.opener = _divertopener(self._realopener, self.indexfile)
409 else:
409 else:
410 self._delaybuf = []
410 self._delaybuf = []
411 self.opener = _delayopener(self._realopener, self.indexfile,
411 self.opener = _delayopener(self._realopener, self.indexfile,
412 self._delaybuf)
412 self._delaybuf)
413 self._delayed = True
413 self._delayed = True
414 tr.addpending('cl-%i' % id(self), self._writepending)
414 tr.addpending('cl-%i' % id(self), self._writepending)
415 tr.addfinalize('cl-%i' % id(self), self._finalize)
415 tr.addfinalize('cl-%i' % id(self), self._finalize)
416
416
417 def _finalize(self, tr):
417 def _finalize(self, tr):
418 "finalize index updates"
418 "finalize index updates"
419 self._delayed = False
419 self._delayed = False
420 self.opener = self._realopener
420 self.opener = self._realopener
421 # move redirected index data back into place
421 # move redirected index data back into place
422 if self._divert:
422 if self._divert:
423 assert not self._delaybuf
423 assert not self._delaybuf
424 tmpname = self.indexfile + ".a"
424 tmpname = self.indexfile + ".a"
425 nfile = self.opener.open(tmpname)
425 nfile = self.opener.open(tmpname)
426 nfile.close()
426 nfile.close()
427 self.opener.rename(tmpname, self.indexfile, checkambig=True)
427 self.opener.rename(tmpname, self.indexfile, checkambig=True)
428 elif self._delaybuf:
428 elif self._delaybuf:
429 fp = self.opener(self.indexfile, 'a', checkambig=True)
429 fp = self.opener(self.indexfile, 'a', checkambig=True)
430 fp.write("".join(self._delaybuf))
430 fp.write("".join(self._delaybuf))
431 fp.close()
431 fp.close()
432 self._delaybuf = None
432 self._delaybuf = None
433 self._divert = False
433 self._divert = False
434 # split when we're done
434 # split when we're done
435 self.checkinlinesize(tr)
435 self._enforceinlinesize(tr)
436
436
437 def _writepending(self, tr):
437 def _writepending(self, tr):
438 "create a file containing the unfinalized state for pretxnchangegroup"
438 "create a file containing the unfinalized state for pretxnchangegroup"
439 if self._delaybuf:
439 if self._delaybuf:
440 # make a temporary copy of the index
440 # make a temporary copy of the index
441 fp1 = self._realopener(self.indexfile)
441 fp1 = self._realopener(self.indexfile)
442 pendingfilename = self.indexfile + ".a"
442 pendingfilename = self.indexfile + ".a"
443 # register as a temp file to ensure cleanup on failure
443 # register as a temp file to ensure cleanup on failure
444 tr.registertmp(pendingfilename)
444 tr.registertmp(pendingfilename)
445 # write existing data
445 # write existing data
446 fp2 = self._realopener(pendingfilename, "w")
446 fp2 = self._realopener(pendingfilename, "w")
447 fp2.write(fp1.read())
447 fp2.write(fp1.read())
448 # add pending data
448 # add pending data
449 fp2.write("".join(self._delaybuf))
449 fp2.write("".join(self._delaybuf))
450 fp2.close()
450 fp2.close()
451 # switch modes so finalize can simply rename
451 # switch modes so finalize can simply rename
452 self._delaybuf = None
452 self._delaybuf = None
453 self._divert = True
453 self._divert = True
454 self.opener = _divertopener(self._realopener, self.indexfile)
454 self.opener = _divertopener(self._realopener, self.indexfile)
455
455
456 if self._divert:
456 if self._divert:
457 return True
457 return True
458
458
459 return False
459 return False
460
460
461 def checkinlinesize(self, tr, fp=None):
461 def _enforceinlinesize(self, tr, fp=None):
462 if not self._delayed:
462 if not self._delayed:
463 revlog.revlog.checkinlinesize(self, tr, fp)
463 revlog.revlog._enforceinlinesize(self, tr, fp)
464
464
465 def read(self, node):
465 def read(self, node):
466 """Obtain data from a parsed changelog revision.
466 """Obtain data from a parsed changelog revision.
467
467
468 Returns a 6-tuple of:
468 Returns a 6-tuple of:
469
469
470 - manifest node in binary
470 - manifest node in binary
471 - author/user as a localstr
471 - author/user as a localstr
472 - date as a 2-tuple of (time, timezone)
472 - date as a 2-tuple of (time, timezone)
473 - list of files
473 - list of files
474 - commit message as a localstr
474 - commit message as a localstr
475 - dict of extra metadata
475 - dict of extra metadata
476
476
477 Unless you need to access all fields, consider calling
477 Unless you need to access all fields, consider calling
478 ``changelogrevision`` instead, as it is faster for partial object
478 ``changelogrevision`` instead, as it is faster for partial object
479 access.
479 access.
480 """
480 """
481 c = changelogrevision(self.revision(node))
481 c = changelogrevision(self.revision(node))
482 return (
482 return (
483 c.manifest,
483 c.manifest,
484 c.user,
484 c.user,
485 c.date,
485 c.date,
486 c.files,
486 c.files,
487 c.description,
487 c.description,
488 c.extra
488 c.extra
489 )
489 )
490
490
491 def changelogrevision(self, nodeorrev):
491 def changelogrevision(self, nodeorrev):
492 """Obtain a ``changelogrevision`` for a node or revision."""
492 """Obtain a ``changelogrevision`` for a node or revision."""
493 return changelogrevision(self.revision(nodeorrev))
493 return changelogrevision(self.revision(nodeorrev))
494
494
495 def readfiles(self, node):
495 def readfiles(self, node):
496 """
496 """
497 short version of read that only returns the files modified by the cset
497 short version of read that only returns the files modified by the cset
498 """
498 """
499 text = self.revision(node)
499 text = self.revision(node)
500 if not text:
500 if not text:
501 return []
501 return []
502 last = text.index("\n\n")
502 last = text.index("\n\n")
503 l = text[:last].split('\n')
503 l = text[:last].split('\n')
504 return l[3:]
504 return l[3:]
505
505
506 def add(self, manifest, files, desc, transaction, p1, p2,
506 def add(self, manifest, files, desc, transaction, p1, p2,
507 user, date=None, extra=None):
507 user, date=None, extra=None):
508 # Convert to UTF-8 encoded bytestrings as the very first
508 # Convert to UTF-8 encoded bytestrings as the very first
509 # thing: calling any method on a localstr object will turn it
509 # thing: calling any method on a localstr object will turn it
510 # into a str object and the cached UTF-8 string is thus lost.
510 # into a str object and the cached UTF-8 string is thus lost.
511 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
511 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
512
512
513 user = user.strip()
513 user = user.strip()
514 # An empty username or a username with a "\n" will make the
514 # An empty username or a username with a "\n" will make the
515 # revision text contain two "\n\n" sequences -> corrupt
515 # revision text contain two "\n\n" sequences -> corrupt
516 # repository since read cannot unpack the revision.
516 # repository since read cannot unpack the revision.
517 if not user:
517 if not user:
518 raise error.RevlogError(_("empty username"))
518 raise error.RevlogError(_("empty username"))
519 if "\n" in user:
519 if "\n" in user:
520 raise error.RevlogError(_("username %s contains a newline")
520 raise error.RevlogError(_("username %s contains a newline")
521 % repr(user))
521 % repr(user))
522
522
523 desc = stripdesc(desc)
523 desc = stripdesc(desc)
524
524
525 if date:
525 if date:
526 parseddate = "%d %d" % util.parsedate(date)
526 parseddate = "%d %d" % util.parsedate(date)
527 else:
527 else:
528 parseddate = "%d %d" % util.makedate()
528 parseddate = "%d %d" % util.makedate()
529 if extra:
529 if extra:
530 branch = extra.get("branch")
530 branch = extra.get("branch")
531 if branch in ("default", ""):
531 if branch in ("default", ""):
532 del extra["branch"]
532 del extra["branch"]
533 elif branch in (".", "null", "tip"):
533 elif branch in (".", "null", "tip"):
534 raise error.RevlogError(_('the name \'%s\' is reserved')
534 raise error.RevlogError(_('the name \'%s\' is reserved')
535 % branch)
535 % branch)
536 if extra:
536 if extra:
537 extra = encodeextra(extra)
537 extra = encodeextra(extra)
538 parseddate = "%s %s" % (parseddate, extra)
538 parseddate = "%s %s" % (parseddate, extra)
539 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
539 l = [hex(manifest), user, parseddate] + sorted(files) + ["", desc]
540 text = "\n".join(l)
540 text = "\n".join(l)
541 return self.addrevision(text, transaction, len(self), p1, p2)
541 return self.addrevision(text, transaction, len(self), p1, p2)
542
542
543 def branchinfo(self, rev):
543 def branchinfo(self, rev):
544 """return the branch name and open/close state of a revision
544 """return the branch name and open/close state of a revision
545
545
546 This function exists because creating a changectx object
546 This function exists because creating a changectx object
547 just to access this is costly."""
547 just to access this is costly."""
548 extra = self.read(rev)[5]
548 extra = self.read(rev)[5]
549 return encoding.tolocal(extra.get("branch")), 'close' in extra
549 return encoding.tolocal(extra.get("branch")), 'close' in extra
550
550
551 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
551 def _addrevision(self, node, rawtext, transaction, *args, **kwargs):
552 # overlay over the standard revlog._addrevision to track the new
552 # overlay over the standard revlog._addrevision to track the new
553 # revision on the transaction.
553 # revision on the transaction.
554 rev = len(self)
554 rev = len(self)
555 node = super(changelog, self)._addrevision(node, rawtext, transaction,
555 node = super(changelog, self)._addrevision(node, rawtext, transaction,
556 *args, **kwargs)
556 *args, **kwargs)
557 revs = transaction.changes.get('revs')
557 revs = transaction.changes.get('revs')
558 if revs is not None:
558 if revs is not None:
559 if revs:
559 if revs:
560 assert revs[-1] + 1 == rev
560 assert revs[-1] + 1 == rev
561 revs = xrange(revs[0], rev + 1)
561 revs = xrange(revs[0], rev + 1)
562 else:
562 else:
563 revs = xrange(rev, rev + 1)
563 revs = xrange(rev, rev + 1)
564 transaction.changes['revs'] = revs
564 transaction.changes['revs'] = revs
565 return node
565 return node
@@ -1,2501 +1,2501 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import hashlib
20 import hashlib
21 import heapq
21 import heapq
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullid,
30 nullid,
31 nullrev,
31 nullrev,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .thirdparty import (
37 from .thirdparty import (
38 attr,
38 attr,
39 )
39 )
40 from . import (
40 from . import (
41 ancestor,
41 ancestor,
42 error,
42 error,
43 mdiff,
43 mdiff,
44 policy,
44 policy,
45 pycompat,
45 pycompat,
46 templatefilters,
46 templatefilters,
47 util,
47 util,
48 )
48 )
49
49
50 parsers = policy.importmod(r'parsers')
50 parsers = policy.importmod(r'parsers')
51
51
52 # Aliased for performance.
52 # Aliased for performance.
53 _zlibdecompress = zlib.decompress
53 _zlibdecompress = zlib.decompress
54
54
55 # revlog header flags
55 # revlog header flags
56 REVLOGV0 = 0
56 REVLOGV0 = 0
57 REVLOGV1 = 1
57 REVLOGV1 = 1
58 # Dummy value until file format is finalized.
58 # Dummy value until file format is finalized.
59 # Reminder: change the bounds check in revlog.__init__ when this is changed.
59 # Reminder: change the bounds check in revlog.__init__ when this is changed.
60 REVLOGV2 = 0xDEAD
60 REVLOGV2 = 0xDEAD
61 FLAG_INLINE_DATA = (1 << 16)
61 FLAG_INLINE_DATA = (1 << 16)
62 FLAG_GENERALDELTA = (1 << 17)
62 FLAG_GENERALDELTA = (1 << 17)
63 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
63 REVLOG_DEFAULT_FLAGS = FLAG_INLINE_DATA
64 REVLOG_DEFAULT_FORMAT = REVLOGV1
64 REVLOG_DEFAULT_FORMAT = REVLOGV1
65 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
65 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
66 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
66 REVLOGV1_FLAGS = FLAG_INLINE_DATA | FLAG_GENERALDELTA
67 REVLOGV2_FLAGS = REVLOGV1_FLAGS
67 REVLOGV2_FLAGS = REVLOGV1_FLAGS
68
68
69 # revlog index flags
69 # revlog index flags
70 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
70 REVIDX_ISCENSORED = (1 << 15) # revision has censor metadata, must be verified
71 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
71 REVIDX_ELLIPSIS = (1 << 14) # revision hash does not match data (narrowhg)
72 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
72 REVIDX_EXTSTORED = (1 << 13) # revision data is stored externally
73 REVIDX_DEFAULT_FLAGS = 0
73 REVIDX_DEFAULT_FLAGS = 0
74 # stable order in which flags need to be processed and their processors applied
74 # stable order in which flags need to be processed and their processors applied
75 REVIDX_FLAGS_ORDER = [
75 REVIDX_FLAGS_ORDER = [
76 REVIDX_ISCENSORED,
76 REVIDX_ISCENSORED,
77 REVIDX_ELLIPSIS,
77 REVIDX_ELLIPSIS,
78 REVIDX_EXTSTORED,
78 REVIDX_EXTSTORED,
79 ]
79 ]
80 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
80 REVIDX_KNOWN_FLAGS = util.bitsfrom(REVIDX_FLAGS_ORDER)
81
81
82 # max size of revlog with inline data
82 # max size of revlog with inline data
83 _maxinline = 131072
83 _maxinline = 131072
84 _chunksize = 1048576
84 _chunksize = 1048576
85
85
86 RevlogError = error.RevlogError
86 RevlogError = error.RevlogError
87 LookupError = error.LookupError
87 LookupError = error.LookupError
88 CensoredNodeError = error.CensoredNodeError
88 CensoredNodeError = error.CensoredNodeError
89 ProgrammingError = error.ProgrammingError
89 ProgrammingError = error.ProgrammingError
90
90
91 # Store flag processors (cf. 'addflagprocessor()' to register)
91 # Store flag processors (cf. 'addflagprocessor()' to register)
92 _flagprocessors = {
92 _flagprocessors = {
93 REVIDX_ISCENSORED: None,
93 REVIDX_ISCENSORED: None,
94 }
94 }
95
95
96 def addflagprocessor(flag, processor):
96 def addflagprocessor(flag, processor):
97 """Register a flag processor on a revision data flag.
97 """Register a flag processor on a revision data flag.
98
98
99 Invariant:
99 Invariant:
100 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
100 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER.
101 - Only one flag processor can be registered on a specific flag.
101 - Only one flag processor can be registered on a specific flag.
102 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
102 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
103 following signatures:
103 following signatures:
104 - (read) f(self, rawtext) -> text, bool
104 - (read) f(self, rawtext) -> text, bool
105 - (write) f(self, text) -> rawtext, bool
105 - (write) f(self, text) -> rawtext, bool
106 - (raw) f(self, rawtext) -> bool
106 - (raw) f(self, rawtext) -> bool
107 "text" is presented to the user. "rawtext" is stored in revlog data, not
107 "text" is presented to the user. "rawtext" is stored in revlog data, not
108 directly visible to the user.
108 directly visible to the user.
109 The boolean returned by these transforms is used to determine whether
109 The boolean returned by these transforms is used to determine whether
110 the returned text can be used for hash integrity checking. For example,
110 the returned text can be used for hash integrity checking. For example,
111 if "write" returns False, then "text" is used to generate hash. If
111 if "write" returns False, then "text" is used to generate hash. If
112 "write" returns True, that basically means "rawtext" returned by "write"
112 "write" returns True, that basically means "rawtext" returned by "write"
113 should be used to generate hash. Usually, "write" and "read" return
113 should be used to generate hash. Usually, "write" and "read" return
114 different booleans. And "raw" returns a same boolean as "write".
114 different booleans. And "raw" returns a same boolean as "write".
115
115
116 Note: The 'raw' transform is used for changegroup generation and in some
116 Note: The 'raw' transform is used for changegroup generation and in some
117 debug commands. In this case the transform only indicates whether the
117 debug commands. In this case the transform only indicates whether the
118 contents can be used for hash integrity checks.
118 contents can be used for hash integrity checks.
119 """
119 """
120 if not flag & REVIDX_KNOWN_FLAGS:
120 if not flag & REVIDX_KNOWN_FLAGS:
121 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
121 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
122 raise ProgrammingError(msg)
122 raise ProgrammingError(msg)
123 if flag not in REVIDX_FLAGS_ORDER:
123 if flag not in REVIDX_FLAGS_ORDER:
124 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
124 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
125 raise ProgrammingError(msg)
125 raise ProgrammingError(msg)
126 if flag in _flagprocessors:
126 if flag in _flagprocessors:
127 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
127 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
128 raise error.Abort(msg)
128 raise error.Abort(msg)
129 _flagprocessors[flag] = processor
129 _flagprocessors[flag] = processor
130
130
131 def getoffset(q):
131 def getoffset(q):
132 return int(q >> 16)
132 return int(q >> 16)
133
133
134 def gettype(q):
134 def gettype(q):
135 return int(q & 0xFFFF)
135 return int(q & 0xFFFF)
136
136
137 def offset_type(offset, type):
137 def offset_type(offset, type):
138 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
138 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
139 raise ValueError('unknown revlog index flags')
139 raise ValueError('unknown revlog index flags')
140 return int(int(offset) << 16 | type)
140 return int(int(offset) << 16 | type)
141
141
142 _nullhash = hashlib.sha1(nullid)
142 _nullhash = hashlib.sha1(nullid)
143
143
144 def hash(text, p1, p2):
144 def hash(text, p1, p2):
145 """generate a hash from the given text and its parent hashes
145 """generate a hash from the given text and its parent hashes
146
146
147 This hash combines both the current file contents and its history
147 This hash combines both the current file contents and its history
148 in a manner that makes it easy to distinguish nodes with the same
148 in a manner that makes it easy to distinguish nodes with the same
149 content in the revision graph.
149 content in the revision graph.
150 """
150 """
151 # As of now, if one of the parent node is null, p2 is null
151 # As of now, if one of the parent node is null, p2 is null
152 if p2 == nullid:
152 if p2 == nullid:
153 # deep copy of a hash is faster than creating one
153 # deep copy of a hash is faster than creating one
154 s = _nullhash.copy()
154 s = _nullhash.copy()
155 s.update(p1)
155 s.update(p1)
156 else:
156 else:
157 # none of the parent nodes are nullid
157 # none of the parent nodes are nullid
158 if p1 < p2:
158 if p1 < p2:
159 a = p1
159 a = p1
160 b = p2
160 b = p2
161 else:
161 else:
162 a = p2
162 a = p2
163 b = p1
163 b = p1
164 s = hashlib.sha1(a)
164 s = hashlib.sha1(a)
165 s.update(b)
165 s.update(b)
166 s.update(text)
166 s.update(text)
167 return s.digest()
167 return s.digest()
168
168
169 def _trimchunk(revlog, revs, startidx, endidx=None):
169 def _trimchunk(revlog, revs, startidx, endidx=None):
170 """returns revs[startidx:endidx] without empty trailing revs
170 """returns revs[startidx:endidx] without empty trailing revs
171 """
171 """
172 length = revlog.length
172 length = revlog.length
173
173
174 if endidx is None:
174 if endidx is None:
175 endidx = len(revs)
175 endidx = len(revs)
176
176
177 # Trim empty revs at the end, but never the very first revision of a chain
177 # Trim empty revs at the end, but never the very first revision of a chain
178 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
178 while endidx > 1 and endidx > startidx and length(revs[endidx - 1]) == 0:
179 endidx -= 1
179 endidx -= 1
180
180
181 return revs[startidx:endidx]
181 return revs[startidx:endidx]
182
182
183 def _slicechunk(revlog, revs):
183 def _slicechunk(revlog, revs):
184 """slice revs to reduce the amount of unrelated data to be read from disk.
184 """slice revs to reduce the amount of unrelated data to be read from disk.
185
185
186 ``revs`` is sliced into groups that should be read in one time.
186 ``revs`` is sliced into groups that should be read in one time.
187 Assume that revs are sorted.
187 Assume that revs are sorted.
188 """
188 """
189 start = revlog.start
189 start = revlog.start
190 length = revlog.length
190 length = revlog.length
191
191
192 if len(revs) <= 1:
192 if len(revs) <= 1:
193 yield revs
193 yield revs
194 return
194 return
195
195
196 startbyte = start(revs[0])
196 startbyte = start(revs[0])
197 endbyte = start(revs[-1]) + length(revs[-1])
197 endbyte = start(revs[-1]) + length(revs[-1])
198 readdata = deltachainspan = endbyte - startbyte
198 readdata = deltachainspan = endbyte - startbyte
199
199
200 chainpayload = sum(length(r) for r in revs)
200 chainpayload = sum(length(r) for r in revs)
201
201
202 if deltachainspan:
202 if deltachainspan:
203 density = chainpayload / float(deltachainspan)
203 density = chainpayload / float(deltachainspan)
204 else:
204 else:
205 density = 1.0
205 density = 1.0
206
206
207 # Store the gaps in a heap to have them sorted by decreasing size
207 # Store the gaps in a heap to have them sorted by decreasing size
208 gapsheap = []
208 gapsheap = []
209 heapq.heapify(gapsheap)
209 heapq.heapify(gapsheap)
210 prevend = None
210 prevend = None
211 for i, rev in enumerate(revs):
211 for i, rev in enumerate(revs):
212 revstart = start(rev)
212 revstart = start(rev)
213 revlen = length(rev)
213 revlen = length(rev)
214
214
215 # Skip empty revisions to form larger holes
215 # Skip empty revisions to form larger holes
216 if revlen == 0:
216 if revlen == 0:
217 continue
217 continue
218
218
219 if prevend is not None:
219 if prevend is not None:
220 gapsize = revstart - prevend
220 gapsize = revstart - prevend
221 # only consider holes that are large enough
221 # only consider holes that are large enough
222 if gapsize > revlog._srmingapsize:
222 if gapsize > revlog._srmingapsize:
223 heapq.heappush(gapsheap, (-gapsize, i))
223 heapq.heappush(gapsheap, (-gapsize, i))
224
224
225 prevend = revstart + revlen
225 prevend = revstart + revlen
226
226
227 # Collect the indices of the largest holes until the density is acceptable
227 # Collect the indices of the largest holes until the density is acceptable
228 indicesheap = []
228 indicesheap = []
229 heapq.heapify(indicesheap)
229 heapq.heapify(indicesheap)
230 while gapsheap and density < revlog._srdensitythreshold:
230 while gapsheap and density < revlog._srdensitythreshold:
231 oppgapsize, gapidx = heapq.heappop(gapsheap)
231 oppgapsize, gapidx = heapq.heappop(gapsheap)
232
232
233 heapq.heappush(indicesheap, gapidx)
233 heapq.heappush(indicesheap, gapidx)
234
234
235 # the gap sizes are stored as negatives to be sorted decreasingly
235 # the gap sizes are stored as negatives to be sorted decreasingly
236 # by the heap
236 # by the heap
237 readdata -= (-oppgapsize)
237 readdata -= (-oppgapsize)
238 if readdata > 0:
238 if readdata > 0:
239 density = chainpayload / float(readdata)
239 density = chainpayload / float(readdata)
240 else:
240 else:
241 density = 1.0
241 density = 1.0
242
242
243 # Cut the revs at collected indices
243 # Cut the revs at collected indices
244 previdx = 0
244 previdx = 0
245 while indicesheap:
245 while indicesheap:
246 idx = heapq.heappop(indicesheap)
246 idx = heapq.heappop(indicesheap)
247
247
248 chunk = _trimchunk(revlog, revs, previdx, idx)
248 chunk = _trimchunk(revlog, revs, previdx, idx)
249 if chunk:
249 if chunk:
250 yield chunk
250 yield chunk
251
251
252 previdx = idx
252 previdx = idx
253
253
254 chunk = _trimchunk(revlog, revs, previdx)
254 chunk = _trimchunk(revlog, revs, previdx)
255 if chunk:
255 if chunk:
256 yield chunk
256 yield chunk
257
257
258 @attr.s(slots=True, frozen=True)
258 @attr.s(slots=True, frozen=True)
259 class _deltainfo(object):
259 class _deltainfo(object):
260 distance = attr.ib()
260 distance = attr.ib()
261 deltalen = attr.ib()
261 deltalen = attr.ib()
262 data = attr.ib()
262 data = attr.ib()
263 base = attr.ib()
263 base = attr.ib()
264 chainbase = attr.ib()
264 chainbase = attr.ib()
265 chainlen = attr.ib()
265 chainlen = attr.ib()
266 compresseddeltalen = attr.ib()
266 compresseddeltalen = attr.ib()
267
267
268 class _deltacomputer(object):
268 class _deltacomputer(object):
269 def __init__(self, revlog):
269 def __init__(self, revlog):
270 self.revlog = revlog
270 self.revlog = revlog
271
271
272 def _getcandidaterevs(self, p1, p2, cachedelta):
272 def _getcandidaterevs(self, p1, p2, cachedelta):
273 """
273 """
274 Provides revisions that present an interest to be diffed against,
274 Provides revisions that present an interest to be diffed against,
275 grouped by level of easiness.
275 grouped by level of easiness.
276 """
276 """
277 revlog = self.revlog
277 revlog = self.revlog
278 curr = len(revlog)
278 curr = len(revlog)
279 prev = curr - 1
279 prev = curr - 1
280 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
280 p1r, p2r = revlog.rev(p1), revlog.rev(p2)
281
281
282 # should we try to build a delta?
282 # should we try to build a delta?
283 if prev != nullrev and revlog.storedeltachains:
283 if prev != nullrev and revlog.storedeltachains:
284 tested = set()
284 tested = set()
285 # This condition is true most of the time when processing
285 # This condition is true most of the time when processing
286 # changegroup data into a generaldelta repo. The only time it
286 # changegroup data into a generaldelta repo. The only time it
287 # isn't true is if this is the first revision in a delta chain
287 # isn't true is if this is the first revision in a delta chain
288 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
288 # or if ``format.generaldelta=true`` disabled ``lazydeltabase``.
289 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
289 if cachedelta and revlog._generaldelta and revlog._lazydeltabase:
290 # Assume what we received from the server is a good choice
290 # Assume what we received from the server is a good choice
291 # build delta will reuse the cache
291 # build delta will reuse the cache
292 yield (cachedelta[0],)
292 yield (cachedelta[0],)
293 tested.add(cachedelta[0])
293 tested.add(cachedelta[0])
294
294
295 if revlog._generaldelta:
295 if revlog._generaldelta:
296 # exclude already lazy tested base if any
296 # exclude already lazy tested base if any
297 parents = [p for p in (p1r, p2r)
297 parents = [p for p in (p1r, p2r)
298 if p != nullrev and p not in tested]
298 if p != nullrev and p not in tested]
299 if parents and not revlog._aggressivemergedeltas:
299 if parents and not revlog._aggressivemergedeltas:
300 # Pick whichever parent is closer to us (to minimize the
300 # Pick whichever parent is closer to us (to minimize the
301 # chance of having to build a fulltext).
301 # chance of having to build a fulltext).
302 parents = [max(parents)]
302 parents = [max(parents)]
303 tested.update(parents)
303 tested.update(parents)
304 yield parents
304 yield parents
305
305
306 if prev not in tested:
306 if prev not in tested:
307 # other approach failed try against prev to hopefully save us a
307 # other approach failed try against prev to hopefully save us a
308 # fulltext.
308 # fulltext.
309 yield (prev,)
309 yield (prev,)
310
310
311 def buildtext(self, revinfo, fh):
311 def buildtext(self, revinfo, fh):
312 """Builds a fulltext version of a revision
312 """Builds a fulltext version of a revision
313
313
314 revinfo: _revisioninfo instance that contains all needed info
314 revinfo: _revisioninfo instance that contains all needed info
315 fh: file handle to either the .i or the .d revlog file,
315 fh: file handle to either the .i or the .d revlog file,
316 depending on whether it is inlined or not
316 depending on whether it is inlined or not
317 """
317 """
318 btext = revinfo.btext
318 btext = revinfo.btext
319 if btext[0] is not None:
319 if btext[0] is not None:
320 return btext[0]
320 return btext[0]
321
321
322 revlog = self.revlog
322 revlog = self.revlog
323 cachedelta = revinfo.cachedelta
323 cachedelta = revinfo.cachedelta
324 flags = revinfo.flags
324 flags = revinfo.flags
325 node = revinfo.node
325 node = revinfo.node
326
326
327 baserev = cachedelta[0]
327 baserev = cachedelta[0]
328 delta = cachedelta[1]
328 delta = cachedelta[1]
329 # special case deltas which replace entire base; no need to decode
329 # special case deltas which replace entire base; no need to decode
330 # base revision. this neatly avoids censored bases, which throw when
330 # base revision. this neatly avoids censored bases, which throw when
331 # they're decoded.
331 # they're decoded.
332 hlen = struct.calcsize(">lll")
332 hlen = struct.calcsize(">lll")
333 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
333 if delta[:hlen] == mdiff.replacediffheader(revlog.rawsize(baserev),
334 len(delta) - hlen):
334 len(delta) - hlen):
335 btext[0] = delta[hlen:]
335 btext[0] = delta[hlen:]
336 else:
336 else:
337 basetext = revlog.revision(baserev, _df=fh, raw=True)
337 basetext = revlog.revision(baserev, _df=fh, raw=True)
338 btext[0] = mdiff.patch(basetext, delta)
338 btext[0] = mdiff.patch(basetext, delta)
339
339
340 try:
340 try:
341 res = revlog._processflags(btext[0], flags, 'read', raw=True)
341 res = revlog._processflags(btext[0], flags, 'read', raw=True)
342 btext[0], validatehash = res
342 btext[0], validatehash = res
343 if validatehash:
343 if validatehash:
344 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
344 revlog.checkhash(btext[0], node, p1=revinfo.p1, p2=revinfo.p2)
345 if flags & REVIDX_ISCENSORED:
345 if flags & REVIDX_ISCENSORED:
346 raise RevlogError(_('node %s is not censored') % node)
346 raise RevlogError(_('node %s is not censored') % node)
347 except CensoredNodeError:
347 except CensoredNodeError:
348 # must pass the censored index flag to add censored revisions
348 # must pass the censored index flag to add censored revisions
349 if not flags & REVIDX_ISCENSORED:
349 if not flags & REVIDX_ISCENSORED:
350 raise
350 raise
351 return btext[0]
351 return btext[0]
352
352
353 def _builddeltadiff(self, base, revinfo, fh):
353 def _builddeltadiff(self, base, revinfo, fh):
354 revlog = self.revlog
354 revlog = self.revlog
355 t = self.buildtext(revinfo, fh)
355 t = self.buildtext(revinfo, fh)
356 if revlog.iscensored(base):
356 if revlog.iscensored(base):
357 # deltas based on a censored revision must replace the
357 # deltas based on a censored revision must replace the
358 # full content in one patch, so delta works everywhere
358 # full content in one patch, so delta works everywhere
359 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
359 header = mdiff.replacediffheader(revlog.rawsize(base), len(t))
360 delta = header + t
360 delta = header + t
361 else:
361 else:
362 ptext = revlog.revision(base, _df=fh, raw=True)
362 ptext = revlog.revision(base, _df=fh, raw=True)
363 delta = mdiff.textdiff(ptext, t)
363 delta = mdiff.textdiff(ptext, t)
364
364
365 return delta
365 return delta
366
366
367 def _builddeltainfo(self, revinfo, base, fh):
367 def _builddeltainfo(self, revinfo, base, fh):
368 # can we use the cached delta?
368 # can we use the cached delta?
369 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
369 if revinfo.cachedelta and revinfo.cachedelta[0] == base:
370 delta = revinfo.cachedelta[1]
370 delta = revinfo.cachedelta[1]
371 else:
371 else:
372 delta = self._builddeltadiff(base, revinfo, fh)
372 delta = self._builddeltadiff(base, revinfo, fh)
373 revlog = self.revlog
373 revlog = self.revlog
374 header, data = revlog.compress(delta)
374 header, data = revlog.compress(delta)
375 deltalen = len(header) + len(data)
375 deltalen = len(header) + len(data)
376 chainbase = revlog.chainbase(base)
376 chainbase = revlog.chainbase(base)
377 offset = revlog.end(len(revlog) - 1)
377 offset = revlog.end(len(revlog) - 1)
378 dist = deltalen + offset - revlog.start(chainbase)
378 dist = deltalen + offset - revlog.start(chainbase)
379 if revlog._generaldelta:
379 if revlog._generaldelta:
380 deltabase = base
380 deltabase = base
381 else:
381 else:
382 deltabase = chainbase
382 deltabase = chainbase
383 chainlen, compresseddeltalen = revlog._chaininfo(base)
383 chainlen, compresseddeltalen = revlog._chaininfo(base)
384 chainlen += 1
384 chainlen += 1
385 compresseddeltalen += deltalen
385 compresseddeltalen += deltalen
386 return _deltainfo(dist, deltalen, (header, data), deltabase,
386 return _deltainfo(dist, deltalen, (header, data), deltabase,
387 chainbase, chainlen, compresseddeltalen)
387 chainbase, chainlen, compresseddeltalen)
388
388
389 def finddeltainfo(self, revinfo, fh):
389 def finddeltainfo(self, revinfo, fh):
390 """Find an acceptable delta against a candidate revision
390 """Find an acceptable delta against a candidate revision
391
391
392 revinfo: information about the revision (instance of _revisioninfo)
392 revinfo: information about the revision (instance of _revisioninfo)
393 fh: file handle to either the .i or the .d revlog file,
393 fh: file handle to either the .i or the .d revlog file,
394 depending on whether it is inlined or not
394 depending on whether it is inlined or not
395
395
396 Returns the first acceptable candidate revision, as ordered by
396 Returns the first acceptable candidate revision, as ordered by
397 _getcandidaterevs
397 _getcandidaterevs
398 """
398 """
399 cachedelta = revinfo.cachedelta
399 cachedelta = revinfo.cachedelta
400 p1 = revinfo.p1
400 p1 = revinfo.p1
401 p2 = revinfo.p2
401 p2 = revinfo.p2
402 revlog = self.revlog
402 revlog = self.revlog
403
403
404 deltainfo = None
404 deltainfo = None
405 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
405 for candidaterevs in self._getcandidaterevs(p1, p2, cachedelta):
406 nominateddeltas = []
406 nominateddeltas = []
407 for candidaterev in candidaterevs:
407 for candidaterev in candidaterevs:
408 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
408 candidatedelta = self._builddeltainfo(revinfo, candidaterev, fh)
409 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
409 if revlog._isgooddeltainfo(candidatedelta, revinfo.textlen):
410 nominateddeltas.append(candidatedelta)
410 nominateddeltas.append(candidatedelta)
411 if nominateddeltas:
411 if nominateddeltas:
412 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
412 deltainfo = min(nominateddeltas, key=lambda x: x.deltalen)
413 break
413 break
414
414
415 return deltainfo
415 return deltainfo
416
416
417 @attr.s(slots=True, frozen=True)
417 @attr.s(slots=True, frozen=True)
418 class _revisioninfo(object):
418 class _revisioninfo(object):
419 """Information about a revision that allows building its fulltext
419 """Information about a revision that allows building its fulltext
420 node: expected hash of the revision
420 node: expected hash of the revision
421 p1, p2: parent revs of the revision
421 p1, p2: parent revs of the revision
422 btext: built text cache consisting of a one-element list
422 btext: built text cache consisting of a one-element list
423 cachedelta: (baserev, uncompressed_delta) or None
423 cachedelta: (baserev, uncompressed_delta) or None
424 flags: flags associated to the revision storage
424 flags: flags associated to the revision storage
425
425
426 One of btext[0] or cachedelta must be set.
426 One of btext[0] or cachedelta must be set.
427 """
427 """
428 node = attr.ib()
428 node = attr.ib()
429 p1 = attr.ib()
429 p1 = attr.ib()
430 p2 = attr.ib()
430 p2 = attr.ib()
431 btext = attr.ib()
431 btext = attr.ib()
432 textlen = attr.ib()
432 textlen = attr.ib()
433 cachedelta = attr.ib()
433 cachedelta = attr.ib()
434 flags = attr.ib()
434 flags = attr.ib()
435
435
436 # index v0:
436 # index v0:
437 # 4 bytes: offset
437 # 4 bytes: offset
438 # 4 bytes: compressed length
438 # 4 bytes: compressed length
439 # 4 bytes: base rev
439 # 4 bytes: base rev
440 # 4 bytes: link rev
440 # 4 bytes: link rev
441 # 20 bytes: parent 1 nodeid
441 # 20 bytes: parent 1 nodeid
442 # 20 bytes: parent 2 nodeid
442 # 20 bytes: parent 2 nodeid
443 # 20 bytes: nodeid
443 # 20 bytes: nodeid
444 indexformatv0 = struct.Struct(">4l20s20s20s")
444 indexformatv0 = struct.Struct(">4l20s20s20s")
445 indexformatv0_pack = indexformatv0.pack
445 indexformatv0_pack = indexformatv0.pack
446 indexformatv0_unpack = indexformatv0.unpack
446 indexformatv0_unpack = indexformatv0.unpack
447
447
448 class revlogoldio(object):
448 class revlogoldio(object):
449 def __init__(self):
449 def __init__(self):
450 self.size = indexformatv0.size
450 self.size = indexformatv0.size
451
451
452 def parseindex(self, data, inline):
452 def parseindex(self, data, inline):
453 s = self.size
453 s = self.size
454 index = []
454 index = []
455 nodemap = {nullid: nullrev}
455 nodemap = {nullid: nullrev}
456 n = off = 0
456 n = off = 0
457 l = len(data)
457 l = len(data)
458 while off + s <= l:
458 while off + s <= l:
459 cur = data[off:off + s]
459 cur = data[off:off + s]
460 off += s
460 off += s
461 e = indexformatv0_unpack(cur)
461 e = indexformatv0_unpack(cur)
462 # transform to revlogv1 format
462 # transform to revlogv1 format
463 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
463 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
464 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
464 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
465 index.append(e2)
465 index.append(e2)
466 nodemap[e[6]] = n
466 nodemap[e[6]] = n
467 n += 1
467 n += 1
468
468
469 # add the magic null revision at -1
469 # add the magic null revision at -1
470 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 index.append((0, 0, 0, -1, -1, -1, -1, nullid))
471
471
472 return index, nodemap, None
472 return index, nodemap, None
473
473
474 def packentry(self, entry, node, version, rev):
474 def packentry(self, entry, node, version, rev):
475 if gettype(entry[0]):
475 if gettype(entry[0]):
476 raise RevlogError(_('index entry flags need revlog version 1'))
476 raise RevlogError(_('index entry flags need revlog version 1'))
477 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
477 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
478 node(entry[5]), node(entry[6]), entry[7])
478 node(entry[5]), node(entry[6]), entry[7])
479 return indexformatv0_pack(*e2)
479 return indexformatv0_pack(*e2)
480
480
481 # index ng:
481 # index ng:
482 # 6 bytes: offset
482 # 6 bytes: offset
483 # 2 bytes: flags
483 # 2 bytes: flags
484 # 4 bytes: compressed length
484 # 4 bytes: compressed length
485 # 4 bytes: uncompressed length
485 # 4 bytes: uncompressed length
486 # 4 bytes: base rev
486 # 4 bytes: base rev
487 # 4 bytes: link rev
487 # 4 bytes: link rev
488 # 4 bytes: parent 1 rev
488 # 4 bytes: parent 1 rev
489 # 4 bytes: parent 2 rev
489 # 4 bytes: parent 2 rev
490 # 32 bytes: nodeid
490 # 32 bytes: nodeid
491 indexformatng = struct.Struct(">Qiiiiii20s12x")
491 indexformatng = struct.Struct(">Qiiiiii20s12x")
492 indexformatng_pack = indexformatng.pack
492 indexformatng_pack = indexformatng.pack
493 versionformat = struct.Struct(">I")
493 versionformat = struct.Struct(">I")
494 versionformat_pack = versionformat.pack
494 versionformat_pack = versionformat.pack
495 versionformat_unpack = versionformat.unpack
495 versionformat_unpack = versionformat.unpack
496
496
497 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
497 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
498 # signed integer)
498 # signed integer)
499 _maxentrysize = 0x7fffffff
499 _maxentrysize = 0x7fffffff
500
500
501 class revlogio(object):
501 class revlogio(object):
502 def __init__(self):
502 def __init__(self):
503 self.size = indexformatng.size
503 self.size = indexformatng.size
504
504
505 def parseindex(self, data, inline):
505 def parseindex(self, data, inline):
506 # call the C implementation to parse the index data
506 # call the C implementation to parse the index data
507 index, cache = parsers.parse_index2(data, inline)
507 index, cache = parsers.parse_index2(data, inline)
508 return index, getattr(index, 'nodemap', None), cache
508 return index, getattr(index, 'nodemap', None), cache
509
509
510 def packentry(self, entry, node, version, rev):
510 def packentry(self, entry, node, version, rev):
511 p = indexformatng_pack(*entry)
511 p = indexformatng_pack(*entry)
512 if rev == 0:
512 if rev == 0:
513 p = versionformat_pack(version) + p[4:]
513 p = versionformat_pack(version) + p[4:]
514 return p
514 return p
515
515
516 class revlog(object):
516 class revlog(object):
517 """
517 """
518 the underlying revision storage object
518 the underlying revision storage object
519
519
520 A revlog consists of two parts, an index and the revision data.
520 A revlog consists of two parts, an index and the revision data.
521
521
522 The index is a file with a fixed record size containing
522 The index is a file with a fixed record size containing
523 information on each revision, including its nodeid (hash), the
523 information on each revision, including its nodeid (hash), the
524 nodeids of its parents, the position and offset of its data within
524 nodeids of its parents, the position and offset of its data within
525 the data file, and the revision it's based on. Finally, each entry
525 the data file, and the revision it's based on. Finally, each entry
526 contains a linkrev entry that can serve as a pointer to external
526 contains a linkrev entry that can serve as a pointer to external
527 data.
527 data.
528
528
529 The revision data itself is a linear collection of data chunks.
529 The revision data itself is a linear collection of data chunks.
530 Each chunk represents a revision and is usually represented as a
530 Each chunk represents a revision and is usually represented as a
531 delta against the previous chunk. To bound lookup time, runs of
531 delta against the previous chunk. To bound lookup time, runs of
532 deltas are limited to about 2 times the length of the original
532 deltas are limited to about 2 times the length of the original
533 version data. This makes retrieval of a version proportional to
533 version data. This makes retrieval of a version proportional to
534 its size, or O(1) relative to the number of revisions.
534 its size, or O(1) relative to the number of revisions.
535
535
536 Both pieces of the revlog are written to in an append-only
536 Both pieces of the revlog are written to in an append-only
537 fashion, which means we never need to rewrite a file to insert or
537 fashion, which means we never need to rewrite a file to insert or
538 remove data, and can use some simple techniques to avoid the need
538 remove data, and can use some simple techniques to avoid the need
539 for locking while reading.
539 for locking while reading.
540
540
541 If checkambig, indexfile is opened with checkambig=True at
541 If checkambig, indexfile is opened with checkambig=True at
542 writing, to avoid file stat ambiguity.
542 writing, to avoid file stat ambiguity.
543
543
544 If mmaplargeindex is True, and an mmapindexthreshold is set, the
544 If mmaplargeindex is True, and an mmapindexthreshold is set, the
545 index will be mmapped rather than read if it is larger than the
545 index will be mmapped rather than read if it is larger than the
546 configured threshold.
546 configured threshold.
547 """
547 """
548 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
548 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
549 mmaplargeindex=False):
549 mmaplargeindex=False):
550 """
550 """
551 create a revlog object
551 create a revlog object
552
552
553 opener is a function that abstracts the file opening operation
553 opener is a function that abstracts the file opening operation
554 and can be used to implement COW semantics or the like.
554 and can be used to implement COW semantics or the like.
555 """
555 """
556 self.indexfile = indexfile
556 self.indexfile = indexfile
557 self.datafile = datafile or (indexfile[:-2] + ".d")
557 self.datafile = datafile or (indexfile[:-2] + ".d")
558 self.opener = opener
558 self.opener = opener
559 # When True, indexfile is opened with checkambig=True at writing, to
559 # When True, indexfile is opened with checkambig=True at writing, to
560 # avoid file stat ambiguity.
560 # avoid file stat ambiguity.
561 self._checkambig = checkambig
561 self._checkambig = checkambig
562 # 3-tuple of (node, rev, text) for a raw revision.
562 # 3-tuple of (node, rev, text) for a raw revision.
563 self._cache = None
563 self._cache = None
564 # Maps rev to chain base rev.
564 # Maps rev to chain base rev.
565 self._chainbasecache = util.lrucachedict(100)
565 self._chainbasecache = util.lrucachedict(100)
566 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
566 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
567 self._chunkcache = (0, '')
567 self._chunkcache = (0, '')
568 # How much data to read and cache into the raw revlog data cache.
568 # How much data to read and cache into the raw revlog data cache.
569 self._chunkcachesize = 65536
569 self._chunkcachesize = 65536
570 self._maxchainlen = None
570 self._maxchainlen = None
571 self._aggressivemergedeltas = False
571 self._aggressivemergedeltas = False
572 self.index = []
572 self.index = []
573 # Mapping of partial identifiers to full nodes.
573 # Mapping of partial identifiers to full nodes.
574 self._pcache = {}
574 self._pcache = {}
575 # Mapping of revision integer to full node.
575 # Mapping of revision integer to full node.
576 self._nodecache = {nullid: nullrev}
576 self._nodecache = {nullid: nullrev}
577 self._nodepos = None
577 self._nodepos = None
578 self._compengine = 'zlib'
578 self._compengine = 'zlib'
579 self._maxdeltachainspan = -1
579 self._maxdeltachainspan = -1
580 self._withsparseread = False
580 self._withsparseread = False
581 self._srdensitythreshold = 0.25
581 self._srdensitythreshold = 0.25
582 self._srmingapsize = 262144
582 self._srmingapsize = 262144
583
583
584 mmapindexthreshold = None
584 mmapindexthreshold = None
585 v = REVLOG_DEFAULT_VERSION
585 v = REVLOG_DEFAULT_VERSION
586 opts = getattr(opener, 'options', None)
586 opts = getattr(opener, 'options', None)
587 if opts is not None:
587 if opts is not None:
588 if 'revlogv2' in opts:
588 if 'revlogv2' in opts:
589 # version 2 revlogs always use generaldelta.
589 # version 2 revlogs always use generaldelta.
590 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
590 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
591 elif 'revlogv1' in opts:
591 elif 'revlogv1' in opts:
592 if 'generaldelta' in opts:
592 if 'generaldelta' in opts:
593 v |= FLAG_GENERALDELTA
593 v |= FLAG_GENERALDELTA
594 else:
594 else:
595 v = 0
595 v = 0
596 if 'chunkcachesize' in opts:
596 if 'chunkcachesize' in opts:
597 self._chunkcachesize = opts['chunkcachesize']
597 self._chunkcachesize = opts['chunkcachesize']
598 if 'maxchainlen' in opts:
598 if 'maxchainlen' in opts:
599 self._maxchainlen = opts['maxchainlen']
599 self._maxchainlen = opts['maxchainlen']
600 if 'aggressivemergedeltas' in opts:
600 if 'aggressivemergedeltas' in opts:
601 self._aggressivemergedeltas = opts['aggressivemergedeltas']
601 self._aggressivemergedeltas = opts['aggressivemergedeltas']
602 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
602 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
603 if 'compengine' in opts:
603 if 'compengine' in opts:
604 self._compengine = opts['compengine']
604 self._compengine = opts['compengine']
605 if 'maxdeltachainspan' in opts:
605 if 'maxdeltachainspan' in opts:
606 self._maxdeltachainspan = opts['maxdeltachainspan']
606 self._maxdeltachainspan = opts['maxdeltachainspan']
607 if mmaplargeindex and 'mmapindexthreshold' in opts:
607 if mmaplargeindex and 'mmapindexthreshold' in opts:
608 mmapindexthreshold = opts['mmapindexthreshold']
608 mmapindexthreshold = opts['mmapindexthreshold']
609 self._withsparseread = bool(opts.get('with-sparse-read', False))
609 self._withsparseread = bool(opts.get('with-sparse-read', False))
610 if 'sparse-read-density-threshold' in opts:
610 if 'sparse-read-density-threshold' in opts:
611 self._srdensitythreshold = opts['sparse-read-density-threshold']
611 self._srdensitythreshold = opts['sparse-read-density-threshold']
612 if 'sparse-read-min-gap-size' in opts:
612 if 'sparse-read-min-gap-size' in opts:
613 self._srmingapsize = opts['sparse-read-min-gap-size']
613 self._srmingapsize = opts['sparse-read-min-gap-size']
614
614
615 if self._chunkcachesize <= 0:
615 if self._chunkcachesize <= 0:
616 raise RevlogError(_('revlog chunk cache size %r is not greater '
616 raise RevlogError(_('revlog chunk cache size %r is not greater '
617 'than 0') % self._chunkcachesize)
617 'than 0') % self._chunkcachesize)
618 elif self._chunkcachesize & (self._chunkcachesize - 1):
618 elif self._chunkcachesize & (self._chunkcachesize - 1):
619 raise RevlogError(_('revlog chunk cache size %r is not a power '
619 raise RevlogError(_('revlog chunk cache size %r is not a power '
620 'of 2') % self._chunkcachesize)
620 'of 2') % self._chunkcachesize)
621
621
622 indexdata = ''
622 indexdata = ''
623 self._initempty = True
623 self._initempty = True
624 try:
624 try:
625 with self._indexfp() as f:
625 with self._indexfp() as f:
626 if (mmapindexthreshold is not None and
626 if (mmapindexthreshold is not None and
627 self.opener.fstat(f).st_size >= mmapindexthreshold):
627 self.opener.fstat(f).st_size >= mmapindexthreshold):
628 indexdata = util.buffer(util.mmapread(f))
628 indexdata = util.buffer(util.mmapread(f))
629 else:
629 else:
630 indexdata = f.read()
630 indexdata = f.read()
631 if len(indexdata) > 0:
631 if len(indexdata) > 0:
632 v = versionformat_unpack(indexdata[:4])[0]
632 v = versionformat_unpack(indexdata[:4])[0]
633 self._initempty = False
633 self._initempty = False
634 except IOError as inst:
634 except IOError as inst:
635 if inst.errno != errno.ENOENT:
635 if inst.errno != errno.ENOENT:
636 raise
636 raise
637
637
638 self.version = v
638 self.version = v
639 self._inline = v & FLAG_INLINE_DATA
639 self._inline = v & FLAG_INLINE_DATA
640 self._generaldelta = v & FLAG_GENERALDELTA
640 self._generaldelta = v & FLAG_GENERALDELTA
641 flags = v & ~0xFFFF
641 flags = v & ~0xFFFF
642 fmt = v & 0xFFFF
642 fmt = v & 0xFFFF
643 if fmt == REVLOGV0:
643 if fmt == REVLOGV0:
644 if flags:
644 if flags:
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
645 raise RevlogError(_('unknown flags (%#04x) in version %d '
646 'revlog %s') %
646 'revlog %s') %
647 (flags >> 16, fmt, self.indexfile))
647 (flags >> 16, fmt, self.indexfile))
648 elif fmt == REVLOGV1:
648 elif fmt == REVLOGV1:
649 if flags & ~REVLOGV1_FLAGS:
649 if flags & ~REVLOGV1_FLAGS:
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
650 raise RevlogError(_('unknown flags (%#04x) in version %d '
651 'revlog %s') %
651 'revlog %s') %
652 (flags >> 16, fmt, self.indexfile))
652 (flags >> 16, fmt, self.indexfile))
653 elif fmt == REVLOGV2:
653 elif fmt == REVLOGV2:
654 if flags & ~REVLOGV2_FLAGS:
654 if flags & ~REVLOGV2_FLAGS:
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
655 raise RevlogError(_('unknown flags (%#04x) in version %d '
656 'revlog %s') %
656 'revlog %s') %
657 (flags >> 16, fmt, self.indexfile))
657 (flags >> 16, fmt, self.indexfile))
658 else:
658 else:
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
659 raise RevlogError(_('unknown version (%d) in revlog %s') %
660 (fmt, self.indexfile))
660 (fmt, self.indexfile))
661
661
662 self.storedeltachains = True
662 self.storedeltachains = True
663
663
664 self._io = revlogio()
664 self._io = revlogio()
665 if self.version == REVLOGV0:
665 if self.version == REVLOGV0:
666 self._io = revlogoldio()
666 self._io = revlogoldio()
667 try:
667 try:
668 d = self._io.parseindex(indexdata, self._inline)
668 d = self._io.parseindex(indexdata, self._inline)
669 except (ValueError, IndexError):
669 except (ValueError, IndexError):
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
670 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
671 self.index, nodemap, self._chunkcache = d
671 self.index, nodemap, self._chunkcache = d
672 if nodemap is not None:
672 if nodemap is not None:
673 self.nodemap = self._nodecache = nodemap
673 self.nodemap = self._nodecache = nodemap
674 if not self._chunkcache:
674 if not self._chunkcache:
675 self._chunkclear()
675 self._chunkclear()
676 # revnum -> (chain-length, sum-delta-length)
676 # revnum -> (chain-length, sum-delta-length)
677 self._chaininfocache = {}
677 self._chaininfocache = {}
678 # revlog header -> revlog compressor
678 # revlog header -> revlog compressor
679 self._decompressors = {}
679 self._decompressors = {}
680
680
681 @util.propertycache
681 @util.propertycache
682 def _compressor(self):
682 def _compressor(self):
683 return util.compengines[self._compengine].revlogcompressor()
683 return util.compengines[self._compengine].revlogcompressor()
684
684
685 def _indexfp(self, mode='r'):
685 def _indexfp(self, mode='r'):
686 """file object for the revlog's index file"""
686 """file object for the revlog's index file"""
687 args = {r'mode': mode}
687 args = {r'mode': mode}
688 if mode != 'r':
688 if mode != 'r':
689 args[r'checkambig'] = self._checkambig
689 args[r'checkambig'] = self._checkambig
690 if mode == 'w':
690 if mode == 'w':
691 args[r'atomictemp'] = True
691 args[r'atomictemp'] = True
692 return self.opener(self.indexfile, **args)
692 return self.opener(self.indexfile, **args)
693
693
694 def _datafp(self, mode='r'):
694 def _datafp(self, mode='r'):
695 """file object for the revlog's data file"""
695 """file object for the revlog's data file"""
696 return self.opener(self.datafile, mode=mode)
696 return self.opener(self.datafile, mode=mode)
697
697
698 @contextlib.contextmanager
698 @contextlib.contextmanager
699 def _datareadfp(self, existingfp=None):
699 def _datareadfp(self, existingfp=None):
700 """file object suitable to read data"""
700 """file object suitable to read data"""
701 if existingfp is not None:
701 if existingfp is not None:
702 yield existingfp
702 yield existingfp
703 else:
703 else:
704 if self._inline:
704 if self._inline:
705 func = self._indexfp
705 func = self._indexfp
706 else:
706 else:
707 func = self._datafp
707 func = self._datafp
708 with func() as fp:
708 with func() as fp:
709 yield fp
709 yield fp
710
710
711 def tip(self):
711 def tip(self):
712 return self.node(len(self.index) - 2)
712 return self.node(len(self.index) - 2)
713 def __contains__(self, rev):
713 def __contains__(self, rev):
714 return 0 <= rev < len(self)
714 return 0 <= rev < len(self)
715 def __len__(self):
715 def __len__(self):
716 return len(self.index) - 1
716 return len(self.index) - 1
717 def __iter__(self):
717 def __iter__(self):
718 return iter(xrange(len(self)))
718 return iter(xrange(len(self)))
719 def revs(self, start=0, stop=None):
719 def revs(self, start=0, stop=None):
720 """iterate over all rev in this revlog (from start to stop)"""
720 """iterate over all rev in this revlog (from start to stop)"""
721 step = 1
721 step = 1
722 if stop is not None:
722 if stop is not None:
723 if start > stop:
723 if start > stop:
724 step = -1
724 step = -1
725 stop += step
725 stop += step
726 else:
726 else:
727 stop = len(self)
727 stop = len(self)
728 return xrange(start, stop, step)
728 return xrange(start, stop, step)
729
729
730 @util.propertycache
730 @util.propertycache
731 def nodemap(self):
731 def nodemap(self):
732 self.rev(self.node(0))
732 self.rev(self.node(0))
733 return self._nodecache
733 return self._nodecache
734
734
735 def hasnode(self, node):
735 def hasnode(self, node):
736 try:
736 try:
737 self.rev(node)
737 self.rev(node)
738 return True
738 return True
739 except KeyError:
739 except KeyError:
740 return False
740 return False
741
741
742 def clearcaches(self):
742 def clearcaches(self):
743 self._cache = None
743 self._cache = None
744 self._chainbasecache.clear()
744 self._chainbasecache.clear()
745 self._chunkcache = (0, '')
745 self._chunkcache = (0, '')
746 self._pcache = {}
746 self._pcache = {}
747
747
748 try:
748 try:
749 self._nodecache.clearcaches()
749 self._nodecache.clearcaches()
750 except AttributeError:
750 except AttributeError:
751 self._nodecache = {nullid: nullrev}
751 self._nodecache = {nullid: nullrev}
752 self._nodepos = None
752 self._nodepos = None
753
753
754 def rev(self, node):
754 def rev(self, node):
755 try:
755 try:
756 return self._nodecache[node]
756 return self._nodecache[node]
757 except TypeError:
757 except TypeError:
758 raise
758 raise
759 except RevlogError:
759 except RevlogError:
760 # parsers.c radix tree lookup failed
760 # parsers.c radix tree lookup failed
761 if node == wdirid:
761 if node == wdirid:
762 raise error.WdirUnsupported
762 raise error.WdirUnsupported
763 raise LookupError(node, self.indexfile, _('no node'))
763 raise LookupError(node, self.indexfile, _('no node'))
764 except KeyError:
764 except KeyError:
765 # pure python cache lookup failed
765 # pure python cache lookup failed
766 n = self._nodecache
766 n = self._nodecache
767 i = self.index
767 i = self.index
768 p = self._nodepos
768 p = self._nodepos
769 if p is None:
769 if p is None:
770 p = len(i) - 2
770 p = len(i) - 2
771 for r in xrange(p, -1, -1):
771 for r in xrange(p, -1, -1):
772 v = i[r][7]
772 v = i[r][7]
773 n[v] = r
773 n[v] = r
774 if v == node:
774 if v == node:
775 self._nodepos = r - 1
775 self._nodepos = r - 1
776 return r
776 return r
777 if node == wdirid:
777 if node == wdirid:
778 raise error.WdirUnsupported
778 raise error.WdirUnsupported
779 raise LookupError(node, self.indexfile, _('no node'))
779 raise LookupError(node, self.indexfile, _('no node'))
780
780
781 # Accessors for index entries.
781 # Accessors for index entries.
782
782
783 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
783 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
784 # are flags.
784 # are flags.
785 def start(self, rev):
785 def start(self, rev):
786 return int(self.index[rev][0] >> 16)
786 return int(self.index[rev][0] >> 16)
787
787
788 def flags(self, rev):
788 def flags(self, rev):
789 return self.index[rev][0] & 0xFFFF
789 return self.index[rev][0] & 0xFFFF
790
790
791 def length(self, rev):
791 def length(self, rev):
792 return self.index[rev][1]
792 return self.index[rev][1]
793
793
794 def rawsize(self, rev):
794 def rawsize(self, rev):
795 """return the length of the uncompressed text for a given revision"""
795 """return the length of the uncompressed text for a given revision"""
796 l = self.index[rev][2]
796 l = self.index[rev][2]
797 if l >= 0:
797 if l >= 0:
798 return l
798 return l
799
799
800 t = self.revision(rev, raw=True)
800 t = self.revision(rev, raw=True)
801 return len(t)
801 return len(t)
802
802
803 def size(self, rev):
803 def size(self, rev):
804 """length of non-raw text (processed by a "read" flag processor)"""
804 """length of non-raw text (processed by a "read" flag processor)"""
805 # fast path: if no "read" flag processor could change the content,
805 # fast path: if no "read" flag processor could change the content,
806 # size is rawsize. note: ELLIPSIS is known to not change the content.
806 # size is rawsize. note: ELLIPSIS is known to not change the content.
807 flags = self.flags(rev)
807 flags = self.flags(rev)
808 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
808 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
809 return self.rawsize(rev)
809 return self.rawsize(rev)
810
810
811 return len(self.revision(rev, raw=False))
811 return len(self.revision(rev, raw=False))
812
812
813 def chainbase(self, rev):
813 def chainbase(self, rev):
814 base = self._chainbasecache.get(rev)
814 base = self._chainbasecache.get(rev)
815 if base is not None:
815 if base is not None:
816 return base
816 return base
817
817
818 index = self.index
818 index = self.index
819 base = index[rev][3]
819 base = index[rev][3]
820 while base != rev:
820 while base != rev:
821 rev = base
821 rev = base
822 base = index[rev][3]
822 base = index[rev][3]
823
823
824 self._chainbasecache[rev] = base
824 self._chainbasecache[rev] = base
825 return base
825 return base
826
826
827 def linkrev(self, rev):
827 def linkrev(self, rev):
828 return self.index[rev][4]
828 return self.index[rev][4]
829
829
830 def parentrevs(self, rev):
830 def parentrevs(self, rev):
831 try:
831 try:
832 entry = self.index[rev]
832 entry = self.index[rev]
833 except IndexError:
833 except IndexError:
834 if rev == wdirrev:
834 if rev == wdirrev:
835 raise error.WdirUnsupported
835 raise error.WdirUnsupported
836 raise
836 raise
837
837
838 return entry[5], entry[6]
838 return entry[5], entry[6]
839
839
840 def node(self, rev):
840 def node(self, rev):
841 try:
841 try:
842 return self.index[rev][7]
842 return self.index[rev][7]
843 except IndexError:
843 except IndexError:
844 if rev == wdirrev:
844 if rev == wdirrev:
845 raise error.WdirUnsupported
845 raise error.WdirUnsupported
846 raise
846 raise
847
847
848 # Derived from index values.
848 # Derived from index values.
849
849
850 def end(self, rev):
850 def end(self, rev):
851 return self.start(rev) + self.length(rev)
851 return self.start(rev) + self.length(rev)
852
852
853 def parents(self, node):
853 def parents(self, node):
854 i = self.index
854 i = self.index
855 d = i[self.rev(node)]
855 d = i[self.rev(node)]
856 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
856 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
857
857
858 def chainlen(self, rev):
858 def chainlen(self, rev):
859 return self._chaininfo(rev)[0]
859 return self._chaininfo(rev)[0]
860
860
861 def _chaininfo(self, rev):
861 def _chaininfo(self, rev):
862 chaininfocache = self._chaininfocache
862 chaininfocache = self._chaininfocache
863 if rev in chaininfocache:
863 if rev in chaininfocache:
864 return chaininfocache[rev]
864 return chaininfocache[rev]
865 index = self.index
865 index = self.index
866 generaldelta = self._generaldelta
866 generaldelta = self._generaldelta
867 iterrev = rev
867 iterrev = rev
868 e = index[iterrev]
868 e = index[iterrev]
869 clen = 0
869 clen = 0
870 compresseddeltalen = 0
870 compresseddeltalen = 0
871 while iterrev != e[3]:
871 while iterrev != e[3]:
872 clen += 1
872 clen += 1
873 compresseddeltalen += e[1]
873 compresseddeltalen += e[1]
874 if generaldelta:
874 if generaldelta:
875 iterrev = e[3]
875 iterrev = e[3]
876 else:
876 else:
877 iterrev -= 1
877 iterrev -= 1
878 if iterrev in chaininfocache:
878 if iterrev in chaininfocache:
879 t = chaininfocache[iterrev]
879 t = chaininfocache[iterrev]
880 clen += t[0]
880 clen += t[0]
881 compresseddeltalen += t[1]
881 compresseddeltalen += t[1]
882 break
882 break
883 e = index[iterrev]
883 e = index[iterrev]
884 else:
884 else:
885 # Add text length of base since decompressing that also takes
885 # Add text length of base since decompressing that also takes
886 # work. For cache hits the length is already included.
886 # work. For cache hits the length is already included.
887 compresseddeltalen += e[1]
887 compresseddeltalen += e[1]
888 r = (clen, compresseddeltalen)
888 r = (clen, compresseddeltalen)
889 chaininfocache[rev] = r
889 chaininfocache[rev] = r
890 return r
890 return r
891
891
892 def _deltachain(self, rev, stoprev=None):
892 def _deltachain(self, rev, stoprev=None):
893 """Obtain the delta chain for a revision.
893 """Obtain the delta chain for a revision.
894
894
895 ``stoprev`` specifies a revision to stop at. If not specified, we
895 ``stoprev`` specifies a revision to stop at. If not specified, we
896 stop at the base of the chain.
896 stop at the base of the chain.
897
897
898 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
898 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
899 revs in ascending order and ``stopped`` is a bool indicating whether
899 revs in ascending order and ``stopped`` is a bool indicating whether
900 ``stoprev`` was hit.
900 ``stoprev`` was hit.
901 """
901 """
902 # Try C implementation.
902 # Try C implementation.
903 try:
903 try:
904 return self.index.deltachain(rev, stoprev, self._generaldelta)
904 return self.index.deltachain(rev, stoprev, self._generaldelta)
905 except AttributeError:
905 except AttributeError:
906 pass
906 pass
907
907
908 chain = []
908 chain = []
909
909
910 # Alias to prevent attribute lookup in tight loop.
910 # Alias to prevent attribute lookup in tight loop.
911 index = self.index
911 index = self.index
912 generaldelta = self._generaldelta
912 generaldelta = self._generaldelta
913
913
914 iterrev = rev
914 iterrev = rev
915 e = index[iterrev]
915 e = index[iterrev]
916 while iterrev != e[3] and iterrev != stoprev:
916 while iterrev != e[3] and iterrev != stoprev:
917 chain.append(iterrev)
917 chain.append(iterrev)
918 if generaldelta:
918 if generaldelta:
919 iterrev = e[3]
919 iterrev = e[3]
920 else:
920 else:
921 iterrev -= 1
921 iterrev -= 1
922 e = index[iterrev]
922 e = index[iterrev]
923
923
924 if iterrev == stoprev:
924 if iterrev == stoprev:
925 stopped = True
925 stopped = True
926 else:
926 else:
927 chain.append(iterrev)
927 chain.append(iterrev)
928 stopped = False
928 stopped = False
929
929
930 chain.reverse()
930 chain.reverse()
931 return chain, stopped
931 return chain, stopped
932
932
933 def ancestors(self, revs, stoprev=0, inclusive=False):
933 def ancestors(self, revs, stoprev=0, inclusive=False):
934 """Generate the ancestors of 'revs' in reverse topological order.
934 """Generate the ancestors of 'revs' in reverse topological order.
935 Does not generate revs lower than stoprev.
935 Does not generate revs lower than stoprev.
936
936
937 See the documentation for ancestor.lazyancestors for more details."""
937 See the documentation for ancestor.lazyancestors for more details."""
938
938
939 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
939 return ancestor.lazyancestors(self.parentrevs, revs, stoprev=stoprev,
940 inclusive=inclusive)
940 inclusive=inclusive)
941
941
942 def descendants(self, revs):
942 def descendants(self, revs):
943 """Generate the descendants of 'revs' in revision order.
943 """Generate the descendants of 'revs' in revision order.
944
944
945 Yield a sequence of revision numbers starting with a child of
945 Yield a sequence of revision numbers starting with a child of
946 some rev in revs, i.e., each revision is *not* considered a
946 some rev in revs, i.e., each revision is *not* considered a
947 descendant of itself. Results are ordered by revision number (a
947 descendant of itself. Results are ordered by revision number (a
948 topological sort)."""
948 topological sort)."""
949 first = min(revs)
949 first = min(revs)
950 if first == nullrev:
950 if first == nullrev:
951 for i in self:
951 for i in self:
952 yield i
952 yield i
953 return
953 return
954
954
955 seen = set(revs)
955 seen = set(revs)
956 for i in self.revs(start=first + 1):
956 for i in self.revs(start=first + 1):
957 for x in self.parentrevs(i):
957 for x in self.parentrevs(i):
958 if x != nullrev and x in seen:
958 if x != nullrev and x in seen:
959 seen.add(i)
959 seen.add(i)
960 yield i
960 yield i
961 break
961 break
962
962
963 def findcommonmissing(self, common=None, heads=None):
963 def findcommonmissing(self, common=None, heads=None):
964 """Return a tuple of the ancestors of common and the ancestors of heads
964 """Return a tuple of the ancestors of common and the ancestors of heads
965 that are not ancestors of common. In revset terminology, we return the
965 that are not ancestors of common. In revset terminology, we return the
966 tuple:
966 tuple:
967
967
968 ::common, (::heads) - (::common)
968 ::common, (::heads) - (::common)
969
969
970 The list is sorted by revision number, meaning it is
970 The list is sorted by revision number, meaning it is
971 topologically sorted.
971 topologically sorted.
972
972
973 'heads' and 'common' are both lists of node IDs. If heads is
973 'heads' and 'common' are both lists of node IDs. If heads is
974 not supplied, uses all of the revlog's heads. If common is not
974 not supplied, uses all of the revlog's heads. If common is not
975 supplied, uses nullid."""
975 supplied, uses nullid."""
976 if common is None:
976 if common is None:
977 common = [nullid]
977 common = [nullid]
978 if heads is None:
978 if heads is None:
979 heads = self.heads()
979 heads = self.heads()
980
980
981 common = [self.rev(n) for n in common]
981 common = [self.rev(n) for n in common]
982 heads = [self.rev(n) for n in heads]
982 heads = [self.rev(n) for n in heads]
983
983
984 # we want the ancestors, but inclusive
984 # we want the ancestors, but inclusive
985 class lazyset(object):
985 class lazyset(object):
986 def __init__(self, lazyvalues):
986 def __init__(self, lazyvalues):
987 self.addedvalues = set()
987 self.addedvalues = set()
988 self.lazyvalues = lazyvalues
988 self.lazyvalues = lazyvalues
989
989
990 def __contains__(self, value):
990 def __contains__(self, value):
991 return value in self.addedvalues or value in self.lazyvalues
991 return value in self.addedvalues or value in self.lazyvalues
992
992
993 def __iter__(self):
993 def __iter__(self):
994 added = self.addedvalues
994 added = self.addedvalues
995 for r in added:
995 for r in added:
996 yield r
996 yield r
997 for r in self.lazyvalues:
997 for r in self.lazyvalues:
998 if not r in added:
998 if not r in added:
999 yield r
999 yield r
1000
1000
1001 def add(self, value):
1001 def add(self, value):
1002 self.addedvalues.add(value)
1002 self.addedvalues.add(value)
1003
1003
1004 def update(self, values):
1004 def update(self, values):
1005 self.addedvalues.update(values)
1005 self.addedvalues.update(values)
1006
1006
1007 has = lazyset(self.ancestors(common))
1007 has = lazyset(self.ancestors(common))
1008 has.add(nullrev)
1008 has.add(nullrev)
1009 has.update(common)
1009 has.update(common)
1010
1010
1011 # take all ancestors from heads that aren't in has
1011 # take all ancestors from heads that aren't in has
1012 missing = set()
1012 missing = set()
1013 visit = collections.deque(r for r in heads if r not in has)
1013 visit = collections.deque(r for r in heads if r not in has)
1014 while visit:
1014 while visit:
1015 r = visit.popleft()
1015 r = visit.popleft()
1016 if r in missing:
1016 if r in missing:
1017 continue
1017 continue
1018 else:
1018 else:
1019 missing.add(r)
1019 missing.add(r)
1020 for p in self.parentrevs(r):
1020 for p in self.parentrevs(r):
1021 if p not in has:
1021 if p not in has:
1022 visit.append(p)
1022 visit.append(p)
1023 missing = list(missing)
1023 missing = list(missing)
1024 missing.sort()
1024 missing.sort()
1025 return has, [self.node(miss) for miss in missing]
1025 return has, [self.node(miss) for miss in missing]
1026
1026
1027 def incrementalmissingrevs(self, common=None):
1027 def incrementalmissingrevs(self, common=None):
1028 """Return an object that can be used to incrementally compute the
1028 """Return an object that can be used to incrementally compute the
1029 revision numbers of the ancestors of arbitrary sets that are not
1029 revision numbers of the ancestors of arbitrary sets that are not
1030 ancestors of common. This is an ancestor.incrementalmissingancestors
1030 ancestors of common. This is an ancestor.incrementalmissingancestors
1031 object.
1031 object.
1032
1032
1033 'common' is a list of revision numbers. If common is not supplied, uses
1033 'common' is a list of revision numbers. If common is not supplied, uses
1034 nullrev.
1034 nullrev.
1035 """
1035 """
1036 if common is None:
1036 if common is None:
1037 common = [nullrev]
1037 common = [nullrev]
1038
1038
1039 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1039 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1040
1040
1041 def findmissingrevs(self, common=None, heads=None):
1041 def findmissingrevs(self, common=None, heads=None):
1042 """Return the revision numbers of the ancestors of heads that
1042 """Return the revision numbers of the ancestors of heads that
1043 are not ancestors of common.
1043 are not ancestors of common.
1044
1044
1045 More specifically, return a list of revision numbers corresponding to
1045 More specifically, return a list of revision numbers corresponding to
1046 nodes N such that every N satisfies the following constraints:
1046 nodes N such that every N satisfies the following constraints:
1047
1047
1048 1. N is an ancestor of some node in 'heads'
1048 1. N is an ancestor of some node in 'heads'
1049 2. N is not an ancestor of any node in 'common'
1049 2. N is not an ancestor of any node in 'common'
1050
1050
1051 The list is sorted by revision number, meaning it is
1051 The list is sorted by revision number, meaning it is
1052 topologically sorted.
1052 topologically sorted.
1053
1053
1054 'heads' and 'common' are both lists of revision numbers. If heads is
1054 'heads' and 'common' are both lists of revision numbers. If heads is
1055 not supplied, uses all of the revlog's heads. If common is not
1055 not supplied, uses all of the revlog's heads. If common is not
1056 supplied, uses nullid."""
1056 supplied, uses nullid."""
1057 if common is None:
1057 if common is None:
1058 common = [nullrev]
1058 common = [nullrev]
1059 if heads is None:
1059 if heads is None:
1060 heads = self.headrevs()
1060 heads = self.headrevs()
1061
1061
1062 inc = self.incrementalmissingrevs(common=common)
1062 inc = self.incrementalmissingrevs(common=common)
1063 return inc.missingancestors(heads)
1063 return inc.missingancestors(heads)
1064
1064
1065 def findmissing(self, common=None, heads=None):
1065 def findmissing(self, common=None, heads=None):
1066 """Return the ancestors of heads that are not ancestors of common.
1066 """Return the ancestors of heads that are not ancestors of common.
1067
1067
1068 More specifically, return a list of nodes N such that every N
1068 More specifically, return a list of nodes N such that every N
1069 satisfies the following constraints:
1069 satisfies the following constraints:
1070
1070
1071 1. N is an ancestor of some node in 'heads'
1071 1. N is an ancestor of some node in 'heads'
1072 2. N is not an ancestor of any node in 'common'
1072 2. N is not an ancestor of any node in 'common'
1073
1073
1074 The list is sorted by revision number, meaning it is
1074 The list is sorted by revision number, meaning it is
1075 topologically sorted.
1075 topologically sorted.
1076
1076
1077 'heads' and 'common' are both lists of node IDs. If heads is
1077 'heads' and 'common' are both lists of node IDs. If heads is
1078 not supplied, uses all of the revlog's heads. If common is not
1078 not supplied, uses all of the revlog's heads. If common is not
1079 supplied, uses nullid."""
1079 supplied, uses nullid."""
1080 if common is None:
1080 if common is None:
1081 common = [nullid]
1081 common = [nullid]
1082 if heads is None:
1082 if heads is None:
1083 heads = self.heads()
1083 heads = self.heads()
1084
1084
1085 common = [self.rev(n) for n in common]
1085 common = [self.rev(n) for n in common]
1086 heads = [self.rev(n) for n in heads]
1086 heads = [self.rev(n) for n in heads]
1087
1087
1088 inc = self.incrementalmissingrevs(common=common)
1088 inc = self.incrementalmissingrevs(common=common)
1089 return [self.node(r) for r in inc.missingancestors(heads)]
1089 return [self.node(r) for r in inc.missingancestors(heads)]
1090
1090
1091 def nodesbetween(self, roots=None, heads=None):
1091 def nodesbetween(self, roots=None, heads=None):
1092 """Return a topological path from 'roots' to 'heads'.
1092 """Return a topological path from 'roots' to 'heads'.
1093
1093
1094 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1094 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1095 topologically sorted list of all nodes N that satisfy both of
1095 topologically sorted list of all nodes N that satisfy both of
1096 these constraints:
1096 these constraints:
1097
1097
1098 1. N is a descendant of some node in 'roots'
1098 1. N is a descendant of some node in 'roots'
1099 2. N is an ancestor of some node in 'heads'
1099 2. N is an ancestor of some node in 'heads'
1100
1100
1101 Every node is considered to be both a descendant and an ancestor
1101 Every node is considered to be both a descendant and an ancestor
1102 of itself, so every reachable node in 'roots' and 'heads' will be
1102 of itself, so every reachable node in 'roots' and 'heads' will be
1103 included in 'nodes'.
1103 included in 'nodes'.
1104
1104
1105 'outroots' is the list of reachable nodes in 'roots', i.e., the
1105 'outroots' is the list of reachable nodes in 'roots', i.e., the
1106 subset of 'roots' that is returned in 'nodes'. Likewise,
1106 subset of 'roots' that is returned in 'nodes'. Likewise,
1107 'outheads' is the subset of 'heads' that is also in 'nodes'.
1107 'outheads' is the subset of 'heads' that is also in 'nodes'.
1108
1108
1109 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1109 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1110 unspecified, uses nullid as the only root. If 'heads' is
1110 unspecified, uses nullid as the only root. If 'heads' is
1111 unspecified, uses list of all of the revlog's heads."""
1111 unspecified, uses list of all of the revlog's heads."""
1112 nonodes = ([], [], [])
1112 nonodes = ([], [], [])
1113 if roots is not None:
1113 if roots is not None:
1114 roots = list(roots)
1114 roots = list(roots)
1115 if not roots:
1115 if not roots:
1116 return nonodes
1116 return nonodes
1117 lowestrev = min([self.rev(n) for n in roots])
1117 lowestrev = min([self.rev(n) for n in roots])
1118 else:
1118 else:
1119 roots = [nullid] # Everybody's a descendant of nullid
1119 roots = [nullid] # Everybody's a descendant of nullid
1120 lowestrev = nullrev
1120 lowestrev = nullrev
1121 if (lowestrev == nullrev) and (heads is None):
1121 if (lowestrev == nullrev) and (heads is None):
1122 # We want _all_ the nodes!
1122 # We want _all_ the nodes!
1123 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1123 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1124 if heads is None:
1124 if heads is None:
1125 # All nodes are ancestors, so the latest ancestor is the last
1125 # All nodes are ancestors, so the latest ancestor is the last
1126 # node.
1126 # node.
1127 highestrev = len(self) - 1
1127 highestrev = len(self) - 1
1128 # Set ancestors to None to signal that every node is an ancestor.
1128 # Set ancestors to None to signal that every node is an ancestor.
1129 ancestors = None
1129 ancestors = None
1130 # Set heads to an empty dictionary for later discovery of heads
1130 # Set heads to an empty dictionary for later discovery of heads
1131 heads = {}
1131 heads = {}
1132 else:
1132 else:
1133 heads = list(heads)
1133 heads = list(heads)
1134 if not heads:
1134 if not heads:
1135 return nonodes
1135 return nonodes
1136 ancestors = set()
1136 ancestors = set()
1137 # Turn heads into a dictionary so we can remove 'fake' heads.
1137 # Turn heads into a dictionary so we can remove 'fake' heads.
1138 # Also, later we will be using it to filter out the heads we can't
1138 # Also, later we will be using it to filter out the heads we can't
1139 # find from roots.
1139 # find from roots.
1140 heads = dict.fromkeys(heads, False)
1140 heads = dict.fromkeys(heads, False)
1141 # Start at the top and keep marking parents until we're done.
1141 # Start at the top and keep marking parents until we're done.
1142 nodestotag = set(heads)
1142 nodestotag = set(heads)
1143 # Remember where the top was so we can use it as a limit later.
1143 # Remember where the top was so we can use it as a limit later.
1144 highestrev = max([self.rev(n) for n in nodestotag])
1144 highestrev = max([self.rev(n) for n in nodestotag])
1145 while nodestotag:
1145 while nodestotag:
1146 # grab a node to tag
1146 # grab a node to tag
1147 n = nodestotag.pop()
1147 n = nodestotag.pop()
1148 # Never tag nullid
1148 # Never tag nullid
1149 if n == nullid:
1149 if n == nullid:
1150 continue
1150 continue
1151 # A node's revision number represents its place in a
1151 # A node's revision number represents its place in a
1152 # topologically sorted list of nodes.
1152 # topologically sorted list of nodes.
1153 r = self.rev(n)
1153 r = self.rev(n)
1154 if r >= lowestrev:
1154 if r >= lowestrev:
1155 if n not in ancestors:
1155 if n not in ancestors:
1156 # If we are possibly a descendant of one of the roots
1156 # If we are possibly a descendant of one of the roots
1157 # and we haven't already been marked as an ancestor
1157 # and we haven't already been marked as an ancestor
1158 ancestors.add(n) # Mark as ancestor
1158 ancestors.add(n) # Mark as ancestor
1159 # Add non-nullid parents to list of nodes to tag.
1159 # Add non-nullid parents to list of nodes to tag.
1160 nodestotag.update([p for p in self.parents(n) if
1160 nodestotag.update([p for p in self.parents(n) if
1161 p != nullid])
1161 p != nullid])
1162 elif n in heads: # We've seen it before, is it a fake head?
1162 elif n in heads: # We've seen it before, is it a fake head?
1163 # So it is, real heads should not be the ancestors of
1163 # So it is, real heads should not be the ancestors of
1164 # any other heads.
1164 # any other heads.
1165 heads.pop(n)
1165 heads.pop(n)
1166 if not ancestors:
1166 if not ancestors:
1167 return nonodes
1167 return nonodes
1168 # Now that we have our set of ancestors, we want to remove any
1168 # Now that we have our set of ancestors, we want to remove any
1169 # roots that are not ancestors.
1169 # roots that are not ancestors.
1170
1170
1171 # If one of the roots was nullid, everything is included anyway.
1171 # If one of the roots was nullid, everything is included anyway.
1172 if lowestrev > nullrev:
1172 if lowestrev > nullrev:
1173 # But, since we weren't, let's recompute the lowest rev to not
1173 # But, since we weren't, let's recompute the lowest rev to not
1174 # include roots that aren't ancestors.
1174 # include roots that aren't ancestors.
1175
1175
1176 # Filter out roots that aren't ancestors of heads
1176 # Filter out roots that aren't ancestors of heads
1177 roots = [root for root in roots if root in ancestors]
1177 roots = [root for root in roots if root in ancestors]
1178 # Recompute the lowest revision
1178 # Recompute the lowest revision
1179 if roots:
1179 if roots:
1180 lowestrev = min([self.rev(root) for root in roots])
1180 lowestrev = min([self.rev(root) for root in roots])
1181 else:
1181 else:
1182 # No more roots? Return empty list
1182 # No more roots? Return empty list
1183 return nonodes
1183 return nonodes
1184 else:
1184 else:
1185 # We are descending from nullid, and don't need to care about
1185 # We are descending from nullid, and don't need to care about
1186 # any other roots.
1186 # any other roots.
1187 lowestrev = nullrev
1187 lowestrev = nullrev
1188 roots = [nullid]
1188 roots = [nullid]
1189 # Transform our roots list into a set.
1189 # Transform our roots list into a set.
1190 descendants = set(roots)
1190 descendants = set(roots)
1191 # Also, keep the original roots so we can filter out roots that aren't
1191 # Also, keep the original roots so we can filter out roots that aren't
1192 # 'real' roots (i.e. are descended from other roots).
1192 # 'real' roots (i.e. are descended from other roots).
1193 roots = descendants.copy()
1193 roots = descendants.copy()
1194 # Our topologically sorted list of output nodes.
1194 # Our topologically sorted list of output nodes.
1195 orderedout = []
1195 orderedout = []
1196 # Don't start at nullid since we don't want nullid in our output list,
1196 # Don't start at nullid since we don't want nullid in our output list,
1197 # and if nullid shows up in descendants, empty parents will look like
1197 # and if nullid shows up in descendants, empty parents will look like
1198 # they're descendants.
1198 # they're descendants.
1199 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1199 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1200 n = self.node(r)
1200 n = self.node(r)
1201 isdescendant = False
1201 isdescendant = False
1202 if lowestrev == nullrev: # Everybody is a descendant of nullid
1202 if lowestrev == nullrev: # Everybody is a descendant of nullid
1203 isdescendant = True
1203 isdescendant = True
1204 elif n in descendants:
1204 elif n in descendants:
1205 # n is already a descendant
1205 # n is already a descendant
1206 isdescendant = True
1206 isdescendant = True
1207 # This check only needs to be done here because all the roots
1207 # This check only needs to be done here because all the roots
1208 # will start being marked is descendants before the loop.
1208 # will start being marked is descendants before the loop.
1209 if n in roots:
1209 if n in roots:
1210 # If n was a root, check if it's a 'real' root.
1210 # If n was a root, check if it's a 'real' root.
1211 p = tuple(self.parents(n))
1211 p = tuple(self.parents(n))
1212 # If any of its parents are descendants, it's not a root.
1212 # If any of its parents are descendants, it's not a root.
1213 if (p[0] in descendants) or (p[1] in descendants):
1213 if (p[0] in descendants) or (p[1] in descendants):
1214 roots.remove(n)
1214 roots.remove(n)
1215 else:
1215 else:
1216 p = tuple(self.parents(n))
1216 p = tuple(self.parents(n))
1217 # A node is a descendant if either of its parents are
1217 # A node is a descendant if either of its parents are
1218 # descendants. (We seeded the dependents list with the roots
1218 # descendants. (We seeded the dependents list with the roots
1219 # up there, remember?)
1219 # up there, remember?)
1220 if (p[0] in descendants) or (p[1] in descendants):
1220 if (p[0] in descendants) or (p[1] in descendants):
1221 descendants.add(n)
1221 descendants.add(n)
1222 isdescendant = True
1222 isdescendant = True
1223 if isdescendant and ((ancestors is None) or (n in ancestors)):
1223 if isdescendant and ((ancestors is None) or (n in ancestors)):
1224 # Only include nodes that are both descendants and ancestors.
1224 # Only include nodes that are both descendants and ancestors.
1225 orderedout.append(n)
1225 orderedout.append(n)
1226 if (ancestors is not None) and (n in heads):
1226 if (ancestors is not None) and (n in heads):
1227 # We're trying to figure out which heads are reachable
1227 # We're trying to figure out which heads are reachable
1228 # from roots.
1228 # from roots.
1229 # Mark this head as having been reached
1229 # Mark this head as having been reached
1230 heads[n] = True
1230 heads[n] = True
1231 elif ancestors is None:
1231 elif ancestors is None:
1232 # Otherwise, we're trying to discover the heads.
1232 # Otherwise, we're trying to discover the heads.
1233 # Assume this is a head because if it isn't, the next step
1233 # Assume this is a head because if it isn't, the next step
1234 # will eventually remove it.
1234 # will eventually remove it.
1235 heads[n] = True
1235 heads[n] = True
1236 # But, obviously its parents aren't.
1236 # But, obviously its parents aren't.
1237 for p in self.parents(n):
1237 for p in self.parents(n):
1238 heads.pop(p, None)
1238 heads.pop(p, None)
1239 heads = [head for head, flag in heads.iteritems() if flag]
1239 heads = [head for head, flag in heads.iteritems() if flag]
1240 roots = list(roots)
1240 roots = list(roots)
1241 assert orderedout
1241 assert orderedout
1242 assert roots
1242 assert roots
1243 assert heads
1243 assert heads
1244 return (orderedout, roots, heads)
1244 return (orderedout, roots, heads)
1245
1245
1246 def headrevs(self):
1246 def headrevs(self):
1247 try:
1247 try:
1248 return self.index.headrevs()
1248 return self.index.headrevs()
1249 except AttributeError:
1249 except AttributeError:
1250 return self._headrevs()
1250 return self._headrevs()
1251
1251
1252 def computephases(self, roots):
1252 def computephases(self, roots):
1253 return self.index.computephasesmapsets(roots)
1253 return self.index.computephasesmapsets(roots)
1254
1254
1255 def _headrevs(self):
1255 def _headrevs(self):
1256 count = len(self)
1256 count = len(self)
1257 if not count:
1257 if not count:
1258 return [nullrev]
1258 return [nullrev]
1259 # we won't iter over filtered rev so nobody is a head at start
1259 # we won't iter over filtered rev so nobody is a head at start
1260 ishead = [0] * (count + 1)
1260 ishead = [0] * (count + 1)
1261 index = self.index
1261 index = self.index
1262 for r in self:
1262 for r in self:
1263 ishead[r] = 1 # I may be an head
1263 ishead[r] = 1 # I may be an head
1264 e = index[r]
1264 e = index[r]
1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1265 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1266 return [r for r, val in enumerate(ishead) if val]
1266 return [r for r, val in enumerate(ishead) if val]
1267
1267
1268 def heads(self, start=None, stop=None):
1268 def heads(self, start=None, stop=None):
1269 """return the list of all nodes that have no children
1269 """return the list of all nodes that have no children
1270
1270
1271 if start is specified, only heads that are descendants of
1271 if start is specified, only heads that are descendants of
1272 start will be returned
1272 start will be returned
1273 if stop is specified, it will consider all the revs from stop
1273 if stop is specified, it will consider all the revs from stop
1274 as if they had no children
1274 as if they had no children
1275 """
1275 """
1276 if start is None and stop is None:
1276 if start is None and stop is None:
1277 if not len(self):
1277 if not len(self):
1278 return [nullid]
1278 return [nullid]
1279 return [self.node(r) for r in self.headrevs()]
1279 return [self.node(r) for r in self.headrevs()]
1280
1280
1281 if start is None:
1281 if start is None:
1282 start = nullid
1282 start = nullid
1283 if stop is None:
1283 if stop is None:
1284 stop = []
1284 stop = []
1285 stoprevs = set([self.rev(n) for n in stop])
1285 stoprevs = set([self.rev(n) for n in stop])
1286 startrev = self.rev(start)
1286 startrev = self.rev(start)
1287 reachable = {startrev}
1287 reachable = {startrev}
1288 heads = {startrev}
1288 heads = {startrev}
1289
1289
1290 parentrevs = self.parentrevs
1290 parentrevs = self.parentrevs
1291 for r in self.revs(start=startrev + 1):
1291 for r in self.revs(start=startrev + 1):
1292 for p in parentrevs(r):
1292 for p in parentrevs(r):
1293 if p in reachable:
1293 if p in reachable:
1294 if r not in stoprevs:
1294 if r not in stoprevs:
1295 reachable.add(r)
1295 reachable.add(r)
1296 heads.add(r)
1296 heads.add(r)
1297 if p in heads and p not in stoprevs:
1297 if p in heads and p not in stoprevs:
1298 heads.remove(p)
1298 heads.remove(p)
1299
1299
1300 return [self.node(r) for r in heads]
1300 return [self.node(r) for r in heads]
1301
1301
1302 def children(self, node):
1302 def children(self, node):
1303 """find the children of a given node"""
1303 """find the children of a given node"""
1304 c = []
1304 c = []
1305 p = self.rev(node)
1305 p = self.rev(node)
1306 for r in self.revs(start=p + 1):
1306 for r in self.revs(start=p + 1):
1307 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1307 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1308 if prevs:
1308 if prevs:
1309 for pr in prevs:
1309 for pr in prevs:
1310 if pr == p:
1310 if pr == p:
1311 c.append(self.node(r))
1311 c.append(self.node(r))
1312 elif p == nullrev:
1312 elif p == nullrev:
1313 c.append(self.node(r))
1313 c.append(self.node(r))
1314 return c
1314 return c
1315
1315
1316 def descendant(self, start, end):
1316 def descendant(self, start, end):
1317 if start == nullrev:
1317 if start == nullrev:
1318 return True
1318 return True
1319 for i in self.descendants([start]):
1319 for i in self.descendants([start]):
1320 if i == end:
1320 if i == end:
1321 return True
1321 return True
1322 elif i > end:
1322 elif i > end:
1323 break
1323 break
1324 return False
1324 return False
1325
1325
1326 def commonancestorsheads(self, a, b):
1326 def commonancestorsheads(self, a, b):
1327 """calculate all the heads of the common ancestors of nodes a and b"""
1327 """calculate all the heads of the common ancestors of nodes a and b"""
1328 a, b = self.rev(a), self.rev(b)
1328 a, b = self.rev(a), self.rev(b)
1329 try:
1329 try:
1330 ancs = self.index.commonancestorsheads(a, b)
1330 ancs = self.index.commonancestorsheads(a, b)
1331 except (AttributeError, OverflowError): # C implementation failed
1331 except (AttributeError, OverflowError): # C implementation failed
1332 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1332 ancs = ancestor.commonancestorsheads(self.parentrevs, a, b)
1333 return pycompat.maplist(self.node, ancs)
1333 return pycompat.maplist(self.node, ancs)
1334
1334
1335 def isancestor(self, a, b):
1335 def isancestor(self, a, b):
1336 """return True if node a is an ancestor of node b
1336 """return True if node a is an ancestor of node b
1337
1337
1338 The implementation of this is trivial but the use of
1338 The implementation of this is trivial but the use of
1339 commonancestorsheads is not."""
1339 commonancestorsheads is not."""
1340 return a in self.commonancestorsheads(a, b)
1340 return a in self.commonancestorsheads(a, b)
1341
1341
1342 def ancestor(self, a, b):
1342 def ancestor(self, a, b):
1343 """calculate the "best" common ancestor of nodes a and b"""
1343 """calculate the "best" common ancestor of nodes a and b"""
1344
1344
1345 a, b = self.rev(a), self.rev(b)
1345 a, b = self.rev(a), self.rev(b)
1346 try:
1346 try:
1347 ancs = self.index.ancestors(a, b)
1347 ancs = self.index.ancestors(a, b)
1348 except (AttributeError, OverflowError):
1348 except (AttributeError, OverflowError):
1349 ancs = ancestor.ancestors(self.parentrevs, a, b)
1349 ancs = ancestor.ancestors(self.parentrevs, a, b)
1350 if ancs:
1350 if ancs:
1351 # choose a consistent winner when there's a tie
1351 # choose a consistent winner when there's a tie
1352 return min(map(self.node, ancs))
1352 return min(map(self.node, ancs))
1353 return nullid
1353 return nullid
1354
1354
1355 def _match(self, id):
1355 def _match(self, id):
1356 if isinstance(id, int):
1356 if isinstance(id, int):
1357 # rev
1357 # rev
1358 return self.node(id)
1358 return self.node(id)
1359 if len(id) == 20:
1359 if len(id) == 20:
1360 # possibly a binary node
1360 # possibly a binary node
1361 # odds of a binary node being all hex in ASCII are 1 in 10**25
1361 # odds of a binary node being all hex in ASCII are 1 in 10**25
1362 try:
1362 try:
1363 node = id
1363 node = id
1364 self.rev(node) # quick search the index
1364 self.rev(node) # quick search the index
1365 return node
1365 return node
1366 except LookupError:
1366 except LookupError:
1367 pass # may be partial hex id
1367 pass # may be partial hex id
1368 try:
1368 try:
1369 # str(rev)
1369 # str(rev)
1370 rev = int(id)
1370 rev = int(id)
1371 if str(rev) != id:
1371 if str(rev) != id:
1372 raise ValueError
1372 raise ValueError
1373 if rev < 0:
1373 if rev < 0:
1374 rev = len(self) + rev
1374 rev = len(self) + rev
1375 if rev < 0 or rev >= len(self):
1375 if rev < 0 or rev >= len(self):
1376 raise ValueError
1376 raise ValueError
1377 return self.node(rev)
1377 return self.node(rev)
1378 except (ValueError, OverflowError):
1378 except (ValueError, OverflowError):
1379 pass
1379 pass
1380 if len(id) == 40:
1380 if len(id) == 40:
1381 try:
1381 try:
1382 # a full hex nodeid?
1382 # a full hex nodeid?
1383 node = bin(id)
1383 node = bin(id)
1384 self.rev(node)
1384 self.rev(node)
1385 return node
1385 return node
1386 except (TypeError, LookupError):
1386 except (TypeError, LookupError):
1387 pass
1387 pass
1388
1388
1389 def _partialmatch(self, id):
1389 def _partialmatch(self, id):
1390 maybewdir = wdirhex.startswith(id)
1390 maybewdir = wdirhex.startswith(id)
1391 try:
1391 try:
1392 partial = self.index.partialmatch(id)
1392 partial = self.index.partialmatch(id)
1393 if partial and self.hasnode(partial):
1393 if partial and self.hasnode(partial):
1394 if maybewdir:
1394 if maybewdir:
1395 # single 'ff...' match in radix tree, ambiguous with wdir
1395 # single 'ff...' match in radix tree, ambiguous with wdir
1396 raise RevlogError
1396 raise RevlogError
1397 return partial
1397 return partial
1398 if maybewdir:
1398 if maybewdir:
1399 # no 'ff...' match in radix tree, wdir identified
1399 # no 'ff...' match in radix tree, wdir identified
1400 raise error.WdirUnsupported
1400 raise error.WdirUnsupported
1401 return None
1401 return None
1402 except RevlogError:
1402 except RevlogError:
1403 # parsers.c radix tree lookup gave multiple matches
1403 # parsers.c radix tree lookup gave multiple matches
1404 # fast path: for unfiltered changelog, radix tree is accurate
1404 # fast path: for unfiltered changelog, radix tree is accurate
1405 if not getattr(self, 'filteredrevs', None):
1405 if not getattr(self, 'filteredrevs', None):
1406 raise LookupError(id, self.indexfile,
1406 raise LookupError(id, self.indexfile,
1407 _('ambiguous identifier'))
1407 _('ambiguous identifier'))
1408 # fall through to slow path that filters hidden revisions
1408 # fall through to slow path that filters hidden revisions
1409 except (AttributeError, ValueError):
1409 except (AttributeError, ValueError):
1410 # we are pure python, or key was too short to search radix tree
1410 # we are pure python, or key was too short to search radix tree
1411 pass
1411 pass
1412
1412
1413 if id in self._pcache:
1413 if id in self._pcache:
1414 return self._pcache[id]
1414 return self._pcache[id]
1415
1415
1416 if len(id) < 40:
1416 if len(id) < 40:
1417 try:
1417 try:
1418 # hex(node)[:...]
1418 # hex(node)[:...]
1419 l = len(id) // 2 # grab an even number of digits
1419 l = len(id) // 2 # grab an even number of digits
1420 prefix = bin(id[:l * 2])
1420 prefix = bin(id[:l * 2])
1421 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1421 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1422 nl = [n for n in nl if hex(n).startswith(id) and
1422 nl = [n for n in nl if hex(n).startswith(id) and
1423 self.hasnode(n)]
1423 self.hasnode(n)]
1424 if len(nl) > 0:
1424 if len(nl) > 0:
1425 if len(nl) == 1 and not maybewdir:
1425 if len(nl) == 1 and not maybewdir:
1426 self._pcache[id] = nl[0]
1426 self._pcache[id] = nl[0]
1427 return nl[0]
1427 return nl[0]
1428 raise LookupError(id, self.indexfile,
1428 raise LookupError(id, self.indexfile,
1429 _('ambiguous identifier'))
1429 _('ambiguous identifier'))
1430 if maybewdir:
1430 if maybewdir:
1431 raise error.WdirUnsupported
1431 raise error.WdirUnsupported
1432 return None
1432 return None
1433 except (TypeError, binascii.Error):
1433 except (TypeError, binascii.Error):
1434 pass
1434 pass
1435
1435
1436 def lookup(self, id):
1436 def lookup(self, id):
1437 """locate a node based on:
1437 """locate a node based on:
1438 - revision number or str(revision number)
1438 - revision number or str(revision number)
1439 - nodeid or subset of hex nodeid
1439 - nodeid or subset of hex nodeid
1440 """
1440 """
1441 n = self._match(id)
1441 n = self._match(id)
1442 if n is not None:
1442 if n is not None:
1443 return n
1443 return n
1444 n = self._partialmatch(id)
1444 n = self._partialmatch(id)
1445 if n:
1445 if n:
1446 return n
1446 return n
1447
1447
1448 raise LookupError(id, self.indexfile, _('no match found'))
1448 raise LookupError(id, self.indexfile, _('no match found'))
1449
1449
1450 def shortest(self, hexnode, minlength=1):
1450 def shortest(self, hexnode, minlength=1):
1451 """Find the shortest unambiguous prefix that matches hexnode."""
1451 """Find the shortest unambiguous prefix that matches hexnode."""
1452 def isvalid(test):
1452 def isvalid(test):
1453 try:
1453 try:
1454 if self._partialmatch(test) is None:
1454 if self._partialmatch(test) is None:
1455 return False
1455 return False
1456
1456
1457 try:
1457 try:
1458 i = int(test)
1458 i = int(test)
1459 # if we are a pure int, then starting with zero will not be
1459 # if we are a pure int, then starting with zero will not be
1460 # confused as a rev; or, obviously, if the int is larger
1460 # confused as a rev; or, obviously, if the int is larger
1461 # than the value of the tip rev
1461 # than the value of the tip rev
1462 if test[0] == '0' or i > len(self):
1462 if test[0] == '0' or i > len(self):
1463 return True
1463 return True
1464 return False
1464 return False
1465 except ValueError:
1465 except ValueError:
1466 return True
1466 return True
1467 except error.RevlogError:
1467 except error.RevlogError:
1468 return False
1468 return False
1469 except error.WdirUnsupported:
1469 except error.WdirUnsupported:
1470 # single 'ff...' match
1470 # single 'ff...' match
1471 return True
1471 return True
1472
1472
1473 shortest = hexnode
1473 shortest = hexnode
1474 startlength = max(6, minlength)
1474 startlength = max(6, minlength)
1475 length = startlength
1475 length = startlength
1476 while True:
1476 while True:
1477 test = hexnode[:length]
1477 test = hexnode[:length]
1478 if isvalid(test):
1478 if isvalid(test):
1479 shortest = test
1479 shortest = test
1480 if length == minlength or length > startlength:
1480 if length == minlength or length > startlength:
1481 return shortest
1481 return shortest
1482 length -= 1
1482 length -= 1
1483 else:
1483 else:
1484 length += 1
1484 length += 1
1485 if len(shortest) <= length:
1485 if len(shortest) <= length:
1486 return shortest
1486 return shortest
1487
1487
1488 def cmp(self, node, text):
1488 def cmp(self, node, text):
1489 """compare text with a given file revision
1489 """compare text with a given file revision
1490
1490
1491 returns True if text is different than what is stored.
1491 returns True if text is different than what is stored.
1492 """
1492 """
1493 p1, p2 = self.parents(node)
1493 p1, p2 = self.parents(node)
1494 return hash(text, p1, p2) != node
1494 return hash(text, p1, p2) != node
1495
1495
1496 def _cachesegment(self, offset, data):
1496 def _cachesegment(self, offset, data):
1497 """Add a segment to the revlog cache.
1497 """Add a segment to the revlog cache.
1498
1498
1499 Accepts an absolute offset and the data that is at that location.
1499 Accepts an absolute offset and the data that is at that location.
1500 """
1500 """
1501 o, d = self._chunkcache
1501 o, d = self._chunkcache
1502 # try to add to existing cache
1502 # try to add to existing cache
1503 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1503 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1504 self._chunkcache = o, d + data
1504 self._chunkcache = o, d + data
1505 else:
1505 else:
1506 self._chunkcache = offset, data
1506 self._chunkcache = offset, data
1507
1507
1508 def _readsegment(self, offset, length, df=None):
1508 def _readsegment(self, offset, length, df=None):
1509 """Load a segment of raw data from the revlog.
1509 """Load a segment of raw data from the revlog.
1510
1510
1511 Accepts an absolute offset, length to read, and an optional existing
1511 Accepts an absolute offset, length to read, and an optional existing
1512 file handle to read from.
1512 file handle to read from.
1513
1513
1514 If an existing file handle is passed, it will be seeked and the
1514 If an existing file handle is passed, it will be seeked and the
1515 original seek position will NOT be restored.
1515 original seek position will NOT be restored.
1516
1516
1517 Returns a str or buffer of raw byte data.
1517 Returns a str or buffer of raw byte data.
1518 """
1518 """
1519 # Cache data both forward and backward around the requested
1519 # Cache data both forward and backward around the requested
1520 # data, in a fixed size window. This helps speed up operations
1520 # data, in a fixed size window. This helps speed up operations
1521 # involving reading the revlog backwards.
1521 # involving reading the revlog backwards.
1522 cachesize = self._chunkcachesize
1522 cachesize = self._chunkcachesize
1523 realoffset = offset & ~(cachesize - 1)
1523 realoffset = offset & ~(cachesize - 1)
1524 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1524 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1525 - realoffset)
1525 - realoffset)
1526 with self._datareadfp(df) as df:
1526 with self._datareadfp(df) as df:
1527 df.seek(realoffset)
1527 df.seek(realoffset)
1528 d = df.read(reallength)
1528 d = df.read(reallength)
1529 self._cachesegment(realoffset, d)
1529 self._cachesegment(realoffset, d)
1530 if offset != realoffset or reallength != length:
1530 if offset != realoffset or reallength != length:
1531 return util.buffer(d, offset - realoffset, length)
1531 return util.buffer(d, offset - realoffset, length)
1532 return d
1532 return d
1533
1533
1534 def _getsegment(self, offset, length, df=None):
1534 def _getsegment(self, offset, length, df=None):
1535 """Obtain a segment of raw data from the revlog.
1535 """Obtain a segment of raw data from the revlog.
1536
1536
1537 Accepts an absolute offset, length of bytes to obtain, and an
1537 Accepts an absolute offset, length of bytes to obtain, and an
1538 optional file handle to the already-opened revlog. If the file
1538 optional file handle to the already-opened revlog. If the file
1539 handle is used, it's original seek position will not be preserved.
1539 handle is used, it's original seek position will not be preserved.
1540
1540
1541 Requests for data may be returned from a cache.
1541 Requests for data may be returned from a cache.
1542
1542
1543 Returns a str or a buffer instance of raw byte data.
1543 Returns a str or a buffer instance of raw byte data.
1544 """
1544 """
1545 o, d = self._chunkcache
1545 o, d = self._chunkcache
1546 l = len(d)
1546 l = len(d)
1547
1547
1548 # is it in the cache?
1548 # is it in the cache?
1549 cachestart = offset - o
1549 cachestart = offset - o
1550 cacheend = cachestart + length
1550 cacheend = cachestart + length
1551 if cachestart >= 0 and cacheend <= l:
1551 if cachestart >= 0 and cacheend <= l:
1552 if cachestart == 0 and cacheend == l:
1552 if cachestart == 0 and cacheend == l:
1553 return d # avoid a copy
1553 return d # avoid a copy
1554 return util.buffer(d, cachestart, cacheend - cachestart)
1554 return util.buffer(d, cachestart, cacheend - cachestart)
1555
1555
1556 return self._readsegment(offset, length, df=df)
1556 return self._readsegment(offset, length, df=df)
1557
1557
1558 def _getsegmentforrevs(self, startrev, endrev, df=None):
1558 def _getsegmentforrevs(self, startrev, endrev, df=None):
1559 """Obtain a segment of raw data corresponding to a range of revisions.
1559 """Obtain a segment of raw data corresponding to a range of revisions.
1560
1560
1561 Accepts the start and end revisions and an optional already-open
1561 Accepts the start and end revisions and an optional already-open
1562 file handle to be used for reading. If the file handle is read, its
1562 file handle to be used for reading. If the file handle is read, its
1563 seek position will not be preserved.
1563 seek position will not be preserved.
1564
1564
1565 Requests for data may be satisfied by a cache.
1565 Requests for data may be satisfied by a cache.
1566
1566
1567 Returns a 2-tuple of (offset, data) for the requested range of
1567 Returns a 2-tuple of (offset, data) for the requested range of
1568 revisions. Offset is the integer offset from the beginning of the
1568 revisions. Offset is the integer offset from the beginning of the
1569 revlog and data is a str or buffer of the raw byte data.
1569 revlog and data is a str or buffer of the raw byte data.
1570
1570
1571 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1571 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1572 to determine where each revision's data begins and ends.
1572 to determine where each revision's data begins and ends.
1573 """
1573 """
1574 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1574 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1575 # (functions are expensive).
1575 # (functions are expensive).
1576 index = self.index
1576 index = self.index
1577 istart = index[startrev]
1577 istart = index[startrev]
1578 start = int(istart[0] >> 16)
1578 start = int(istart[0] >> 16)
1579 if startrev == endrev:
1579 if startrev == endrev:
1580 end = start + istart[1]
1580 end = start + istart[1]
1581 else:
1581 else:
1582 iend = index[endrev]
1582 iend = index[endrev]
1583 end = int(iend[0] >> 16) + iend[1]
1583 end = int(iend[0] >> 16) + iend[1]
1584
1584
1585 if self._inline:
1585 if self._inline:
1586 start += (startrev + 1) * self._io.size
1586 start += (startrev + 1) * self._io.size
1587 end += (endrev + 1) * self._io.size
1587 end += (endrev + 1) * self._io.size
1588 length = end - start
1588 length = end - start
1589
1589
1590 return start, self._getsegment(start, length, df=df)
1590 return start, self._getsegment(start, length, df=df)
1591
1591
1592 def _chunk(self, rev, df=None):
1592 def _chunk(self, rev, df=None):
1593 """Obtain a single decompressed chunk for a revision.
1593 """Obtain a single decompressed chunk for a revision.
1594
1594
1595 Accepts an integer revision and an optional already-open file handle
1595 Accepts an integer revision and an optional already-open file handle
1596 to be used for reading. If used, the seek position of the file will not
1596 to be used for reading. If used, the seek position of the file will not
1597 be preserved.
1597 be preserved.
1598
1598
1599 Returns a str holding uncompressed data for the requested revision.
1599 Returns a str holding uncompressed data for the requested revision.
1600 """
1600 """
1601 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1601 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1602
1602
1603 def _chunks(self, revs, df=None):
1603 def _chunks(self, revs, df=None):
1604 """Obtain decompressed chunks for the specified revisions.
1604 """Obtain decompressed chunks for the specified revisions.
1605
1605
1606 Accepts an iterable of numeric revisions that are assumed to be in
1606 Accepts an iterable of numeric revisions that are assumed to be in
1607 ascending order. Also accepts an optional already-open file handle
1607 ascending order. Also accepts an optional already-open file handle
1608 to be used for reading. If used, the seek position of the file will
1608 to be used for reading. If used, the seek position of the file will
1609 not be preserved.
1609 not be preserved.
1610
1610
1611 This function is similar to calling ``self._chunk()`` multiple times,
1611 This function is similar to calling ``self._chunk()`` multiple times,
1612 but is faster.
1612 but is faster.
1613
1613
1614 Returns a list with decompressed data for each requested revision.
1614 Returns a list with decompressed data for each requested revision.
1615 """
1615 """
1616 if not revs:
1616 if not revs:
1617 return []
1617 return []
1618 start = self.start
1618 start = self.start
1619 length = self.length
1619 length = self.length
1620 inline = self._inline
1620 inline = self._inline
1621 iosize = self._io.size
1621 iosize = self._io.size
1622 buffer = util.buffer
1622 buffer = util.buffer
1623
1623
1624 l = []
1624 l = []
1625 ladd = l.append
1625 ladd = l.append
1626
1626
1627 if not self._withsparseread:
1627 if not self._withsparseread:
1628 slicedchunks = (revs,)
1628 slicedchunks = (revs,)
1629 else:
1629 else:
1630 slicedchunks = _slicechunk(self, revs)
1630 slicedchunks = _slicechunk(self, revs)
1631
1631
1632 for revschunk in slicedchunks:
1632 for revschunk in slicedchunks:
1633 firstrev = revschunk[0]
1633 firstrev = revschunk[0]
1634 # Skip trailing revisions with empty diff
1634 # Skip trailing revisions with empty diff
1635 for lastrev in revschunk[::-1]:
1635 for lastrev in revschunk[::-1]:
1636 if length(lastrev) != 0:
1636 if length(lastrev) != 0:
1637 break
1637 break
1638
1638
1639 try:
1639 try:
1640 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1640 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1641 except OverflowError:
1641 except OverflowError:
1642 # issue4215 - we can't cache a run of chunks greater than
1642 # issue4215 - we can't cache a run of chunks greater than
1643 # 2G on Windows
1643 # 2G on Windows
1644 return [self._chunk(rev, df=df) for rev in revschunk]
1644 return [self._chunk(rev, df=df) for rev in revschunk]
1645
1645
1646 decomp = self.decompress
1646 decomp = self.decompress
1647 for rev in revschunk:
1647 for rev in revschunk:
1648 chunkstart = start(rev)
1648 chunkstart = start(rev)
1649 if inline:
1649 if inline:
1650 chunkstart += (rev + 1) * iosize
1650 chunkstart += (rev + 1) * iosize
1651 chunklength = length(rev)
1651 chunklength = length(rev)
1652 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1652 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1653
1653
1654 return l
1654 return l
1655
1655
1656 def _chunkclear(self):
1656 def _chunkclear(self):
1657 """Clear the raw chunk cache."""
1657 """Clear the raw chunk cache."""
1658 self._chunkcache = (0, '')
1658 self._chunkcache = (0, '')
1659
1659
1660 def deltaparent(self, rev):
1660 def deltaparent(self, rev):
1661 """return deltaparent of the given revision"""
1661 """return deltaparent of the given revision"""
1662 base = self.index[rev][3]
1662 base = self.index[rev][3]
1663 if base == rev:
1663 if base == rev:
1664 return nullrev
1664 return nullrev
1665 elif self._generaldelta:
1665 elif self._generaldelta:
1666 return base
1666 return base
1667 else:
1667 else:
1668 return rev - 1
1668 return rev - 1
1669
1669
1670 def revdiff(self, rev1, rev2):
1670 def revdiff(self, rev1, rev2):
1671 """return or calculate a delta between two revisions
1671 """return or calculate a delta between two revisions
1672
1672
1673 The delta calculated is in binary form and is intended to be written to
1673 The delta calculated is in binary form and is intended to be written to
1674 revlog data directly. So this function needs raw revision data.
1674 revlog data directly. So this function needs raw revision data.
1675 """
1675 """
1676 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1676 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1677 return bytes(self._chunk(rev2))
1677 return bytes(self._chunk(rev2))
1678
1678
1679 return mdiff.textdiff(self.revision(rev1, raw=True),
1679 return mdiff.textdiff(self.revision(rev1, raw=True),
1680 self.revision(rev2, raw=True))
1680 self.revision(rev2, raw=True))
1681
1681
1682 def revision(self, nodeorrev, _df=None, raw=False):
1682 def revision(self, nodeorrev, _df=None, raw=False):
1683 """return an uncompressed revision of a given node or revision
1683 """return an uncompressed revision of a given node or revision
1684 number.
1684 number.
1685
1685
1686 _df - an existing file handle to read from. (internal-only)
1686 _df - an existing file handle to read from. (internal-only)
1687 raw - an optional argument specifying if the revision data is to be
1687 raw - an optional argument specifying if the revision data is to be
1688 treated as raw data when applying flag transforms. 'raw' should be set
1688 treated as raw data when applying flag transforms. 'raw' should be set
1689 to True when generating changegroups or in debug commands.
1689 to True when generating changegroups or in debug commands.
1690 """
1690 """
1691 if isinstance(nodeorrev, int):
1691 if isinstance(nodeorrev, int):
1692 rev = nodeorrev
1692 rev = nodeorrev
1693 node = self.node(rev)
1693 node = self.node(rev)
1694 else:
1694 else:
1695 node = nodeorrev
1695 node = nodeorrev
1696 rev = None
1696 rev = None
1697
1697
1698 cachedrev = None
1698 cachedrev = None
1699 flags = None
1699 flags = None
1700 rawtext = None
1700 rawtext = None
1701 if node == nullid:
1701 if node == nullid:
1702 return ""
1702 return ""
1703 if self._cache:
1703 if self._cache:
1704 if self._cache[0] == node:
1704 if self._cache[0] == node:
1705 # _cache only stores rawtext
1705 # _cache only stores rawtext
1706 if raw:
1706 if raw:
1707 return self._cache[2]
1707 return self._cache[2]
1708 # duplicated, but good for perf
1708 # duplicated, but good for perf
1709 if rev is None:
1709 if rev is None:
1710 rev = self.rev(node)
1710 rev = self.rev(node)
1711 if flags is None:
1711 if flags is None:
1712 flags = self.flags(rev)
1712 flags = self.flags(rev)
1713 # no extra flags set, no flag processor runs, text = rawtext
1713 # no extra flags set, no flag processor runs, text = rawtext
1714 if flags == REVIDX_DEFAULT_FLAGS:
1714 if flags == REVIDX_DEFAULT_FLAGS:
1715 return self._cache[2]
1715 return self._cache[2]
1716 # rawtext is reusable. need to run flag processor
1716 # rawtext is reusable. need to run flag processor
1717 rawtext = self._cache[2]
1717 rawtext = self._cache[2]
1718
1718
1719 cachedrev = self._cache[1]
1719 cachedrev = self._cache[1]
1720
1720
1721 # look up what we need to read
1721 # look up what we need to read
1722 if rawtext is None:
1722 if rawtext is None:
1723 if rev is None:
1723 if rev is None:
1724 rev = self.rev(node)
1724 rev = self.rev(node)
1725
1725
1726 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1726 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1727 if stopped:
1727 if stopped:
1728 rawtext = self._cache[2]
1728 rawtext = self._cache[2]
1729
1729
1730 # drop cache to save memory
1730 # drop cache to save memory
1731 self._cache = None
1731 self._cache = None
1732
1732
1733 bins = self._chunks(chain, df=_df)
1733 bins = self._chunks(chain, df=_df)
1734 if rawtext is None:
1734 if rawtext is None:
1735 rawtext = bytes(bins[0])
1735 rawtext = bytes(bins[0])
1736 bins = bins[1:]
1736 bins = bins[1:]
1737
1737
1738 rawtext = mdiff.patches(rawtext, bins)
1738 rawtext = mdiff.patches(rawtext, bins)
1739 self._cache = (node, rev, rawtext)
1739 self._cache = (node, rev, rawtext)
1740
1740
1741 if flags is None:
1741 if flags is None:
1742 if rev is None:
1742 if rev is None:
1743 rev = self.rev(node)
1743 rev = self.rev(node)
1744 flags = self.flags(rev)
1744 flags = self.flags(rev)
1745
1745
1746 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1746 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1747 if validatehash:
1747 if validatehash:
1748 self.checkhash(text, node, rev=rev)
1748 self.checkhash(text, node, rev=rev)
1749
1749
1750 return text
1750 return text
1751
1751
1752 def hash(self, text, p1, p2):
1752 def hash(self, text, p1, p2):
1753 """Compute a node hash.
1753 """Compute a node hash.
1754
1754
1755 Available as a function so that subclasses can replace the hash
1755 Available as a function so that subclasses can replace the hash
1756 as needed.
1756 as needed.
1757 """
1757 """
1758 return hash(text, p1, p2)
1758 return hash(text, p1, p2)
1759
1759
1760 def _processflags(self, text, flags, operation, raw=False):
1760 def _processflags(self, text, flags, operation, raw=False):
1761 """Inspect revision data flags and applies transforms defined by
1761 """Inspect revision data flags and applies transforms defined by
1762 registered flag processors.
1762 registered flag processors.
1763
1763
1764 ``text`` - the revision data to process
1764 ``text`` - the revision data to process
1765 ``flags`` - the revision flags
1765 ``flags`` - the revision flags
1766 ``operation`` - the operation being performed (read or write)
1766 ``operation`` - the operation being performed (read or write)
1767 ``raw`` - an optional argument describing if the raw transform should be
1767 ``raw`` - an optional argument describing if the raw transform should be
1768 applied.
1768 applied.
1769
1769
1770 This method processes the flags in the order (or reverse order if
1770 This method processes the flags in the order (or reverse order if
1771 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1771 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1772 flag processors registered for present flags. The order of flags defined
1772 flag processors registered for present flags. The order of flags defined
1773 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1773 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1774
1774
1775 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1775 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1776 processed text and ``validatehash`` is a bool indicating whether the
1776 processed text and ``validatehash`` is a bool indicating whether the
1777 returned text should be checked for hash integrity.
1777 returned text should be checked for hash integrity.
1778
1778
1779 Note: If the ``raw`` argument is set, it has precedence over the
1779 Note: If the ``raw`` argument is set, it has precedence over the
1780 operation and will only update the value of ``validatehash``.
1780 operation and will only update the value of ``validatehash``.
1781 """
1781 """
1782 # fast path: no flag processors will run
1782 # fast path: no flag processors will run
1783 if flags == 0:
1783 if flags == 0:
1784 return text, True
1784 return text, True
1785 if not operation in ('read', 'write'):
1785 if not operation in ('read', 'write'):
1786 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1786 raise ProgrammingError(_("invalid '%s' operation ") % (operation))
1787 # Check all flags are known.
1787 # Check all flags are known.
1788 if flags & ~REVIDX_KNOWN_FLAGS:
1788 if flags & ~REVIDX_KNOWN_FLAGS:
1789 raise RevlogError(_("incompatible revision flag '%#x'") %
1789 raise RevlogError(_("incompatible revision flag '%#x'") %
1790 (flags & ~REVIDX_KNOWN_FLAGS))
1790 (flags & ~REVIDX_KNOWN_FLAGS))
1791 validatehash = True
1791 validatehash = True
1792 # Depending on the operation (read or write), the order might be
1792 # Depending on the operation (read or write), the order might be
1793 # reversed due to non-commutative transforms.
1793 # reversed due to non-commutative transforms.
1794 orderedflags = REVIDX_FLAGS_ORDER
1794 orderedflags = REVIDX_FLAGS_ORDER
1795 if operation == 'write':
1795 if operation == 'write':
1796 orderedflags = reversed(orderedflags)
1796 orderedflags = reversed(orderedflags)
1797
1797
1798 for flag in orderedflags:
1798 for flag in orderedflags:
1799 # If a flagprocessor has been registered for a known flag, apply the
1799 # If a flagprocessor has been registered for a known flag, apply the
1800 # related operation transform and update result tuple.
1800 # related operation transform and update result tuple.
1801 if flag & flags:
1801 if flag & flags:
1802 vhash = True
1802 vhash = True
1803
1803
1804 if flag not in _flagprocessors:
1804 if flag not in _flagprocessors:
1805 message = _("missing processor for flag '%#x'") % (flag)
1805 message = _("missing processor for flag '%#x'") % (flag)
1806 raise RevlogError(message)
1806 raise RevlogError(message)
1807
1807
1808 processor = _flagprocessors[flag]
1808 processor = _flagprocessors[flag]
1809 if processor is not None:
1809 if processor is not None:
1810 readtransform, writetransform, rawtransform = processor
1810 readtransform, writetransform, rawtransform = processor
1811
1811
1812 if raw:
1812 if raw:
1813 vhash = rawtransform(self, text)
1813 vhash = rawtransform(self, text)
1814 elif operation == 'read':
1814 elif operation == 'read':
1815 text, vhash = readtransform(self, text)
1815 text, vhash = readtransform(self, text)
1816 else: # write operation
1816 else: # write operation
1817 text, vhash = writetransform(self, text)
1817 text, vhash = writetransform(self, text)
1818 validatehash = validatehash and vhash
1818 validatehash = validatehash and vhash
1819
1819
1820 return text, validatehash
1820 return text, validatehash
1821
1821
1822 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1822 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1823 """Check node hash integrity.
1823 """Check node hash integrity.
1824
1824
1825 Available as a function so that subclasses can extend hash mismatch
1825 Available as a function so that subclasses can extend hash mismatch
1826 behaviors as needed.
1826 behaviors as needed.
1827 """
1827 """
1828 if p1 is None and p2 is None:
1828 if p1 is None and p2 is None:
1829 p1, p2 = self.parents(node)
1829 p1, p2 = self.parents(node)
1830 if node != self.hash(text, p1, p2):
1830 if node != self.hash(text, p1, p2):
1831 revornode = rev
1831 revornode = rev
1832 if revornode is None:
1832 if revornode is None:
1833 revornode = templatefilters.short(hex(node))
1833 revornode = templatefilters.short(hex(node))
1834 raise RevlogError(_("integrity check failed on %s:%s")
1834 raise RevlogError(_("integrity check failed on %s:%s")
1835 % (self.indexfile, pycompat.bytestr(revornode)))
1835 % (self.indexfile, pycompat.bytestr(revornode)))
1836
1836
1837 def checkinlinesize(self, tr, fp=None):
1837 def _enforceinlinesize(self, tr, fp=None):
1838 """Check if the revlog is too big for inline and convert if so.
1838 """Check if the revlog is too big for inline and convert if so.
1839
1839
1840 This should be called after revisions are added to the revlog. If the
1840 This should be called after revisions are added to the revlog. If the
1841 revlog has grown too large to be an inline revlog, it will convert it
1841 revlog has grown too large to be an inline revlog, it will convert it
1842 to use multiple index and data files.
1842 to use multiple index and data files.
1843 """
1843 """
1844 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1844 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1845 return
1845 return
1846
1846
1847 trinfo = tr.find(self.indexfile)
1847 trinfo = tr.find(self.indexfile)
1848 if trinfo is None:
1848 if trinfo is None:
1849 raise RevlogError(_("%s not found in the transaction")
1849 raise RevlogError(_("%s not found in the transaction")
1850 % self.indexfile)
1850 % self.indexfile)
1851
1851
1852 trindex = trinfo[2]
1852 trindex = trinfo[2]
1853 if trindex is not None:
1853 if trindex is not None:
1854 dataoff = self.start(trindex)
1854 dataoff = self.start(trindex)
1855 else:
1855 else:
1856 # revlog was stripped at start of transaction, use all leftover data
1856 # revlog was stripped at start of transaction, use all leftover data
1857 trindex = len(self) - 1
1857 trindex = len(self) - 1
1858 dataoff = self.end(-2)
1858 dataoff = self.end(-2)
1859
1859
1860 tr.add(self.datafile, dataoff)
1860 tr.add(self.datafile, dataoff)
1861
1861
1862 if fp:
1862 if fp:
1863 fp.flush()
1863 fp.flush()
1864 fp.close()
1864 fp.close()
1865
1865
1866 with self._datafp('w') as df:
1866 with self._datafp('w') as df:
1867 for r in self:
1867 for r in self:
1868 df.write(self._getsegmentforrevs(r, r)[1])
1868 df.write(self._getsegmentforrevs(r, r)[1])
1869
1869
1870 with self._indexfp('w') as fp:
1870 with self._indexfp('w') as fp:
1871 self.version &= ~FLAG_INLINE_DATA
1871 self.version &= ~FLAG_INLINE_DATA
1872 self._inline = False
1872 self._inline = False
1873 io = self._io
1873 io = self._io
1874 for i in self:
1874 for i in self:
1875 e = io.packentry(self.index[i], self.node, self.version, i)
1875 e = io.packentry(self.index[i], self.node, self.version, i)
1876 fp.write(e)
1876 fp.write(e)
1877
1877
1878 # the temp file replace the real index when we exit the context
1878 # the temp file replace the real index when we exit the context
1879 # manager
1879 # manager
1880
1880
1881 tr.replace(self.indexfile, trindex * self._io.size)
1881 tr.replace(self.indexfile, trindex * self._io.size)
1882 self._chunkclear()
1882 self._chunkclear()
1883
1883
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1884 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1885 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1886 """add a revision to the log
1886 """add a revision to the log
1887
1887
1888 text - the revision data to add
1888 text - the revision data to add
1889 transaction - the transaction object used for rollback
1889 transaction - the transaction object used for rollback
1890 link - the linkrev data to add
1890 link - the linkrev data to add
1891 p1, p2 - the parent nodeids of the revision
1891 p1, p2 - the parent nodeids of the revision
1892 cachedelta - an optional precomputed delta
1892 cachedelta - an optional precomputed delta
1893 node - nodeid of revision; typically node is not specified, and it is
1893 node - nodeid of revision; typically node is not specified, and it is
1894 computed by default as hash(text, p1, p2), however subclasses might
1894 computed by default as hash(text, p1, p2), however subclasses might
1895 use different hashing method (and override checkhash() in such case)
1895 use different hashing method (and override checkhash() in such case)
1896 flags - the known flags to set on the revision
1896 flags - the known flags to set on the revision
1897 deltacomputer - an optional _deltacomputer instance shared between
1897 deltacomputer - an optional _deltacomputer instance shared between
1898 multiple calls
1898 multiple calls
1899 """
1899 """
1900 if link == nullrev:
1900 if link == nullrev:
1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1901 raise RevlogError(_("attempted to add linkrev -1 to %s")
1902 % self.indexfile)
1902 % self.indexfile)
1903
1903
1904 if flags:
1904 if flags:
1905 node = node or self.hash(text, p1, p2)
1905 node = node or self.hash(text, p1, p2)
1906
1906
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1907 rawtext, validatehash = self._processflags(text, flags, 'write')
1908
1908
1909 # If the flag processor modifies the revision data, ignore any provided
1909 # If the flag processor modifies the revision data, ignore any provided
1910 # cachedelta.
1910 # cachedelta.
1911 if rawtext != text:
1911 if rawtext != text:
1912 cachedelta = None
1912 cachedelta = None
1913
1913
1914 if len(rawtext) > _maxentrysize:
1914 if len(rawtext) > _maxentrysize:
1915 raise RevlogError(
1915 raise RevlogError(
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1916 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1917 % (self.indexfile, len(rawtext)))
1917 % (self.indexfile, len(rawtext)))
1918
1918
1919 node = node or self.hash(rawtext, p1, p2)
1919 node = node or self.hash(rawtext, p1, p2)
1920 if node in self.nodemap:
1920 if node in self.nodemap:
1921 return node
1921 return node
1922
1922
1923 if validatehash:
1923 if validatehash:
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1924 self.checkhash(rawtext, node, p1=p1, p2=p2)
1925
1925
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1926 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1927 flags, cachedelta=cachedelta,
1927 flags, cachedelta=cachedelta,
1928 deltacomputer=deltacomputer)
1928 deltacomputer=deltacomputer)
1929
1929
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1930 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1931 cachedelta=None, deltacomputer=None):
1931 cachedelta=None, deltacomputer=None):
1932 """add a raw revision with known flags, node and parents
1932 """add a raw revision with known flags, node and parents
1933 useful when reusing a revision not stored in this revlog (ex: received
1933 useful when reusing a revision not stored in this revlog (ex: received
1934 over wire, or read from an external bundle).
1934 over wire, or read from an external bundle).
1935 """
1935 """
1936 dfh = None
1936 dfh = None
1937 if not self._inline:
1937 if not self._inline:
1938 dfh = self._datafp("a+")
1938 dfh = self._datafp("a+")
1939 ifh = self._indexfp("a+")
1939 ifh = self._indexfp("a+")
1940 try:
1940 try:
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1941 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1942 flags, cachedelta, ifh, dfh,
1942 flags, cachedelta, ifh, dfh,
1943 deltacomputer=deltacomputer)
1943 deltacomputer=deltacomputer)
1944 finally:
1944 finally:
1945 if dfh:
1945 if dfh:
1946 dfh.close()
1946 dfh.close()
1947 ifh.close()
1947 ifh.close()
1948
1948
1949 def compress(self, data):
1949 def compress(self, data):
1950 """Generate a possibly-compressed representation of data."""
1950 """Generate a possibly-compressed representation of data."""
1951 if not data:
1951 if not data:
1952 return '', data
1952 return '', data
1953
1953
1954 compressed = self._compressor.compress(data)
1954 compressed = self._compressor.compress(data)
1955
1955
1956 if compressed:
1956 if compressed:
1957 # The revlog compressor added the header in the returned data.
1957 # The revlog compressor added the header in the returned data.
1958 return '', compressed
1958 return '', compressed
1959
1959
1960 if data[0:1] == '\0':
1960 if data[0:1] == '\0':
1961 return '', data
1961 return '', data
1962 return 'u', data
1962 return 'u', data
1963
1963
1964 def decompress(self, data):
1964 def decompress(self, data):
1965 """Decompress a revlog chunk.
1965 """Decompress a revlog chunk.
1966
1966
1967 The chunk is expected to begin with a header identifying the
1967 The chunk is expected to begin with a header identifying the
1968 format type so it can be routed to an appropriate decompressor.
1968 format type so it can be routed to an appropriate decompressor.
1969 """
1969 """
1970 if not data:
1970 if not data:
1971 return data
1971 return data
1972
1972
1973 # Revlogs are read much more frequently than they are written and many
1973 # Revlogs are read much more frequently than they are written and many
1974 # chunks only take microseconds to decompress, so performance is
1974 # chunks only take microseconds to decompress, so performance is
1975 # important here.
1975 # important here.
1976 #
1976 #
1977 # We can make a few assumptions about revlogs:
1977 # We can make a few assumptions about revlogs:
1978 #
1978 #
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1979 # 1) the majority of chunks will be compressed (as opposed to inline
1980 # raw data).
1980 # raw data).
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1981 # 2) decompressing *any* data will likely by at least 10x slower than
1982 # returning raw inline data.
1982 # returning raw inline data.
1983 # 3) we want to prioritize common and officially supported compression
1983 # 3) we want to prioritize common and officially supported compression
1984 # engines
1984 # engines
1985 #
1985 #
1986 # It follows that we want to optimize for "decompress compressed data
1986 # It follows that we want to optimize for "decompress compressed data
1987 # when encoded with common and officially supported compression engines"
1987 # when encoded with common and officially supported compression engines"
1988 # case over "raw data" and "data encoded by less common or non-official
1988 # case over "raw data" and "data encoded by less common or non-official
1989 # compression engines." That is why we have the inline lookup first
1989 # compression engines." That is why we have the inline lookup first
1990 # followed by the compengines lookup.
1990 # followed by the compengines lookup.
1991 #
1991 #
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1992 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1993 # compressed chunks. And this matters for changelog and manifest reads.
1993 # compressed chunks. And this matters for changelog and manifest reads.
1994 t = data[0:1]
1994 t = data[0:1]
1995
1995
1996 if t == 'x':
1996 if t == 'x':
1997 try:
1997 try:
1998 return _zlibdecompress(data)
1998 return _zlibdecompress(data)
1999 except zlib.error as e:
1999 except zlib.error as e:
2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
2000 raise RevlogError(_('revlog decompress error: %s') % str(e))
2001 # '\0' is more common than 'u' so it goes first.
2001 # '\0' is more common than 'u' so it goes first.
2002 elif t == '\0':
2002 elif t == '\0':
2003 return data
2003 return data
2004 elif t == 'u':
2004 elif t == 'u':
2005 return util.buffer(data, 1)
2005 return util.buffer(data, 1)
2006
2006
2007 try:
2007 try:
2008 compressor = self._decompressors[t]
2008 compressor = self._decompressors[t]
2009 except KeyError:
2009 except KeyError:
2010 try:
2010 try:
2011 engine = util.compengines.forrevlogheader(t)
2011 engine = util.compengines.forrevlogheader(t)
2012 compressor = engine.revlogcompressor()
2012 compressor = engine.revlogcompressor()
2013 self._decompressors[t] = compressor
2013 self._decompressors[t] = compressor
2014 except KeyError:
2014 except KeyError:
2015 raise RevlogError(_('unknown compression type %r') % t)
2015 raise RevlogError(_('unknown compression type %r') % t)
2016
2016
2017 return compressor.decompress(data)
2017 return compressor.decompress(data)
2018
2018
2019 def _isgooddeltainfo(self, d, textlen):
2019 def _isgooddeltainfo(self, d, textlen):
2020 """Returns True if the given delta is good. Good means that it is within
2020 """Returns True if the given delta is good. Good means that it is within
2021 the disk span, disk size, and chain length bounds that we know to be
2021 the disk span, disk size, and chain length bounds that we know to be
2022 performant."""
2022 performant."""
2023 if d is None:
2023 if d is None:
2024 return False
2024 return False
2025
2025
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2026 # - 'd.distance' is the distance from the base revision -- bounding it
2027 # limits the amount of I/O we need to do.
2027 # limits the amount of I/O we need to do.
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2028 # - 'd.compresseddeltalen' is the sum of the total size of deltas we
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2029 # need to apply -- bounding it limits the amount of CPU we consume.
2030
2030
2031 defaultmax = textlen * 4
2031 defaultmax = textlen * 4
2032 maxdist = self._maxdeltachainspan
2032 maxdist = self._maxdeltachainspan
2033 if not maxdist:
2033 if not maxdist:
2034 maxdist = d.distance # ensure the conditional pass
2034 maxdist = d.distance # ensure the conditional pass
2035 maxdist = max(maxdist, defaultmax)
2035 maxdist = max(maxdist, defaultmax)
2036 if (d.distance > maxdist or d.deltalen > textlen or
2036 if (d.distance > maxdist or d.deltalen > textlen or
2037 d.compresseddeltalen > textlen * 2 or
2037 d.compresseddeltalen > textlen * 2 or
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2038 (self._maxchainlen and d.chainlen > self._maxchainlen)):
2039 return False
2039 return False
2040
2040
2041 return True
2041 return True
2042
2042
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2043 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2044 cachedelta, ifh, dfh, alwayscache=False,
2044 cachedelta, ifh, dfh, alwayscache=False,
2045 deltacomputer=None):
2045 deltacomputer=None):
2046 """internal function to add revisions to the log
2046 """internal function to add revisions to the log
2047
2047
2048 see addrevision for argument descriptions.
2048 see addrevision for argument descriptions.
2049
2049
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2050 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2051
2051
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2052 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2053 be used.
2053 be used.
2054
2054
2055 invariants:
2055 invariants:
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2056 - rawtext is optional (can be None); if not set, cachedelta must be set.
2057 if both are set, they must correspond to each other.
2057 if both are set, they must correspond to each other.
2058 """
2058 """
2059 if node == nullid:
2059 if node == nullid:
2060 raise RevlogError(_("%s: attempt to add null revision") %
2060 raise RevlogError(_("%s: attempt to add null revision") %
2061 (self.indexfile))
2061 (self.indexfile))
2062 if node == wdirid:
2062 if node == wdirid:
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2063 raise RevlogError(_("%s: attempt to add wdir revision") %
2064 (self.indexfile))
2064 (self.indexfile))
2065
2065
2066 if self._inline:
2066 if self._inline:
2067 fh = ifh
2067 fh = ifh
2068 else:
2068 else:
2069 fh = dfh
2069 fh = dfh
2070
2070
2071 btext = [rawtext]
2071 btext = [rawtext]
2072
2072
2073 curr = len(self)
2073 curr = len(self)
2074 prev = curr - 1
2074 prev = curr - 1
2075 offset = self.end(prev)
2075 offset = self.end(prev)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2076 p1r, p2r = self.rev(p1), self.rev(p2)
2077
2077
2078 # full versions are inserted when the needed deltas
2078 # full versions are inserted when the needed deltas
2079 # become comparable to the uncompressed text
2079 # become comparable to the uncompressed text
2080 if rawtext is None:
2080 if rawtext is None:
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2081 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
2082 cachedelta[1])
2082 cachedelta[1])
2083 else:
2083 else:
2084 textlen = len(rawtext)
2084 textlen = len(rawtext)
2085
2085
2086 if deltacomputer is None:
2086 if deltacomputer is None:
2087 deltacomputer = _deltacomputer(self)
2087 deltacomputer = _deltacomputer(self)
2088
2088
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2089 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2090 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2091
2091
2092 if deltainfo is not None:
2092 if deltainfo is not None:
2093 base = deltainfo.base
2093 base = deltainfo.base
2094 chainbase = deltainfo.chainbase
2094 chainbase = deltainfo.chainbase
2095 data = deltainfo.data
2095 data = deltainfo.data
2096 l = deltainfo.deltalen
2096 l = deltainfo.deltalen
2097 else:
2097 else:
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2098 rawtext = deltacomputer.buildtext(revinfo, fh)
2099 data = self.compress(rawtext)
2099 data = self.compress(rawtext)
2100 l = len(data[1]) + len(data[0])
2100 l = len(data[1]) + len(data[0])
2101 base = chainbase = curr
2101 base = chainbase = curr
2102
2102
2103 e = (offset_type(offset, flags), l, textlen,
2103 e = (offset_type(offset, flags), l, textlen,
2104 base, link, p1r, p2r, node)
2104 base, link, p1r, p2r, node)
2105 self.index.insert(-1, e)
2105 self.index.insert(-1, e)
2106 self.nodemap[node] = curr
2106 self.nodemap[node] = curr
2107
2107
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2108 entry = self._io.packentry(e, self.node, self.version, curr)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2109 self._writeentry(transaction, ifh, dfh, entry, data, link, offset)
2110
2110
2111 if alwayscache and rawtext is None:
2111 if alwayscache and rawtext is None:
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2112 rawtext = deltacomputer._buildtext(revinfo, fh)
2113
2113
2114 if type(rawtext) == bytes: # only accept immutable objects
2114 if type(rawtext) == bytes: # only accept immutable objects
2115 self._cache = (node, curr, rawtext)
2115 self._cache = (node, curr, rawtext)
2116 self._chainbasecache[curr] = chainbase
2116 self._chainbasecache[curr] = chainbase
2117 return node
2117 return node
2118
2118
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2119 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2120 # Files opened in a+ mode have inconsistent behavior on various
2120 # Files opened in a+ mode have inconsistent behavior on various
2121 # platforms. Windows requires that a file positioning call be made
2121 # platforms. Windows requires that a file positioning call be made
2122 # when the file handle transitions between reads and writes. See
2122 # when the file handle transitions between reads and writes. See
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2124 # platforms, Python or the platform itself can be buggy. Some versions
2124 # platforms, Python or the platform itself can be buggy. Some versions
2125 # of Solaris have been observed to not append at the end of the file
2125 # of Solaris have been observed to not append at the end of the file
2126 # if the file was seeked to before the end. See issue4943 for more.
2126 # if the file was seeked to before the end. See issue4943 for more.
2127 #
2127 #
2128 # We work around this issue by inserting a seek() before writing.
2128 # We work around this issue by inserting a seek() before writing.
2129 # Note: This is likely not necessary on Python 3.
2129 # Note: This is likely not necessary on Python 3.
2130 ifh.seek(0, os.SEEK_END)
2130 ifh.seek(0, os.SEEK_END)
2131 if dfh:
2131 if dfh:
2132 dfh.seek(0, os.SEEK_END)
2132 dfh.seek(0, os.SEEK_END)
2133
2133
2134 curr = len(self) - 1
2134 curr = len(self) - 1
2135 if not self._inline:
2135 if not self._inline:
2136 transaction.add(self.datafile, offset)
2136 transaction.add(self.datafile, offset)
2137 transaction.add(self.indexfile, curr * len(entry))
2137 transaction.add(self.indexfile, curr * len(entry))
2138 if data[0]:
2138 if data[0]:
2139 dfh.write(data[0])
2139 dfh.write(data[0])
2140 dfh.write(data[1])
2140 dfh.write(data[1])
2141 ifh.write(entry)
2141 ifh.write(entry)
2142 else:
2142 else:
2143 offset += curr * self._io.size
2143 offset += curr * self._io.size
2144 transaction.add(self.indexfile, offset, curr)
2144 transaction.add(self.indexfile, offset, curr)
2145 ifh.write(entry)
2145 ifh.write(entry)
2146 ifh.write(data[0])
2146 ifh.write(data[0])
2147 ifh.write(data[1])
2147 ifh.write(data[1])
2148 self.checkinlinesize(transaction, ifh)
2148 self._enforceinlinesize(transaction, ifh)
2149
2149
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2150 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2151 """
2151 """
2152 add a delta group
2152 add a delta group
2153
2153
2154 given a set of deltas, add them to the revision log. the
2154 given a set of deltas, add them to the revision log. the
2155 first delta is against its parent, which should be in our
2155 first delta is against its parent, which should be in our
2156 log, the rest are against the previous delta.
2156 log, the rest are against the previous delta.
2157
2157
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2158 If ``addrevisioncb`` is defined, it will be called with arguments of
2159 this revlog and the node that was added.
2159 this revlog and the node that was added.
2160 """
2160 """
2161
2161
2162 nodes = []
2162 nodes = []
2163
2163
2164 r = len(self)
2164 r = len(self)
2165 end = 0
2165 end = 0
2166 if r:
2166 if r:
2167 end = self.end(r - 1)
2167 end = self.end(r - 1)
2168 ifh = self._indexfp("a+")
2168 ifh = self._indexfp("a+")
2169 isize = r * self._io.size
2169 isize = r * self._io.size
2170 if self._inline:
2170 if self._inline:
2171 transaction.add(self.indexfile, end + isize, r)
2171 transaction.add(self.indexfile, end + isize, r)
2172 dfh = None
2172 dfh = None
2173 else:
2173 else:
2174 transaction.add(self.indexfile, isize, r)
2174 transaction.add(self.indexfile, isize, r)
2175 transaction.add(self.datafile, end)
2175 transaction.add(self.datafile, end)
2176 dfh = self._datafp("a+")
2176 dfh = self._datafp("a+")
2177 def flush():
2177 def flush():
2178 if dfh:
2178 if dfh:
2179 dfh.flush()
2179 dfh.flush()
2180 ifh.flush()
2180 ifh.flush()
2181 try:
2181 try:
2182 deltacomputer = _deltacomputer(self)
2182 deltacomputer = _deltacomputer(self)
2183 # loop through our set of deltas
2183 # loop through our set of deltas
2184 for data in deltas:
2184 for data in deltas:
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2185 node, p1, p2, linknode, deltabase, delta, flags = data
2186 link = linkmapper(linknode)
2186 link = linkmapper(linknode)
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2187 flags = flags or REVIDX_DEFAULT_FLAGS
2188
2188
2189 nodes.append(node)
2189 nodes.append(node)
2190
2190
2191 if node in self.nodemap:
2191 if node in self.nodemap:
2192 # this can happen if two branches make the same change
2192 # this can happen if two branches make the same change
2193 continue
2193 continue
2194
2194
2195 for p in (p1, p2):
2195 for p in (p1, p2):
2196 if p not in self.nodemap:
2196 if p not in self.nodemap:
2197 raise LookupError(p, self.indexfile,
2197 raise LookupError(p, self.indexfile,
2198 _('unknown parent'))
2198 _('unknown parent'))
2199
2199
2200 if deltabase not in self.nodemap:
2200 if deltabase not in self.nodemap:
2201 raise LookupError(deltabase, self.indexfile,
2201 raise LookupError(deltabase, self.indexfile,
2202 _('unknown delta base'))
2202 _('unknown delta base'))
2203
2203
2204 baserev = self.rev(deltabase)
2204 baserev = self.rev(deltabase)
2205
2205
2206 if baserev != nullrev and self.iscensored(baserev):
2206 if baserev != nullrev and self.iscensored(baserev):
2207 # if base is censored, delta must be full replacement in a
2207 # if base is censored, delta must be full replacement in a
2208 # single patch operation
2208 # single patch operation
2209 hlen = struct.calcsize(">lll")
2209 hlen = struct.calcsize(">lll")
2210 oldlen = self.rawsize(baserev)
2210 oldlen = self.rawsize(baserev)
2211 newlen = len(delta) - hlen
2211 newlen = len(delta) - hlen
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2212 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2213 raise error.CensoredBaseError(self.indexfile,
2213 raise error.CensoredBaseError(self.indexfile,
2214 self.node(baserev))
2214 self.node(baserev))
2215
2215
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2216 if not flags and self._peek_iscensored(baserev, delta, flush):
2217 flags |= REVIDX_ISCENSORED
2217 flags |= REVIDX_ISCENSORED
2218
2218
2219 # We assume consumers of addrevisioncb will want to retrieve
2219 # We assume consumers of addrevisioncb will want to retrieve
2220 # the added revision, which will require a call to
2220 # the added revision, which will require a call to
2221 # revision(). revision() will fast path if there is a cache
2221 # revision(). revision() will fast path if there is a cache
2222 # hit. So, we tell _addrevision() to always cache in this case.
2222 # hit. So, we tell _addrevision() to always cache in this case.
2223 # We're only using addgroup() in the context of changegroup
2223 # We're only using addgroup() in the context of changegroup
2224 # generation so the revision data can always be handled as raw
2224 # generation so the revision data can always be handled as raw
2225 # by the flagprocessor.
2225 # by the flagprocessor.
2226 self._addrevision(node, None, transaction, link,
2226 self._addrevision(node, None, transaction, link,
2227 p1, p2, flags, (baserev, delta),
2227 p1, p2, flags, (baserev, delta),
2228 ifh, dfh,
2228 ifh, dfh,
2229 alwayscache=bool(addrevisioncb),
2229 alwayscache=bool(addrevisioncb),
2230 deltacomputer=deltacomputer)
2230 deltacomputer=deltacomputer)
2231
2231
2232 if addrevisioncb:
2232 if addrevisioncb:
2233 addrevisioncb(self, node)
2233 addrevisioncb(self, node)
2234
2234
2235 if not dfh and not self._inline:
2235 if not dfh and not self._inline:
2236 # addrevision switched from inline to conventional
2236 # addrevision switched from inline to conventional
2237 # reopen the index
2237 # reopen the index
2238 ifh.close()
2238 ifh.close()
2239 dfh = self._datafp("a+")
2239 dfh = self._datafp("a+")
2240 ifh = self._indexfp("a+")
2240 ifh = self._indexfp("a+")
2241 finally:
2241 finally:
2242 if dfh:
2242 if dfh:
2243 dfh.close()
2243 dfh.close()
2244 ifh.close()
2244 ifh.close()
2245
2245
2246 return nodes
2246 return nodes
2247
2247
2248 def iscensored(self, rev):
2248 def iscensored(self, rev):
2249 """Check if a file revision is censored."""
2249 """Check if a file revision is censored."""
2250 return False
2250 return False
2251
2251
2252 def _peek_iscensored(self, baserev, delta, flush):
2252 def _peek_iscensored(self, baserev, delta, flush):
2253 """Quickly check if a delta produces a censored revision."""
2253 """Quickly check if a delta produces a censored revision."""
2254 return False
2254 return False
2255
2255
2256 def getstrippoint(self, minlink):
2256 def getstrippoint(self, minlink):
2257 """find the minimum rev that must be stripped to strip the linkrev
2257 """find the minimum rev that must be stripped to strip the linkrev
2258
2258
2259 Returns a tuple containing the minimum rev and a set of all revs that
2259 Returns a tuple containing the minimum rev and a set of all revs that
2260 have linkrevs that will be broken by this strip.
2260 have linkrevs that will be broken by this strip.
2261 """
2261 """
2262 brokenrevs = set()
2262 brokenrevs = set()
2263 strippoint = len(self)
2263 strippoint = len(self)
2264
2264
2265 heads = {}
2265 heads = {}
2266 futurelargelinkrevs = set()
2266 futurelargelinkrevs = set()
2267 for head in self.headrevs():
2267 for head in self.headrevs():
2268 headlinkrev = self.linkrev(head)
2268 headlinkrev = self.linkrev(head)
2269 heads[head] = headlinkrev
2269 heads[head] = headlinkrev
2270 if headlinkrev >= minlink:
2270 if headlinkrev >= minlink:
2271 futurelargelinkrevs.add(headlinkrev)
2271 futurelargelinkrevs.add(headlinkrev)
2272
2272
2273 # This algorithm involves walking down the rev graph, starting at the
2273 # This algorithm involves walking down the rev graph, starting at the
2274 # heads. Since the revs are topologically sorted according to linkrev,
2274 # heads. Since the revs are topologically sorted according to linkrev,
2275 # once all head linkrevs are below the minlink, we know there are
2275 # once all head linkrevs are below the minlink, we know there are
2276 # no more revs that could have a linkrev greater than minlink.
2276 # no more revs that could have a linkrev greater than minlink.
2277 # So we can stop walking.
2277 # So we can stop walking.
2278 while futurelargelinkrevs:
2278 while futurelargelinkrevs:
2279 strippoint -= 1
2279 strippoint -= 1
2280 linkrev = heads.pop(strippoint)
2280 linkrev = heads.pop(strippoint)
2281
2281
2282 if linkrev < minlink:
2282 if linkrev < minlink:
2283 brokenrevs.add(strippoint)
2283 brokenrevs.add(strippoint)
2284 else:
2284 else:
2285 futurelargelinkrevs.remove(linkrev)
2285 futurelargelinkrevs.remove(linkrev)
2286
2286
2287 for p in self.parentrevs(strippoint):
2287 for p in self.parentrevs(strippoint):
2288 if p != nullrev:
2288 if p != nullrev:
2289 plinkrev = self.linkrev(p)
2289 plinkrev = self.linkrev(p)
2290 heads[p] = plinkrev
2290 heads[p] = plinkrev
2291 if plinkrev >= minlink:
2291 if plinkrev >= minlink:
2292 futurelargelinkrevs.add(plinkrev)
2292 futurelargelinkrevs.add(plinkrev)
2293
2293
2294 return strippoint, brokenrevs
2294 return strippoint, brokenrevs
2295
2295
2296 def strip(self, minlink, transaction):
2296 def strip(self, minlink, transaction):
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2297 """truncate the revlog on the first revision with a linkrev >= minlink
2298
2298
2299 This function is called when we're stripping revision minlink and
2299 This function is called when we're stripping revision minlink and
2300 its descendants from the repository.
2300 its descendants from the repository.
2301
2301
2302 We have to remove all revisions with linkrev >= minlink, because
2302 We have to remove all revisions with linkrev >= minlink, because
2303 the equivalent changelog revisions will be renumbered after the
2303 the equivalent changelog revisions will be renumbered after the
2304 strip.
2304 strip.
2305
2305
2306 So we truncate the revlog on the first of these revisions, and
2306 So we truncate the revlog on the first of these revisions, and
2307 trust that the caller has saved the revisions that shouldn't be
2307 trust that the caller has saved the revisions that shouldn't be
2308 removed and that it'll re-add them after this truncation.
2308 removed and that it'll re-add them after this truncation.
2309 """
2309 """
2310 if len(self) == 0:
2310 if len(self) == 0:
2311 return
2311 return
2312
2312
2313 rev, _ = self.getstrippoint(minlink)
2313 rev, _ = self.getstrippoint(minlink)
2314 if rev == len(self):
2314 if rev == len(self):
2315 return
2315 return
2316
2316
2317 # first truncate the files on disk
2317 # first truncate the files on disk
2318 end = self.start(rev)
2318 end = self.start(rev)
2319 if not self._inline:
2319 if not self._inline:
2320 transaction.add(self.datafile, end)
2320 transaction.add(self.datafile, end)
2321 end = rev * self._io.size
2321 end = rev * self._io.size
2322 else:
2322 else:
2323 end += rev * self._io.size
2323 end += rev * self._io.size
2324
2324
2325 transaction.add(self.indexfile, end)
2325 transaction.add(self.indexfile, end)
2326
2326
2327 # then reset internal state in memory to forget those revisions
2327 # then reset internal state in memory to forget those revisions
2328 self._cache = None
2328 self._cache = None
2329 self._chaininfocache = {}
2329 self._chaininfocache = {}
2330 self._chunkclear()
2330 self._chunkclear()
2331 for x in xrange(rev, len(self)):
2331 for x in xrange(rev, len(self)):
2332 del self.nodemap[self.node(x)]
2332 del self.nodemap[self.node(x)]
2333
2333
2334 del self.index[rev:-1]
2334 del self.index[rev:-1]
2335
2335
2336 def checksize(self):
2336 def checksize(self):
2337 expected = 0
2337 expected = 0
2338 if len(self):
2338 if len(self):
2339 expected = max(0, self.end(len(self) - 1))
2339 expected = max(0, self.end(len(self) - 1))
2340
2340
2341 try:
2341 try:
2342 with self._datafp() as f:
2342 with self._datafp() as f:
2343 f.seek(0, 2)
2343 f.seek(0, 2)
2344 actual = f.tell()
2344 actual = f.tell()
2345 dd = actual - expected
2345 dd = actual - expected
2346 except IOError as inst:
2346 except IOError as inst:
2347 if inst.errno != errno.ENOENT:
2347 if inst.errno != errno.ENOENT:
2348 raise
2348 raise
2349 dd = 0
2349 dd = 0
2350
2350
2351 try:
2351 try:
2352 f = self.opener(self.indexfile)
2352 f = self.opener(self.indexfile)
2353 f.seek(0, 2)
2353 f.seek(0, 2)
2354 actual = f.tell()
2354 actual = f.tell()
2355 f.close()
2355 f.close()
2356 s = self._io.size
2356 s = self._io.size
2357 i = max(0, actual // s)
2357 i = max(0, actual // s)
2358 di = actual - (i * s)
2358 di = actual - (i * s)
2359 if self._inline:
2359 if self._inline:
2360 databytes = 0
2360 databytes = 0
2361 for r in self:
2361 for r in self:
2362 databytes += max(0, self.length(r))
2362 databytes += max(0, self.length(r))
2363 dd = 0
2363 dd = 0
2364 di = actual - len(self) * s - databytes
2364 di = actual - len(self) * s - databytes
2365 except IOError as inst:
2365 except IOError as inst:
2366 if inst.errno != errno.ENOENT:
2366 if inst.errno != errno.ENOENT:
2367 raise
2367 raise
2368 di = 0
2368 di = 0
2369
2369
2370 return (dd, di)
2370 return (dd, di)
2371
2371
2372 def files(self):
2372 def files(self):
2373 res = [self.indexfile]
2373 res = [self.indexfile]
2374 if not self._inline:
2374 if not self._inline:
2375 res.append(self.datafile)
2375 res.append(self.datafile)
2376 return res
2376 return res
2377
2377
2378 DELTAREUSEALWAYS = 'always'
2378 DELTAREUSEALWAYS = 'always'
2379 DELTAREUSESAMEREVS = 'samerevs'
2379 DELTAREUSESAMEREVS = 'samerevs'
2380 DELTAREUSENEVER = 'never'
2380 DELTAREUSENEVER = 'never'
2381
2381
2382 DELTAREUSEFULLADD = 'fulladd'
2382 DELTAREUSEFULLADD = 'fulladd'
2383
2383
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2384 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2385
2385
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2386 def clone(self, tr, destrevlog, addrevisioncb=None,
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2387 deltareuse=DELTAREUSESAMEREVS, aggressivemergedeltas=None):
2388 """Copy this revlog to another, possibly with format changes.
2388 """Copy this revlog to another, possibly with format changes.
2389
2389
2390 The destination revlog will contain the same revisions and nodes.
2390 The destination revlog will contain the same revisions and nodes.
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2391 However, it may not be bit-for-bit identical due to e.g. delta encoding
2392 differences.
2392 differences.
2393
2393
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2394 The ``deltareuse`` argument control how deltas from the existing revlog
2395 are preserved in the destination revlog. The argument can have the
2395 are preserved in the destination revlog. The argument can have the
2396 following values:
2396 following values:
2397
2397
2398 DELTAREUSEALWAYS
2398 DELTAREUSEALWAYS
2399 Deltas will always be reused (if possible), even if the destination
2399 Deltas will always be reused (if possible), even if the destination
2400 revlog would not select the same revisions for the delta. This is the
2400 revlog would not select the same revisions for the delta. This is the
2401 fastest mode of operation.
2401 fastest mode of operation.
2402 DELTAREUSESAMEREVS
2402 DELTAREUSESAMEREVS
2403 Deltas will be reused if the destination revlog would pick the same
2403 Deltas will be reused if the destination revlog would pick the same
2404 revisions for the delta. This mode strikes a balance between speed
2404 revisions for the delta. This mode strikes a balance between speed
2405 and optimization.
2405 and optimization.
2406 DELTAREUSENEVER
2406 DELTAREUSENEVER
2407 Deltas will never be reused. This is the slowest mode of execution.
2407 Deltas will never be reused. This is the slowest mode of execution.
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2408 This mode can be used to recompute deltas (e.g. if the diff/delta
2409 algorithm changes).
2409 algorithm changes).
2410
2410
2411 Delta computation can be slow, so the choice of delta reuse policy can
2411 Delta computation can be slow, so the choice of delta reuse policy can
2412 significantly affect run time.
2412 significantly affect run time.
2413
2413
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2414 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2415 two extremes. Deltas will be reused if they are appropriate. But if the
2416 delta could choose a better revision, it will do so. This means if you
2416 delta could choose a better revision, it will do so. This means if you
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2417 are converting a non-generaldelta revlog to a generaldelta revlog,
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2418 deltas will be recomputed if the delta's parent isn't a parent of the
2419 revision.
2419 revision.
2420
2420
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2421 In addition to the delta policy, the ``aggressivemergedeltas`` argument
2422 controls whether to compute deltas against both parents for merges.
2422 controls whether to compute deltas against both parents for merges.
2423 By default, the current default is used.
2423 By default, the current default is used.
2424 """
2424 """
2425 if deltareuse not in self.DELTAREUSEALL:
2425 if deltareuse not in self.DELTAREUSEALL:
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2426 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2427
2427
2428 if len(destrevlog):
2428 if len(destrevlog):
2429 raise ValueError(_('destination revlog is not empty'))
2429 raise ValueError(_('destination revlog is not empty'))
2430
2430
2431 if getattr(self, 'filteredrevs', None):
2431 if getattr(self, 'filteredrevs', None):
2432 raise ValueError(_('source revlog has filtered revisions'))
2432 raise ValueError(_('source revlog has filtered revisions'))
2433 if getattr(destrevlog, 'filteredrevs', None):
2433 if getattr(destrevlog, 'filteredrevs', None):
2434 raise ValueError(_('destination revlog has filtered revisions'))
2434 raise ValueError(_('destination revlog has filtered revisions'))
2435
2435
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2436 # lazydeltabase controls whether to reuse a cached delta, if possible.
2437 oldlazydeltabase = destrevlog._lazydeltabase
2437 oldlazydeltabase = destrevlog._lazydeltabase
2438 oldamd = destrevlog._aggressivemergedeltas
2438 oldamd = destrevlog._aggressivemergedeltas
2439
2439
2440 try:
2440 try:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2441 if deltareuse == self.DELTAREUSEALWAYS:
2442 destrevlog._lazydeltabase = True
2442 destrevlog._lazydeltabase = True
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2443 elif deltareuse == self.DELTAREUSESAMEREVS:
2444 destrevlog._lazydeltabase = False
2444 destrevlog._lazydeltabase = False
2445
2445
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2446 destrevlog._aggressivemergedeltas = aggressivemergedeltas or oldamd
2447
2447
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2448 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2449 self.DELTAREUSESAMEREVS)
2449 self.DELTAREUSESAMEREVS)
2450
2450
2451 deltacomputer = _deltacomputer(destrevlog)
2451 deltacomputer = _deltacomputer(destrevlog)
2452 index = self.index
2452 index = self.index
2453 for rev in self:
2453 for rev in self:
2454 entry = index[rev]
2454 entry = index[rev]
2455
2455
2456 # Some classes override linkrev to take filtered revs into
2456 # Some classes override linkrev to take filtered revs into
2457 # account. Use raw entry from index.
2457 # account. Use raw entry from index.
2458 flags = entry[0] & 0xffff
2458 flags = entry[0] & 0xffff
2459 linkrev = entry[4]
2459 linkrev = entry[4]
2460 p1 = index[entry[5]][7]
2460 p1 = index[entry[5]][7]
2461 p2 = index[entry[6]][7]
2461 p2 = index[entry[6]][7]
2462 node = entry[7]
2462 node = entry[7]
2463
2463
2464 # (Possibly) reuse the delta from the revlog if allowed and
2464 # (Possibly) reuse the delta from the revlog if allowed and
2465 # the revlog chunk is a delta.
2465 # the revlog chunk is a delta.
2466 cachedelta = None
2466 cachedelta = None
2467 rawtext = None
2467 rawtext = None
2468 if populatecachedelta:
2468 if populatecachedelta:
2469 dp = self.deltaparent(rev)
2469 dp = self.deltaparent(rev)
2470 if dp != nullrev:
2470 if dp != nullrev:
2471 cachedelta = (dp, str(self._chunk(rev)))
2471 cachedelta = (dp, str(self._chunk(rev)))
2472
2472
2473 if not cachedelta:
2473 if not cachedelta:
2474 rawtext = self.revision(rev, raw=True)
2474 rawtext = self.revision(rev, raw=True)
2475
2475
2476
2476
2477 if deltareuse == self.DELTAREUSEFULLADD:
2477 if deltareuse == self.DELTAREUSEFULLADD:
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2478 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2479 cachedelta=cachedelta,
2479 cachedelta=cachedelta,
2480 node=node, flags=flags,
2480 node=node, flags=flags,
2481 deltacomputer=deltacomputer)
2481 deltacomputer=deltacomputer)
2482 else:
2482 else:
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2483 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2484 checkambig=False)
2484 checkambig=False)
2485 dfh = None
2485 dfh = None
2486 if not destrevlog._inline:
2486 if not destrevlog._inline:
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2487 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2488 try:
2488 try:
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2489 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2490 p2, flags, cachedelta, ifh, dfh,
2490 p2, flags, cachedelta, ifh, dfh,
2491 deltacomputer=deltacomputer)
2491 deltacomputer=deltacomputer)
2492 finally:
2492 finally:
2493 if dfh:
2493 if dfh:
2494 dfh.close()
2494 dfh.close()
2495 ifh.close()
2495 ifh.close()
2496
2496
2497 if addrevisioncb:
2497 if addrevisioncb:
2498 addrevisioncb(self, rev, node)
2498 addrevisioncb(self, rev, node)
2499 finally:
2499 finally:
2500 destrevlog._lazydeltabase = oldlazydeltabase
2500 destrevlog._lazydeltabase = oldlazydeltabase
2501 destrevlog._aggressivemergedeltas = oldamd
2501 destrevlog._aggressivemergedeltas = oldamd
General Comments 0
You need to be logged in to leave comments. Login now