##// END OF EJS Templates
revlog: speed up isancestor...
Valentin Gatien-Baron -
r42639:055c3e2c default
parent child Browse files
Show More
@@ -1,668 +1,665 b''
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import (
16 from .thirdparty import (
17 attr,
17 attr,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 encoding,
21 encoding,
22 error,
22 error,
23 pycompat,
23 pycompat,
24 revlog,
24 revlog,
25 util,
25 util,
26 )
26 )
27 from .utils import (
27 from .utils import (
28 dateutil,
28 dateutil,
29 stringutil,
29 stringutil,
30 )
30 )
31
31
32 _defaultextra = {'branch': 'default'}
32 _defaultextra = {'branch': 'default'}
33
33
34 def _string_escape(text):
34 def _string_escape(text):
35 """
35 """
36 >>> from .pycompat import bytechr as chr
36 >>> from .pycompat import bytechr as chr
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s
39 >>> s
40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 >>> res = _string_escape(s)
41 >>> res = _string_escape(s)
42 >>> s == _string_unescape(res)
42 >>> s == _string_unescape(res)
43 True
43 True
44 """
44 """
45 # subset of the string_escape codec
45 # subset of the string_escape codec
46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
46 text = text.replace('\\', '\\\\').replace('\n', '\\n').replace('\r', '\\r')
47 return text.replace('\0', '\\0')
47 return text.replace('\0', '\\0')
48
48
49 def _string_unescape(text):
49 def _string_unescape(text):
50 if '\\0' in text:
50 if '\\0' in text:
51 # fix up \0 without getting into trouble with \\0
51 # fix up \0 without getting into trouble with \\0
52 text = text.replace('\\\\', '\\\\\n')
52 text = text.replace('\\\\', '\\\\\n')
53 text = text.replace('\\0', '\0')
53 text = text.replace('\\0', '\0')
54 text = text.replace('\n', '')
54 text = text.replace('\n', '')
55 return stringutil.unescapestr(text)
55 return stringutil.unescapestr(text)
56
56
57 def decodeextra(text):
57 def decodeextra(text):
58 """
58 """
59 >>> from .pycompat import bytechr as chr
59 >>> from .pycompat import bytechr as chr
60 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
60 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
61 ... ).items())
61 ... ).items())
62 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
62 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
63 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
63 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
64 ... b'baz': chr(92) + chr(0) + b'2'})
64 ... b'baz': chr(92) + chr(0) + b'2'})
65 ... ).items())
65 ... ).items())
66 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
66 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
67 """
67 """
68 extra = _defaultextra.copy()
68 extra = _defaultextra.copy()
69 for l in text.split('\0'):
69 for l in text.split('\0'):
70 if l:
70 if l:
71 k, v = _string_unescape(l).split(':', 1)
71 k, v = _string_unescape(l).split(':', 1)
72 extra[k] = v
72 extra[k] = v
73 return extra
73 return extra
74
74
75 def encodeextra(d):
75 def encodeextra(d):
76 # keys must be sorted to produce a deterministic changelog entry
76 # keys must be sorted to produce a deterministic changelog entry
77 items = [
77 items = [
78 _string_escape('%s:%s' % (k, pycompat.bytestr(d[k])))
78 _string_escape('%s:%s' % (k, pycompat.bytestr(d[k])))
79 for k in sorted(d)
79 for k in sorted(d)
80 ]
80 ]
81 return "\0".join(items)
81 return "\0".join(items)
82
82
83 def encodecopies(files, copies):
83 def encodecopies(files, copies):
84 items = []
84 items = []
85 for i, dst in enumerate(files):
85 for i, dst in enumerate(files):
86 if dst in copies:
86 if dst in copies:
87 items.append('%d\0%s' % (i, copies[dst]))
87 items.append('%d\0%s' % (i, copies[dst]))
88 if len(items) != len(copies):
88 if len(items) != len(copies):
89 raise error.ProgrammingError('some copy targets missing from file list')
89 raise error.ProgrammingError('some copy targets missing from file list')
90 return "\n".join(items)
90 return "\n".join(items)
91
91
92 def decodecopies(files, data):
92 def decodecopies(files, data):
93 try:
93 try:
94 copies = {}
94 copies = {}
95 for l in data.split('\n'):
95 for l in data.split('\n'):
96 strindex, src = l.split('\0')
96 strindex, src = l.split('\0')
97 i = int(strindex)
97 i = int(strindex)
98 dst = files[i]
98 dst = files[i]
99 copies[dst] = src
99 copies[dst] = src
100 return copies
100 return copies
101 except (ValueError, IndexError):
101 except (ValueError, IndexError):
102 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
102 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
103 # used different syntax for the value.
103 # used different syntax for the value.
104 return None
104 return None
105
105
106 def encodefileindices(files, subset):
106 def encodefileindices(files, subset):
107 subset = set(subset)
107 subset = set(subset)
108 indices = []
108 indices = []
109 for i, f in enumerate(files):
109 for i, f in enumerate(files):
110 if f in subset:
110 if f in subset:
111 indices.append('%d' % i)
111 indices.append('%d' % i)
112 return '\n'.join(indices)
112 return '\n'.join(indices)
113
113
114 def decodefileindices(files, data):
114 def decodefileindices(files, data):
115 try:
115 try:
116 subset = []
116 subset = []
117 for strindex in data.split('\n'):
117 for strindex in data.split('\n'):
118 i = int(strindex)
118 i = int(strindex)
119 if i < 0 or i >= len(files):
119 if i < 0 or i >= len(files):
120 return None
120 return None
121 subset.append(files[i])
121 subset.append(files[i])
122 return subset
122 return subset
123 except (ValueError, IndexError):
123 except (ValueError, IndexError):
124 # Perhaps someone had chosen the same key name (e.g. "added") and
124 # Perhaps someone had chosen the same key name (e.g. "added") and
125 # used different syntax for the value.
125 # used different syntax for the value.
126 return None
126 return None
127
127
128 def stripdesc(desc):
128 def stripdesc(desc):
129 """strip trailing whitespace and leading and trailing empty lines"""
129 """strip trailing whitespace and leading and trailing empty lines"""
130 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
130 return '\n'.join([l.rstrip() for l in desc.splitlines()]).strip('\n')
131
131
132 class appender(object):
132 class appender(object):
133 '''the changelog index must be updated last on disk, so we use this class
133 '''the changelog index must be updated last on disk, so we use this class
134 to delay writes to it'''
134 to delay writes to it'''
135 def __init__(self, vfs, name, mode, buf):
135 def __init__(self, vfs, name, mode, buf):
136 self.data = buf
136 self.data = buf
137 fp = vfs(name, mode)
137 fp = vfs(name, mode)
138 self.fp = fp
138 self.fp = fp
139 self.offset = fp.tell()
139 self.offset = fp.tell()
140 self.size = vfs.fstat(fp).st_size
140 self.size = vfs.fstat(fp).st_size
141 self._end = self.size
141 self._end = self.size
142
142
143 def end(self):
143 def end(self):
144 return self._end
144 return self._end
145 def tell(self):
145 def tell(self):
146 return self.offset
146 return self.offset
147 def flush(self):
147 def flush(self):
148 pass
148 pass
149
149
150 @property
150 @property
151 def closed(self):
151 def closed(self):
152 return self.fp.closed
152 return self.fp.closed
153
153
154 def close(self):
154 def close(self):
155 self.fp.close()
155 self.fp.close()
156
156
157 def seek(self, offset, whence=0):
157 def seek(self, offset, whence=0):
158 '''virtual file offset spans real file and data'''
158 '''virtual file offset spans real file and data'''
159 if whence == 0:
159 if whence == 0:
160 self.offset = offset
160 self.offset = offset
161 elif whence == 1:
161 elif whence == 1:
162 self.offset += offset
162 self.offset += offset
163 elif whence == 2:
163 elif whence == 2:
164 self.offset = self.end() + offset
164 self.offset = self.end() + offset
165 if self.offset < self.size:
165 if self.offset < self.size:
166 self.fp.seek(self.offset)
166 self.fp.seek(self.offset)
167
167
168 def read(self, count=-1):
168 def read(self, count=-1):
169 '''only trick here is reads that span real file and data'''
169 '''only trick here is reads that span real file and data'''
170 ret = ""
170 ret = ""
171 if self.offset < self.size:
171 if self.offset < self.size:
172 s = self.fp.read(count)
172 s = self.fp.read(count)
173 ret = s
173 ret = s
174 self.offset += len(s)
174 self.offset += len(s)
175 if count > 0:
175 if count > 0:
176 count -= len(s)
176 count -= len(s)
177 if count != 0:
177 if count != 0:
178 doff = self.offset - self.size
178 doff = self.offset - self.size
179 self.data.insert(0, "".join(self.data))
179 self.data.insert(0, "".join(self.data))
180 del self.data[1:]
180 del self.data[1:]
181 s = self.data[0][doff:doff + count]
181 s = self.data[0][doff:doff + count]
182 self.offset += len(s)
182 self.offset += len(s)
183 ret += s
183 ret += s
184 return ret
184 return ret
185
185
186 def write(self, s):
186 def write(self, s):
187 self.data.append(bytes(s))
187 self.data.append(bytes(s))
188 self.offset += len(s)
188 self.offset += len(s)
189 self._end += len(s)
189 self._end += len(s)
190
190
191 def __enter__(self):
191 def __enter__(self):
192 self.fp.__enter__()
192 self.fp.__enter__()
193 return self
193 return self
194
194
195 def __exit__(self, *args):
195 def __exit__(self, *args):
196 return self.fp.__exit__(*args)
196 return self.fp.__exit__(*args)
197
197
198 def _divertopener(opener, target):
198 def _divertopener(opener, target):
199 """build an opener that writes in 'target.a' instead of 'target'"""
199 """build an opener that writes in 'target.a' instead of 'target'"""
200 def _divert(name, mode='r', checkambig=False):
200 def _divert(name, mode='r', checkambig=False):
201 if name != target:
201 if name != target:
202 return opener(name, mode)
202 return opener(name, mode)
203 return opener(name + ".a", mode)
203 return opener(name + ".a", mode)
204 return _divert
204 return _divert
205
205
206 def _delayopener(opener, target, buf):
206 def _delayopener(opener, target, buf):
207 """build an opener that stores chunks in 'buf' instead of 'target'"""
207 """build an opener that stores chunks in 'buf' instead of 'target'"""
208 def _delay(name, mode='r', checkambig=False):
208 def _delay(name, mode='r', checkambig=False):
209 if name != target:
209 if name != target:
210 return opener(name, mode)
210 return opener(name, mode)
211 return appender(opener, name, mode, buf)
211 return appender(opener, name, mode, buf)
212 return _delay
212 return _delay
213
213
214 @attr.s
214 @attr.s
215 class _changelogrevision(object):
215 class _changelogrevision(object):
216 # Extensions might modify _defaultextra, so let the constructor below pass
216 # Extensions might modify _defaultextra, so let the constructor below pass
217 # it in
217 # it in
218 extra = attr.ib()
218 extra = attr.ib()
219 manifest = attr.ib(default=nullid)
219 manifest = attr.ib(default=nullid)
220 user = attr.ib(default='')
220 user = attr.ib(default='')
221 date = attr.ib(default=(0, 0))
221 date = attr.ib(default=(0, 0))
222 files = attr.ib(default=attr.Factory(list))
222 files = attr.ib(default=attr.Factory(list))
223 filesadded = attr.ib(default=None)
223 filesadded = attr.ib(default=None)
224 filesremoved = attr.ib(default=None)
224 filesremoved = attr.ib(default=None)
225 p1copies = attr.ib(default=None)
225 p1copies = attr.ib(default=None)
226 p2copies = attr.ib(default=None)
226 p2copies = attr.ib(default=None)
227 description = attr.ib(default='')
227 description = attr.ib(default='')
228
228
229 class changelogrevision(object):
229 class changelogrevision(object):
230 """Holds results of a parsed changelog revision.
230 """Holds results of a parsed changelog revision.
231
231
232 Changelog revisions consist of multiple pieces of data, including
232 Changelog revisions consist of multiple pieces of data, including
233 the manifest node, user, and date. This object exposes a view into
233 the manifest node, user, and date. This object exposes a view into
234 the parsed object.
234 the parsed object.
235 """
235 """
236
236
237 __slots__ = (
237 __slots__ = (
238 r'_offsets',
238 r'_offsets',
239 r'_text',
239 r'_text',
240 )
240 )
241
241
242 def __new__(cls, text):
242 def __new__(cls, text):
243 if not text:
243 if not text:
244 return _changelogrevision(extra=_defaultextra)
244 return _changelogrevision(extra=_defaultextra)
245
245
246 self = super(changelogrevision, cls).__new__(cls)
246 self = super(changelogrevision, cls).__new__(cls)
247 # We could return here and implement the following as an __init__.
247 # We could return here and implement the following as an __init__.
248 # But doing it here is equivalent and saves an extra function call.
248 # But doing it here is equivalent and saves an extra function call.
249
249
250 # format used:
250 # format used:
251 # nodeid\n : manifest node in ascii
251 # nodeid\n : manifest node in ascii
252 # user\n : user, no \n or \r allowed
252 # user\n : user, no \n or \r allowed
253 # time tz extra\n : date (time is int or float, timezone is int)
253 # time tz extra\n : date (time is int or float, timezone is int)
254 # : extra is metadata, encoded and separated by '\0'
254 # : extra is metadata, encoded and separated by '\0'
255 # : older versions ignore it
255 # : older versions ignore it
256 # files\n\n : files modified by the cset, no \n or \r allowed
256 # files\n\n : files modified by the cset, no \n or \r allowed
257 # (.*) : comment (free text, ideally utf-8)
257 # (.*) : comment (free text, ideally utf-8)
258 #
258 #
259 # changelog v0 doesn't use extra
259 # changelog v0 doesn't use extra
260
260
261 nl1 = text.index('\n')
261 nl1 = text.index('\n')
262 nl2 = text.index('\n', nl1 + 1)
262 nl2 = text.index('\n', nl1 + 1)
263 nl3 = text.index('\n', nl2 + 1)
263 nl3 = text.index('\n', nl2 + 1)
264
264
265 # The list of files may be empty. Which means nl3 is the first of the
265 # The list of files may be empty. Which means nl3 is the first of the
266 # double newline that precedes the description.
266 # double newline that precedes the description.
267 if text[nl3 + 1:nl3 + 2] == '\n':
267 if text[nl3 + 1:nl3 + 2] == '\n':
268 doublenl = nl3
268 doublenl = nl3
269 else:
269 else:
270 doublenl = text.index('\n\n', nl3 + 1)
270 doublenl = text.index('\n\n', nl3 + 1)
271
271
272 self._offsets = (nl1, nl2, nl3, doublenl)
272 self._offsets = (nl1, nl2, nl3, doublenl)
273 self._text = text
273 self._text = text
274
274
275 return self
275 return self
276
276
277 @property
277 @property
278 def manifest(self):
278 def manifest(self):
279 return bin(self._text[0:self._offsets[0]])
279 return bin(self._text[0:self._offsets[0]])
280
280
281 @property
281 @property
282 def user(self):
282 def user(self):
283 off = self._offsets
283 off = self._offsets
284 return encoding.tolocal(self._text[off[0] + 1:off[1]])
284 return encoding.tolocal(self._text[off[0] + 1:off[1]])
285
285
286 @property
286 @property
287 def _rawdate(self):
287 def _rawdate(self):
288 off = self._offsets
288 off = self._offsets
289 dateextra = self._text[off[1] + 1:off[2]]
289 dateextra = self._text[off[1] + 1:off[2]]
290 return dateextra.split(' ', 2)[0:2]
290 return dateextra.split(' ', 2)[0:2]
291
291
292 @property
292 @property
293 def _rawextra(self):
293 def _rawextra(self):
294 off = self._offsets
294 off = self._offsets
295 dateextra = self._text[off[1] + 1:off[2]]
295 dateextra = self._text[off[1] + 1:off[2]]
296 fields = dateextra.split(' ', 2)
296 fields = dateextra.split(' ', 2)
297 if len(fields) != 3:
297 if len(fields) != 3:
298 return None
298 return None
299
299
300 return fields[2]
300 return fields[2]
301
301
302 @property
302 @property
303 def date(self):
303 def date(self):
304 raw = self._rawdate
304 raw = self._rawdate
305 time = float(raw[0])
305 time = float(raw[0])
306 # Various tools did silly things with the timezone.
306 # Various tools did silly things with the timezone.
307 try:
307 try:
308 timezone = int(raw[1])
308 timezone = int(raw[1])
309 except ValueError:
309 except ValueError:
310 timezone = 0
310 timezone = 0
311
311
312 return time, timezone
312 return time, timezone
313
313
314 @property
314 @property
315 def extra(self):
315 def extra(self):
316 raw = self._rawextra
316 raw = self._rawextra
317 if raw is None:
317 if raw is None:
318 return _defaultextra
318 return _defaultextra
319
319
320 return decodeextra(raw)
320 return decodeextra(raw)
321
321
322 @property
322 @property
323 def files(self):
323 def files(self):
324 off = self._offsets
324 off = self._offsets
325 if off[2] == off[3]:
325 if off[2] == off[3]:
326 return []
326 return []
327
327
328 return self._text[off[2] + 1:off[3]].split('\n')
328 return self._text[off[2] + 1:off[3]].split('\n')
329
329
330 @property
330 @property
331 def filesadded(self):
331 def filesadded(self):
332 rawindices = self.extra.get('filesadded')
332 rawindices = self.extra.get('filesadded')
333 return rawindices and decodefileindices(self.files, rawindices)
333 return rawindices and decodefileindices(self.files, rawindices)
334
334
335 @property
335 @property
336 def filesremoved(self):
336 def filesremoved(self):
337 rawindices = self.extra.get('filesremoved')
337 rawindices = self.extra.get('filesremoved')
338 return rawindices and decodefileindices(self.files, rawindices)
338 return rawindices and decodefileindices(self.files, rawindices)
339
339
340 @property
340 @property
341 def p1copies(self):
341 def p1copies(self):
342 rawcopies = self.extra.get('p1copies')
342 rawcopies = self.extra.get('p1copies')
343 return rawcopies and decodecopies(self.files, rawcopies)
343 return rawcopies and decodecopies(self.files, rawcopies)
344
344
345 @property
345 @property
346 def p2copies(self):
346 def p2copies(self):
347 rawcopies = self.extra.get('p2copies')
347 rawcopies = self.extra.get('p2copies')
348 return rawcopies and decodecopies(self.files, rawcopies)
348 return rawcopies and decodecopies(self.files, rawcopies)
349
349
350 @property
350 @property
351 def description(self):
351 def description(self):
352 return encoding.tolocal(self._text[self._offsets[3] + 2:])
352 return encoding.tolocal(self._text[self._offsets[3] + 2:])
353
353
354 class changelog(revlog.revlog):
354 class changelog(revlog.revlog):
355 def __init__(self, opener, trypending=False):
355 def __init__(self, opener, trypending=False):
356 """Load a changelog revlog using an opener.
356 """Load a changelog revlog using an opener.
357
357
358 If ``trypending`` is true, we attempt to load the index from a
358 If ``trypending`` is true, we attempt to load the index from a
359 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
359 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
360 The ``00changelog.i.a`` file contains index (and possibly inline
360 The ``00changelog.i.a`` file contains index (and possibly inline
361 revision) data for a transaction that hasn't been finalized yet.
361 revision) data for a transaction that hasn't been finalized yet.
362 It exists in a separate file to facilitate readers (such as
362 It exists in a separate file to facilitate readers (such as
363 hooks processes) accessing data before a transaction is finalized.
363 hooks processes) accessing data before a transaction is finalized.
364 """
364 """
365 if trypending and opener.exists('00changelog.i.a'):
365 if trypending and opener.exists('00changelog.i.a'):
366 indexfile = '00changelog.i.a'
366 indexfile = '00changelog.i.a'
367 else:
367 else:
368 indexfile = '00changelog.i'
368 indexfile = '00changelog.i'
369
369
370 datafile = '00changelog.d'
370 datafile = '00changelog.d'
371 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
371 revlog.revlog.__init__(self, opener, indexfile, datafile=datafile,
372 checkambig=True, mmaplargeindex=True)
372 checkambig=True, mmaplargeindex=True)
373
373
374 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
374 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
375 # changelogs don't benefit from generaldelta.
375 # changelogs don't benefit from generaldelta.
376
376
377 self.version &= ~revlog.FLAG_GENERALDELTA
377 self.version &= ~revlog.FLAG_GENERALDELTA
378 self._generaldelta = False
378 self._generaldelta = False
379
379
380 # Delta chains for changelogs tend to be very small because entries
380 # Delta chains for changelogs tend to be very small because entries
381 # tend to be small and don't delta well with each. So disable delta
381 # tend to be small and don't delta well with each. So disable delta
382 # chains.
382 # chains.
383 self._storedeltachains = False
383 self._storedeltachains = False
384
384
385 self._realopener = opener
385 self._realopener = opener
386 self._delayed = False
386 self._delayed = False
387 self._delaybuf = None
387 self._delaybuf = None
388 self._divert = False
388 self._divert = False
389 self.filteredrevs = frozenset()
389 self.filteredrevs = frozenset()
390
390
391 def tiprev(self):
391 def tiprev(self):
392 for i in pycompat.xrange(len(self) -1, -2, -1):
392 for i in pycompat.xrange(len(self) -1, -2, -1):
393 if i not in self.filteredrevs:
393 if i not in self.filteredrevs:
394 return i
394 return i
395
395
396 def tip(self):
396 def tip(self):
397 """filtered version of revlog.tip"""
397 """filtered version of revlog.tip"""
398 return self.node(self.tiprev())
398 return self.node(self.tiprev())
399
399
400 def __contains__(self, rev):
400 def __contains__(self, rev):
401 """filtered version of revlog.__contains__"""
401 """filtered version of revlog.__contains__"""
402 return (0 <= rev < len(self)
402 return (0 <= rev < len(self)
403 and rev not in self.filteredrevs)
403 and rev not in self.filteredrevs)
404
404
405 def __iter__(self):
405 def __iter__(self):
406 """filtered version of revlog.__iter__"""
406 """filtered version of revlog.__iter__"""
407 if len(self.filteredrevs) == 0:
407 if len(self.filteredrevs) == 0:
408 return revlog.revlog.__iter__(self)
408 return revlog.revlog.__iter__(self)
409
409
410 def filterediter():
410 def filterediter():
411 for i in pycompat.xrange(len(self)):
411 for i in pycompat.xrange(len(self)):
412 if i not in self.filteredrevs:
412 if i not in self.filteredrevs:
413 yield i
413 yield i
414
414
415 return filterediter()
415 return filterediter()
416
416
417 def revs(self, start=0, stop=None):
417 def revs(self, start=0, stop=None):
418 """filtered version of revlog.revs"""
418 """filtered version of revlog.revs"""
419 for i in super(changelog, self).revs(start, stop):
419 for i in super(changelog, self).revs(start, stop):
420 if i not in self.filteredrevs:
420 if i not in self.filteredrevs:
421 yield i
421 yield i
422
422
423 def reachableroots(self, minroot, heads, roots, includepath=False):
424 return self.index.reachableroots2(minroot, heads, roots, includepath)
425
426 def _checknofilteredinrevs(self, revs):
423 def _checknofilteredinrevs(self, revs):
427 """raise the appropriate error if 'revs' contains a filtered revision
424 """raise the appropriate error if 'revs' contains a filtered revision
428
425
429 This returns a version of 'revs' to be used thereafter by the caller.
426 This returns a version of 'revs' to be used thereafter by the caller.
430 In particular, if revs is an iterator, it is converted into a set.
427 In particular, if revs is an iterator, it is converted into a set.
431 """
428 """
432 safehasattr = util.safehasattr
429 safehasattr = util.safehasattr
433 if safehasattr(revs, '__next__'):
430 if safehasattr(revs, '__next__'):
434 # Note that inspect.isgenerator() is not true for iterators,
431 # Note that inspect.isgenerator() is not true for iterators,
435 revs = set(revs)
432 revs = set(revs)
436
433
437 filteredrevs = self.filteredrevs
434 filteredrevs = self.filteredrevs
438 if safehasattr(revs, 'first'): # smartset
435 if safehasattr(revs, 'first'): # smartset
439 offenders = revs & filteredrevs
436 offenders = revs & filteredrevs
440 else:
437 else:
441 offenders = filteredrevs.intersection(revs)
438 offenders = filteredrevs.intersection(revs)
442
439
443 for rev in offenders:
440 for rev in offenders:
444 raise error.FilteredIndexError(rev)
441 raise error.FilteredIndexError(rev)
445 return revs
442 return revs
446
443
447 def headrevs(self, revs=None):
444 def headrevs(self, revs=None):
448 if revs is None and self.filteredrevs:
445 if revs is None and self.filteredrevs:
449 try:
446 try:
450 return self.index.headrevsfiltered(self.filteredrevs)
447 return self.index.headrevsfiltered(self.filteredrevs)
451 # AttributeError covers non-c-extension environments and
448 # AttributeError covers non-c-extension environments and
452 # old c extensions without filter handling.
449 # old c extensions without filter handling.
453 except AttributeError:
450 except AttributeError:
454 return self._headrevs()
451 return self._headrevs()
455
452
456 if self.filteredrevs:
453 if self.filteredrevs:
457 revs = self._checknofilteredinrevs(revs)
454 revs = self._checknofilteredinrevs(revs)
458 return super(changelog, self).headrevs(revs)
455 return super(changelog, self).headrevs(revs)
459
456
460 def strip(self, *args, **kwargs):
457 def strip(self, *args, **kwargs):
461 # XXX make something better than assert
458 # XXX make something better than assert
462 # We can't expect proper strip behavior if we are filtered.
459 # We can't expect proper strip behavior if we are filtered.
463 assert not self.filteredrevs
460 assert not self.filteredrevs
464 super(changelog, self).strip(*args, **kwargs)
461 super(changelog, self).strip(*args, **kwargs)
465
462
466 def rev(self, node):
463 def rev(self, node):
467 """filtered version of revlog.rev"""
464 """filtered version of revlog.rev"""
468 r = super(changelog, self).rev(node)
465 r = super(changelog, self).rev(node)
469 if r in self.filteredrevs:
466 if r in self.filteredrevs:
470 raise error.FilteredLookupError(hex(node), self.indexfile,
467 raise error.FilteredLookupError(hex(node), self.indexfile,
471 _('filtered node'))
468 _('filtered node'))
472 return r
469 return r
473
470
474 def node(self, rev):
471 def node(self, rev):
475 """filtered version of revlog.node"""
472 """filtered version of revlog.node"""
476 if rev in self.filteredrevs:
473 if rev in self.filteredrevs:
477 raise error.FilteredIndexError(rev)
474 raise error.FilteredIndexError(rev)
478 return super(changelog, self).node(rev)
475 return super(changelog, self).node(rev)
479
476
480 def linkrev(self, rev):
477 def linkrev(self, rev):
481 """filtered version of revlog.linkrev"""
478 """filtered version of revlog.linkrev"""
482 if rev in self.filteredrevs:
479 if rev in self.filteredrevs:
483 raise error.FilteredIndexError(rev)
480 raise error.FilteredIndexError(rev)
484 return super(changelog, self).linkrev(rev)
481 return super(changelog, self).linkrev(rev)
485
482
486 def parentrevs(self, rev):
483 def parentrevs(self, rev):
487 """filtered version of revlog.parentrevs"""
484 """filtered version of revlog.parentrevs"""
488 if rev in self.filteredrevs:
485 if rev in self.filteredrevs:
489 raise error.FilteredIndexError(rev)
486 raise error.FilteredIndexError(rev)
490 return super(changelog, self).parentrevs(rev)
487 return super(changelog, self).parentrevs(rev)
491
488
492 def flags(self, rev):
489 def flags(self, rev):
493 """filtered version of revlog.flags"""
490 """filtered version of revlog.flags"""
494 if rev in self.filteredrevs:
491 if rev in self.filteredrevs:
495 raise error.FilteredIndexError(rev)
492 raise error.FilteredIndexError(rev)
496 return super(changelog, self).flags(rev)
493 return super(changelog, self).flags(rev)
497
494
498 def delayupdate(self, tr):
495 def delayupdate(self, tr):
499 "delay visibility of index updates to other readers"
496 "delay visibility of index updates to other readers"
500
497
501 if not self._delayed:
498 if not self._delayed:
502 if len(self) == 0:
499 if len(self) == 0:
503 self._divert = True
500 self._divert = True
504 if self._realopener.exists(self.indexfile + '.a'):
501 if self._realopener.exists(self.indexfile + '.a'):
505 self._realopener.unlink(self.indexfile + '.a')
502 self._realopener.unlink(self.indexfile + '.a')
506 self.opener = _divertopener(self._realopener, self.indexfile)
503 self.opener = _divertopener(self._realopener, self.indexfile)
507 else:
504 else:
508 self._delaybuf = []
505 self._delaybuf = []
509 self.opener = _delayopener(self._realopener, self.indexfile,
506 self.opener = _delayopener(self._realopener, self.indexfile,
510 self._delaybuf)
507 self._delaybuf)
511 self._delayed = True
508 self._delayed = True
512 tr.addpending('cl-%i' % id(self), self._writepending)
509 tr.addpending('cl-%i' % id(self), self._writepending)
513 tr.addfinalize('cl-%i' % id(self), self._finalize)
510 tr.addfinalize('cl-%i' % id(self), self._finalize)
514
511
515 def _finalize(self, tr):
512 def _finalize(self, tr):
516 "finalize index updates"
513 "finalize index updates"
517 self._delayed = False
514 self._delayed = False
518 self.opener = self._realopener
515 self.opener = self._realopener
519 # move redirected index data back into place
516 # move redirected index data back into place
520 if self._divert:
517 if self._divert:
521 assert not self._delaybuf
518 assert not self._delaybuf
522 tmpname = self.indexfile + ".a"
519 tmpname = self.indexfile + ".a"
523 nfile = self.opener.open(tmpname)
520 nfile = self.opener.open(tmpname)
524 nfile.close()
521 nfile.close()
525 self.opener.rename(tmpname, self.indexfile, checkambig=True)
522 self.opener.rename(tmpname, self.indexfile, checkambig=True)
526 elif self._delaybuf:
523 elif self._delaybuf:
527 fp = self.opener(self.indexfile, 'a', checkambig=True)
524 fp = self.opener(self.indexfile, 'a', checkambig=True)
528 fp.write("".join(self._delaybuf))
525 fp.write("".join(self._delaybuf))
529 fp.close()
526 fp.close()
530 self._delaybuf = None
527 self._delaybuf = None
531 self._divert = False
528 self._divert = False
532 # split when we're done
529 # split when we're done
533 self._enforceinlinesize(tr)
530 self._enforceinlinesize(tr)
534
531
535 def _writepending(self, tr):
532 def _writepending(self, tr):
536 "create a file containing the unfinalized state for pretxnchangegroup"
533 "create a file containing the unfinalized state for pretxnchangegroup"
537 if self._delaybuf:
534 if self._delaybuf:
538 # make a temporary copy of the index
535 # make a temporary copy of the index
539 fp1 = self._realopener(self.indexfile)
536 fp1 = self._realopener(self.indexfile)
540 pendingfilename = self.indexfile + ".a"
537 pendingfilename = self.indexfile + ".a"
541 # register as a temp file to ensure cleanup on failure
538 # register as a temp file to ensure cleanup on failure
542 tr.registertmp(pendingfilename)
539 tr.registertmp(pendingfilename)
543 # write existing data
540 # write existing data
544 fp2 = self._realopener(pendingfilename, "w")
541 fp2 = self._realopener(pendingfilename, "w")
545 fp2.write(fp1.read())
542 fp2.write(fp1.read())
546 # add pending data
543 # add pending data
547 fp2.write("".join(self._delaybuf))
544 fp2.write("".join(self._delaybuf))
548 fp2.close()
545 fp2.close()
549 # switch modes so finalize can simply rename
546 # switch modes so finalize can simply rename
550 self._delaybuf = None
547 self._delaybuf = None
551 self._divert = True
548 self._divert = True
552 self.opener = _divertopener(self._realopener, self.indexfile)
549 self.opener = _divertopener(self._realopener, self.indexfile)
553
550
554 if self._divert:
551 if self._divert:
555 return True
552 return True
556
553
557 return False
554 return False
558
555
559 def _enforceinlinesize(self, tr, fp=None):
556 def _enforceinlinesize(self, tr, fp=None):
560 if not self._delayed:
557 if not self._delayed:
561 revlog.revlog._enforceinlinesize(self, tr, fp)
558 revlog.revlog._enforceinlinesize(self, tr, fp)
562
559
563 def read(self, node):
560 def read(self, node):
564 """Obtain data from a parsed changelog revision.
561 """Obtain data from a parsed changelog revision.
565
562
566 Returns a 6-tuple of:
563 Returns a 6-tuple of:
567
564
568 - manifest node in binary
565 - manifest node in binary
569 - author/user as a localstr
566 - author/user as a localstr
570 - date as a 2-tuple of (time, timezone)
567 - date as a 2-tuple of (time, timezone)
571 - list of files
568 - list of files
572 - commit message as a localstr
569 - commit message as a localstr
573 - dict of extra metadata
570 - dict of extra metadata
574
571
575 Unless you need to access all fields, consider calling
572 Unless you need to access all fields, consider calling
576 ``changelogrevision`` instead, as it is faster for partial object
573 ``changelogrevision`` instead, as it is faster for partial object
577 access.
574 access.
578 """
575 """
579 c = changelogrevision(self.revision(node))
576 c = changelogrevision(self.revision(node))
580 return (
577 return (
581 c.manifest,
578 c.manifest,
582 c.user,
579 c.user,
583 c.date,
580 c.date,
584 c.files,
581 c.files,
585 c.description,
582 c.description,
586 c.extra
583 c.extra
587 )
584 )
588
585
589 def changelogrevision(self, nodeorrev):
586 def changelogrevision(self, nodeorrev):
590 """Obtain a ``changelogrevision`` for a node or revision."""
587 """Obtain a ``changelogrevision`` for a node or revision."""
591 return changelogrevision(self.revision(nodeorrev))
588 return changelogrevision(self.revision(nodeorrev))
592
589
593 def readfiles(self, node):
590 def readfiles(self, node):
594 """
591 """
595 short version of read that only returns the files modified by the cset
592 short version of read that only returns the files modified by the cset
596 """
593 """
597 text = self.revision(node)
594 text = self.revision(node)
598 if not text:
595 if not text:
599 return []
596 return []
600 last = text.index("\n\n")
597 last = text.index("\n\n")
601 l = text[:last].split('\n')
598 l = text[:last].split('\n')
602 return l[3:]
599 return l[3:]
603
600
604 def add(self, manifest, files, desc, transaction, p1, p2,
601 def add(self, manifest, files, desc, transaction, p1, p2,
605 user, date=None, extra=None, p1copies=None, p2copies=None,
602 user, date=None, extra=None, p1copies=None, p2copies=None,
606 filesadded=None, filesremoved=None):
603 filesadded=None, filesremoved=None):
607 # Convert to UTF-8 encoded bytestrings as the very first
604 # Convert to UTF-8 encoded bytestrings as the very first
608 # thing: calling any method on a localstr object will turn it
605 # thing: calling any method on a localstr object will turn it
609 # into a str object and the cached UTF-8 string is thus lost.
606 # into a str object and the cached UTF-8 string is thus lost.
610 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
607 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
611
608
612 user = user.strip()
609 user = user.strip()
613 # An empty username or a username with a "\n" will make the
610 # An empty username or a username with a "\n" will make the
614 # revision text contain two "\n\n" sequences -> corrupt
611 # revision text contain two "\n\n" sequences -> corrupt
615 # repository since read cannot unpack the revision.
612 # repository since read cannot unpack the revision.
616 if not user:
613 if not user:
617 raise error.StorageError(_("empty username"))
614 raise error.StorageError(_("empty username"))
618 if "\n" in user:
615 if "\n" in user:
619 raise error.StorageError(_("username %r contains a newline")
616 raise error.StorageError(_("username %r contains a newline")
620 % pycompat.bytestr(user))
617 % pycompat.bytestr(user))
621
618
622 desc = stripdesc(desc)
619 desc = stripdesc(desc)
623
620
624 if date:
621 if date:
625 parseddate = "%d %d" % dateutil.parsedate(date)
622 parseddate = "%d %d" % dateutil.parsedate(date)
626 else:
623 else:
627 parseddate = "%d %d" % dateutil.makedate()
624 parseddate = "%d %d" % dateutil.makedate()
628 if extra:
625 if extra:
629 branch = extra.get("branch")
626 branch = extra.get("branch")
630 if branch in ("default", ""):
627 if branch in ("default", ""):
631 del extra["branch"]
628 del extra["branch"]
632 elif branch in (".", "null", "tip"):
629 elif branch in (".", "null", "tip"):
633 raise error.StorageError(_('the name \'%s\' is reserved')
630 raise error.StorageError(_('the name \'%s\' is reserved')
634 % branch)
631 % branch)
635 extrasentries = p1copies, p2copies, filesadded, filesremoved
632 extrasentries = p1copies, p2copies, filesadded, filesremoved
636 if extra is None and any(x is not None for x in extrasentries):
633 if extra is None and any(x is not None for x in extrasentries):
637 extra = {}
634 extra = {}
638 sortedfiles = sorted(files)
635 sortedfiles = sorted(files)
639 if p1copies is not None:
636 if p1copies is not None:
640 extra['p1copies'] = encodecopies(sortedfiles, p1copies)
637 extra['p1copies'] = encodecopies(sortedfiles, p1copies)
641 if p2copies is not None:
638 if p2copies is not None:
642 extra['p2copies'] = encodecopies(sortedfiles, p2copies)
639 extra['p2copies'] = encodecopies(sortedfiles, p2copies)
643 if filesadded is not None:
640 if filesadded is not None:
644 extra['filesadded'] = encodefileindices(sortedfiles, filesadded)
641 extra['filesadded'] = encodefileindices(sortedfiles, filesadded)
645 if filesremoved is not None:
642 if filesremoved is not None:
646 extra['filesremoved'] = encodefileindices(sortedfiles, filesremoved)
643 extra['filesremoved'] = encodefileindices(sortedfiles, filesremoved)
647
644
648 if extra:
645 if extra:
649 extra = encodeextra(extra)
646 extra = encodeextra(extra)
650 parseddate = "%s %s" % (parseddate, extra)
647 parseddate = "%s %s" % (parseddate, extra)
651 l = [hex(manifest), user, parseddate] + sortedfiles + ["", desc]
648 l = [hex(manifest), user, parseddate] + sortedfiles + ["", desc]
652 text = "\n".join(l)
649 text = "\n".join(l)
653 return self.addrevision(text, transaction, len(self), p1, p2)
650 return self.addrevision(text, transaction, len(self), p1, p2)
654
651
655 def branchinfo(self, rev):
652 def branchinfo(self, rev):
656 """return the branch name and open/close state of a revision
653 """return the branch name and open/close state of a revision
657
654
658 This function exists because creating a changectx object
655 This function exists because creating a changectx object
659 just to access this is costly."""
656 just to access this is costly."""
660 extra = self.read(rev)[5]
657 extra = self.read(rev)[5]
661 return encoding.tolocal(extra.get("branch")), 'close' in extra
658 return encoding.tolocal(extra.get("branch")), 'close' in extra
662
659
663 def _nodeduplicatecallback(self, transaction, node):
660 def _nodeduplicatecallback(self, transaction, node):
664 # keep track of revisions that got "re-added", eg: unbunde of know rev.
661 # keep track of revisions that got "re-added", eg: unbunde of know rev.
665 #
662 #
666 # We track them in a list to preserve their order from the source bundle
663 # We track them in a list to preserve their order from the source bundle
667 duplicates = transaction.changes.setdefault('revduplicates', [])
664 duplicates = transaction.changes.setdefault('revduplicates', [])
668 duplicates.append(self.rev(node))
665 duplicates.append(self.rev(node))
@@ -1,844 +1,838 b''
1 # dagop.py - graph ancestry and topology algorithm for revset
1 # dagop.py - graph ancestry and topology algorithm for revset
2 #
2 #
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
3 # Copyright 2010 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from .node import (
12 from .node import (
13 nullrev,
13 nullrev,
14 )
14 )
15 from .thirdparty import (
15 from .thirdparty import (
16 attr,
16 attr,
17 )
17 )
18 from . import (
18 from . import (
19 error,
19 error,
20 mdiff,
20 mdiff,
21 node,
21 node,
22 patch,
22 patch,
23 pycompat,
23 pycompat,
24 smartset,
24 smartset,
25 )
25 )
26
26
27 baseset = smartset.baseset
27 baseset = smartset.baseset
28 generatorset = smartset.generatorset
28 generatorset = smartset.generatorset
29
29
30 # possible maximum depth between null and wdir()
30 # possible maximum depth between null and wdir()
31 maxlogdepth = 0x80000000
31 maxlogdepth = 0x80000000
32
32
33 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
33 def _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse):
34 """Walk DAG using 'pfunc' from the given 'revs' nodes
34 """Walk DAG using 'pfunc' from the given 'revs' nodes
35
35
36 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
36 'pfunc(rev)' should return the parent/child revisions of the given 'rev'
37 if 'reverse' is True/False respectively.
37 if 'reverse' is True/False respectively.
38
38
39 Scan ends at the stopdepth (exlusive) if specified. Revisions found
39 Scan ends at the stopdepth (exlusive) if specified. Revisions found
40 earlier than the startdepth are omitted.
40 earlier than the startdepth are omitted.
41 """
41 """
42 if startdepth is None:
42 if startdepth is None:
43 startdepth = 0
43 startdepth = 0
44 if stopdepth is None:
44 if stopdepth is None:
45 stopdepth = maxlogdepth
45 stopdepth = maxlogdepth
46 if stopdepth == 0:
46 if stopdepth == 0:
47 return
47 return
48 if stopdepth < 0:
48 if stopdepth < 0:
49 raise error.ProgrammingError('negative stopdepth')
49 raise error.ProgrammingError('negative stopdepth')
50 if reverse:
50 if reverse:
51 heapsign = -1 # max heap
51 heapsign = -1 # max heap
52 else:
52 else:
53 heapsign = +1 # min heap
53 heapsign = +1 # min heap
54
54
55 # load input revs lazily to heap so earlier revisions can be yielded
55 # load input revs lazily to heap so earlier revisions can be yielded
56 # without fully computing the input revs
56 # without fully computing the input revs
57 revs.sort(reverse)
57 revs.sort(reverse)
58 irevs = iter(revs)
58 irevs = iter(revs)
59 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
59 pendingheap = [] # [(heapsign * rev, depth), ...] (i.e. lower depth first)
60
60
61 inputrev = next(irevs, None)
61 inputrev = next(irevs, None)
62 if inputrev is not None:
62 if inputrev is not None:
63 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
63 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
64
64
65 lastrev = None
65 lastrev = None
66 while pendingheap:
66 while pendingheap:
67 currev, curdepth = heapq.heappop(pendingheap)
67 currev, curdepth = heapq.heappop(pendingheap)
68 currev = heapsign * currev
68 currev = heapsign * currev
69 if currev == inputrev:
69 if currev == inputrev:
70 inputrev = next(irevs, None)
70 inputrev = next(irevs, None)
71 if inputrev is not None:
71 if inputrev is not None:
72 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
72 heapq.heappush(pendingheap, (heapsign * inputrev, 0))
73 # rescan parents until curdepth >= startdepth because queued entries
73 # rescan parents until curdepth >= startdepth because queued entries
74 # of the same revision are iterated from the lowest depth
74 # of the same revision are iterated from the lowest depth
75 foundnew = (currev != lastrev)
75 foundnew = (currev != lastrev)
76 if foundnew and curdepth >= startdepth:
76 if foundnew and curdepth >= startdepth:
77 lastrev = currev
77 lastrev = currev
78 yield currev
78 yield currev
79 pdepth = curdepth + 1
79 pdepth = curdepth + 1
80 if foundnew and pdepth < stopdepth:
80 if foundnew and pdepth < stopdepth:
81 for prev in pfunc(currev):
81 for prev in pfunc(currev):
82 if prev != node.nullrev:
82 if prev != node.nullrev:
83 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
83 heapq.heappush(pendingheap, (heapsign * prev, pdepth))
84
84
85 def filectxancestors(fctxs, followfirst=False):
85 def filectxancestors(fctxs, followfirst=False):
86 """Like filectx.ancestors(), but can walk from multiple files/revisions,
86 """Like filectx.ancestors(), but can walk from multiple files/revisions,
87 and includes the given fctxs themselves
87 and includes the given fctxs themselves
88
88
89 Yields (rev, {fctx, ...}) pairs in descending order.
89 Yields (rev, {fctx, ...}) pairs in descending order.
90 """
90 """
91 visit = {}
91 visit = {}
92 visitheap = []
92 visitheap = []
93 def addvisit(fctx):
93 def addvisit(fctx):
94 rev = fctx.rev()
94 rev = fctx.rev()
95 if rev not in visit:
95 if rev not in visit:
96 visit[rev] = set()
96 visit[rev] = set()
97 heapq.heappush(visitheap, -rev) # max heap
97 heapq.heappush(visitheap, -rev) # max heap
98 visit[rev].add(fctx)
98 visit[rev].add(fctx)
99
99
100 if followfirst:
100 if followfirst:
101 cut = 1
101 cut = 1
102 else:
102 else:
103 cut = None
103 cut = None
104
104
105 for c in fctxs:
105 for c in fctxs:
106 addvisit(c)
106 addvisit(c)
107 while visit:
107 while visit:
108 currev = -heapq.heappop(visitheap)
108 currev = -heapq.heappop(visitheap)
109 curfctxs = visit.pop(currev)
109 curfctxs = visit.pop(currev)
110 yield currev, curfctxs
110 yield currev, curfctxs
111 for c in curfctxs:
111 for c in curfctxs:
112 for parent in c.parents()[:cut]:
112 for parent in c.parents()[:cut]:
113 addvisit(parent)
113 addvisit(parent)
114 assert not visitheap
114 assert not visitheap
115
115
116 def filerevancestors(fctxs, followfirst=False):
116 def filerevancestors(fctxs, followfirst=False):
117 """Like filectx.ancestors(), but can walk from multiple files/revisions,
117 """Like filectx.ancestors(), but can walk from multiple files/revisions,
118 and includes the given fctxs themselves
118 and includes the given fctxs themselves
119
119
120 Returns a smartset.
120 Returns a smartset.
121 """
121 """
122 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
122 gen = (rev for rev, _cs in filectxancestors(fctxs, followfirst))
123 return generatorset(gen, iterasc=False)
123 return generatorset(gen, iterasc=False)
124
124
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
125 def _genrevancestors(repo, revs, followfirst, startdepth, stopdepth, cutfunc):
126 if followfirst:
126 if followfirst:
127 cut = 1
127 cut = 1
128 else:
128 else:
129 cut = None
129 cut = None
130 cl = repo.changelog
130 cl = repo.changelog
131 def plainpfunc(rev):
131 def plainpfunc(rev):
132 try:
132 try:
133 return cl.parentrevs(rev)[:cut]
133 return cl.parentrevs(rev)[:cut]
134 except error.WdirUnsupported:
134 except error.WdirUnsupported:
135 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
135 return (pctx.rev() for pctx in repo[rev].parents()[:cut])
136 if cutfunc is None:
136 if cutfunc is None:
137 pfunc = plainpfunc
137 pfunc = plainpfunc
138 else:
138 else:
139 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
139 pfunc = lambda rev: [r for r in plainpfunc(rev) if not cutfunc(r)]
140 revs = revs.filter(lambda rev: not cutfunc(rev))
140 revs = revs.filter(lambda rev: not cutfunc(rev))
141 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
141 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=True)
142
142
143 def revancestors(repo, revs, followfirst=False, startdepth=None,
143 def revancestors(repo, revs, followfirst=False, startdepth=None,
144 stopdepth=None, cutfunc=None):
144 stopdepth=None, cutfunc=None):
145 r"""Like revlog.ancestors(), but supports additional options, includes
145 r"""Like revlog.ancestors(), but supports additional options, includes
146 the given revs themselves, and returns a smartset
146 the given revs themselves, and returns a smartset
147
147
148 Scan ends at the stopdepth (exlusive) if specified. Revisions found
148 Scan ends at the stopdepth (exlusive) if specified. Revisions found
149 earlier than the startdepth are omitted.
149 earlier than the startdepth are omitted.
150
150
151 If cutfunc is provided, it will be used to cut the traversal of the DAG.
151 If cutfunc is provided, it will be used to cut the traversal of the DAG.
152 When cutfunc(X) returns True, the DAG traversal stops - revision X and
152 When cutfunc(X) returns True, the DAG traversal stops - revision X and
153 X's ancestors in the traversal path will be skipped. This could be an
153 X's ancestors in the traversal path will be skipped. This could be an
154 optimization sometimes.
154 optimization sometimes.
155
155
156 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
156 Note: if Y is an ancestor of X, cutfunc(X) returning True does not
157 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
157 necessarily mean Y will also be cut. Usually cutfunc(Y) also wants to
158 return True in this case. For example,
158 return True in this case. For example,
159
159
160 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
160 D # revancestors(repo, D, cutfunc=lambda rev: rev == B)
161 |\ # will include "A", because the path D -> C -> A was not cut.
161 |\ # will include "A", because the path D -> C -> A was not cut.
162 B C # If "B" gets cut, "A" might want to be cut too.
162 B C # If "B" gets cut, "A" might want to be cut too.
163 |/
163 |/
164 A
164 A
165 """
165 """
166 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
166 gen = _genrevancestors(repo, revs, followfirst, startdepth, stopdepth,
167 cutfunc)
167 cutfunc)
168 return generatorset(gen, iterasc=False)
168 return generatorset(gen, iterasc=False)
169
169
170 def _genrevdescendants(repo, revs, followfirst):
170 def _genrevdescendants(repo, revs, followfirst):
171 if followfirst:
171 if followfirst:
172 cut = 1
172 cut = 1
173 else:
173 else:
174 cut = None
174 cut = None
175
175
176 cl = repo.changelog
176 cl = repo.changelog
177 first = revs.min()
177 first = revs.min()
178 nullrev = node.nullrev
178 nullrev = node.nullrev
179 if first == nullrev:
179 if first == nullrev:
180 # Are there nodes with a null first parent and a non-null
180 # Are there nodes with a null first parent and a non-null
181 # second one? Maybe. Do we care? Probably not.
181 # second one? Maybe. Do we care? Probably not.
182 yield first
182 yield first
183 for i in cl:
183 for i in cl:
184 yield i
184 yield i
185 else:
185 else:
186 seen = set(revs)
186 seen = set(revs)
187 for i in cl.revs(first):
187 for i in cl.revs(first):
188 if i in seen:
188 if i in seen:
189 yield i
189 yield i
190 continue
190 continue
191 for x in cl.parentrevs(i)[:cut]:
191 for x in cl.parentrevs(i)[:cut]:
192 if x != nullrev and x in seen:
192 if x != nullrev and x in seen:
193 seen.add(i)
193 seen.add(i)
194 yield i
194 yield i
195 break
195 break
196
196
197 def _builddescendantsmap(repo, startrev, followfirst):
197 def _builddescendantsmap(repo, startrev, followfirst):
198 """Build map of 'rev -> child revs', offset from startrev"""
198 """Build map of 'rev -> child revs', offset from startrev"""
199 cl = repo.changelog
199 cl = repo.changelog
200 nullrev = node.nullrev
200 nullrev = node.nullrev
201 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
201 descmap = [[] for _rev in pycompat.xrange(startrev, len(cl))]
202 for currev in cl.revs(startrev + 1):
202 for currev in cl.revs(startrev + 1):
203 p1rev, p2rev = cl.parentrevs(currev)
203 p1rev, p2rev = cl.parentrevs(currev)
204 if p1rev >= startrev:
204 if p1rev >= startrev:
205 descmap[p1rev - startrev].append(currev)
205 descmap[p1rev - startrev].append(currev)
206 if not followfirst and p2rev != nullrev and p2rev >= startrev:
206 if not followfirst and p2rev != nullrev and p2rev >= startrev:
207 descmap[p2rev - startrev].append(currev)
207 descmap[p2rev - startrev].append(currev)
208 return descmap
208 return descmap
209
209
210 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
210 def _genrevdescendantsofdepth(repo, revs, followfirst, startdepth, stopdepth):
211 startrev = revs.min()
211 startrev = revs.min()
212 descmap = _builddescendantsmap(repo, startrev, followfirst)
212 descmap = _builddescendantsmap(repo, startrev, followfirst)
213 def pfunc(rev):
213 def pfunc(rev):
214 return descmap[rev - startrev]
214 return descmap[rev - startrev]
215 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
215 return _walkrevtree(pfunc, revs, startdepth, stopdepth, reverse=False)
216
216
217 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
217 def revdescendants(repo, revs, followfirst, startdepth=None, stopdepth=None):
218 """Like revlog.descendants() but supports additional options, includes
218 """Like revlog.descendants() but supports additional options, includes
219 the given revs themselves, and returns a smartset
219 the given revs themselves, and returns a smartset
220
220
221 Scan ends at the stopdepth (exlusive) if specified. Revisions found
221 Scan ends at the stopdepth (exlusive) if specified. Revisions found
222 earlier than the startdepth are omitted.
222 earlier than the startdepth are omitted.
223 """
223 """
224 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
224 if startdepth is None and (stopdepth is None or stopdepth >= maxlogdepth):
225 gen = _genrevdescendants(repo, revs, followfirst)
225 gen = _genrevdescendants(repo, revs, followfirst)
226 else:
226 else:
227 gen = _genrevdescendantsofdepth(repo, revs, followfirst,
227 gen = _genrevdescendantsofdepth(repo, revs, followfirst,
228 startdepth, stopdepth)
228 startdepth, stopdepth)
229 return generatorset(gen, iterasc=True)
229 return generatorset(gen, iterasc=True)
230
230
231 def descendantrevs(revs, revsfn, parentrevsfn):
231 def descendantrevs(revs, revsfn, parentrevsfn):
232 """Generate revision number descendants in revision order.
232 """Generate revision number descendants in revision order.
233
233
234 Yields revision numbers starting with a child of some rev in
234 Yields revision numbers starting with a child of some rev in
235 ``revs``. Results are ordered by revision number and are
235 ``revs``. Results are ordered by revision number and are
236 therefore topological. Each revision is not considered a descendant
236 therefore topological. Each revision is not considered a descendant
237 of itself.
237 of itself.
238
238
239 ``revsfn`` is a callable that with no argument iterates over all
239 ``revsfn`` is a callable that with no argument iterates over all
240 revision numbers and with a ``start`` argument iterates over revision
240 revision numbers and with a ``start`` argument iterates over revision
241 numbers beginning with that value.
241 numbers beginning with that value.
242
242
243 ``parentrevsfn`` is a callable that receives a revision number and
243 ``parentrevsfn`` is a callable that receives a revision number and
244 returns an iterable of parent revision numbers, whose values may include
244 returns an iterable of parent revision numbers, whose values may include
245 nullrev.
245 nullrev.
246 """
246 """
247 first = min(revs)
247 first = min(revs)
248
248
249 if first == nullrev:
249 if first == nullrev:
250 for rev in revsfn():
250 for rev in revsfn():
251 yield rev
251 yield rev
252 return
252 return
253
253
254 seen = set(revs)
254 seen = set(revs)
255 for rev in revsfn(start=first + 1):
255 for rev in revsfn(start=first + 1):
256 for prev in parentrevsfn(rev):
256 for prev in parentrevsfn(rev):
257 if prev != nullrev and prev in seen:
257 if prev != nullrev and prev in seen:
258 seen.add(rev)
258 seen.add(rev)
259 yield rev
259 yield rev
260 break
260 break
261
261
262 def _reachablerootspure(repo, minroot, roots, heads, includepath):
262 def _reachablerootspure(pfunc, minroot, roots, heads, includepath):
263 """See reachableroots"""
263 """See revlog.reachableroots"""
264 if not roots:
264 if not roots:
265 return []
265 return []
266 parentrevs = repo.changelog.parentrevs
267 roots = set(roots)
266 roots = set(roots)
268 visit = list(heads)
267 visit = list(heads)
269 reachable = set()
268 reachable = set()
270 seen = {}
269 seen = {}
271 # prefetch all the things! (because python is slow)
270 # prefetch all the things! (because python is slow)
272 reached = reachable.add
271 reached = reachable.add
273 dovisit = visit.append
272 dovisit = visit.append
274 nextvisit = visit.pop
273 nextvisit = visit.pop
275 # open-code the post-order traversal due to the tiny size of
274 # open-code the post-order traversal due to the tiny size of
276 # sys.getrecursionlimit()
275 # sys.getrecursionlimit()
277 while visit:
276 while visit:
278 rev = nextvisit()
277 rev = nextvisit()
279 if rev in roots:
278 if rev in roots:
280 reached(rev)
279 reached(rev)
281 if not includepath:
280 if not includepath:
282 continue
281 continue
283 parents = parentrevs(rev)
282 parents = pfunc(rev)
284 seen[rev] = parents
283 seen[rev] = parents
285 for parent in parents:
284 for parent in parents:
286 if parent >= minroot and parent not in seen:
285 if parent >= minroot and parent not in seen:
287 dovisit(parent)
286 dovisit(parent)
288 if not reachable:
287 if not reachable:
289 return baseset()
288 return baseset()
290 if not includepath:
289 if not includepath:
291 return reachable
290 return reachable
292 for rev in sorted(seen):
291 for rev in sorted(seen):
293 for parent in seen[rev]:
292 for parent in seen[rev]:
294 if parent in reachable:
293 if parent in reachable:
295 reached(rev)
294 reached(rev)
296 return reachable
295 return reachable
297
296
298 def reachableroots(repo, roots, heads, includepath=False):
297 def reachableroots(repo, roots, heads, includepath=False):
299 """return (heads(::<roots> and <roots>::<heads>))
298 """See revlog.reachableroots"""
300
301 If includepath is True, return (<roots>::<heads>)."""
302 if not roots:
299 if not roots:
303 return baseset()
300 return baseset()
304 minroot = roots.min()
301 minroot = roots.min()
305 roots = list(roots)
302 roots = list(roots)
306 heads = list(heads)
303 heads = list(heads)
307 try:
304 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
308 revs = repo.changelog.reachableroots(minroot, heads, roots, includepath)
309 except AttributeError:
310 revs = _reachablerootspure(repo, minroot, roots, heads, includepath)
311 revs = baseset(revs)
305 revs = baseset(revs)
312 revs.sort()
306 revs.sort()
313 return revs
307 return revs
314
308
315 def _changesrange(fctx1, fctx2, linerange2, diffopts):
309 def _changesrange(fctx1, fctx2, linerange2, diffopts):
316 """Return `(diffinrange, linerange1)` where `diffinrange` is True
310 """Return `(diffinrange, linerange1)` where `diffinrange` is True
317 if diff from fctx2 to fctx1 has changes in linerange2 and
311 if diff from fctx2 to fctx1 has changes in linerange2 and
318 `linerange1` is the new line range for fctx1.
312 `linerange1` is the new line range for fctx1.
319 """
313 """
320 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
314 blocks = mdiff.allblocks(fctx1.data(), fctx2.data(), diffopts)
321 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
315 filteredblocks, linerange1 = mdiff.blocksinrange(blocks, linerange2)
322 diffinrange = any(stype == '!' for _, stype in filteredblocks)
316 diffinrange = any(stype == '!' for _, stype in filteredblocks)
323 return diffinrange, linerange1
317 return diffinrange, linerange1
324
318
325 def blockancestors(fctx, fromline, toline, followfirst=False):
319 def blockancestors(fctx, fromline, toline, followfirst=False):
326 """Yield ancestors of `fctx` with respect to the block of lines within
320 """Yield ancestors of `fctx` with respect to the block of lines within
327 `fromline`-`toline` range.
321 `fromline`-`toline` range.
328 """
322 """
329 diffopts = patch.diffopts(fctx._repo.ui)
323 diffopts = patch.diffopts(fctx._repo.ui)
330 fctx = fctx.introfilectx()
324 fctx = fctx.introfilectx()
331 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
325 visit = {(fctx.linkrev(), fctx.filenode()): (fctx, (fromline, toline))}
332 while visit:
326 while visit:
333 c, linerange2 = visit.pop(max(visit))
327 c, linerange2 = visit.pop(max(visit))
334 pl = c.parents()
328 pl = c.parents()
335 if followfirst:
329 if followfirst:
336 pl = pl[:1]
330 pl = pl[:1]
337 if not pl:
331 if not pl:
338 # The block originates from the initial revision.
332 # The block originates from the initial revision.
339 yield c, linerange2
333 yield c, linerange2
340 continue
334 continue
341 inrange = False
335 inrange = False
342 for p in pl:
336 for p in pl:
343 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
337 inrangep, linerange1 = _changesrange(p, c, linerange2, diffopts)
344 inrange = inrange or inrangep
338 inrange = inrange or inrangep
345 if linerange1[0] == linerange1[1]:
339 if linerange1[0] == linerange1[1]:
346 # Parent's linerange is empty, meaning that the block got
340 # Parent's linerange is empty, meaning that the block got
347 # introduced in this revision; no need to go futher in this
341 # introduced in this revision; no need to go futher in this
348 # branch.
342 # branch.
349 continue
343 continue
350 # Set _descendantrev with 'c' (a known descendant) so that, when
344 # Set _descendantrev with 'c' (a known descendant) so that, when
351 # _adjustlinkrev is called for 'p', it receives this descendant
345 # _adjustlinkrev is called for 'p', it receives this descendant
352 # (as srcrev) instead possibly topmost introrev.
346 # (as srcrev) instead possibly topmost introrev.
353 p._descendantrev = c.rev()
347 p._descendantrev = c.rev()
354 visit[p.linkrev(), p.filenode()] = p, linerange1
348 visit[p.linkrev(), p.filenode()] = p, linerange1
355 if inrange:
349 if inrange:
356 yield c, linerange2
350 yield c, linerange2
357
351
358 def blockdescendants(fctx, fromline, toline):
352 def blockdescendants(fctx, fromline, toline):
359 """Yield descendants of `fctx` with respect to the block of lines within
353 """Yield descendants of `fctx` with respect to the block of lines within
360 `fromline`-`toline` range.
354 `fromline`-`toline` range.
361 """
355 """
362 # First possibly yield 'fctx' if it has changes in range with respect to
356 # First possibly yield 'fctx' if it has changes in range with respect to
363 # its parents.
357 # its parents.
364 try:
358 try:
365 c, linerange1 = next(blockancestors(fctx, fromline, toline))
359 c, linerange1 = next(blockancestors(fctx, fromline, toline))
366 except StopIteration:
360 except StopIteration:
367 pass
361 pass
368 else:
362 else:
369 if c == fctx:
363 if c == fctx:
370 yield c, linerange1
364 yield c, linerange1
371
365
372 diffopts = patch.diffopts(fctx._repo.ui)
366 diffopts = patch.diffopts(fctx._repo.ui)
373 fl = fctx.filelog()
367 fl = fctx.filelog()
374 seen = {fctx.filerev(): (fctx, (fromline, toline))}
368 seen = {fctx.filerev(): (fctx, (fromline, toline))}
375 for i in fl.descendants([fctx.filerev()]):
369 for i in fl.descendants([fctx.filerev()]):
376 c = fctx.filectx(i)
370 c = fctx.filectx(i)
377 inrange = False
371 inrange = False
378 for x in fl.parentrevs(i):
372 for x in fl.parentrevs(i):
379 try:
373 try:
380 p, linerange2 = seen[x]
374 p, linerange2 = seen[x]
381 except KeyError:
375 except KeyError:
382 # nullrev or other branch
376 # nullrev or other branch
383 continue
377 continue
384 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
378 inrangep, linerange1 = _changesrange(c, p, linerange2, diffopts)
385 inrange = inrange or inrangep
379 inrange = inrange or inrangep
386 # If revision 'i' has been seen (it's a merge) and the line range
380 # If revision 'i' has been seen (it's a merge) and the line range
387 # previously computed differs from the one we just got, we take the
381 # previously computed differs from the one we just got, we take the
388 # surrounding interval. This is conservative but avoids loosing
382 # surrounding interval. This is conservative but avoids loosing
389 # information.
383 # information.
390 if i in seen and seen[i][1] != linerange1:
384 if i in seen and seen[i][1] != linerange1:
391 lbs, ubs = zip(linerange1, seen[i][1])
385 lbs, ubs = zip(linerange1, seen[i][1])
392 linerange1 = min(lbs), max(ubs)
386 linerange1 = min(lbs), max(ubs)
393 seen[i] = c, linerange1
387 seen[i] = c, linerange1
394 if inrange:
388 if inrange:
395 yield c, linerange1
389 yield c, linerange1
396
390
397 @attr.s(slots=True, frozen=True)
391 @attr.s(slots=True, frozen=True)
398 class annotateline(object):
392 class annotateline(object):
399 fctx = attr.ib()
393 fctx = attr.ib()
400 lineno = attr.ib()
394 lineno = attr.ib()
401 # Whether this annotation was the result of a skip-annotate.
395 # Whether this annotation was the result of a skip-annotate.
402 skip = attr.ib(default=False)
396 skip = attr.ib(default=False)
403 text = attr.ib(default=None)
397 text = attr.ib(default=None)
404
398
405 @attr.s(slots=True, frozen=True)
399 @attr.s(slots=True, frozen=True)
406 class _annotatedfile(object):
400 class _annotatedfile(object):
407 # list indexed by lineno - 1
401 # list indexed by lineno - 1
408 fctxs = attr.ib()
402 fctxs = attr.ib()
409 linenos = attr.ib()
403 linenos = attr.ib()
410 skips = attr.ib()
404 skips = attr.ib()
411 # full file content
405 # full file content
412 text = attr.ib()
406 text = attr.ib()
413
407
414 def _countlines(text):
408 def _countlines(text):
415 if text.endswith("\n"):
409 if text.endswith("\n"):
416 return text.count("\n")
410 return text.count("\n")
417 return text.count("\n") + int(bool(text))
411 return text.count("\n") + int(bool(text))
418
412
419 def _decoratelines(text, fctx):
413 def _decoratelines(text, fctx):
420 n = _countlines(text)
414 n = _countlines(text)
421 linenos = pycompat.rangelist(1, n + 1)
415 linenos = pycompat.rangelist(1, n + 1)
422 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
416 return _annotatedfile([fctx] * n, linenos, [False] * n, text)
423
417
424 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
418 def _annotatepair(parents, childfctx, child, skipchild, diffopts):
425 r'''
419 r'''
426 Given parent and child fctxes and annotate data for parents, for all lines
420 Given parent and child fctxes and annotate data for parents, for all lines
427 in either parent that match the child, annotate the child with the parent's
421 in either parent that match the child, annotate the child with the parent's
428 data.
422 data.
429
423
430 Additionally, if `skipchild` is True, replace all other lines with parent
424 Additionally, if `skipchild` is True, replace all other lines with parent
431 annotate data as well such that child is never blamed for any lines.
425 annotate data as well such that child is never blamed for any lines.
432
426
433 See test-annotate.py for unit tests.
427 See test-annotate.py for unit tests.
434 '''
428 '''
435 pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
429 pblocks = [(parent, mdiff.allblocks(parent.text, child.text, opts=diffopts))
436 for parent in parents]
430 for parent in parents]
437
431
438 if skipchild:
432 if skipchild:
439 # Need to iterate over the blocks twice -- make it a list
433 # Need to iterate over the blocks twice -- make it a list
440 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
434 pblocks = [(p, list(blocks)) for (p, blocks) in pblocks]
441 # Mercurial currently prefers p2 over p1 for annotate.
435 # Mercurial currently prefers p2 over p1 for annotate.
442 # TODO: change this?
436 # TODO: change this?
443 for parent, blocks in pblocks:
437 for parent, blocks in pblocks:
444 for (a1, a2, b1, b2), t in blocks:
438 for (a1, a2, b1, b2), t in blocks:
445 # Changed blocks ('!') or blocks made only of blank lines ('~')
439 # Changed blocks ('!') or blocks made only of blank lines ('~')
446 # belong to the child.
440 # belong to the child.
447 if t == '=':
441 if t == '=':
448 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
442 child.fctxs[b1:b2] = parent.fctxs[a1:a2]
449 child.linenos[b1:b2] = parent.linenos[a1:a2]
443 child.linenos[b1:b2] = parent.linenos[a1:a2]
450 child.skips[b1:b2] = parent.skips[a1:a2]
444 child.skips[b1:b2] = parent.skips[a1:a2]
451
445
452 if skipchild:
446 if skipchild:
453 # Now try and match up anything that couldn't be matched,
447 # Now try and match up anything that couldn't be matched,
454 # Reversing pblocks maintains bias towards p2, matching above
448 # Reversing pblocks maintains bias towards p2, matching above
455 # behavior.
449 # behavior.
456 pblocks.reverse()
450 pblocks.reverse()
457
451
458 # The heuristics are:
452 # The heuristics are:
459 # * Work on blocks of changed lines (effectively diff hunks with -U0).
453 # * Work on blocks of changed lines (effectively diff hunks with -U0).
460 # This could potentially be smarter but works well enough.
454 # This could potentially be smarter but works well enough.
461 # * For a non-matching section, do a best-effort fit. Match lines in
455 # * For a non-matching section, do a best-effort fit. Match lines in
462 # diff hunks 1:1, dropping lines as necessary.
456 # diff hunks 1:1, dropping lines as necessary.
463 # * Repeat the last line as a last resort.
457 # * Repeat the last line as a last resort.
464
458
465 # First, replace as much as possible without repeating the last line.
459 # First, replace as much as possible without repeating the last line.
466 remaining = [(parent, []) for parent, _blocks in pblocks]
460 remaining = [(parent, []) for parent, _blocks in pblocks]
467 for idx, (parent, blocks) in enumerate(pblocks):
461 for idx, (parent, blocks) in enumerate(pblocks):
468 for (a1, a2, b1, b2), _t in blocks:
462 for (a1, a2, b1, b2), _t in blocks:
469 if a2 - a1 >= b2 - b1:
463 if a2 - a1 >= b2 - b1:
470 for bk in pycompat.xrange(b1, b2):
464 for bk in pycompat.xrange(b1, b2):
471 if child.fctxs[bk] == childfctx:
465 if child.fctxs[bk] == childfctx:
472 ak = min(a1 + (bk - b1), a2 - 1)
466 ak = min(a1 + (bk - b1), a2 - 1)
473 child.fctxs[bk] = parent.fctxs[ak]
467 child.fctxs[bk] = parent.fctxs[ak]
474 child.linenos[bk] = parent.linenos[ak]
468 child.linenos[bk] = parent.linenos[ak]
475 child.skips[bk] = True
469 child.skips[bk] = True
476 else:
470 else:
477 remaining[idx][1].append((a1, a2, b1, b2))
471 remaining[idx][1].append((a1, a2, b1, b2))
478
472
479 # Then, look at anything left, which might involve repeating the last
473 # Then, look at anything left, which might involve repeating the last
480 # line.
474 # line.
481 for parent, blocks in remaining:
475 for parent, blocks in remaining:
482 for a1, a2, b1, b2 in blocks:
476 for a1, a2, b1, b2 in blocks:
483 for bk in pycompat.xrange(b1, b2):
477 for bk in pycompat.xrange(b1, b2):
484 if child.fctxs[bk] == childfctx:
478 if child.fctxs[bk] == childfctx:
485 ak = min(a1 + (bk - b1), a2 - 1)
479 ak = min(a1 + (bk - b1), a2 - 1)
486 child.fctxs[bk] = parent.fctxs[ak]
480 child.fctxs[bk] = parent.fctxs[ak]
487 child.linenos[bk] = parent.linenos[ak]
481 child.linenos[bk] = parent.linenos[ak]
488 child.skips[bk] = True
482 child.skips[bk] = True
489 return child
483 return child
490
484
491 def annotate(base, parents, skiprevs=None, diffopts=None):
485 def annotate(base, parents, skiprevs=None, diffopts=None):
492 """Core algorithm for filectx.annotate()
486 """Core algorithm for filectx.annotate()
493
487
494 `parents(fctx)` is a function returning a list of parent filectxs.
488 `parents(fctx)` is a function returning a list of parent filectxs.
495 """
489 """
496
490
497 # This algorithm would prefer to be recursive, but Python is a
491 # This algorithm would prefer to be recursive, but Python is a
498 # bit recursion-hostile. Instead we do an iterative
492 # bit recursion-hostile. Instead we do an iterative
499 # depth-first search.
493 # depth-first search.
500
494
501 # 1st DFS pre-calculates pcache and needed
495 # 1st DFS pre-calculates pcache and needed
502 visit = [base]
496 visit = [base]
503 pcache = {}
497 pcache = {}
504 needed = {base: 1}
498 needed = {base: 1}
505 while visit:
499 while visit:
506 f = visit.pop()
500 f = visit.pop()
507 if f in pcache:
501 if f in pcache:
508 continue
502 continue
509 pl = parents(f)
503 pl = parents(f)
510 pcache[f] = pl
504 pcache[f] = pl
511 for p in pl:
505 for p in pl:
512 needed[p] = needed.get(p, 0) + 1
506 needed[p] = needed.get(p, 0) + 1
513 if p not in pcache:
507 if p not in pcache:
514 visit.append(p)
508 visit.append(p)
515
509
516 # 2nd DFS does the actual annotate
510 # 2nd DFS does the actual annotate
517 visit[:] = [base]
511 visit[:] = [base]
518 hist = {}
512 hist = {}
519 while visit:
513 while visit:
520 f = visit[-1]
514 f = visit[-1]
521 if f in hist:
515 if f in hist:
522 visit.pop()
516 visit.pop()
523 continue
517 continue
524
518
525 ready = True
519 ready = True
526 pl = pcache[f]
520 pl = pcache[f]
527 for p in pl:
521 for p in pl:
528 if p not in hist:
522 if p not in hist:
529 ready = False
523 ready = False
530 visit.append(p)
524 visit.append(p)
531 if ready:
525 if ready:
532 visit.pop()
526 visit.pop()
533 curr = _decoratelines(f.data(), f)
527 curr = _decoratelines(f.data(), f)
534 skipchild = False
528 skipchild = False
535 if skiprevs is not None:
529 if skiprevs is not None:
536 skipchild = f._changeid in skiprevs
530 skipchild = f._changeid in skiprevs
537 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
531 curr = _annotatepair([hist[p] for p in pl], f, curr, skipchild,
538 diffopts)
532 diffopts)
539 for p in pl:
533 for p in pl:
540 if needed[p] == 1:
534 if needed[p] == 1:
541 del hist[p]
535 del hist[p]
542 del needed[p]
536 del needed[p]
543 else:
537 else:
544 needed[p] -= 1
538 needed[p] -= 1
545
539
546 hist[f] = curr
540 hist[f] = curr
547 del pcache[f]
541 del pcache[f]
548
542
549 a = hist[base]
543 a = hist[base]
550 return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
544 return [annotateline(*r) for r in zip(a.fctxs, a.linenos, a.skips,
551 mdiff.splitnewlines(a.text))]
545 mdiff.splitnewlines(a.text))]
552
546
553 def toposort(revs, parentsfunc, firstbranch=()):
547 def toposort(revs, parentsfunc, firstbranch=()):
554 """Yield revisions from heads to roots one (topo) branch at a time.
548 """Yield revisions from heads to roots one (topo) branch at a time.
555
549
556 This function aims to be used by a graph generator that wishes to minimize
550 This function aims to be used by a graph generator that wishes to minimize
557 the number of parallel branches and their interleaving.
551 the number of parallel branches and their interleaving.
558
552
559 Example iteration order (numbers show the "true" order in a changelog):
553 Example iteration order (numbers show the "true" order in a changelog):
560
554
561 o 4
555 o 4
562 |
556 |
563 o 1
557 o 1
564 |
558 |
565 | o 3
559 | o 3
566 | |
560 | |
567 | o 2
561 | o 2
568 |/
562 |/
569 o 0
563 o 0
570
564
571 Note that the ancestors of merges are understood by the current
565 Note that the ancestors of merges are understood by the current
572 algorithm to be on the same branch. This means no reordering will
566 algorithm to be on the same branch. This means no reordering will
573 occur behind a merge.
567 occur behind a merge.
574 """
568 """
575
569
576 ### Quick summary of the algorithm
570 ### Quick summary of the algorithm
577 #
571 #
578 # This function is based around a "retention" principle. We keep revisions
572 # This function is based around a "retention" principle. We keep revisions
579 # in memory until we are ready to emit a whole branch that immediately
573 # in memory until we are ready to emit a whole branch that immediately
580 # "merges" into an existing one. This reduces the number of parallel
574 # "merges" into an existing one. This reduces the number of parallel
581 # branches with interleaved revisions.
575 # branches with interleaved revisions.
582 #
576 #
583 # During iteration revs are split into two groups:
577 # During iteration revs are split into two groups:
584 # A) revision already emitted
578 # A) revision already emitted
585 # B) revision in "retention". They are stored as different subgroups.
579 # B) revision in "retention". They are stored as different subgroups.
586 #
580 #
587 # for each REV, we do the following logic:
581 # for each REV, we do the following logic:
588 #
582 #
589 # 1) if REV is a parent of (A), we will emit it. If there is a
583 # 1) if REV is a parent of (A), we will emit it. If there is a
590 # retention group ((B) above) that is blocked on REV being
584 # retention group ((B) above) that is blocked on REV being
591 # available, we emit all the revisions out of that retention
585 # available, we emit all the revisions out of that retention
592 # group first.
586 # group first.
593 #
587 #
594 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
588 # 2) else, we'll search for a subgroup in (B) awaiting for REV to be
595 # available, if such subgroup exist, we add REV to it and the subgroup is
589 # available, if such subgroup exist, we add REV to it and the subgroup is
596 # now awaiting for REV.parents() to be available.
590 # now awaiting for REV.parents() to be available.
597 #
591 #
598 # 3) finally if no such group existed in (B), we create a new subgroup.
592 # 3) finally if no such group existed in (B), we create a new subgroup.
599 #
593 #
600 #
594 #
601 # To bootstrap the algorithm, we emit the tipmost revision (which
595 # To bootstrap the algorithm, we emit the tipmost revision (which
602 # puts it in group (A) from above).
596 # puts it in group (A) from above).
603
597
604 revs.sort(reverse=True)
598 revs.sort(reverse=True)
605
599
606 # Set of parents of revision that have been emitted. They can be considered
600 # Set of parents of revision that have been emitted. They can be considered
607 # unblocked as the graph generator is already aware of them so there is no
601 # unblocked as the graph generator is already aware of them so there is no
608 # need to delay the revisions that reference them.
602 # need to delay the revisions that reference them.
609 #
603 #
610 # If someone wants to prioritize a branch over the others, pre-filling this
604 # If someone wants to prioritize a branch over the others, pre-filling this
611 # set will force all other branches to wait until this branch is ready to be
605 # set will force all other branches to wait until this branch is ready to be
612 # emitted.
606 # emitted.
613 unblocked = set(firstbranch)
607 unblocked = set(firstbranch)
614
608
615 # list of groups waiting to be displayed, each group is defined by:
609 # list of groups waiting to be displayed, each group is defined by:
616 #
610 #
617 # (revs: lists of revs waiting to be displayed,
611 # (revs: lists of revs waiting to be displayed,
618 # blocked: set of that cannot be displayed before those in 'revs')
612 # blocked: set of that cannot be displayed before those in 'revs')
619 #
613 #
620 # The second value ('blocked') correspond to parents of any revision in the
614 # The second value ('blocked') correspond to parents of any revision in the
621 # group ('revs') that is not itself contained in the group. The main idea
615 # group ('revs') that is not itself contained in the group. The main idea
622 # of this algorithm is to delay as much as possible the emission of any
616 # of this algorithm is to delay as much as possible the emission of any
623 # revision. This means waiting for the moment we are about to display
617 # revision. This means waiting for the moment we are about to display
624 # these parents to display the revs in a group.
618 # these parents to display the revs in a group.
625 #
619 #
626 # This first implementation is smart until it encounters a merge: it will
620 # This first implementation is smart until it encounters a merge: it will
627 # emit revs as soon as any parent is about to be emitted and can grow an
621 # emit revs as soon as any parent is about to be emitted and can grow an
628 # arbitrary number of revs in 'blocked'. In practice this mean we properly
622 # arbitrary number of revs in 'blocked'. In practice this mean we properly
629 # retains new branches but gives up on any special ordering for ancestors
623 # retains new branches but gives up on any special ordering for ancestors
630 # of merges. The implementation can be improved to handle this better.
624 # of merges. The implementation can be improved to handle this better.
631 #
625 #
632 # The first subgroup is special. It corresponds to all the revision that
626 # The first subgroup is special. It corresponds to all the revision that
633 # were already emitted. The 'revs' lists is expected to be empty and the
627 # were already emitted. The 'revs' lists is expected to be empty and the
634 # 'blocked' set contains the parents revisions of already emitted revision.
628 # 'blocked' set contains the parents revisions of already emitted revision.
635 #
629 #
636 # You could pre-seed the <parents> set of groups[0] to a specific
630 # You could pre-seed the <parents> set of groups[0] to a specific
637 # changesets to select what the first emitted branch should be.
631 # changesets to select what the first emitted branch should be.
638 groups = [([], unblocked)]
632 groups = [([], unblocked)]
639 pendingheap = []
633 pendingheap = []
640 pendingset = set()
634 pendingset = set()
641
635
642 heapq.heapify(pendingheap)
636 heapq.heapify(pendingheap)
643 heappop = heapq.heappop
637 heappop = heapq.heappop
644 heappush = heapq.heappush
638 heappush = heapq.heappush
645 for currentrev in revs:
639 for currentrev in revs:
646 # Heap works with smallest element, we want highest so we invert
640 # Heap works with smallest element, we want highest so we invert
647 if currentrev not in pendingset:
641 if currentrev not in pendingset:
648 heappush(pendingheap, -currentrev)
642 heappush(pendingheap, -currentrev)
649 pendingset.add(currentrev)
643 pendingset.add(currentrev)
650 # iterates on pending rev until after the current rev have been
644 # iterates on pending rev until after the current rev have been
651 # processed.
645 # processed.
652 rev = None
646 rev = None
653 while rev != currentrev:
647 while rev != currentrev:
654 rev = -heappop(pendingheap)
648 rev = -heappop(pendingheap)
655 pendingset.remove(rev)
649 pendingset.remove(rev)
656
650
657 # Seek for a subgroup blocked, waiting for the current revision.
651 # Seek for a subgroup blocked, waiting for the current revision.
658 matching = [i for i, g in enumerate(groups) if rev in g[1]]
652 matching = [i for i, g in enumerate(groups) if rev in g[1]]
659
653
660 if matching:
654 if matching:
661 # The main idea is to gather together all sets that are blocked
655 # The main idea is to gather together all sets that are blocked
662 # on the same revision.
656 # on the same revision.
663 #
657 #
664 # Groups are merged when a common blocking ancestor is
658 # Groups are merged when a common blocking ancestor is
665 # observed. For example, given two groups:
659 # observed. For example, given two groups:
666 #
660 #
667 # revs [5, 4] waiting for 1
661 # revs [5, 4] waiting for 1
668 # revs [3, 2] waiting for 1
662 # revs [3, 2] waiting for 1
669 #
663 #
670 # These two groups will be merged when we process
664 # These two groups will be merged when we process
671 # 1. In theory, we could have merged the groups when
665 # 1. In theory, we could have merged the groups when
672 # we added 2 to the group it is now in (we could have
666 # we added 2 to the group it is now in (we could have
673 # noticed the groups were both blocked on 1 then), but
667 # noticed the groups were both blocked on 1 then), but
674 # the way it works now makes the algorithm simpler.
668 # the way it works now makes the algorithm simpler.
675 #
669 #
676 # We also always keep the oldest subgroup first. We can
670 # We also always keep the oldest subgroup first. We can
677 # probably improve the behavior by having the longest set
671 # probably improve the behavior by having the longest set
678 # first. That way, graph algorithms could minimise the length
672 # first. That way, graph algorithms could minimise the length
679 # of parallel lines their drawing. This is currently not done.
673 # of parallel lines their drawing. This is currently not done.
680 targetidx = matching.pop(0)
674 targetidx = matching.pop(0)
681 trevs, tparents = groups[targetidx]
675 trevs, tparents = groups[targetidx]
682 for i in matching:
676 for i in matching:
683 gr = groups[i]
677 gr = groups[i]
684 trevs.extend(gr[0])
678 trevs.extend(gr[0])
685 tparents |= gr[1]
679 tparents |= gr[1]
686 # delete all merged subgroups (except the one we kept)
680 # delete all merged subgroups (except the one we kept)
687 # (starting from the last subgroup for performance and
681 # (starting from the last subgroup for performance and
688 # sanity reasons)
682 # sanity reasons)
689 for i in reversed(matching):
683 for i in reversed(matching):
690 del groups[i]
684 del groups[i]
691 else:
685 else:
692 # This is a new head. We create a new subgroup for it.
686 # This is a new head. We create a new subgroup for it.
693 targetidx = len(groups)
687 targetidx = len(groups)
694 groups.append(([], {rev}))
688 groups.append(([], {rev}))
695
689
696 gr = groups[targetidx]
690 gr = groups[targetidx]
697
691
698 # We now add the current nodes to this subgroups. This is done
692 # We now add the current nodes to this subgroups. This is done
699 # after the subgroup merging because all elements from a subgroup
693 # after the subgroup merging because all elements from a subgroup
700 # that relied on this rev must precede it.
694 # that relied on this rev must precede it.
701 #
695 #
702 # we also update the <parents> set to include the parents of the
696 # we also update the <parents> set to include the parents of the
703 # new nodes.
697 # new nodes.
704 if rev == currentrev: # only display stuff in rev
698 if rev == currentrev: # only display stuff in rev
705 gr[0].append(rev)
699 gr[0].append(rev)
706 gr[1].remove(rev)
700 gr[1].remove(rev)
707 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
701 parents = [p for p in parentsfunc(rev) if p > node.nullrev]
708 gr[1].update(parents)
702 gr[1].update(parents)
709 for p in parents:
703 for p in parents:
710 if p not in pendingset:
704 if p not in pendingset:
711 pendingset.add(p)
705 pendingset.add(p)
712 heappush(pendingheap, -p)
706 heappush(pendingheap, -p)
713
707
714 # Look for a subgroup to display
708 # Look for a subgroup to display
715 #
709 #
716 # When unblocked is empty (if clause), we were not waiting for any
710 # When unblocked is empty (if clause), we were not waiting for any
717 # revisions during the first iteration (if no priority was given) or
711 # revisions during the first iteration (if no priority was given) or
718 # if we emitted a whole disconnected set of the graph (reached a
712 # if we emitted a whole disconnected set of the graph (reached a
719 # root). In that case we arbitrarily take the oldest known
713 # root). In that case we arbitrarily take the oldest known
720 # subgroup. The heuristic could probably be better.
714 # subgroup. The heuristic could probably be better.
721 #
715 #
722 # Otherwise (elif clause) if the subgroup is blocked on
716 # Otherwise (elif clause) if the subgroup is blocked on
723 # a revision we just emitted, we can safely emit it as
717 # a revision we just emitted, we can safely emit it as
724 # well.
718 # well.
725 if not unblocked:
719 if not unblocked:
726 if len(groups) > 1: # display other subset
720 if len(groups) > 1: # display other subset
727 targetidx = 1
721 targetidx = 1
728 gr = groups[1]
722 gr = groups[1]
729 elif not gr[1] & unblocked:
723 elif not gr[1] & unblocked:
730 gr = None
724 gr = None
731
725
732 if gr is not None:
726 if gr is not None:
733 # update the set of awaited revisions with the one from the
727 # update the set of awaited revisions with the one from the
734 # subgroup
728 # subgroup
735 unblocked |= gr[1]
729 unblocked |= gr[1]
736 # output all revisions in the subgroup
730 # output all revisions in the subgroup
737 for r in gr[0]:
731 for r in gr[0]:
738 yield r
732 yield r
739 # delete the subgroup that you just output
733 # delete the subgroup that you just output
740 # unless it is groups[0] in which case you just empty it.
734 # unless it is groups[0] in which case you just empty it.
741 if targetidx:
735 if targetidx:
742 del groups[targetidx]
736 del groups[targetidx]
743 else:
737 else:
744 gr[0][:] = []
738 gr[0][:] = []
745 # Check if we have some subgroup waiting for revisions we are not going to
739 # Check if we have some subgroup waiting for revisions we are not going to
746 # iterate over
740 # iterate over
747 for g in groups:
741 for g in groups:
748 for r in g[0]:
742 for r in g[0]:
749 yield r
743 yield r
750
744
751 def headrevs(revs, parentsfn):
745 def headrevs(revs, parentsfn):
752 """Resolve the set of heads from a set of revisions.
746 """Resolve the set of heads from a set of revisions.
753
747
754 Receives an iterable of revision numbers and a callbable that receives a
748 Receives an iterable of revision numbers and a callbable that receives a
755 revision number and returns an iterable of parent revision numbers, possibly
749 revision number and returns an iterable of parent revision numbers, possibly
756 including nullrev.
750 including nullrev.
757
751
758 Returns a set of revision numbers that are DAG heads within the passed
752 Returns a set of revision numbers that are DAG heads within the passed
759 subset.
753 subset.
760
754
761 ``nullrev`` is never included in the returned set, even if it is provided in
755 ``nullrev`` is never included in the returned set, even if it is provided in
762 the input set.
756 the input set.
763 """
757 """
764 headrevs = set(revs)
758 headrevs = set(revs)
765 parents = {node.nullrev}
759 parents = {node.nullrev}
766 up = parents.update
760 up = parents.update
767
761
768 for rev in revs:
762 for rev in revs:
769 up(parentsfn(rev))
763 up(parentsfn(rev))
770 headrevs.difference_update(parents)
764 headrevs.difference_update(parents)
771 return headrevs
765 return headrevs
772
766
773 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
767 def headrevssubset(revsfn, parentrevsfn, startrev=None, stoprevs=None):
774 """Returns the set of all revs that have no children with control.
768 """Returns the set of all revs that have no children with control.
775
769
776 ``revsfn`` is a callable that with no arguments returns an iterator over
770 ``revsfn`` is a callable that with no arguments returns an iterator over
777 all revision numbers in topological order. With a ``start`` argument, it
771 all revision numbers in topological order. With a ``start`` argument, it
778 returns revision numbers starting at that number.
772 returns revision numbers starting at that number.
779
773
780 ``parentrevsfn`` is a callable receiving a revision number and returns an
774 ``parentrevsfn`` is a callable receiving a revision number and returns an
781 iterable of parent revision numbers, where values can include nullrev.
775 iterable of parent revision numbers, where values can include nullrev.
782
776
783 ``startrev`` is a revision number at which to start the search.
777 ``startrev`` is a revision number at which to start the search.
784
778
785 ``stoprevs`` is an iterable of revision numbers that, when encountered,
779 ``stoprevs`` is an iterable of revision numbers that, when encountered,
786 will stop DAG traversal beyond them. Parents of revisions in this
780 will stop DAG traversal beyond them. Parents of revisions in this
787 collection will be heads.
781 collection will be heads.
788 """
782 """
789 if startrev is None:
783 if startrev is None:
790 startrev = nullrev
784 startrev = nullrev
791
785
792 stoprevs = set(stoprevs or [])
786 stoprevs = set(stoprevs or [])
793
787
794 reachable = {startrev}
788 reachable = {startrev}
795 heads = {startrev}
789 heads = {startrev}
796
790
797 for rev in revsfn(start=startrev + 1):
791 for rev in revsfn(start=startrev + 1):
798 for prev in parentrevsfn(rev):
792 for prev in parentrevsfn(rev):
799 if prev in reachable:
793 if prev in reachable:
800 if rev not in stoprevs:
794 if rev not in stoprevs:
801 reachable.add(rev)
795 reachable.add(rev)
802 heads.add(rev)
796 heads.add(rev)
803
797
804 if prev in heads and prev not in stoprevs:
798 if prev in heads and prev not in stoprevs:
805 heads.remove(prev)
799 heads.remove(prev)
806
800
807 return heads
801 return heads
808
802
809 def linearize(revs, parentsfn):
803 def linearize(revs, parentsfn):
810 """Linearize and topologically sort a list of revisions.
804 """Linearize and topologically sort a list of revisions.
811
805
812 The linearization process tries to create long runs of revs where a child
806 The linearization process tries to create long runs of revs where a child
813 rev comes immediately after its first parent. This is done by visiting the
807 rev comes immediately after its first parent. This is done by visiting the
814 heads of the revs in inverse topological order, and for each visited rev,
808 heads of the revs in inverse topological order, and for each visited rev,
815 visiting its second parent, then its first parent, then adding the rev
809 visiting its second parent, then its first parent, then adding the rev
816 itself to the output list.
810 itself to the output list.
817
811
818 Returns a list of revision numbers.
812 Returns a list of revision numbers.
819 """
813 """
820 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
814 visit = list(sorted(headrevs(revs, parentsfn), reverse=True))
821 finished = set()
815 finished = set()
822 result = []
816 result = []
823
817
824 while visit:
818 while visit:
825 rev = visit.pop()
819 rev = visit.pop()
826 if rev < 0:
820 if rev < 0:
827 rev = -rev - 1
821 rev = -rev - 1
828
822
829 if rev not in finished:
823 if rev not in finished:
830 result.append(rev)
824 result.append(rev)
831 finished.add(rev)
825 finished.add(rev)
832
826
833 else:
827 else:
834 visit.append(-rev - 1)
828 visit.append(-rev - 1)
835
829
836 for prev in parentsfn(rev):
830 for prev in parentsfn(rev):
837 if prev == node.nullrev or prev not in revs or prev in finished:
831 if prev == node.nullrev or prev not in revs or prev in finished:
838 continue
832 continue
839
833
840 visit.append(prev)
834 visit.append(prev)
841
835
842 assert len(result) == len(revs)
836 assert len(result) == len(revs)
843
837
844 return result
838 return result
@@ -1,2676 +1,2687 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import collections
16 import collections
17 import contextlib
17 import contextlib
18 import errno
18 import errno
19 import os
19 import os
20 import struct
20 import struct
21 import zlib
21 import zlib
22
22
23 # import stuff from node for others to import from revlog
23 # import stuff from node for others to import from revlog
24 from .node import (
24 from .node import (
25 bin,
25 bin,
26 hex,
26 hex,
27 nullhex,
27 nullhex,
28 nullid,
28 nullid,
29 nullrev,
29 nullrev,
30 short,
30 short,
31 wdirfilenodeids,
31 wdirfilenodeids,
32 wdirhex,
32 wdirhex,
33 wdirid,
33 wdirid,
34 wdirrev,
34 wdirrev,
35 )
35 )
36 from .i18n import _
36 from .i18n import _
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 FLAG_GENERALDELTA,
38 FLAG_GENERALDELTA,
39 FLAG_INLINE_DATA,
39 FLAG_INLINE_DATA,
40 REVIDX_DEFAULT_FLAGS,
40 REVIDX_DEFAULT_FLAGS,
41 REVIDX_ELLIPSIS,
41 REVIDX_ELLIPSIS,
42 REVIDX_EXTSTORED,
42 REVIDX_EXTSTORED,
43 REVIDX_FLAGS_ORDER,
43 REVIDX_FLAGS_ORDER,
44 REVIDX_ISCENSORED,
44 REVIDX_ISCENSORED,
45 REVIDX_KNOWN_FLAGS,
45 REVIDX_KNOWN_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 REVLOGV0,
47 REVLOGV0,
48 REVLOGV1,
48 REVLOGV1,
49 REVLOGV1_FLAGS,
49 REVLOGV1_FLAGS,
50 REVLOGV2,
50 REVLOGV2,
51 REVLOGV2_FLAGS,
51 REVLOGV2_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
52 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FORMAT,
53 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_VERSION,
54 REVLOG_DEFAULT_VERSION,
55 )
55 )
56 from .thirdparty import (
56 from .thirdparty import (
57 attr,
57 attr,
58 )
58 )
59 from . import (
59 from . import (
60 ancestor,
60 ancestor,
61 dagop,
61 dagop,
62 error,
62 error,
63 mdiff,
63 mdiff,
64 policy,
64 policy,
65 pycompat,
65 pycompat,
66 repository,
66 repository,
67 templatefilters,
67 templatefilters,
68 util,
68 util,
69 )
69 )
70 from .revlogutils import (
70 from .revlogutils import (
71 deltas as deltautil,
71 deltas as deltautil,
72 )
72 )
73 from .utils import (
73 from .utils import (
74 interfaceutil,
74 interfaceutil,
75 storageutil,
75 storageutil,
76 stringutil,
76 stringutil,
77 )
77 )
78
78
79 # blanked usage of all the name to prevent pyflakes constraints
79 # blanked usage of all the name to prevent pyflakes constraints
80 # We need these name available in the module for extensions.
80 # We need these name available in the module for extensions.
81 REVLOGV0
81 REVLOGV0
82 REVLOGV1
82 REVLOGV1
83 REVLOGV2
83 REVLOGV2
84 FLAG_INLINE_DATA
84 FLAG_INLINE_DATA
85 FLAG_GENERALDELTA
85 FLAG_GENERALDELTA
86 REVLOG_DEFAULT_FLAGS
86 REVLOG_DEFAULT_FLAGS
87 REVLOG_DEFAULT_FORMAT
87 REVLOG_DEFAULT_FORMAT
88 REVLOG_DEFAULT_VERSION
88 REVLOG_DEFAULT_VERSION
89 REVLOGV1_FLAGS
89 REVLOGV1_FLAGS
90 REVLOGV2_FLAGS
90 REVLOGV2_FLAGS
91 REVIDX_ISCENSORED
91 REVIDX_ISCENSORED
92 REVIDX_ELLIPSIS
92 REVIDX_ELLIPSIS
93 REVIDX_EXTSTORED
93 REVIDX_EXTSTORED
94 REVIDX_DEFAULT_FLAGS
94 REVIDX_DEFAULT_FLAGS
95 REVIDX_FLAGS_ORDER
95 REVIDX_FLAGS_ORDER
96 REVIDX_KNOWN_FLAGS
96 REVIDX_KNOWN_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
97 REVIDX_RAWTEXT_CHANGING_FLAGS
98
98
99 parsers = policy.importmod(r'parsers')
99 parsers = policy.importmod(r'parsers')
100 try:
100 try:
101 from . import rustext
101 from . import rustext
102 rustext.__name__ # force actual import (see hgdemandimport)
102 rustext.__name__ # force actual import (see hgdemandimport)
103 except ImportError:
103 except ImportError:
104 rustext = None
104 rustext = None
105
105
106 # Aliased for performance.
106 # Aliased for performance.
107 _zlibdecompress = zlib.decompress
107 _zlibdecompress = zlib.decompress
108
108
109 # max size of revlog with inline data
109 # max size of revlog with inline data
110 _maxinline = 131072
110 _maxinline = 131072
111 _chunksize = 1048576
111 _chunksize = 1048576
112
112
113 # Store flag processors (cf. 'addflagprocessor()' to register)
113 # Store flag processors (cf. 'addflagprocessor()' to register)
114 _flagprocessors = {
114 _flagprocessors = {
115 REVIDX_ISCENSORED: None,
115 REVIDX_ISCENSORED: None,
116 }
116 }
117
117
118 # Flag processors for REVIDX_ELLIPSIS.
118 # Flag processors for REVIDX_ELLIPSIS.
119 def ellipsisreadprocessor(rl, text):
119 def ellipsisreadprocessor(rl, text):
120 return text, False
120 return text, False
121
121
122 def ellipsiswriteprocessor(rl, text):
122 def ellipsiswriteprocessor(rl, text):
123 return text, False
123 return text, False
124
124
125 def ellipsisrawprocessor(rl, text):
125 def ellipsisrawprocessor(rl, text):
126 return False
126 return False
127
127
128 ellipsisprocessor = (
128 ellipsisprocessor = (
129 ellipsisreadprocessor,
129 ellipsisreadprocessor,
130 ellipsiswriteprocessor,
130 ellipsiswriteprocessor,
131 ellipsisrawprocessor,
131 ellipsisrawprocessor,
132 )
132 )
133
133
134 def addflagprocessor(flag, processor):
134 def addflagprocessor(flag, processor):
135 """Register a flag processor on a revision data flag.
135 """Register a flag processor on a revision data flag.
136
136
137 Invariant:
137 Invariant:
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
138 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
139 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
140 - Only one flag processor can be registered on a specific flag.
140 - Only one flag processor can be registered on a specific flag.
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
141 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
142 following signatures:
142 following signatures:
143 - (read) f(self, rawtext) -> text, bool
143 - (read) f(self, rawtext) -> text, bool
144 - (write) f(self, text) -> rawtext, bool
144 - (write) f(self, text) -> rawtext, bool
145 - (raw) f(self, rawtext) -> bool
145 - (raw) f(self, rawtext) -> bool
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
146 "text" is presented to the user. "rawtext" is stored in revlog data, not
147 directly visible to the user.
147 directly visible to the user.
148 The boolean returned by these transforms is used to determine whether
148 The boolean returned by these transforms is used to determine whether
149 the returned text can be used for hash integrity checking. For example,
149 the returned text can be used for hash integrity checking. For example,
150 if "write" returns False, then "text" is used to generate hash. If
150 if "write" returns False, then "text" is used to generate hash. If
151 "write" returns True, that basically means "rawtext" returned by "write"
151 "write" returns True, that basically means "rawtext" returned by "write"
152 should be used to generate hash. Usually, "write" and "read" return
152 should be used to generate hash. Usually, "write" and "read" return
153 different booleans. And "raw" returns a same boolean as "write".
153 different booleans. And "raw" returns a same boolean as "write".
154
154
155 Note: The 'raw' transform is used for changegroup generation and in some
155 Note: The 'raw' transform is used for changegroup generation and in some
156 debug commands. In this case the transform only indicates whether the
156 debug commands. In this case the transform only indicates whether the
157 contents can be used for hash integrity checks.
157 contents can be used for hash integrity checks.
158 """
158 """
159 _insertflagprocessor(flag, processor, _flagprocessors)
159 _insertflagprocessor(flag, processor, _flagprocessors)
160
160
161 def _insertflagprocessor(flag, processor, flagprocessors):
161 def _insertflagprocessor(flag, processor, flagprocessors):
162 if not flag & REVIDX_KNOWN_FLAGS:
162 if not flag & REVIDX_KNOWN_FLAGS:
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
163 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165 if flag not in REVIDX_FLAGS_ORDER:
165 if flag not in REVIDX_FLAGS_ORDER:
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
166 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
167 raise error.ProgrammingError(msg)
167 raise error.ProgrammingError(msg)
168 if flag in flagprocessors:
168 if flag in flagprocessors:
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
169 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
170 raise error.Abort(msg)
170 raise error.Abort(msg)
171 flagprocessors[flag] = processor
171 flagprocessors[flag] = processor
172
172
173 def getoffset(q):
173 def getoffset(q):
174 return int(q >> 16)
174 return int(q >> 16)
175
175
176 def gettype(q):
176 def gettype(q):
177 return int(q & 0xFFFF)
177 return int(q & 0xFFFF)
178
178
179 def offset_type(offset, type):
179 def offset_type(offset, type):
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
180 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
181 raise ValueError('unknown revlog index flags')
181 raise ValueError('unknown revlog index flags')
182 return int(int(offset) << 16 | type)
182 return int(int(offset) << 16 | type)
183
183
184 @attr.s(slots=True, frozen=True)
184 @attr.s(slots=True, frozen=True)
185 class _revisioninfo(object):
185 class _revisioninfo(object):
186 """Information about a revision that allows building its fulltext
186 """Information about a revision that allows building its fulltext
187 node: expected hash of the revision
187 node: expected hash of the revision
188 p1, p2: parent revs of the revision
188 p1, p2: parent revs of the revision
189 btext: built text cache consisting of a one-element list
189 btext: built text cache consisting of a one-element list
190 cachedelta: (baserev, uncompressed_delta) or None
190 cachedelta: (baserev, uncompressed_delta) or None
191 flags: flags associated to the revision storage
191 flags: flags associated to the revision storage
192
192
193 One of btext[0] or cachedelta must be set.
193 One of btext[0] or cachedelta must be set.
194 """
194 """
195 node = attr.ib()
195 node = attr.ib()
196 p1 = attr.ib()
196 p1 = attr.ib()
197 p2 = attr.ib()
197 p2 = attr.ib()
198 btext = attr.ib()
198 btext = attr.ib()
199 textlen = attr.ib()
199 textlen = attr.ib()
200 cachedelta = attr.ib()
200 cachedelta = attr.ib()
201 flags = attr.ib()
201 flags = attr.ib()
202
202
203 @interfaceutil.implementer(repository.irevisiondelta)
203 @interfaceutil.implementer(repository.irevisiondelta)
204 @attr.s(slots=True)
204 @attr.s(slots=True)
205 class revlogrevisiondelta(object):
205 class revlogrevisiondelta(object):
206 node = attr.ib()
206 node = attr.ib()
207 p1node = attr.ib()
207 p1node = attr.ib()
208 p2node = attr.ib()
208 p2node = attr.ib()
209 basenode = attr.ib()
209 basenode = attr.ib()
210 flags = attr.ib()
210 flags = attr.ib()
211 baserevisionsize = attr.ib()
211 baserevisionsize = attr.ib()
212 revision = attr.ib()
212 revision = attr.ib()
213 delta = attr.ib()
213 delta = attr.ib()
214 linknode = attr.ib(default=None)
214 linknode = attr.ib(default=None)
215
215
216 @interfaceutil.implementer(repository.iverifyproblem)
216 @interfaceutil.implementer(repository.iverifyproblem)
217 @attr.s(frozen=True)
217 @attr.s(frozen=True)
218 class revlogproblem(object):
218 class revlogproblem(object):
219 warning = attr.ib(default=None)
219 warning = attr.ib(default=None)
220 error = attr.ib(default=None)
220 error = attr.ib(default=None)
221 node = attr.ib(default=None)
221 node = attr.ib(default=None)
222
222
223 # index v0:
223 # index v0:
224 # 4 bytes: offset
224 # 4 bytes: offset
225 # 4 bytes: compressed length
225 # 4 bytes: compressed length
226 # 4 bytes: base rev
226 # 4 bytes: base rev
227 # 4 bytes: link rev
227 # 4 bytes: link rev
228 # 20 bytes: parent 1 nodeid
228 # 20 bytes: parent 1 nodeid
229 # 20 bytes: parent 2 nodeid
229 # 20 bytes: parent 2 nodeid
230 # 20 bytes: nodeid
230 # 20 bytes: nodeid
231 indexformatv0 = struct.Struct(">4l20s20s20s")
231 indexformatv0 = struct.Struct(">4l20s20s20s")
232 indexformatv0_pack = indexformatv0.pack
232 indexformatv0_pack = indexformatv0.pack
233 indexformatv0_unpack = indexformatv0.unpack
233 indexformatv0_unpack = indexformatv0.unpack
234
234
235 class revlogoldindex(list):
235 class revlogoldindex(list):
236 def __getitem__(self, i):
236 def __getitem__(self, i):
237 if i == -1:
237 if i == -1:
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
238 return (0, 0, 0, -1, -1, -1, -1, nullid)
239 return list.__getitem__(self, i)
239 return list.__getitem__(self, i)
240
240
241 class revlogoldio(object):
241 class revlogoldio(object):
242 def __init__(self):
242 def __init__(self):
243 self.size = indexformatv0.size
243 self.size = indexformatv0.size
244
244
245 def parseindex(self, data, inline):
245 def parseindex(self, data, inline):
246 s = self.size
246 s = self.size
247 index = []
247 index = []
248 nodemap = {nullid: nullrev}
248 nodemap = {nullid: nullrev}
249 n = off = 0
249 n = off = 0
250 l = len(data)
250 l = len(data)
251 while off + s <= l:
251 while off + s <= l:
252 cur = data[off:off + s]
252 cur = data[off:off + s]
253 off += s
253 off += s
254 e = indexformatv0_unpack(cur)
254 e = indexformatv0_unpack(cur)
255 # transform to revlogv1 format
255 # transform to revlogv1 format
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
256 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
257 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
258 index.append(e2)
258 index.append(e2)
259 nodemap[e[6]] = n
259 nodemap[e[6]] = n
260 n += 1
260 n += 1
261
261
262 return revlogoldindex(index), nodemap, None
262 return revlogoldindex(index), nodemap, None
263
263
264 def packentry(self, entry, node, version, rev):
264 def packentry(self, entry, node, version, rev):
265 if gettype(entry[0]):
265 if gettype(entry[0]):
266 raise error.RevlogError(_('index entry flags need revlog '
266 raise error.RevlogError(_('index entry flags need revlog '
267 'version 1'))
267 'version 1'))
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
268 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
269 node(entry[5]), node(entry[6]), entry[7])
269 node(entry[5]), node(entry[6]), entry[7])
270 return indexformatv0_pack(*e2)
270 return indexformatv0_pack(*e2)
271
271
272 # index ng:
272 # index ng:
273 # 6 bytes: offset
273 # 6 bytes: offset
274 # 2 bytes: flags
274 # 2 bytes: flags
275 # 4 bytes: compressed length
275 # 4 bytes: compressed length
276 # 4 bytes: uncompressed length
276 # 4 bytes: uncompressed length
277 # 4 bytes: base rev
277 # 4 bytes: base rev
278 # 4 bytes: link rev
278 # 4 bytes: link rev
279 # 4 bytes: parent 1 rev
279 # 4 bytes: parent 1 rev
280 # 4 bytes: parent 2 rev
280 # 4 bytes: parent 2 rev
281 # 32 bytes: nodeid
281 # 32 bytes: nodeid
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
282 indexformatng = struct.Struct(">Qiiiiii20s12x")
283 indexformatng_pack = indexformatng.pack
283 indexformatng_pack = indexformatng.pack
284 versionformat = struct.Struct(">I")
284 versionformat = struct.Struct(">I")
285 versionformat_pack = versionformat.pack
285 versionformat_pack = versionformat.pack
286 versionformat_unpack = versionformat.unpack
286 versionformat_unpack = versionformat.unpack
287
287
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
288 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
289 # signed integer)
289 # signed integer)
290 _maxentrysize = 0x7fffffff
290 _maxentrysize = 0x7fffffff
291
291
292 class revlogio(object):
292 class revlogio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = indexformatng.size
294 self.size = indexformatng.size
295
295
296 def parseindex(self, data, inline):
296 def parseindex(self, data, inline):
297 # call the C implementation to parse the index data
297 # call the C implementation to parse the index data
298 index, cache = parsers.parse_index2(data, inline)
298 index, cache = parsers.parse_index2(data, inline)
299 return index, getattr(index, 'nodemap', None), cache
299 return index, getattr(index, 'nodemap', None), cache
300
300
301 def packentry(self, entry, node, version, rev):
301 def packentry(self, entry, node, version, rev):
302 p = indexformatng_pack(*entry)
302 p = indexformatng_pack(*entry)
303 if rev == 0:
303 if rev == 0:
304 p = versionformat_pack(version) + p[4:]
304 p = versionformat_pack(version) + p[4:]
305 return p
305 return p
306
306
307 class revlog(object):
307 class revlog(object):
308 """
308 """
309 the underlying revision storage object
309 the underlying revision storage object
310
310
311 A revlog consists of two parts, an index and the revision data.
311 A revlog consists of two parts, an index and the revision data.
312
312
313 The index is a file with a fixed record size containing
313 The index is a file with a fixed record size containing
314 information on each revision, including its nodeid (hash), the
314 information on each revision, including its nodeid (hash), the
315 nodeids of its parents, the position and offset of its data within
315 nodeids of its parents, the position and offset of its data within
316 the data file, and the revision it's based on. Finally, each entry
316 the data file, and the revision it's based on. Finally, each entry
317 contains a linkrev entry that can serve as a pointer to external
317 contains a linkrev entry that can serve as a pointer to external
318 data.
318 data.
319
319
320 The revision data itself is a linear collection of data chunks.
320 The revision data itself is a linear collection of data chunks.
321 Each chunk represents a revision and is usually represented as a
321 Each chunk represents a revision and is usually represented as a
322 delta against the previous chunk. To bound lookup time, runs of
322 delta against the previous chunk. To bound lookup time, runs of
323 deltas are limited to about 2 times the length of the original
323 deltas are limited to about 2 times the length of the original
324 version data. This makes retrieval of a version proportional to
324 version data. This makes retrieval of a version proportional to
325 its size, or O(1) relative to the number of revisions.
325 its size, or O(1) relative to the number of revisions.
326
326
327 Both pieces of the revlog are written to in an append-only
327 Both pieces of the revlog are written to in an append-only
328 fashion, which means we never need to rewrite a file to insert or
328 fashion, which means we never need to rewrite a file to insert or
329 remove data, and can use some simple techniques to avoid the need
329 remove data, and can use some simple techniques to avoid the need
330 for locking while reading.
330 for locking while reading.
331
331
332 If checkambig, indexfile is opened with checkambig=True at
332 If checkambig, indexfile is opened with checkambig=True at
333 writing, to avoid file stat ambiguity.
333 writing, to avoid file stat ambiguity.
334
334
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
335 If mmaplargeindex is True, and an mmapindexthreshold is set, the
336 index will be mmapped rather than read if it is larger than the
336 index will be mmapped rather than read if it is larger than the
337 configured threshold.
337 configured threshold.
338
338
339 If censorable is True, the revlog can have censored revisions.
339 If censorable is True, the revlog can have censored revisions.
340 """
340 """
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
341 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
342 mmaplargeindex=False, censorable=False):
342 mmaplargeindex=False, censorable=False):
343 """
343 """
344 create a revlog object
344 create a revlog object
345
345
346 opener is a function that abstracts the file opening operation
346 opener is a function that abstracts the file opening operation
347 and can be used to implement COW semantics or the like.
347 and can be used to implement COW semantics or the like.
348 """
348 """
349 self.indexfile = indexfile
349 self.indexfile = indexfile
350 self.datafile = datafile or (indexfile[:-2] + ".d")
350 self.datafile = datafile or (indexfile[:-2] + ".d")
351 self.opener = opener
351 self.opener = opener
352 # When True, indexfile is opened with checkambig=True at writing, to
352 # When True, indexfile is opened with checkambig=True at writing, to
353 # avoid file stat ambiguity.
353 # avoid file stat ambiguity.
354 self._checkambig = checkambig
354 self._checkambig = checkambig
355 self._mmaplargeindex = mmaplargeindex
355 self._mmaplargeindex = mmaplargeindex
356 self._censorable = censorable
356 self._censorable = censorable
357 # 3-tuple of (node, rev, text) for a raw revision.
357 # 3-tuple of (node, rev, text) for a raw revision.
358 self._revisioncache = None
358 self._revisioncache = None
359 # Maps rev to chain base rev.
359 # Maps rev to chain base rev.
360 self._chainbasecache = util.lrucachedict(100)
360 self._chainbasecache = util.lrucachedict(100)
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
361 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
362 self._chunkcache = (0, '')
362 self._chunkcache = (0, '')
363 # How much data to read and cache into the raw revlog data cache.
363 # How much data to read and cache into the raw revlog data cache.
364 self._chunkcachesize = 65536
364 self._chunkcachesize = 65536
365 self._maxchainlen = None
365 self._maxchainlen = None
366 self._deltabothparents = True
366 self._deltabothparents = True
367 self.index = []
367 self.index = []
368 # Mapping of partial identifiers to full nodes.
368 # Mapping of partial identifiers to full nodes.
369 self._pcache = {}
369 self._pcache = {}
370 # Mapping of revision integer to full node.
370 # Mapping of revision integer to full node.
371 self._nodecache = {nullid: nullrev}
371 self._nodecache = {nullid: nullrev}
372 self._nodepos = None
372 self._nodepos = None
373 self._compengine = 'zlib'
373 self._compengine = 'zlib'
374 self._compengineopts = {}
374 self._compengineopts = {}
375 self._maxdeltachainspan = -1
375 self._maxdeltachainspan = -1
376 self._withsparseread = False
376 self._withsparseread = False
377 self._sparserevlog = False
377 self._sparserevlog = False
378 self._srdensitythreshold = 0.50
378 self._srdensitythreshold = 0.50
379 self._srmingapsize = 262144
379 self._srmingapsize = 262144
380
380
381 # Make copy of flag processors so each revlog instance can support
381 # Make copy of flag processors so each revlog instance can support
382 # custom flags.
382 # custom flags.
383 self._flagprocessors = dict(_flagprocessors)
383 self._flagprocessors = dict(_flagprocessors)
384
384
385 # 2-tuple of file handles being used for active writing.
385 # 2-tuple of file handles being used for active writing.
386 self._writinghandles = None
386 self._writinghandles = None
387
387
388 self._loadindex()
388 self._loadindex()
389
389
390 def _loadindex(self):
390 def _loadindex(self):
391 mmapindexthreshold = None
391 mmapindexthreshold = None
392 opts = getattr(self.opener, 'options', {}) or {}
392 opts = getattr(self.opener, 'options', {}) or {}
393
393
394 if 'revlogv2' in opts:
394 if 'revlogv2' in opts:
395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
395 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
396 elif 'revlogv1' in opts:
396 elif 'revlogv1' in opts:
397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
397 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
398 if 'generaldelta' in opts:
398 if 'generaldelta' in opts:
399 newversionflags |= FLAG_GENERALDELTA
399 newversionflags |= FLAG_GENERALDELTA
400 elif getattr(self.opener, 'options', None) is not None:
400 elif getattr(self.opener, 'options', None) is not None:
401 # If options provided but no 'revlog*' found, the repository
401 # If options provided but no 'revlog*' found, the repository
402 # would have no 'requires' file in it, which means we have to
402 # would have no 'requires' file in it, which means we have to
403 # stick to the old format.
403 # stick to the old format.
404 newversionflags = REVLOGV0
404 newversionflags = REVLOGV0
405 else:
405 else:
406 newversionflags = REVLOG_DEFAULT_VERSION
406 newversionflags = REVLOG_DEFAULT_VERSION
407
407
408 if 'chunkcachesize' in opts:
408 if 'chunkcachesize' in opts:
409 self._chunkcachesize = opts['chunkcachesize']
409 self._chunkcachesize = opts['chunkcachesize']
410 if 'maxchainlen' in opts:
410 if 'maxchainlen' in opts:
411 self._maxchainlen = opts['maxchainlen']
411 self._maxchainlen = opts['maxchainlen']
412 if 'deltabothparents' in opts:
412 if 'deltabothparents' in opts:
413 self._deltabothparents = opts['deltabothparents']
413 self._deltabothparents = opts['deltabothparents']
414 self._lazydelta = bool(opts.get('lazydelta', True))
414 self._lazydelta = bool(opts.get('lazydelta', True))
415 self._lazydeltabase = False
415 self._lazydeltabase = False
416 if self._lazydelta:
416 if self._lazydelta:
417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
417 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
418 if 'compengine' in opts:
418 if 'compengine' in opts:
419 self._compengine = opts['compengine']
419 self._compengine = opts['compengine']
420 if 'zlib.level' in opts:
420 if 'zlib.level' in opts:
421 self._compengineopts['zlib.level'] = opts['zlib.level']
421 self._compengineopts['zlib.level'] = opts['zlib.level']
422 if 'zstd.level' in opts:
422 if 'zstd.level' in opts:
423 self._compengineopts['zstd.level'] = opts['zstd.level']
423 self._compengineopts['zstd.level'] = opts['zstd.level']
424 if 'maxdeltachainspan' in opts:
424 if 'maxdeltachainspan' in opts:
425 self._maxdeltachainspan = opts['maxdeltachainspan']
425 self._maxdeltachainspan = opts['maxdeltachainspan']
426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
426 if self._mmaplargeindex and 'mmapindexthreshold' in opts:
427 mmapindexthreshold = opts['mmapindexthreshold']
427 mmapindexthreshold = opts['mmapindexthreshold']
428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
428 self._sparserevlog = bool(opts.get('sparse-revlog', False))
429 withsparseread = bool(opts.get('with-sparse-read', False))
429 withsparseread = bool(opts.get('with-sparse-read', False))
430 # sparse-revlog forces sparse-read
430 # sparse-revlog forces sparse-read
431 self._withsparseread = self._sparserevlog or withsparseread
431 self._withsparseread = self._sparserevlog or withsparseread
432 if 'sparse-read-density-threshold' in opts:
432 if 'sparse-read-density-threshold' in opts:
433 self._srdensitythreshold = opts['sparse-read-density-threshold']
433 self._srdensitythreshold = opts['sparse-read-density-threshold']
434 if 'sparse-read-min-gap-size' in opts:
434 if 'sparse-read-min-gap-size' in opts:
435 self._srmingapsize = opts['sparse-read-min-gap-size']
435 self._srmingapsize = opts['sparse-read-min-gap-size']
436 if opts.get('enableellipsis'):
436 if opts.get('enableellipsis'):
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
437 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
438
438
439 # revlog v0 doesn't have flag processors
439 # revlog v0 doesn't have flag processors
440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
440 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
441 _insertflagprocessor(flag, processor, self._flagprocessors)
441 _insertflagprocessor(flag, processor, self._flagprocessors)
442
442
443 if self._chunkcachesize <= 0:
443 if self._chunkcachesize <= 0:
444 raise error.RevlogError(_('revlog chunk cache size %r is not '
444 raise error.RevlogError(_('revlog chunk cache size %r is not '
445 'greater than 0') % self._chunkcachesize)
445 'greater than 0') % self._chunkcachesize)
446 elif self._chunkcachesize & (self._chunkcachesize - 1):
446 elif self._chunkcachesize & (self._chunkcachesize - 1):
447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
447 raise error.RevlogError(_('revlog chunk cache size %r is not a '
448 'power of 2') % self._chunkcachesize)
448 'power of 2') % self._chunkcachesize)
449
449
450 indexdata = ''
450 indexdata = ''
451 self._initempty = True
451 self._initempty = True
452 try:
452 try:
453 with self._indexfp() as f:
453 with self._indexfp() as f:
454 if (mmapindexthreshold is not None and
454 if (mmapindexthreshold is not None and
455 self.opener.fstat(f).st_size >= mmapindexthreshold):
455 self.opener.fstat(f).st_size >= mmapindexthreshold):
456 # TODO: should .close() to release resources without
456 # TODO: should .close() to release resources without
457 # relying on Python GC
457 # relying on Python GC
458 indexdata = util.buffer(util.mmapread(f))
458 indexdata = util.buffer(util.mmapread(f))
459 else:
459 else:
460 indexdata = f.read()
460 indexdata = f.read()
461 if len(indexdata) > 0:
461 if len(indexdata) > 0:
462 versionflags = versionformat_unpack(indexdata[:4])[0]
462 versionflags = versionformat_unpack(indexdata[:4])[0]
463 self._initempty = False
463 self._initempty = False
464 else:
464 else:
465 versionflags = newversionflags
465 versionflags = newversionflags
466 except IOError as inst:
466 except IOError as inst:
467 if inst.errno != errno.ENOENT:
467 if inst.errno != errno.ENOENT:
468 raise
468 raise
469
469
470 versionflags = newversionflags
470 versionflags = newversionflags
471
471
472 self.version = versionflags
472 self.version = versionflags
473
473
474 flags = versionflags & ~0xFFFF
474 flags = versionflags & ~0xFFFF
475 fmt = versionflags & 0xFFFF
475 fmt = versionflags & 0xFFFF
476
476
477 if fmt == REVLOGV0:
477 if fmt == REVLOGV0:
478 if flags:
478 if flags:
479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
479 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
480 'revlog %s') %
480 'revlog %s') %
481 (flags >> 16, fmt, self.indexfile))
481 (flags >> 16, fmt, self.indexfile))
482
482
483 self._inline = False
483 self._inline = False
484 self._generaldelta = False
484 self._generaldelta = False
485
485
486 elif fmt == REVLOGV1:
486 elif fmt == REVLOGV1:
487 if flags & ~REVLOGV1_FLAGS:
487 if flags & ~REVLOGV1_FLAGS:
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
488 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
489 'revlog %s') %
489 'revlog %s') %
490 (flags >> 16, fmt, self.indexfile))
490 (flags >> 16, fmt, self.indexfile))
491
491
492 self._inline = versionflags & FLAG_INLINE_DATA
492 self._inline = versionflags & FLAG_INLINE_DATA
493 self._generaldelta = versionflags & FLAG_GENERALDELTA
493 self._generaldelta = versionflags & FLAG_GENERALDELTA
494
494
495 elif fmt == REVLOGV2:
495 elif fmt == REVLOGV2:
496 if flags & ~REVLOGV2_FLAGS:
496 if flags & ~REVLOGV2_FLAGS:
497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
497 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
498 'revlog %s') %
498 'revlog %s') %
499 (flags >> 16, fmt, self.indexfile))
499 (flags >> 16, fmt, self.indexfile))
500
500
501 self._inline = versionflags & FLAG_INLINE_DATA
501 self._inline = versionflags & FLAG_INLINE_DATA
502 # generaldelta implied by version 2 revlogs.
502 # generaldelta implied by version 2 revlogs.
503 self._generaldelta = True
503 self._generaldelta = True
504
504
505 else:
505 else:
506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
506 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
507 (fmt, self.indexfile))
507 (fmt, self.indexfile))
508 # sparse-revlog can't be on without general-delta (issue6056)
508 # sparse-revlog can't be on without general-delta (issue6056)
509 if not self._generaldelta:
509 if not self._generaldelta:
510 self._sparserevlog = False
510 self._sparserevlog = False
511
511
512 self._storedeltachains = True
512 self._storedeltachains = True
513
513
514 self._io = revlogio()
514 self._io = revlogio()
515 if self.version == REVLOGV0:
515 if self.version == REVLOGV0:
516 self._io = revlogoldio()
516 self._io = revlogoldio()
517 try:
517 try:
518 d = self._io.parseindex(indexdata, self._inline)
518 d = self._io.parseindex(indexdata, self._inline)
519 except (ValueError, IndexError):
519 except (ValueError, IndexError):
520 raise error.RevlogError(_("index %s is corrupted") %
520 raise error.RevlogError(_("index %s is corrupted") %
521 self.indexfile)
521 self.indexfile)
522 self.index, nodemap, self._chunkcache = d
522 self.index, nodemap, self._chunkcache = d
523 if nodemap is not None:
523 if nodemap is not None:
524 self.nodemap = self._nodecache = nodemap
524 self.nodemap = self._nodecache = nodemap
525 if not self._chunkcache:
525 if not self._chunkcache:
526 self._chunkclear()
526 self._chunkclear()
527 # revnum -> (chain-length, sum-delta-length)
527 # revnum -> (chain-length, sum-delta-length)
528 self._chaininfocache = {}
528 self._chaininfocache = {}
529 # revlog header -> revlog compressor
529 # revlog header -> revlog compressor
530 self._decompressors = {}
530 self._decompressors = {}
531
531
532 @util.propertycache
532 @util.propertycache
533 def _compressor(self):
533 def _compressor(self):
534 engine = util.compengines[self._compengine]
534 engine = util.compengines[self._compengine]
535 return engine.revlogcompressor(self._compengineopts)
535 return engine.revlogcompressor(self._compengineopts)
536
536
537 def _indexfp(self, mode='r'):
537 def _indexfp(self, mode='r'):
538 """file object for the revlog's index file"""
538 """file object for the revlog's index file"""
539 args = {r'mode': mode}
539 args = {r'mode': mode}
540 if mode != 'r':
540 if mode != 'r':
541 args[r'checkambig'] = self._checkambig
541 args[r'checkambig'] = self._checkambig
542 if mode == 'w':
542 if mode == 'w':
543 args[r'atomictemp'] = True
543 args[r'atomictemp'] = True
544 return self.opener(self.indexfile, **args)
544 return self.opener(self.indexfile, **args)
545
545
546 def _datafp(self, mode='r'):
546 def _datafp(self, mode='r'):
547 """file object for the revlog's data file"""
547 """file object for the revlog's data file"""
548 return self.opener(self.datafile, mode=mode)
548 return self.opener(self.datafile, mode=mode)
549
549
550 @contextlib.contextmanager
550 @contextlib.contextmanager
551 def _datareadfp(self, existingfp=None):
551 def _datareadfp(self, existingfp=None):
552 """file object suitable to read data"""
552 """file object suitable to read data"""
553 # Use explicit file handle, if given.
553 # Use explicit file handle, if given.
554 if existingfp is not None:
554 if existingfp is not None:
555 yield existingfp
555 yield existingfp
556
556
557 # Use a file handle being actively used for writes, if available.
557 # Use a file handle being actively used for writes, if available.
558 # There is some danger to doing this because reads will seek the
558 # There is some danger to doing this because reads will seek the
559 # file. However, _writeentry() performs a SEEK_END before all writes,
559 # file. However, _writeentry() performs a SEEK_END before all writes,
560 # so we should be safe.
560 # so we should be safe.
561 elif self._writinghandles:
561 elif self._writinghandles:
562 if self._inline:
562 if self._inline:
563 yield self._writinghandles[0]
563 yield self._writinghandles[0]
564 else:
564 else:
565 yield self._writinghandles[1]
565 yield self._writinghandles[1]
566
566
567 # Otherwise open a new file handle.
567 # Otherwise open a new file handle.
568 else:
568 else:
569 if self._inline:
569 if self._inline:
570 func = self._indexfp
570 func = self._indexfp
571 else:
571 else:
572 func = self._datafp
572 func = self._datafp
573 with func() as fp:
573 with func() as fp:
574 yield fp
574 yield fp
575
575
576 def tip(self):
576 def tip(self):
577 return self.node(len(self.index) - 1)
577 return self.node(len(self.index) - 1)
578 def __contains__(self, rev):
578 def __contains__(self, rev):
579 return 0 <= rev < len(self)
579 return 0 <= rev < len(self)
580 def __len__(self):
580 def __len__(self):
581 return len(self.index)
581 return len(self.index)
582 def __iter__(self):
582 def __iter__(self):
583 return iter(pycompat.xrange(len(self)))
583 return iter(pycompat.xrange(len(self)))
584 def revs(self, start=0, stop=None):
584 def revs(self, start=0, stop=None):
585 """iterate over all rev in this revlog (from start to stop)"""
585 """iterate over all rev in this revlog (from start to stop)"""
586 return storageutil.iterrevs(len(self), start=start, stop=stop)
586 return storageutil.iterrevs(len(self), start=start, stop=stop)
587
587
588 @util.propertycache
588 @util.propertycache
589 def nodemap(self):
589 def nodemap(self):
590 if self.index:
590 if self.index:
591 # populate mapping down to the initial node
591 # populate mapping down to the initial node
592 node0 = self.index[0][7] # get around changelog filtering
592 node0 = self.index[0][7] # get around changelog filtering
593 self.rev(node0)
593 self.rev(node0)
594 return self._nodecache
594 return self._nodecache
595
595
596 def hasnode(self, node):
596 def hasnode(self, node):
597 try:
597 try:
598 self.rev(node)
598 self.rev(node)
599 return True
599 return True
600 except KeyError:
600 except KeyError:
601 return False
601 return False
602
602
603 def candelta(self, baserev, rev):
603 def candelta(self, baserev, rev):
604 """whether two revisions (baserev, rev) can be delta-ed or not"""
604 """whether two revisions (baserev, rev) can be delta-ed or not"""
605 # Disable delta if either rev requires a content-changing flag
605 # Disable delta if either rev requires a content-changing flag
606 # processor (ex. LFS). This is because such flag processor can alter
606 # processor (ex. LFS). This is because such flag processor can alter
607 # the rawtext content that the delta will be based on, and two clients
607 # the rawtext content that the delta will be based on, and two clients
608 # could have a same revlog node with different flags (i.e. different
608 # could have a same revlog node with different flags (i.e. different
609 # rawtext contents) and the delta could be incompatible.
609 # rawtext contents) and the delta could be incompatible.
610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
610 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
611 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
612 return False
612 return False
613 return True
613 return True
614
614
615 def clearcaches(self):
615 def clearcaches(self):
616 self._revisioncache = None
616 self._revisioncache = None
617 self._chainbasecache.clear()
617 self._chainbasecache.clear()
618 self._chunkcache = (0, '')
618 self._chunkcache = (0, '')
619 self._pcache = {}
619 self._pcache = {}
620
620
621 try:
621 try:
622 # If we are using the native C version, you are in a fun case
622 # If we are using the native C version, you are in a fun case
623 # where self.index, self.nodemap and self._nodecaches is the same
623 # where self.index, self.nodemap and self._nodecaches is the same
624 # object.
624 # object.
625 self._nodecache.clearcaches()
625 self._nodecache.clearcaches()
626 except AttributeError:
626 except AttributeError:
627 self._nodecache = {nullid: nullrev}
627 self._nodecache = {nullid: nullrev}
628 self._nodepos = None
628 self._nodepos = None
629
629
630 def rev(self, node):
630 def rev(self, node):
631 try:
631 try:
632 return self._nodecache[node]
632 return self._nodecache[node]
633 except TypeError:
633 except TypeError:
634 raise
634 raise
635 except error.RevlogError:
635 except error.RevlogError:
636 # parsers.c radix tree lookup failed
636 # parsers.c radix tree lookup failed
637 if node == wdirid or node in wdirfilenodeids:
637 if node == wdirid or node in wdirfilenodeids:
638 raise error.WdirUnsupported
638 raise error.WdirUnsupported
639 raise error.LookupError(node, self.indexfile, _('no node'))
639 raise error.LookupError(node, self.indexfile, _('no node'))
640 except KeyError:
640 except KeyError:
641 # pure python cache lookup failed
641 # pure python cache lookup failed
642 n = self._nodecache
642 n = self._nodecache
643 i = self.index
643 i = self.index
644 p = self._nodepos
644 p = self._nodepos
645 if p is None:
645 if p is None:
646 p = len(i) - 1
646 p = len(i) - 1
647 else:
647 else:
648 assert p < len(i)
648 assert p < len(i)
649 for r in pycompat.xrange(p, -1, -1):
649 for r in pycompat.xrange(p, -1, -1):
650 v = i[r][7]
650 v = i[r][7]
651 n[v] = r
651 n[v] = r
652 if v == node:
652 if v == node:
653 self._nodepos = r - 1
653 self._nodepos = r - 1
654 return r
654 return r
655 if node == wdirid or node in wdirfilenodeids:
655 if node == wdirid or node in wdirfilenodeids:
656 raise error.WdirUnsupported
656 raise error.WdirUnsupported
657 raise error.LookupError(node, self.indexfile, _('no node'))
657 raise error.LookupError(node, self.indexfile, _('no node'))
658
658
659 # Accessors for index entries.
659 # Accessors for index entries.
660
660
661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
661 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
662 # are flags.
662 # are flags.
663 def start(self, rev):
663 def start(self, rev):
664 return int(self.index[rev][0] >> 16)
664 return int(self.index[rev][0] >> 16)
665
665
666 def flags(self, rev):
666 def flags(self, rev):
667 return self.index[rev][0] & 0xFFFF
667 return self.index[rev][0] & 0xFFFF
668
668
669 def length(self, rev):
669 def length(self, rev):
670 return self.index[rev][1]
670 return self.index[rev][1]
671
671
672 def rawsize(self, rev):
672 def rawsize(self, rev):
673 """return the length of the uncompressed text for a given revision"""
673 """return the length of the uncompressed text for a given revision"""
674 l = self.index[rev][2]
674 l = self.index[rev][2]
675 if l >= 0:
675 if l >= 0:
676 return l
676 return l
677
677
678 t = self.revision(rev, raw=True)
678 t = self.revision(rev, raw=True)
679 return len(t)
679 return len(t)
680
680
681 def size(self, rev):
681 def size(self, rev):
682 """length of non-raw text (processed by a "read" flag processor)"""
682 """length of non-raw text (processed by a "read" flag processor)"""
683 # fast path: if no "read" flag processor could change the content,
683 # fast path: if no "read" flag processor could change the content,
684 # size is rawsize. note: ELLIPSIS is known to not change the content.
684 # size is rawsize. note: ELLIPSIS is known to not change the content.
685 flags = self.flags(rev)
685 flags = self.flags(rev)
686 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
686 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
687 return self.rawsize(rev)
687 return self.rawsize(rev)
688
688
689 return len(self.revision(rev, raw=False))
689 return len(self.revision(rev, raw=False))
690
690
691 def chainbase(self, rev):
691 def chainbase(self, rev):
692 base = self._chainbasecache.get(rev)
692 base = self._chainbasecache.get(rev)
693 if base is not None:
693 if base is not None:
694 return base
694 return base
695
695
696 index = self.index
696 index = self.index
697 iterrev = rev
697 iterrev = rev
698 base = index[iterrev][3]
698 base = index[iterrev][3]
699 while base != iterrev:
699 while base != iterrev:
700 iterrev = base
700 iterrev = base
701 base = index[iterrev][3]
701 base = index[iterrev][3]
702
702
703 self._chainbasecache[rev] = base
703 self._chainbasecache[rev] = base
704 return base
704 return base
705
705
706 def linkrev(self, rev):
706 def linkrev(self, rev):
707 return self.index[rev][4]
707 return self.index[rev][4]
708
708
709 def parentrevs(self, rev):
709 def parentrevs(self, rev):
710 try:
710 try:
711 entry = self.index[rev]
711 entry = self.index[rev]
712 except IndexError:
712 except IndexError:
713 if rev == wdirrev:
713 if rev == wdirrev:
714 raise error.WdirUnsupported
714 raise error.WdirUnsupported
715 raise
715 raise
716
716
717 return entry[5], entry[6]
717 return entry[5], entry[6]
718
718
719 # fast parentrevs(rev) where rev isn't filtered
719 # fast parentrevs(rev) where rev isn't filtered
720 _uncheckedparentrevs = parentrevs
720 _uncheckedparentrevs = parentrevs
721
721
722 def node(self, rev):
722 def node(self, rev):
723 try:
723 try:
724 return self.index[rev][7]
724 return self.index[rev][7]
725 except IndexError:
725 except IndexError:
726 if rev == wdirrev:
726 if rev == wdirrev:
727 raise error.WdirUnsupported
727 raise error.WdirUnsupported
728 raise
728 raise
729
729
730 # Derived from index values.
730 # Derived from index values.
731
731
732 def end(self, rev):
732 def end(self, rev):
733 return self.start(rev) + self.length(rev)
733 return self.start(rev) + self.length(rev)
734
734
735 def parents(self, node):
735 def parents(self, node):
736 i = self.index
736 i = self.index
737 d = i[self.rev(node)]
737 d = i[self.rev(node)]
738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
738 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
739
739
740 def chainlen(self, rev):
740 def chainlen(self, rev):
741 return self._chaininfo(rev)[0]
741 return self._chaininfo(rev)[0]
742
742
743 def _chaininfo(self, rev):
743 def _chaininfo(self, rev):
744 chaininfocache = self._chaininfocache
744 chaininfocache = self._chaininfocache
745 if rev in chaininfocache:
745 if rev in chaininfocache:
746 return chaininfocache[rev]
746 return chaininfocache[rev]
747 index = self.index
747 index = self.index
748 generaldelta = self._generaldelta
748 generaldelta = self._generaldelta
749 iterrev = rev
749 iterrev = rev
750 e = index[iterrev]
750 e = index[iterrev]
751 clen = 0
751 clen = 0
752 compresseddeltalen = 0
752 compresseddeltalen = 0
753 while iterrev != e[3]:
753 while iterrev != e[3]:
754 clen += 1
754 clen += 1
755 compresseddeltalen += e[1]
755 compresseddeltalen += e[1]
756 if generaldelta:
756 if generaldelta:
757 iterrev = e[3]
757 iterrev = e[3]
758 else:
758 else:
759 iterrev -= 1
759 iterrev -= 1
760 if iterrev in chaininfocache:
760 if iterrev in chaininfocache:
761 t = chaininfocache[iterrev]
761 t = chaininfocache[iterrev]
762 clen += t[0]
762 clen += t[0]
763 compresseddeltalen += t[1]
763 compresseddeltalen += t[1]
764 break
764 break
765 e = index[iterrev]
765 e = index[iterrev]
766 else:
766 else:
767 # Add text length of base since decompressing that also takes
767 # Add text length of base since decompressing that also takes
768 # work. For cache hits the length is already included.
768 # work. For cache hits the length is already included.
769 compresseddeltalen += e[1]
769 compresseddeltalen += e[1]
770 r = (clen, compresseddeltalen)
770 r = (clen, compresseddeltalen)
771 chaininfocache[rev] = r
771 chaininfocache[rev] = r
772 return r
772 return r
773
773
774 def _deltachain(self, rev, stoprev=None):
774 def _deltachain(self, rev, stoprev=None):
775 """Obtain the delta chain for a revision.
775 """Obtain the delta chain for a revision.
776
776
777 ``stoprev`` specifies a revision to stop at. If not specified, we
777 ``stoprev`` specifies a revision to stop at. If not specified, we
778 stop at the base of the chain.
778 stop at the base of the chain.
779
779
780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
780 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
781 revs in ascending order and ``stopped`` is a bool indicating whether
781 revs in ascending order and ``stopped`` is a bool indicating whether
782 ``stoprev`` was hit.
782 ``stoprev`` was hit.
783 """
783 """
784 # Try C implementation.
784 # Try C implementation.
785 try:
785 try:
786 return self.index.deltachain(rev, stoprev, self._generaldelta)
786 return self.index.deltachain(rev, stoprev, self._generaldelta)
787 except AttributeError:
787 except AttributeError:
788 pass
788 pass
789
789
790 chain = []
790 chain = []
791
791
792 # Alias to prevent attribute lookup in tight loop.
792 # Alias to prevent attribute lookup in tight loop.
793 index = self.index
793 index = self.index
794 generaldelta = self._generaldelta
794 generaldelta = self._generaldelta
795
795
796 iterrev = rev
796 iterrev = rev
797 e = index[iterrev]
797 e = index[iterrev]
798 while iterrev != e[3] and iterrev != stoprev:
798 while iterrev != e[3] and iterrev != stoprev:
799 chain.append(iterrev)
799 chain.append(iterrev)
800 if generaldelta:
800 if generaldelta:
801 iterrev = e[3]
801 iterrev = e[3]
802 else:
802 else:
803 iterrev -= 1
803 iterrev -= 1
804 e = index[iterrev]
804 e = index[iterrev]
805
805
806 if iterrev == stoprev:
806 if iterrev == stoprev:
807 stopped = True
807 stopped = True
808 else:
808 else:
809 chain.append(iterrev)
809 chain.append(iterrev)
810 stopped = False
810 stopped = False
811
811
812 chain.reverse()
812 chain.reverse()
813 return chain, stopped
813 return chain, stopped
814
814
815 def ancestors(self, revs, stoprev=0, inclusive=False):
815 def ancestors(self, revs, stoprev=0, inclusive=False):
816 """Generate the ancestors of 'revs' in reverse revision order.
816 """Generate the ancestors of 'revs' in reverse revision order.
817 Does not generate revs lower than stoprev.
817 Does not generate revs lower than stoprev.
818
818
819 See the documentation for ancestor.lazyancestors for more details."""
819 See the documentation for ancestor.lazyancestors for more details."""
820
820
821 # first, make sure start revisions aren't filtered
821 # first, make sure start revisions aren't filtered
822 revs = list(revs)
822 revs = list(revs)
823 checkrev = self.node
823 checkrev = self.node
824 for r in revs:
824 for r in revs:
825 checkrev(r)
825 checkrev(r)
826 # and we're sure ancestors aren't filtered as well
826 # and we're sure ancestors aren't filtered as well
827
827
828 if rustext is not None:
828 if rustext is not None:
829 lazyancestors = rustext.ancestor.LazyAncestors
829 lazyancestors = rustext.ancestor.LazyAncestors
830 arg = self.index
830 arg = self.index
831 elif util.safehasattr(parsers, 'rustlazyancestors'):
831 elif util.safehasattr(parsers, 'rustlazyancestors'):
832 lazyancestors = ancestor.rustlazyancestors
832 lazyancestors = ancestor.rustlazyancestors
833 arg = self.index
833 arg = self.index
834 else:
834 else:
835 lazyancestors = ancestor.lazyancestors
835 lazyancestors = ancestor.lazyancestors
836 arg = self._uncheckedparentrevs
836 arg = self._uncheckedparentrevs
837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
837 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
838
838
839 def descendants(self, revs):
839 def descendants(self, revs):
840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
840 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
841
841
842 def findcommonmissing(self, common=None, heads=None):
842 def findcommonmissing(self, common=None, heads=None):
843 """Return a tuple of the ancestors of common and the ancestors of heads
843 """Return a tuple of the ancestors of common and the ancestors of heads
844 that are not ancestors of common. In revset terminology, we return the
844 that are not ancestors of common. In revset terminology, we return the
845 tuple:
845 tuple:
846
846
847 ::common, (::heads) - (::common)
847 ::common, (::heads) - (::common)
848
848
849 The list is sorted by revision number, meaning it is
849 The list is sorted by revision number, meaning it is
850 topologically sorted.
850 topologically sorted.
851
851
852 'heads' and 'common' are both lists of node IDs. If heads is
852 'heads' and 'common' are both lists of node IDs. If heads is
853 not supplied, uses all of the revlog's heads. If common is not
853 not supplied, uses all of the revlog's heads. If common is not
854 supplied, uses nullid."""
854 supplied, uses nullid."""
855 if common is None:
855 if common is None:
856 common = [nullid]
856 common = [nullid]
857 if heads is None:
857 if heads is None:
858 heads = self.heads()
858 heads = self.heads()
859
859
860 common = [self.rev(n) for n in common]
860 common = [self.rev(n) for n in common]
861 heads = [self.rev(n) for n in heads]
861 heads = [self.rev(n) for n in heads]
862
862
863 # we want the ancestors, but inclusive
863 # we want the ancestors, but inclusive
864 class lazyset(object):
864 class lazyset(object):
865 def __init__(self, lazyvalues):
865 def __init__(self, lazyvalues):
866 self.addedvalues = set()
866 self.addedvalues = set()
867 self.lazyvalues = lazyvalues
867 self.lazyvalues = lazyvalues
868
868
869 def __contains__(self, value):
869 def __contains__(self, value):
870 return value in self.addedvalues or value in self.lazyvalues
870 return value in self.addedvalues or value in self.lazyvalues
871
871
872 def __iter__(self):
872 def __iter__(self):
873 added = self.addedvalues
873 added = self.addedvalues
874 for r in added:
874 for r in added:
875 yield r
875 yield r
876 for r in self.lazyvalues:
876 for r in self.lazyvalues:
877 if not r in added:
877 if not r in added:
878 yield r
878 yield r
879
879
880 def add(self, value):
880 def add(self, value):
881 self.addedvalues.add(value)
881 self.addedvalues.add(value)
882
882
883 def update(self, values):
883 def update(self, values):
884 self.addedvalues.update(values)
884 self.addedvalues.update(values)
885
885
886 has = lazyset(self.ancestors(common))
886 has = lazyset(self.ancestors(common))
887 has.add(nullrev)
887 has.add(nullrev)
888 has.update(common)
888 has.update(common)
889
889
890 # take all ancestors from heads that aren't in has
890 # take all ancestors from heads that aren't in has
891 missing = set()
891 missing = set()
892 visit = collections.deque(r for r in heads if r not in has)
892 visit = collections.deque(r for r in heads if r not in has)
893 while visit:
893 while visit:
894 r = visit.popleft()
894 r = visit.popleft()
895 if r in missing:
895 if r in missing:
896 continue
896 continue
897 else:
897 else:
898 missing.add(r)
898 missing.add(r)
899 for p in self.parentrevs(r):
899 for p in self.parentrevs(r):
900 if p not in has:
900 if p not in has:
901 visit.append(p)
901 visit.append(p)
902 missing = list(missing)
902 missing = list(missing)
903 missing.sort()
903 missing.sort()
904 return has, [self.node(miss) for miss in missing]
904 return has, [self.node(miss) for miss in missing]
905
905
906 def incrementalmissingrevs(self, common=None):
906 def incrementalmissingrevs(self, common=None):
907 """Return an object that can be used to incrementally compute the
907 """Return an object that can be used to incrementally compute the
908 revision numbers of the ancestors of arbitrary sets that are not
908 revision numbers of the ancestors of arbitrary sets that are not
909 ancestors of common. This is an ancestor.incrementalmissingancestors
909 ancestors of common. This is an ancestor.incrementalmissingancestors
910 object.
910 object.
911
911
912 'common' is a list of revision numbers. If common is not supplied, uses
912 'common' is a list of revision numbers. If common is not supplied, uses
913 nullrev.
913 nullrev.
914 """
914 """
915 if common is None:
915 if common is None:
916 common = [nullrev]
916 common = [nullrev]
917
917
918 if rustext is not None:
918 if rustext is not None:
919 return rustext.ancestor.MissingAncestors(self.index, common)
919 return rustext.ancestor.MissingAncestors(self.index, common)
920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
920 return ancestor.incrementalmissingancestors(self.parentrevs, common)
921
921
922 def findmissingrevs(self, common=None, heads=None):
922 def findmissingrevs(self, common=None, heads=None):
923 """Return the revision numbers of the ancestors of heads that
923 """Return the revision numbers of the ancestors of heads that
924 are not ancestors of common.
924 are not ancestors of common.
925
925
926 More specifically, return a list of revision numbers corresponding to
926 More specifically, return a list of revision numbers corresponding to
927 nodes N such that every N satisfies the following constraints:
927 nodes N such that every N satisfies the following constraints:
928
928
929 1. N is an ancestor of some node in 'heads'
929 1. N is an ancestor of some node in 'heads'
930 2. N is not an ancestor of any node in 'common'
930 2. N is not an ancestor of any node in 'common'
931
931
932 The list is sorted by revision number, meaning it is
932 The list is sorted by revision number, meaning it is
933 topologically sorted.
933 topologically sorted.
934
934
935 'heads' and 'common' are both lists of revision numbers. If heads is
935 'heads' and 'common' are both lists of revision numbers. If heads is
936 not supplied, uses all of the revlog's heads. If common is not
936 not supplied, uses all of the revlog's heads. If common is not
937 supplied, uses nullid."""
937 supplied, uses nullid."""
938 if common is None:
938 if common is None:
939 common = [nullrev]
939 common = [nullrev]
940 if heads is None:
940 if heads is None:
941 heads = self.headrevs()
941 heads = self.headrevs()
942
942
943 inc = self.incrementalmissingrevs(common=common)
943 inc = self.incrementalmissingrevs(common=common)
944 return inc.missingancestors(heads)
944 return inc.missingancestors(heads)
945
945
946 def findmissing(self, common=None, heads=None):
946 def findmissing(self, common=None, heads=None):
947 """Return the ancestors of heads that are not ancestors of common.
947 """Return the ancestors of heads that are not ancestors of common.
948
948
949 More specifically, return a list of nodes N such that every N
949 More specifically, return a list of nodes N such that every N
950 satisfies the following constraints:
950 satisfies the following constraints:
951
951
952 1. N is an ancestor of some node in 'heads'
952 1. N is an ancestor of some node in 'heads'
953 2. N is not an ancestor of any node in 'common'
953 2. N is not an ancestor of any node in 'common'
954
954
955 The list is sorted by revision number, meaning it is
955 The list is sorted by revision number, meaning it is
956 topologically sorted.
956 topologically sorted.
957
957
958 'heads' and 'common' are both lists of node IDs. If heads is
958 'heads' and 'common' are both lists of node IDs. If heads is
959 not supplied, uses all of the revlog's heads. If common is not
959 not supplied, uses all of the revlog's heads. If common is not
960 supplied, uses nullid."""
960 supplied, uses nullid."""
961 if common is None:
961 if common is None:
962 common = [nullid]
962 common = [nullid]
963 if heads is None:
963 if heads is None:
964 heads = self.heads()
964 heads = self.heads()
965
965
966 common = [self.rev(n) for n in common]
966 common = [self.rev(n) for n in common]
967 heads = [self.rev(n) for n in heads]
967 heads = [self.rev(n) for n in heads]
968
968
969 inc = self.incrementalmissingrevs(common=common)
969 inc = self.incrementalmissingrevs(common=common)
970 return [self.node(r) for r in inc.missingancestors(heads)]
970 return [self.node(r) for r in inc.missingancestors(heads)]
971
971
972 def nodesbetween(self, roots=None, heads=None):
972 def nodesbetween(self, roots=None, heads=None):
973 """Return a topological path from 'roots' to 'heads'.
973 """Return a topological path from 'roots' to 'heads'.
974
974
975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
975 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
976 topologically sorted list of all nodes N that satisfy both of
976 topologically sorted list of all nodes N that satisfy both of
977 these constraints:
977 these constraints:
978
978
979 1. N is a descendant of some node in 'roots'
979 1. N is a descendant of some node in 'roots'
980 2. N is an ancestor of some node in 'heads'
980 2. N is an ancestor of some node in 'heads'
981
981
982 Every node is considered to be both a descendant and an ancestor
982 Every node is considered to be both a descendant and an ancestor
983 of itself, so every reachable node in 'roots' and 'heads' will be
983 of itself, so every reachable node in 'roots' and 'heads' will be
984 included in 'nodes'.
984 included in 'nodes'.
985
985
986 'outroots' is the list of reachable nodes in 'roots', i.e., the
986 'outroots' is the list of reachable nodes in 'roots', i.e., the
987 subset of 'roots' that is returned in 'nodes'. Likewise,
987 subset of 'roots' that is returned in 'nodes'. Likewise,
988 'outheads' is the subset of 'heads' that is also in 'nodes'.
988 'outheads' is the subset of 'heads' that is also in 'nodes'.
989
989
990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
990 'roots' and 'heads' are both lists of node IDs. If 'roots' is
991 unspecified, uses nullid as the only root. If 'heads' is
991 unspecified, uses nullid as the only root. If 'heads' is
992 unspecified, uses list of all of the revlog's heads."""
992 unspecified, uses list of all of the revlog's heads."""
993 nonodes = ([], [], [])
993 nonodes = ([], [], [])
994 if roots is not None:
994 if roots is not None:
995 roots = list(roots)
995 roots = list(roots)
996 if not roots:
996 if not roots:
997 return nonodes
997 return nonodes
998 lowestrev = min([self.rev(n) for n in roots])
998 lowestrev = min([self.rev(n) for n in roots])
999 else:
999 else:
1000 roots = [nullid] # Everybody's a descendant of nullid
1000 roots = [nullid] # Everybody's a descendant of nullid
1001 lowestrev = nullrev
1001 lowestrev = nullrev
1002 if (lowestrev == nullrev) and (heads is None):
1002 if (lowestrev == nullrev) and (heads is None):
1003 # We want _all_ the nodes!
1003 # We want _all_ the nodes!
1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1004 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1005 if heads is None:
1005 if heads is None:
1006 # All nodes are ancestors, so the latest ancestor is the last
1006 # All nodes are ancestors, so the latest ancestor is the last
1007 # node.
1007 # node.
1008 highestrev = len(self) - 1
1008 highestrev = len(self) - 1
1009 # Set ancestors to None to signal that every node is an ancestor.
1009 # Set ancestors to None to signal that every node is an ancestor.
1010 ancestors = None
1010 ancestors = None
1011 # Set heads to an empty dictionary for later discovery of heads
1011 # Set heads to an empty dictionary for later discovery of heads
1012 heads = {}
1012 heads = {}
1013 else:
1013 else:
1014 heads = list(heads)
1014 heads = list(heads)
1015 if not heads:
1015 if not heads:
1016 return nonodes
1016 return nonodes
1017 ancestors = set()
1017 ancestors = set()
1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1018 # Turn heads into a dictionary so we can remove 'fake' heads.
1019 # Also, later we will be using it to filter out the heads we can't
1019 # Also, later we will be using it to filter out the heads we can't
1020 # find from roots.
1020 # find from roots.
1021 heads = dict.fromkeys(heads, False)
1021 heads = dict.fromkeys(heads, False)
1022 # Start at the top and keep marking parents until we're done.
1022 # Start at the top and keep marking parents until we're done.
1023 nodestotag = set(heads)
1023 nodestotag = set(heads)
1024 # Remember where the top was so we can use it as a limit later.
1024 # Remember where the top was so we can use it as a limit later.
1025 highestrev = max([self.rev(n) for n in nodestotag])
1025 highestrev = max([self.rev(n) for n in nodestotag])
1026 while nodestotag:
1026 while nodestotag:
1027 # grab a node to tag
1027 # grab a node to tag
1028 n = nodestotag.pop()
1028 n = nodestotag.pop()
1029 # Never tag nullid
1029 # Never tag nullid
1030 if n == nullid:
1030 if n == nullid:
1031 continue
1031 continue
1032 # A node's revision number represents its place in a
1032 # A node's revision number represents its place in a
1033 # topologically sorted list of nodes.
1033 # topologically sorted list of nodes.
1034 r = self.rev(n)
1034 r = self.rev(n)
1035 if r >= lowestrev:
1035 if r >= lowestrev:
1036 if n not in ancestors:
1036 if n not in ancestors:
1037 # If we are possibly a descendant of one of the roots
1037 # If we are possibly a descendant of one of the roots
1038 # and we haven't already been marked as an ancestor
1038 # and we haven't already been marked as an ancestor
1039 ancestors.add(n) # Mark as ancestor
1039 ancestors.add(n) # Mark as ancestor
1040 # Add non-nullid parents to list of nodes to tag.
1040 # Add non-nullid parents to list of nodes to tag.
1041 nodestotag.update([p for p in self.parents(n) if
1041 nodestotag.update([p for p in self.parents(n) if
1042 p != nullid])
1042 p != nullid])
1043 elif n in heads: # We've seen it before, is it a fake head?
1043 elif n in heads: # We've seen it before, is it a fake head?
1044 # So it is, real heads should not be the ancestors of
1044 # So it is, real heads should not be the ancestors of
1045 # any other heads.
1045 # any other heads.
1046 heads.pop(n)
1046 heads.pop(n)
1047 if not ancestors:
1047 if not ancestors:
1048 return nonodes
1048 return nonodes
1049 # Now that we have our set of ancestors, we want to remove any
1049 # Now that we have our set of ancestors, we want to remove any
1050 # roots that are not ancestors.
1050 # roots that are not ancestors.
1051
1051
1052 # If one of the roots was nullid, everything is included anyway.
1052 # If one of the roots was nullid, everything is included anyway.
1053 if lowestrev > nullrev:
1053 if lowestrev > nullrev:
1054 # But, since we weren't, let's recompute the lowest rev to not
1054 # But, since we weren't, let's recompute the lowest rev to not
1055 # include roots that aren't ancestors.
1055 # include roots that aren't ancestors.
1056
1056
1057 # Filter out roots that aren't ancestors of heads
1057 # Filter out roots that aren't ancestors of heads
1058 roots = [root for root in roots if root in ancestors]
1058 roots = [root for root in roots if root in ancestors]
1059 # Recompute the lowest revision
1059 # Recompute the lowest revision
1060 if roots:
1060 if roots:
1061 lowestrev = min([self.rev(root) for root in roots])
1061 lowestrev = min([self.rev(root) for root in roots])
1062 else:
1062 else:
1063 # No more roots? Return empty list
1063 # No more roots? Return empty list
1064 return nonodes
1064 return nonodes
1065 else:
1065 else:
1066 # We are descending from nullid, and don't need to care about
1066 # We are descending from nullid, and don't need to care about
1067 # any other roots.
1067 # any other roots.
1068 lowestrev = nullrev
1068 lowestrev = nullrev
1069 roots = [nullid]
1069 roots = [nullid]
1070 # Transform our roots list into a set.
1070 # Transform our roots list into a set.
1071 descendants = set(roots)
1071 descendants = set(roots)
1072 # Also, keep the original roots so we can filter out roots that aren't
1072 # Also, keep the original roots so we can filter out roots that aren't
1073 # 'real' roots (i.e. are descended from other roots).
1073 # 'real' roots (i.e. are descended from other roots).
1074 roots = descendants.copy()
1074 roots = descendants.copy()
1075 # Our topologically sorted list of output nodes.
1075 # Our topologically sorted list of output nodes.
1076 orderedout = []
1076 orderedout = []
1077 # Don't start at nullid since we don't want nullid in our output list,
1077 # Don't start at nullid since we don't want nullid in our output list,
1078 # and if nullid shows up in descendants, empty parents will look like
1078 # and if nullid shows up in descendants, empty parents will look like
1079 # they're descendants.
1079 # they're descendants.
1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1080 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1081 n = self.node(r)
1081 n = self.node(r)
1082 isdescendant = False
1082 isdescendant = False
1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1083 if lowestrev == nullrev: # Everybody is a descendant of nullid
1084 isdescendant = True
1084 isdescendant = True
1085 elif n in descendants:
1085 elif n in descendants:
1086 # n is already a descendant
1086 # n is already a descendant
1087 isdescendant = True
1087 isdescendant = True
1088 # This check only needs to be done here because all the roots
1088 # This check only needs to be done here because all the roots
1089 # will start being marked is descendants before the loop.
1089 # will start being marked is descendants before the loop.
1090 if n in roots:
1090 if n in roots:
1091 # If n was a root, check if it's a 'real' root.
1091 # If n was a root, check if it's a 'real' root.
1092 p = tuple(self.parents(n))
1092 p = tuple(self.parents(n))
1093 # If any of its parents are descendants, it's not a root.
1093 # If any of its parents are descendants, it's not a root.
1094 if (p[0] in descendants) or (p[1] in descendants):
1094 if (p[0] in descendants) or (p[1] in descendants):
1095 roots.remove(n)
1095 roots.remove(n)
1096 else:
1096 else:
1097 p = tuple(self.parents(n))
1097 p = tuple(self.parents(n))
1098 # A node is a descendant if either of its parents are
1098 # A node is a descendant if either of its parents are
1099 # descendants. (We seeded the dependents list with the roots
1099 # descendants. (We seeded the dependents list with the roots
1100 # up there, remember?)
1100 # up there, remember?)
1101 if (p[0] in descendants) or (p[1] in descendants):
1101 if (p[0] in descendants) or (p[1] in descendants):
1102 descendants.add(n)
1102 descendants.add(n)
1103 isdescendant = True
1103 isdescendant = True
1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1104 if isdescendant and ((ancestors is None) or (n in ancestors)):
1105 # Only include nodes that are both descendants and ancestors.
1105 # Only include nodes that are both descendants and ancestors.
1106 orderedout.append(n)
1106 orderedout.append(n)
1107 if (ancestors is not None) and (n in heads):
1107 if (ancestors is not None) and (n in heads):
1108 # We're trying to figure out which heads are reachable
1108 # We're trying to figure out which heads are reachable
1109 # from roots.
1109 # from roots.
1110 # Mark this head as having been reached
1110 # Mark this head as having been reached
1111 heads[n] = True
1111 heads[n] = True
1112 elif ancestors is None:
1112 elif ancestors is None:
1113 # Otherwise, we're trying to discover the heads.
1113 # Otherwise, we're trying to discover the heads.
1114 # Assume this is a head because if it isn't, the next step
1114 # Assume this is a head because if it isn't, the next step
1115 # will eventually remove it.
1115 # will eventually remove it.
1116 heads[n] = True
1116 heads[n] = True
1117 # But, obviously its parents aren't.
1117 # But, obviously its parents aren't.
1118 for p in self.parents(n):
1118 for p in self.parents(n):
1119 heads.pop(p, None)
1119 heads.pop(p, None)
1120 heads = [head for head, flag in heads.iteritems() if flag]
1120 heads = [head for head, flag in heads.iteritems() if flag]
1121 roots = list(roots)
1121 roots = list(roots)
1122 assert orderedout
1122 assert orderedout
1123 assert roots
1123 assert roots
1124 assert heads
1124 assert heads
1125 return (orderedout, roots, heads)
1125 return (orderedout, roots, heads)
1126
1126
1127 def headrevs(self, revs=None):
1127 def headrevs(self, revs=None):
1128 if revs is None:
1128 if revs is None:
1129 try:
1129 try:
1130 return self.index.headrevs()
1130 return self.index.headrevs()
1131 except AttributeError:
1131 except AttributeError:
1132 return self._headrevs()
1132 return self._headrevs()
1133 if rustext is not None:
1133 if rustext is not None:
1134 return rustext.dagop.headrevs(self.index, revs)
1134 return rustext.dagop.headrevs(self.index, revs)
1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1135 return dagop.headrevs(revs, self._uncheckedparentrevs)
1136
1136
1137 def computephases(self, roots):
1137 def computephases(self, roots):
1138 return self.index.computephasesmapsets(roots)
1138 return self.index.computephasesmapsets(roots)
1139
1139
1140 def _headrevs(self):
1140 def _headrevs(self):
1141 count = len(self)
1141 count = len(self)
1142 if not count:
1142 if not count:
1143 return [nullrev]
1143 return [nullrev]
1144 # we won't iter over filtered rev so nobody is a head at start
1144 # we won't iter over filtered rev so nobody is a head at start
1145 ishead = [0] * (count + 1)
1145 ishead = [0] * (count + 1)
1146 index = self.index
1146 index = self.index
1147 for r in self:
1147 for r in self:
1148 ishead[r] = 1 # I may be an head
1148 ishead[r] = 1 # I may be an head
1149 e = index[r]
1149 e = index[r]
1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1150 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1151 return [r for r, val in enumerate(ishead) if val]
1151 return [r for r, val in enumerate(ishead) if val]
1152
1152
1153 def heads(self, start=None, stop=None):
1153 def heads(self, start=None, stop=None):
1154 """return the list of all nodes that have no children
1154 """return the list of all nodes that have no children
1155
1155
1156 if start is specified, only heads that are descendants of
1156 if start is specified, only heads that are descendants of
1157 start will be returned
1157 start will be returned
1158 if stop is specified, it will consider all the revs from stop
1158 if stop is specified, it will consider all the revs from stop
1159 as if they had no children
1159 as if they had no children
1160 """
1160 """
1161 if start is None and stop is None:
1161 if start is None and stop is None:
1162 if not len(self):
1162 if not len(self):
1163 return [nullid]
1163 return [nullid]
1164 return [self.node(r) for r in self.headrevs()]
1164 return [self.node(r) for r in self.headrevs()]
1165
1165
1166 if start is None:
1166 if start is None:
1167 start = nullrev
1167 start = nullrev
1168 else:
1168 else:
1169 start = self.rev(start)
1169 start = self.rev(start)
1170
1170
1171 stoprevs = set(self.rev(n) for n in stop or [])
1171 stoprevs = set(self.rev(n) for n in stop or [])
1172
1172
1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1173 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1174 stoprevs=stoprevs)
1174 stoprevs=stoprevs)
1175
1175
1176 return [self.node(rev) for rev in revs]
1176 return [self.node(rev) for rev in revs]
1177
1177
1178 def children(self, node):
1178 def children(self, node):
1179 """find the children of a given node"""
1179 """find the children of a given node"""
1180 c = []
1180 c = []
1181 p = self.rev(node)
1181 p = self.rev(node)
1182 for r in self.revs(start=p + 1):
1182 for r in self.revs(start=p + 1):
1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1183 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1184 if prevs:
1184 if prevs:
1185 for pr in prevs:
1185 for pr in prevs:
1186 if pr == p:
1186 if pr == p:
1187 c.append(self.node(r))
1187 c.append(self.node(r))
1188 elif p == nullrev:
1188 elif p == nullrev:
1189 c.append(self.node(r))
1189 c.append(self.node(r))
1190 return c
1190 return c
1191
1191
1192 def commonancestorsheads(self, a, b):
1192 def commonancestorsheads(self, a, b):
1193 """calculate all the heads of the common ancestors of nodes a and b"""
1193 """calculate all the heads of the common ancestors of nodes a and b"""
1194 a, b = self.rev(a), self.rev(b)
1194 a, b = self.rev(a), self.rev(b)
1195 ancs = self._commonancestorsheads(a, b)
1195 ancs = self._commonancestorsheads(a, b)
1196 return pycompat.maplist(self.node, ancs)
1196 return pycompat.maplist(self.node, ancs)
1197
1197
1198 def _commonancestorsheads(self, *revs):
1198 def _commonancestorsheads(self, *revs):
1199 """calculate all the heads of the common ancestors of revs"""
1199 """calculate all the heads of the common ancestors of revs"""
1200 try:
1200 try:
1201 ancs = self.index.commonancestorsheads(*revs)
1201 ancs = self.index.commonancestorsheads(*revs)
1202 except (AttributeError, OverflowError): # C implementation failed
1202 except (AttributeError, OverflowError): # C implementation failed
1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1203 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1204 return ancs
1204 return ancs
1205
1205
1206 def isancestor(self, a, b):
1206 def isancestor(self, a, b):
1207 """return True if node a is an ancestor of node b
1207 """return True if node a is an ancestor of node b
1208
1208
1209 A revision is considered an ancestor of itself."""
1209 A revision is considered an ancestor of itself."""
1210 a, b = self.rev(a), self.rev(b)
1210 a, b = self.rev(a), self.rev(b)
1211 return self.isancestorrev(a, b)
1211 return self.isancestorrev(a, b)
1212
1212
1213 def isancestorrev(self, a, b):
1213 def isancestorrev(self, a, b):
1214 """return True if revision a is an ancestor of revision b
1214 """return True if revision a is an ancestor of revision b
1215
1215
1216 A revision is considered an ancestor of itself.
1216 A revision is considered an ancestor of itself.
1217
1217
1218 The implementation of this is trivial but the use of
1218 The implementation of this is trivial but the use of
1219 commonancestorsheads is not."""
1219 reachableroots is not."""
1220 if a == nullrev:
1220 if a == nullrev:
1221 return True
1221 return True
1222 elif a == b:
1222 elif a == b:
1223 return True
1223 return True
1224 elif a > b:
1224 elif a > b:
1225 return False
1225 return False
1226 return a in self._commonancestorsheads(a, b)
1226 return bool(self.reachableroots(a, [b], [a], includepath=False))
1227
1228 def reachableroots(self, minroot, heads, roots, includepath=False):
1229 """return (heads(::<roots> and <roots>::<heads>))
1230
1231 If includepath is True, return (<roots>::<heads>)."""
1232 try:
1233 return self.index.reachableroots2(minroot, heads, roots,
1234 includepath)
1235 except AttributeError:
1236 return dagop._reachablerootspure(self.parentrevs,
1237 minroot, roots, heads, includepath)
1227
1238
1228 def ancestor(self, a, b):
1239 def ancestor(self, a, b):
1229 """calculate the "best" common ancestor of nodes a and b"""
1240 """calculate the "best" common ancestor of nodes a and b"""
1230
1241
1231 a, b = self.rev(a), self.rev(b)
1242 a, b = self.rev(a), self.rev(b)
1232 try:
1243 try:
1233 ancs = self.index.ancestors(a, b)
1244 ancs = self.index.ancestors(a, b)
1234 except (AttributeError, OverflowError):
1245 except (AttributeError, OverflowError):
1235 ancs = ancestor.ancestors(self.parentrevs, a, b)
1246 ancs = ancestor.ancestors(self.parentrevs, a, b)
1236 if ancs:
1247 if ancs:
1237 # choose a consistent winner when there's a tie
1248 # choose a consistent winner when there's a tie
1238 return min(map(self.node, ancs))
1249 return min(map(self.node, ancs))
1239 return nullid
1250 return nullid
1240
1251
1241 def _match(self, id):
1252 def _match(self, id):
1242 if isinstance(id, int):
1253 if isinstance(id, int):
1243 # rev
1254 # rev
1244 return self.node(id)
1255 return self.node(id)
1245 if len(id) == 20:
1256 if len(id) == 20:
1246 # possibly a binary node
1257 # possibly a binary node
1247 # odds of a binary node being all hex in ASCII are 1 in 10**25
1258 # odds of a binary node being all hex in ASCII are 1 in 10**25
1248 try:
1259 try:
1249 node = id
1260 node = id
1250 self.rev(node) # quick search the index
1261 self.rev(node) # quick search the index
1251 return node
1262 return node
1252 except error.LookupError:
1263 except error.LookupError:
1253 pass # may be partial hex id
1264 pass # may be partial hex id
1254 try:
1265 try:
1255 # str(rev)
1266 # str(rev)
1256 rev = int(id)
1267 rev = int(id)
1257 if "%d" % rev != id:
1268 if "%d" % rev != id:
1258 raise ValueError
1269 raise ValueError
1259 if rev < 0:
1270 if rev < 0:
1260 rev = len(self) + rev
1271 rev = len(self) + rev
1261 if rev < 0 or rev >= len(self):
1272 if rev < 0 or rev >= len(self):
1262 raise ValueError
1273 raise ValueError
1263 return self.node(rev)
1274 return self.node(rev)
1264 except (ValueError, OverflowError):
1275 except (ValueError, OverflowError):
1265 pass
1276 pass
1266 if len(id) == 40:
1277 if len(id) == 40:
1267 try:
1278 try:
1268 # a full hex nodeid?
1279 # a full hex nodeid?
1269 node = bin(id)
1280 node = bin(id)
1270 self.rev(node)
1281 self.rev(node)
1271 return node
1282 return node
1272 except (TypeError, error.LookupError):
1283 except (TypeError, error.LookupError):
1273 pass
1284 pass
1274
1285
1275 def _partialmatch(self, id):
1286 def _partialmatch(self, id):
1276 # we don't care wdirfilenodeids as they should be always full hash
1287 # we don't care wdirfilenodeids as they should be always full hash
1277 maybewdir = wdirhex.startswith(id)
1288 maybewdir = wdirhex.startswith(id)
1278 try:
1289 try:
1279 partial = self.index.partialmatch(id)
1290 partial = self.index.partialmatch(id)
1280 if partial and self.hasnode(partial):
1291 if partial and self.hasnode(partial):
1281 if maybewdir:
1292 if maybewdir:
1282 # single 'ff...' match in radix tree, ambiguous with wdir
1293 # single 'ff...' match in radix tree, ambiguous with wdir
1283 raise error.RevlogError
1294 raise error.RevlogError
1284 return partial
1295 return partial
1285 if maybewdir:
1296 if maybewdir:
1286 # no 'ff...' match in radix tree, wdir identified
1297 # no 'ff...' match in radix tree, wdir identified
1287 raise error.WdirUnsupported
1298 raise error.WdirUnsupported
1288 return None
1299 return None
1289 except error.RevlogError:
1300 except error.RevlogError:
1290 # parsers.c radix tree lookup gave multiple matches
1301 # parsers.c radix tree lookup gave multiple matches
1291 # fast path: for unfiltered changelog, radix tree is accurate
1302 # fast path: for unfiltered changelog, radix tree is accurate
1292 if not getattr(self, 'filteredrevs', None):
1303 if not getattr(self, 'filteredrevs', None):
1293 raise error.AmbiguousPrefixLookupError(
1304 raise error.AmbiguousPrefixLookupError(
1294 id, self.indexfile, _('ambiguous identifier'))
1305 id, self.indexfile, _('ambiguous identifier'))
1295 # fall through to slow path that filters hidden revisions
1306 # fall through to slow path that filters hidden revisions
1296 except (AttributeError, ValueError):
1307 except (AttributeError, ValueError):
1297 # we are pure python, or key was too short to search radix tree
1308 # we are pure python, or key was too short to search radix tree
1298 pass
1309 pass
1299
1310
1300 if id in self._pcache:
1311 if id in self._pcache:
1301 return self._pcache[id]
1312 return self._pcache[id]
1302
1313
1303 if len(id) <= 40:
1314 if len(id) <= 40:
1304 try:
1315 try:
1305 # hex(node)[:...]
1316 # hex(node)[:...]
1306 l = len(id) // 2 # grab an even number of digits
1317 l = len(id) // 2 # grab an even number of digits
1307 prefix = bin(id[:l * 2])
1318 prefix = bin(id[:l * 2])
1308 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1319 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1309 nl = [n for n in nl if hex(n).startswith(id) and
1320 nl = [n for n in nl if hex(n).startswith(id) and
1310 self.hasnode(n)]
1321 self.hasnode(n)]
1311 if nullhex.startswith(id):
1322 if nullhex.startswith(id):
1312 nl.append(nullid)
1323 nl.append(nullid)
1313 if len(nl) > 0:
1324 if len(nl) > 0:
1314 if len(nl) == 1 and not maybewdir:
1325 if len(nl) == 1 and not maybewdir:
1315 self._pcache[id] = nl[0]
1326 self._pcache[id] = nl[0]
1316 return nl[0]
1327 return nl[0]
1317 raise error.AmbiguousPrefixLookupError(
1328 raise error.AmbiguousPrefixLookupError(
1318 id, self.indexfile, _('ambiguous identifier'))
1329 id, self.indexfile, _('ambiguous identifier'))
1319 if maybewdir:
1330 if maybewdir:
1320 raise error.WdirUnsupported
1331 raise error.WdirUnsupported
1321 return None
1332 return None
1322 except TypeError:
1333 except TypeError:
1323 pass
1334 pass
1324
1335
1325 def lookup(self, id):
1336 def lookup(self, id):
1326 """locate a node based on:
1337 """locate a node based on:
1327 - revision number or str(revision number)
1338 - revision number or str(revision number)
1328 - nodeid or subset of hex nodeid
1339 - nodeid or subset of hex nodeid
1329 """
1340 """
1330 n = self._match(id)
1341 n = self._match(id)
1331 if n is not None:
1342 if n is not None:
1332 return n
1343 return n
1333 n = self._partialmatch(id)
1344 n = self._partialmatch(id)
1334 if n:
1345 if n:
1335 return n
1346 return n
1336
1347
1337 raise error.LookupError(id, self.indexfile, _('no match found'))
1348 raise error.LookupError(id, self.indexfile, _('no match found'))
1338
1349
1339 def shortest(self, node, minlength=1):
1350 def shortest(self, node, minlength=1):
1340 """Find the shortest unambiguous prefix that matches node."""
1351 """Find the shortest unambiguous prefix that matches node."""
1341 def isvalid(prefix):
1352 def isvalid(prefix):
1342 try:
1353 try:
1343 node = self._partialmatch(prefix)
1354 node = self._partialmatch(prefix)
1344 except error.AmbiguousPrefixLookupError:
1355 except error.AmbiguousPrefixLookupError:
1345 return False
1356 return False
1346 except error.WdirUnsupported:
1357 except error.WdirUnsupported:
1347 # single 'ff...' match
1358 # single 'ff...' match
1348 return True
1359 return True
1349 if node is None:
1360 if node is None:
1350 raise error.LookupError(node, self.indexfile, _('no node'))
1361 raise error.LookupError(node, self.indexfile, _('no node'))
1351 return True
1362 return True
1352
1363
1353 def maybewdir(prefix):
1364 def maybewdir(prefix):
1354 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1365 return all(c == 'f' for c in pycompat.iterbytestr(prefix))
1355
1366
1356 hexnode = hex(node)
1367 hexnode = hex(node)
1357
1368
1358 def disambiguate(hexnode, minlength):
1369 def disambiguate(hexnode, minlength):
1359 """Disambiguate against wdirid."""
1370 """Disambiguate against wdirid."""
1360 for length in range(minlength, 41):
1371 for length in range(minlength, 41):
1361 prefix = hexnode[:length]
1372 prefix = hexnode[:length]
1362 if not maybewdir(prefix):
1373 if not maybewdir(prefix):
1363 return prefix
1374 return prefix
1364
1375
1365 if not getattr(self, 'filteredrevs', None):
1376 if not getattr(self, 'filteredrevs', None):
1366 try:
1377 try:
1367 length = max(self.index.shortest(node), minlength)
1378 length = max(self.index.shortest(node), minlength)
1368 return disambiguate(hexnode, length)
1379 return disambiguate(hexnode, length)
1369 except error.RevlogError:
1380 except error.RevlogError:
1370 if node != wdirid:
1381 if node != wdirid:
1371 raise error.LookupError(node, self.indexfile, _('no node'))
1382 raise error.LookupError(node, self.indexfile, _('no node'))
1372 except AttributeError:
1383 except AttributeError:
1373 # Fall through to pure code
1384 # Fall through to pure code
1374 pass
1385 pass
1375
1386
1376 if node == wdirid:
1387 if node == wdirid:
1377 for length in range(minlength, 41):
1388 for length in range(minlength, 41):
1378 prefix = hexnode[:length]
1389 prefix = hexnode[:length]
1379 if isvalid(prefix):
1390 if isvalid(prefix):
1380 return prefix
1391 return prefix
1381
1392
1382 for length in range(minlength, 41):
1393 for length in range(minlength, 41):
1383 prefix = hexnode[:length]
1394 prefix = hexnode[:length]
1384 if isvalid(prefix):
1395 if isvalid(prefix):
1385 return disambiguate(hexnode, length)
1396 return disambiguate(hexnode, length)
1386
1397
1387 def cmp(self, node, text):
1398 def cmp(self, node, text):
1388 """compare text with a given file revision
1399 """compare text with a given file revision
1389
1400
1390 returns True if text is different than what is stored.
1401 returns True if text is different than what is stored.
1391 """
1402 """
1392 p1, p2 = self.parents(node)
1403 p1, p2 = self.parents(node)
1393 return storageutil.hashrevisionsha1(text, p1, p2) != node
1404 return storageutil.hashrevisionsha1(text, p1, p2) != node
1394
1405
1395 def _cachesegment(self, offset, data):
1406 def _cachesegment(self, offset, data):
1396 """Add a segment to the revlog cache.
1407 """Add a segment to the revlog cache.
1397
1408
1398 Accepts an absolute offset and the data that is at that location.
1409 Accepts an absolute offset and the data that is at that location.
1399 """
1410 """
1400 o, d = self._chunkcache
1411 o, d = self._chunkcache
1401 # try to add to existing cache
1412 # try to add to existing cache
1402 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1413 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1403 self._chunkcache = o, d + data
1414 self._chunkcache = o, d + data
1404 else:
1415 else:
1405 self._chunkcache = offset, data
1416 self._chunkcache = offset, data
1406
1417
1407 def _readsegment(self, offset, length, df=None):
1418 def _readsegment(self, offset, length, df=None):
1408 """Load a segment of raw data from the revlog.
1419 """Load a segment of raw data from the revlog.
1409
1420
1410 Accepts an absolute offset, length to read, and an optional existing
1421 Accepts an absolute offset, length to read, and an optional existing
1411 file handle to read from.
1422 file handle to read from.
1412
1423
1413 If an existing file handle is passed, it will be seeked and the
1424 If an existing file handle is passed, it will be seeked and the
1414 original seek position will NOT be restored.
1425 original seek position will NOT be restored.
1415
1426
1416 Returns a str or buffer of raw byte data.
1427 Returns a str or buffer of raw byte data.
1417
1428
1418 Raises if the requested number of bytes could not be read.
1429 Raises if the requested number of bytes could not be read.
1419 """
1430 """
1420 # Cache data both forward and backward around the requested
1431 # Cache data both forward and backward around the requested
1421 # data, in a fixed size window. This helps speed up operations
1432 # data, in a fixed size window. This helps speed up operations
1422 # involving reading the revlog backwards.
1433 # involving reading the revlog backwards.
1423 cachesize = self._chunkcachesize
1434 cachesize = self._chunkcachesize
1424 realoffset = offset & ~(cachesize - 1)
1435 realoffset = offset & ~(cachesize - 1)
1425 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1436 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1426 - realoffset)
1437 - realoffset)
1427 with self._datareadfp(df) as df:
1438 with self._datareadfp(df) as df:
1428 df.seek(realoffset)
1439 df.seek(realoffset)
1429 d = df.read(reallength)
1440 d = df.read(reallength)
1430
1441
1431 self._cachesegment(realoffset, d)
1442 self._cachesegment(realoffset, d)
1432 if offset != realoffset or reallength != length:
1443 if offset != realoffset or reallength != length:
1433 startoffset = offset - realoffset
1444 startoffset = offset - realoffset
1434 if len(d) - startoffset < length:
1445 if len(d) - startoffset < length:
1435 raise error.RevlogError(
1446 raise error.RevlogError(
1436 _('partial read of revlog %s; expected %d bytes from '
1447 _('partial read of revlog %s; expected %d bytes from '
1437 'offset %d, got %d') %
1448 'offset %d, got %d') %
1438 (self.indexfile if self._inline else self.datafile,
1449 (self.indexfile if self._inline else self.datafile,
1439 length, realoffset, len(d) - startoffset))
1450 length, realoffset, len(d) - startoffset))
1440
1451
1441 return util.buffer(d, startoffset, length)
1452 return util.buffer(d, startoffset, length)
1442
1453
1443 if len(d) < length:
1454 if len(d) < length:
1444 raise error.RevlogError(
1455 raise error.RevlogError(
1445 _('partial read of revlog %s; expected %d bytes from offset '
1456 _('partial read of revlog %s; expected %d bytes from offset '
1446 '%d, got %d') %
1457 '%d, got %d') %
1447 (self.indexfile if self._inline else self.datafile,
1458 (self.indexfile if self._inline else self.datafile,
1448 length, offset, len(d)))
1459 length, offset, len(d)))
1449
1460
1450 return d
1461 return d
1451
1462
1452 def _getsegment(self, offset, length, df=None):
1463 def _getsegment(self, offset, length, df=None):
1453 """Obtain a segment of raw data from the revlog.
1464 """Obtain a segment of raw data from the revlog.
1454
1465
1455 Accepts an absolute offset, length of bytes to obtain, and an
1466 Accepts an absolute offset, length of bytes to obtain, and an
1456 optional file handle to the already-opened revlog. If the file
1467 optional file handle to the already-opened revlog. If the file
1457 handle is used, it's original seek position will not be preserved.
1468 handle is used, it's original seek position will not be preserved.
1458
1469
1459 Requests for data may be returned from a cache.
1470 Requests for data may be returned from a cache.
1460
1471
1461 Returns a str or a buffer instance of raw byte data.
1472 Returns a str or a buffer instance of raw byte data.
1462 """
1473 """
1463 o, d = self._chunkcache
1474 o, d = self._chunkcache
1464 l = len(d)
1475 l = len(d)
1465
1476
1466 # is it in the cache?
1477 # is it in the cache?
1467 cachestart = offset - o
1478 cachestart = offset - o
1468 cacheend = cachestart + length
1479 cacheend = cachestart + length
1469 if cachestart >= 0 and cacheend <= l:
1480 if cachestart >= 0 and cacheend <= l:
1470 if cachestart == 0 and cacheend == l:
1481 if cachestart == 0 and cacheend == l:
1471 return d # avoid a copy
1482 return d # avoid a copy
1472 return util.buffer(d, cachestart, cacheend - cachestart)
1483 return util.buffer(d, cachestart, cacheend - cachestart)
1473
1484
1474 return self._readsegment(offset, length, df=df)
1485 return self._readsegment(offset, length, df=df)
1475
1486
1476 def _getsegmentforrevs(self, startrev, endrev, df=None):
1487 def _getsegmentforrevs(self, startrev, endrev, df=None):
1477 """Obtain a segment of raw data corresponding to a range of revisions.
1488 """Obtain a segment of raw data corresponding to a range of revisions.
1478
1489
1479 Accepts the start and end revisions and an optional already-open
1490 Accepts the start and end revisions and an optional already-open
1480 file handle to be used for reading. If the file handle is read, its
1491 file handle to be used for reading. If the file handle is read, its
1481 seek position will not be preserved.
1492 seek position will not be preserved.
1482
1493
1483 Requests for data may be satisfied by a cache.
1494 Requests for data may be satisfied by a cache.
1484
1495
1485 Returns a 2-tuple of (offset, data) for the requested range of
1496 Returns a 2-tuple of (offset, data) for the requested range of
1486 revisions. Offset is the integer offset from the beginning of the
1497 revisions. Offset is the integer offset from the beginning of the
1487 revlog and data is a str or buffer of the raw byte data.
1498 revlog and data is a str or buffer of the raw byte data.
1488
1499
1489 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1500 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1490 to determine where each revision's data begins and ends.
1501 to determine where each revision's data begins and ends.
1491 """
1502 """
1492 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1503 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1493 # (functions are expensive).
1504 # (functions are expensive).
1494 index = self.index
1505 index = self.index
1495 istart = index[startrev]
1506 istart = index[startrev]
1496 start = int(istart[0] >> 16)
1507 start = int(istart[0] >> 16)
1497 if startrev == endrev:
1508 if startrev == endrev:
1498 end = start + istart[1]
1509 end = start + istart[1]
1499 else:
1510 else:
1500 iend = index[endrev]
1511 iend = index[endrev]
1501 end = int(iend[0] >> 16) + iend[1]
1512 end = int(iend[0] >> 16) + iend[1]
1502
1513
1503 if self._inline:
1514 if self._inline:
1504 start += (startrev + 1) * self._io.size
1515 start += (startrev + 1) * self._io.size
1505 end += (endrev + 1) * self._io.size
1516 end += (endrev + 1) * self._io.size
1506 length = end - start
1517 length = end - start
1507
1518
1508 return start, self._getsegment(start, length, df=df)
1519 return start, self._getsegment(start, length, df=df)
1509
1520
1510 def _chunk(self, rev, df=None):
1521 def _chunk(self, rev, df=None):
1511 """Obtain a single decompressed chunk for a revision.
1522 """Obtain a single decompressed chunk for a revision.
1512
1523
1513 Accepts an integer revision and an optional already-open file handle
1524 Accepts an integer revision and an optional already-open file handle
1514 to be used for reading. If used, the seek position of the file will not
1525 to be used for reading. If used, the seek position of the file will not
1515 be preserved.
1526 be preserved.
1516
1527
1517 Returns a str holding uncompressed data for the requested revision.
1528 Returns a str holding uncompressed data for the requested revision.
1518 """
1529 """
1519 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1530 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1520
1531
1521 def _chunks(self, revs, df=None, targetsize=None):
1532 def _chunks(self, revs, df=None, targetsize=None):
1522 """Obtain decompressed chunks for the specified revisions.
1533 """Obtain decompressed chunks for the specified revisions.
1523
1534
1524 Accepts an iterable of numeric revisions that are assumed to be in
1535 Accepts an iterable of numeric revisions that are assumed to be in
1525 ascending order. Also accepts an optional already-open file handle
1536 ascending order. Also accepts an optional already-open file handle
1526 to be used for reading. If used, the seek position of the file will
1537 to be used for reading. If used, the seek position of the file will
1527 not be preserved.
1538 not be preserved.
1528
1539
1529 This function is similar to calling ``self._chunk()`` multiple times,
1540 This function is similar to calling ``self._chunk()`` multiple times,
1530 but is faster.
1541 but is faster.
1531
1542
1532 Returns a list with decompressed data for each requested revision.
1543 Returns a list with decompressed data for each requested revision.
1533 """
1544 """
1534 if not revs:
1545 if not revs:
1535 return []
1546 return []
1536 start = self.start
1547 start = self.start
1537 length = self.length
1548 length = self.length
1538 inline = self._inline
1549 inline = self._inline
1539 iosize = self._io.size
1550 iosize = self._io.size
1540 buffer = util.buffer
1551 buffer = util.buffer
1541
1552
1542 l = []
1553 l = []
1543 ladd = l.append
1554 ladd = l.append
1544
1555
1545 if not self._withsparseread:
1556 if not self._withsparseread:
1546 slicedchunks = (revs,)
1557 slicedchunks = (revs,)
1547 else:
1558 else:
1548 slicedchunks = deltautil.slicechunk(self, revs,
1559 slicedchunks = deltautil.slicechunk(self, revs,
1549 targetsize=targetsize)
1560 targetsize=targetsize)
1550
1561
1551 for revschunk in slicedchunks:
1562 for revschunk in slicedchunks:
1552 firstrev = revschunk[0]
1563 firstrev = revschunk[0]
1553 # Skip trailing revisions with empty diff
1564 # Skip trailing revisions with empty diff
1554 for lastrev in revschunk[::-1]:
1565 for lastrev in revschunk[::-1]:
1555 if length(lastrev) != 0:
1566 if length(lastrev) != 0:
1556 break
1567 break
1557
1568
1558 try:
1569 try:
1559 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1570 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1560 except OverflowError:
1571 except OverflowError:
1561 # issue4215 - we can't cache a run of chunks greater than
1572 # issue4215 - we can't cache a run of chunks greater than
1562 # 2G on Windows
1573 # 2G on Windows
1563 return [self._chunk(rev, df=df) for rev in revschunk]
1574 return [self._chunk(rev, df=df) for rev in revschunk]
1564
1575
1565 decomp = self.decompress
1576 decomp = self.decompress
1566 for rev in revschunk:
1577 for rev in revschunk:
1567 chunkstart = start(rev)
1578 chunkstart = start(rev)
1568 if inline:
1579 if inline:
1569 chunkstart += (rev + 1) * iosize
1580 chunkstart += (rev + 1) * iosize
1570 chunklength = length(rev)
1581 chunklength = length(rev)
1571 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1582 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1572
1583
1573 return l
1584 return l
1574
1585
1575 def _chunkclear(self):
1586 def _chunkclear(self):
1576 """Clear the raw chunk cache."""
1587 """Clear the raw chunk cache."""
1577 self._chunkcache = (0, '')
1588 self._chunkcache = (0, '')
1578
1589
1579 def deltaparent(self, rev):
1590 def deltaparent(self, rev):
1580 """return deltaparent of the given revision"""
1591 """return deltaparent of the given revision"""
1581 base = self.index[rev][3]
1592 base = self.index[rev][3]
1582 if base == rev:
1593 if base == rev:
1583 return nullrev
1594 return nullrev
1584 elif self._generaldelta:
1595 elif self._generaldelta:
1585 return base
1596 return base
1586 else:
1597 else:
1587 return rev - 1
1598 return rev - 1
1588
1599
1589 def issnapshot(self, rev):
1600 def issnapshot(self, rev):
1590 """tells whether rev is a snapshot
1601 """tells whether rev is a snapshot
1591 """
1602 """
1592 if not self._sparserevlog:
1603 if not self._sparserevlog:
1593 return self.deltaparent(rev) == nullrev
1604 return self.deltaparent(rev) == nullrev
1594 elif util.safehasattr(self.index, 'issnapshot'):
1605 elif util.safehasattr(self.index, 'issnapshot'):
1595 # directly assign the method to cache the testing and access
1606 # directly assign the method to cache the testing and access
1596 self.issnapshot = self.index.issnapshot
1607 self.issnapshot = self.index.issnapshot
1597 return self.issnapshot(rev)
1608 return self.issnapshot(rev)
1598 if rev == nullrev:
1609 if rev == nullrev:
1599 return True
1610 return True
1600 entry = self.index[rev]
1611 entry = self.index[rev]
1601 base = entry[3]
1612 base = entry[3]
1602 if base == rev:
1613 if base == rev:
1603 return True
1614 return True
1604 if base == nullrev:
1615 if base == nullrev:
1605 return True
1616 return True
1606 p1 = entry[5]
1617 p1 = entry[5]
1607 p2 = entry[6]
1618 p2 = entry[6]
1608 if base == p1 or base == p2:
1619 if base == p1 or base == p2:
1609 return False
1620 return False
1610 return self.issnapshot(base)
1621 return self.issnapshot(base)
1611
1622
1612 def snapshotdepth(self, rev):
1623 def snapshotdepth(self, rev):
1613 """number of snapshot in the chain before this one"""
1624 """number of snapshot in the chain before this one"""
1614 if not self.issnapshot(rev):
1625 if not self.issnapshot(rev):
1615 raise error.ProgrammingError('revision %d not a snapshot')
1626 raise error.ProgrammingError('revision %d not a snapshot')
1616 return len(self._deltachain(rev)[0]) - 1
1627 return len(self._deltachain(rev)[0]) - 1
1617
1628
1618 def revdiff(self, rev1, rev2):
1629 def revdiff(self, rev1, rev2):
1619 """return or calculate a delta between two revisions
1630 """return or calculate a delta between two revisions
1620
1631
1621 The delta calculated is in binary form and is intended to be written to
1632 The delta calculated is in binary form and is intended to be written to
1622 revlog data directly. So this function needs raw revision data.
1633 revlog data directly. So this function needs raw revision data.
1623 """
1634 """
1624 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1635 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1625 return bytes(self._chunk(rev2))
1636 return bytes(self._chunk(rev2))
1626
1637
1627 return mdiff.textdiff(self.revision(rev1, raw=True),
1638 return mdiff.textdiff(self.revision(rev1, raw=True),
1628 self.revision(rev2, raw=True))
1639 self.revision(rev2, raw=True))
1629
1640
1630 def revision(self, nodeorrev, _df=None, raw=False):
1641 def revision(self, nodeorrev, _df=None, raw=False):
1631 """return an uncompressed revision of a given node or revision
1642 """return an uncompressed revision of a given node or revision
1632 number.
1643 number.
1633
1644
1634 _df - an existing file handle to read from. (internal-only)
1645 _df - an existing file handle to read from. (internal-only)
1635 raw - an optional argument specifying if the revision data is to be
1646 raw - an optional argument specifying if the revision data is to be
1636 treated as raw data when applying flag transforms. 'raw' should be set
1647 treated as raw data when applying flag transforms. 'raw' should be set
1637 to True when generating changegroups or in debug commands.
1648 to True when generating changegroups or in debug commands.
1638 """
1649 """
1639 if isinstance(nodeorrev, int):
1650 if isinstance(nodeorrev, int):
1640 rev = nodeorrev
1651 rev = nodeorrev
1641 node = self.node(rev)
1652 node = self.node(rev)
1642 else:
1653 else:
1643 node = nodeorrev
1654 node = nodeorrev
1644 rev = None
1655 rev = None
1645
1656
1646 cachedrev = None
1657 cachedrev = None
1647 flags = None
1658 flags = None
1648 rawtext = None
1659 rawtext = None
1649 if node == nullid:
1660 if node == nullid:
1650 return ""
1661 return ""
1651 if self._revisioncache:
1662 if self._revisioncache:
1652 if self._revisioncache[0] == node:
1663 if self._revisioncache[0] == node:
1653 # _cache only stores rawtext
1664 # _cache only stores rawtext
1654 if raw:
1665 if raw:
1655 return self._revisioncache[2]
1666 return self._revisioncache[2]
1656 # duplicated, but good for perf
1667 # duplicated, but good for perf
1657 if rev is None:
1668 if rev is None:
1658 rev = self.rev(node)
1669 rev = self.rev(node)
1659 if flags is None:
1670 if flags is None:
1660 flags = self.flags(rev)
1671 flags = self.flags(rev)
1661 # no extra flags set, no flag processor runs, text = rawtext
1672 # no extra flags set, no flag processor runs, text = rawtext
1662 if flags == REVIDX_DEFAULT_FLAGS:
1673 if flags == REVIDX_DEFAULT_FLAGS:
1663 return self._revisioncache[2]
1674 return self._revisioncache[2]
1664 # rawtext is reusable. need to run flag processor
1675 # rawtext is reusable. need to run flag processor
1665 rawtext = self._revisioncache[2]
1676 rawtext = self._revisioncache[2]
1666
1677
1667 cachedrev = self._revisioncache[1]
1678 cachedrev = self._revisioncache[1]
1668
1679
1669 # look up what we need to read
1680 # look up what we need to read
1670 if rawtext is None:
1681 if rawtext is None:
1671 if rev is None:
1682 if rev is None:
1672 rev = self.rev(node)
1683 rev = self.rev(node)
1673
1684
1674 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1685 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1675 if stopped:
1686 if stopped:
1676 rawtext = self._revisioncache[2]
1687 rawtext = self._revisioncache[2]
1677
1688
1678 # drop cache to save memory
1689 # drop cache to save memory
1679 self._revisioncache = None
1690 self._revisioncache = None
1680
1691
1681 targetsize = None
1692 targetsize = None
1682 rawsize = self.index[rev][2]
1693 rawsize = self.index[rev][2]
1683 if 0 <= rawsize:
1694 if 0 <= rawsize:
1684 targetsize = 4 * rawsize
1695 targetsize = 4 * rawsize
1685
1696
1686 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1697 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1687 if rawtext is None:
1698 if rawtext is None:
1688 rawtext = bytes(bins[0])
1699 rawtext = bytes(bins[0])
1689 bins = bins[1:]
1700 bins = bins[1:]
1690
1701
1691 rawtext = mdiff.patches(rawtext, bins)
1702 rawtext = mdiff.patches(rawtext, bins)
1692 self._revisioncache = (node, rev, rawtext)
1703 self._revisioncache = (node, rev, rawtext)
1693
1704
1694 if flags is None:
1705 if flags is None:
1695 if rev is None:
1706 if rev is None:
1696 rev = self.rev(node)
1707 rev = self.rev(node)
1697 flags = self.flags(rev)
1708 flags = self.flags(rev)
1698
1709
1699 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1710 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1700 if validatehash:
1711 if validatehash:
1701 self.checkhash(text, node, rev=rev)
1712 self.checkhash(text, node, rev=rev)
1702
1713
1703 return text
1714 return text
1704
1715
1705 def hash(self, text, p1, p2):
1716 def hash(self, text, p1, p2):
1706 """Compute a node hash.
1717 """Compute a node hash.
1707
1718
1708 Available as a function so that subclasses can replace the hash
1719 Available as a function so that subclasses can replace the hash
1709 as needed.
1720 as needed.
1710 """
1721 """
1711 return storageutil.hashrevisionsha1(text, p1, p2)
1722 return storageutil.hashrevisionsha1(text, p1, p2)
1712
1723
1713 def _processflags(self, text, flags, operation, raw=False):
1724 def _processflags(self, text, flags, operation, raw=False):
1714 """Inspect revision data flags and applies transforms defined by
1725 """Inspect revision data flags and applies transforms defined by
1715 registered flag processors.
1726 registered flag processors.
1716
1727
1717 ``text`` - the revision data to process
1728 ``text`` - the revision data to process
1718 ``flags`` - the revision flags
1729 ``flags`` - the revision flags
1719 ``operation`` - the operation being performed (read or write)
1730 ``operation`` - the operation being performed (read or write)
1720 ``raw`` - an optional argument describing if the raw transform should be
1731 ``raw`` - an optional argument describing if the raw transform should be
1721 applied.
1732 applied.
1722
1733
1723 This method processes the flags in the order (or reverse order if
1734 This method processes the flags in the order (or reverse order if
1724 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1735 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1725 flag processors registered for present flags. The order of flags defined
1736 flag processors registered for present flags. The order of flags defined
1726 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1737 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1727
1738
1728 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1739 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1729 processed text and ``validatehash`` is a bool indicating whether the
1740 processed text and ``validatehash`` is a bool indicating whether the
1730 returned text should be checked for hash integrity.
1741 returned text should be checked for hash integrity.
1731
1742
1732 Note: If the ``raw`` argument is set, it has precedence over the
1743 Note: If the ``raw`` argument is set, it has precedence over the
1733 operation and will only update the value of ``validatehash``.
1744 operation and will only update the value of ``validatehash``.
1734 """
1745 """
1735 # fast path: no flag processors will run
1746 # fast path: no flag processors will run
1736 if flags == 0:
1747 if flags == 0:
1737 return text, True
1748 return text, True
1738 if not operation in ('read', 'write'):
1749 if not operation in ('read', 'write'):
1739 raise error.ProgrammingError(_("invalid '%s' operation") %
1750 raise error.ProgrammingError(_("invalid '%s' operation") %
1740 operation)
1751 operation)
1741 # Check all flags are known.
1752 # Check all flags are known.
1742 if flags & ~REVIDX_KNOWN_FLAGS:
1753 if flags & ~REVIDX_KNOWN_FLAGS:
1743 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1754 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1744 (flags & ~REVIDX_KNOWN_FLAGS))
1755 (flags & ~REVIDX_KNOWN_FLAGS))
1745 validatehash = True
1756 validatehash = True
1746 # Depending on the operation (read or write), the order might be
1757 # Depending on the operation (read or write), the order might be
1747 # reversed due to non-commutative transforms.
1758 # reversed due to non-commutative transforms.
1748 orderedflags = REVIDX_FLAGS_ORDER
1759 orderedflags = REVIDX_FLAGS_ORDER
1749 if operation == 'write':
1760 if operation == 'write':
1750 orderedflags = reversed(orderedflags)
1761 orderedflags = reversed(orderedflags)
1751
1762
1752 for flag in orderedflags:
1763 for flag in orderedflags:
1753 # If a flagprocessor has been registered for a known flag, apply the
1764 # If a flagprocessor has been registered for a known flag, apply the
1754 # related operation transform and update result tuple.
1765 # related operation transform and update result tuple.
1755 if flag & flags:
1766 if flag & flags:
1756 vhash = True
1767 vhash = True
1757
1768
1758 if flag not in self._flagprocessors:
1769 if flag not in self._flagprocessors:
1759 message = _("missing processor for flag '%#x'") % (flag)
1770 message = _("missing processor for flag '%#x'") % (flag)
1760 raise error.RevlogError(message)
1771 raise error.RevlogError(message)
1761
1772
1762 processor = self._flagprocessors[flag]
1773 processor = self._flagprocessors[flag]
1763 if processor is not None:
1774 if processor is not None:
1764 readtransform, writetransform, rawtransform = processor
1775 readtransform, writetransform, rawtransform = processor
1765
1776
1766 if raw:
1777 if raw:
1767 vhash = rawtransform(self, text)
1778 vhash = rawtransform(self, text)
1768 elif operation == 'read':
1779 elif operation == 'read':
1769 text, vhash = readtransform(self, text)
1780 text, vhash = readtransform(self, text)
1770 else: # write operation
1781 else: # write operation
1771 text, vhash = writetransform(self, text)
1782 text, vhash = writetransform(self, text)
1772 validatehash = validatehash and vhash
1783 validatehash = validatehash and vhash
1773
1784
1774 return text, validatehash
1785 return text, validatehash
1775
1786
1776 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1787 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1777 """Check node hash integrity.
1788 """Check node hash integrity.
1778
1789
1779 Available as a function so that subclasses can extend hash mismatch
1790 Available as a function so that subclasses can extend hash mismatch
1780 behaviors as needed.
1791 behaviors as needed.
1781 """
1792 """
1782 try:
1793 try:
1783 if p1 is None and p2 is None:
1794 if p1 is None and p2 is None:
1784 p1, p2 = self.parents(node)
1795 p1, p2 = self.parents(node)
1785 if node != self.hash(text, p1, p2):
1796 if node != self.hash(text, p1, p2):
1786 # Clear the revision cache on hash failure. The revision cache
1797 # Clear the revision cache on hash failure. The revision cache
1787 # only stores the raw revision and clearing the cache does have
1798 # only stores the raw revision and clearing the cache does have
1788 # the side-effect that we won't have a cache hit when the raw
1799 # the side-effect that we won't have a cache hit when the raw
1789 # revision data is accessed. But this case should be rare and
1800 # revision data is accessed. But this case should be rare and
1790 # it is extra work to teach the cache about the hash
1801 # it is extra work to teach the cache about the hash
1791 # verification state.
1802 # verification state.
1792 if self._revisioncache and self._revisioncache[0] == node:
1803 if self._revisioncache and self._revisioncache[0] == node:
1793 self._revisioncache = None
1804 self._revisioncache = None
1794
1805
1795 revornode = rev
1806 revornode = rev
1796 if revornode is None:
1807 if revornode is None:
1797 revornode = templatefilters.short(hex(node))
1808 revornode = templatefilters.short(hex(node))
1798 raise error.RevlogError(_("integrity check failed on %s:%s")
1809 raise error.RevlogError(_("integrity check failed on %s:%s")
1799 % (self.indexfile, pycompat.bytestr(revornode)))
1810 % (self.indexfile, pycompat.bytestr(revornode)))
1800 except error.RevlogError:
1811 except error.RevlogError:
1801 if self._censorable and storageutil.iscensoredtext(text):
1812 if self._censorable and storageutil.iscensoredtext(text):
1802 raise error.CensoredNodeError(self.indexfile, node, text)
1813 raise error.CensoredNodeError(self.indexfile, node, text)
1803 raise
1814 raise
1804
1815
1805 def _enforceinlinesize(self, tr, fp=None):
1816 def _enforceinlinesize(self, tr, fp=None):
1806 """Check if the revlog is too big for inline and convert if so.
1817 """Check if the revlog is too big for inline and convert if so.
1807
1818
1808 This should be called after revisions are added to the revlog. If the
1819 This should be called after revisions are added to the revlog. If the
1809 revlog has grown too large to be an inline revlog, it will convert it
1820 revlog has grown too large to be an inline revlog, it will convert it
1810 to use multiple index and data files.
1821 to use multiple index and data files.
1811 """
1822 """
1812 tiprev = len(self) - 1
1823 tiprev = len(self) - 1
1813 if (not self._inline or
1824 if (not self._inline or
1814 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1825 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1815 return
1826 return
1816
1827
1817 trinfo = tr.find(self.indexfile)
1828 trinfo = tr.find(self.indexfile)
1818 if trinfo is None:
1829 if trinfo is None:
1819 raise error.RevlogError(_("%s not found in the transaction")
1830 raise error.RevlogError(_("%s not found in the transaction")
1820 % self.indexfile)
1831 % self.indexfile)
1821
1832
1822 trindex = trinfo[2]
1833 trindex = trinfo[2]
1823 if trindex is not None:
1834 if trindex is not None:
1824 dataoff = self.start(trindex)
1835 dataoff = self.start(trindex)
1825 else:
1836 else:
1826 # revlog was stripped at start of transaction, use all leftover data
1837 # revlog was stripped at start of transaction, use all leftover data
1827 trindex = len(self) - 1
1838 trindex = len(self) - 1
1828 dataoff = self.end(tiprev)
1839 dataoff = self.end(tiprev)
1829
1840
1830 tr.add(self.datafile, dataoff)
1841 tr.add(self.datafile, dataoff)
1831
1842
1832 if fp:
1843 if fp:
1833 fp.flush()
1844 fp.flush()
1834 fp.close()
1845 fp.close()
1835 # We can't use the cached file handle after close(). So prevent
1846 # We can't use the cached file handle after close(). So prevent
1836 # its usage.
1847 # its usage.
1837 self._writinghandles = None
1848 self._writinghandles = None
1838
1849
1839 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1850 with self._indexfp('r') as ifh, self._datafp('w') as dfh:
1840 for r in self:
1851 for r in self:
1841 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1852 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
1842
1853
1843 with self._indexfp('w') as fp:
1854 with self._indexfp('w') as fp:
1844 self.version &= ~FLAG_INLINE_DATA
1855 self.version &= ~FLAG_INLINE_DATA
1845 self._inline = False
1856 self._inline = False
1846 io = self._io
1857 io = self._io
1847 for i in self:
1858 for i in self:
1848 e = io.packentry(self.index[i], self.node, self.version, i)
1859 e = io.packentry(self.index[i], self.node, self.version, i)
1849 fp.write(e)
1860 fp.write(e)
1850
1861
1851 # the temp file replace the real index when we exit the context
1862 # the temp file replace the real index when we exit the context
1852 # manager
1863 # manager
1853
1864
1854 tr.replace(self.indexfile, trindex * self._io.size)
1865 tr.replace(self.indexfile, trindex * self._io.size)
1855 self._chunkclear()
1866 self._chunkclear()
1856
1867
1857 def _nodeduplicatecallback(self, transaction, node):
1868 def _nodeduplicatecallback(self, transaction, node):
1858 """called when trying to add a node already stored.
1869 """called when trying to add a node already stored.
1859 """
1870 """
1860
1871
1861 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1872 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1862 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1873 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1863 """add a revision to the log
1874 """add a revision to the log
1864
1875
1865 text - the revision data to add
1876 text - the revision data to add
1866 transaction - the transaction object used for rollback
1877 transaction - the transaction object used for rollback
1867 link - the linkrev data to add
1878 link - the linkrev data to add
1868 p1, p2 - the parent nodeids of the revision
1879 p1, p2 - the parent nodeids of the revision
1869 cachedelta - an optional precomputed delta
1880 cachedelta - an optional precomputed delta
1870 node - nodeid of revision; typically node is not specified, and it is
1881 node - nodeid of revision; typically node is not specified, and it is
1871 computed by default as hash(text, p1, p2), however subclasses might
1882 computed by default as hash(text, p1, p2), however subclasses might
1872 use different hashing method (and override checkhash() in such case)
1883 use different hashing method (and override checkhash() in such case)
1873 flags - the known flags to set on the revision
1884 flags - the known flags to set on the revision
1874 deltacomputer - an optional deltacomputer instance shared between
1885 deltacomputer - an optional deltacomputer instance shared between
1875 multiple calls
1886 multiple calls
1876 """
1887 """
1877 if link == nullrev:
1888 if link == nullrev:
1878 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1889 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1879 % self.indexfile)
1890 % self.indexfile)
1880
1891
1881 if flags:
1892 if flags:
1882 node = node or self.hash(text, p1, p2)
1893 node = node or self.hash(text, p1, p2)
1883
1894
1884 rawtext, validatehash = self._processflags(text, flags, 'write')
1895 rawtext, validatehash = self._processflags(text, flags, 'write')
1885
1896
1886 # If the flag processor modifies the revision data, ignore any provided
1897 # If the flag processor modifies the revision data, ignore any provided
1887 # cachedelta.
1898 # cachedelta.
1888 if rawtext != text:
1899 if rawtext != text:
1889 cachedelta = None
1900 cachedelta = None
1890
1901
1891 if len(rawtext) > _maxentrysize:
1902 if len(rawtext) > _maxentrysize:
1892 raise error.RevlogError(
1903 raise error.RevlogError(
1893 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1904 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1894 % (self.indexfile, len(rawtext)))
1905 % (self.indexfile, len(rawtext)))
1895
1906
1896 node = node or self.hash(rawtext, p1, p2)
1907 node = node or self.hash(rawtext, p1, p2)
1897 if node in self.nodemap:
1908 if node in self.nodemap:
1898 return node
1909 return node
1899
1910
1900 if validatehash:
1911 if validatehash:
1901 self.checkhash(rawtext, node, p1=p1, p2=p2)
1912 self.checkhash(rawtext, node, p1=p1, p2=p2)
1902
1913
1903 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1914 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1904 flags, cachedelta=cachedelta,
1915 flags, cachedelta=cachedelta,
1905 deltacomputer=deltacomputer)
1916 deltacomputer=deltacomputer)
1906
1917
1907 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1918 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1908 cachedelta=None, deltacomputer=None):
1919 cachedelta=None, deltacomputer=None):
1909 """add a raw revision with known flags, node and parents
1920 """add a raw revision with known flags, node and parents
1910 useful when reusing a revision not stored in this revlog (ex: received
1921 useful when reusing a revision not stored in this revlog (ex: received
1911 over wire, or read from an external bundle).
1922 over wire, or read from an external bundle).
1912 """
1923 """
1913 dfh = None
1924 dfh = None
1914 if not self._inline:
1925 if not self._inline:
1915 dfh = self._datafp("a+")
1926 dfh = self._datafp("a+")
1916 ifh = self._indexfp("a+")
1927 ifh = self._indexfp("a+")
1917 try:
1928 try:
1918 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1929 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1919 flags, cachedelta, ifh, dfh,
1930 flags, cachedelta, ifh, dfh,
1920 deltacomputer=deltacomputer)
1931 deltacomputer=deltacomputer)
1921 finally:
1932 finally:
1922 if dfh:
1933 if dfh:
1923 dfh.close()
1934 dfh.close()
1924 ifh.close()
1935 ifh.close()
1925
1936
1926 def compress(self, data):
1937 def compress(self, data):
1927 """Generate a possibly-compressed representation of data."""
1938 """Generate a possibly-compressed representation of data."""
1928 if not data:
1939 if not data:
1929 return '', data
1940 return '', data
1930
1941
1931 compressed = self._compressor.compress(data)
1942 compressed = self._compressor.compress(data)
1932
1943
1933 if compressed:
1944 if compressed:
1934 # The revlog compressor added the header in the returned data.
1945 # The revlog compressor added the header in the returned data.
1935 return '', compressed
1946 return '', compressed
1936
1947
1937 if data[0:1] == '\0':
1948 if data[0:1] == '\0':
1938 return '', data
1949 return '', data
1939 return 'u', data
1950 return 'u', data
1940
1951
1941 def decompress(self, data):
1952 def decompress(self, data):
1942 """Decompress a revlog chunk.
1953 """Decompress a revlog chunk.
1943
1954
1944 The chunk is expected to begin with a header identifying the
1955 The chunk is expected to begin with a header identifying the
1945 format type so it can be routed to an appropriate decompressor.
1956 format type so it can be routed to an appropriate decompressor.
1946 """
1957 """
1947 if not data:
1958 if not data:
1948 return data
1959 return data
1949
1960
1950 # Revlogs are read much more frequently than they are written and many
1961 # Revlogs are read much more frequently than they are written and many
1951 # chunks only take microseconds to decompress, so performance is
1962 # chunks only take microseconds to decompress, so performance is
1952 # important here.
1963 # important here.
1953 #
1964 #
1954 # We can make a few assumptions about revlogs:
1965 # We can make a few assumptions about revlogs:
1955 #
1966 #
1956 # 1) the majority of chunks will be compressed (as opposed to inline
1967 # 1) the majority of chunks will be compressed (as opposed to inline
1957 # raw data).
1968 # raw data).
1958 # 2) decompressing *any* data will likely by at least 10x slower than
1969 # 2) decompressing *any* data will likely by at least 10x slower than
1959 # returning raw inline data.
1970 # returning raw inline data.
1960 # 3) we want to prioritize common and officially supported compression
1971 # 3) we want to prioritize common and officially supported compression
1961 # engines
1972 # engines
1962 #
1973 #
1963 # It follows that we want to optimize for "decompress compressed data
1974 # It follows that we want to optimize for "decompress compressed data
1964 # when encoded with common and officially supported compression engines"
1975 # when encoded with common and officially supported compression engines"
1965 # case over "raw data" and "data encoded by less common or non-official
1976 # case over "raw data" and "data encoded by less common or non-official
1966 # compression engines." That is why we have the inline lookup first
1977 # compression engines." That is why we have the inline lookup first
1967 # followed by the compengines lookup.
1978 # followed by the compengines lookup.
1968 #
1979 #
1969 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1980 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1970 # compressed chunks. And this matters for changelog and manifest reads.
1981 # compressed chunks. And this matters for changelog and manifest reads.
1971 t = data[0:1]
1982 t = data[0:1]
1972
1983
1973 if t == 'x':
1984 if t == 'x':
1974 try:
1985 try:
1975 return _zlibdecompress(data)
1986 return _zlibdecompress(data)
1976 except zlib.error as e:
1987 except zlib.error as e:
1977 raise error.RevlogError(_('revlog decompress error: %s') %
1988 raise error.RevlogError(_('revlog decompress error: %s') %
1978 stringutil.forcebytestr(e))
1989 stringutil.forcebytestr(e))
1979 # '\0' is more common than 'u' so it goes first.
1990 # '\0' is more common than 'u' so it goes first.
1980 elif t == '\0':
1991 elif t == '\0':
1981 return data
1992 return data
1982 elif t == 'u':
1993 elif t == 'u':
1983 return util.buffer(data, 1)
1994 return util.buffer(data, 1)
1984
1995
1985 try:
1996 try:
1986 compressor = self._decompressors[t]
1997 compressor = self._decompressors[t]
1987 except KeyError:
1998 except KeyError:
1988 try:
1999 try:
1989 engine = util.compengines.forrevlogheader(t)
2000 engine = util.compengines.forrevlogheader(t)
1990 compressor = engine.revlogcompressor(self._compengineopts)
2001 compressor = engine.revlogcompressor(self._compengineopts)
1991 self._decompressors[t] = compressor
2002 self._decompressors[t] = compressor
1992 except KeyError:
2003 except KeyError:
1993 raise error.RevlogError(_('unknown compression type %r') % t)
2004 raise error.RevlogError(_('unknown compression type %r') % t)
1994
2005
1995 return compressor.decompress(data)
2006 return compressor.decompress(data)
1996
2007
1997 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
2008 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1998 cachedelta, ifh, dfh, alwayscache=False,
2009 cachedelta, ifh, dfh, alwayscache=False,
1999 deltacomputer=None):
2010 deltacomputer=None):
2000 """internal function to add revisions to the log
2011 """internal function to add revisions to the log
2001
2012
2002 see addrevision for argument descriptions.
2013 see addrevision for argument descriptions.
2003
2014
2004 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2015 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2005
2016
2006 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2017 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2007 be used.
2018 be used.
2008
2019
2009 invariants:
2020 invariants:
2010 - rawtext is optional (can be None); if not set, cachedelta must be set.
2021 - rawtext is optional (can be None); if not set, cachedelta must be set.
2011 if both are set, they must correspond to each other.
2022 if both are set, they must correspond to each other.
2012 """
2023 """
2013 if node == nullid:
2024 if node == nullid:
2014 raise error.RevlogError(_("%s: attempt to add null revision") %
2025 raise error.RevlogError(_("%s: attempt to add null revision") %
2015 self.indexfile)
2026 self.indexfile)
2016 if node == wdirid or node in wdirfilenodeids:
2027 if node == wdirid or node in wdirfilenodeids:
2017 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2028 raise error.RevlogError(_("%s: attempt to add wdir revision") %
2018 self.indexfile)
2029 self.indexfile)
2019
2030
2020 if self._inline:
2031 if self._inline:
2021 fh = ifh
2032 fh = ifh
2022 else:
2033 else:
2023 fh = dfh
2034 fh = dfh
2024
2035
2025 btext = [rawtext]
2036 btext = [rawtext]
2026
2037
2027 curr = len(self)
2038 curr = len(self)
2028 prev = curr - 1
2039 prev = curr - 1
2029 offset = self.end(prev)
2040 offset = self.end(prev)
2030 p1r, p2r = self.rev(p1), self.rev(p2)
2041 p1r, p2r = self.rev(p1), self.rev(p2)
2031
2042
2032 # full versions are inserted when the needed deltas
2043 # full versions are inserted when the needed deltas
2033 # become comparable to the uncompressed text
2044 # become comparable to the uncompressed text
2034 if rawtext is None:
2045 if rawtext is None:
2035 # need rawtext size, before changed by flag processors, which is
2046 # need rawtext size, before changed by flag processors, which is
2036 # the non-raw size. use revlog explicitly to avoid filelog's extra
2047 # the non-raw size. use revlog explicitly to avoid filelog's extra
2037 # logic that might remove metadata size.
2048 # logic that might remove metadata size.
2038 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2049 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
2039 cachedelta[1])
2050 cachedelta[1])
2040 else:
2051 else:
2041 textlen = len(rawtext)
2052 textlen = len(rawtext)
2042
2053
2043 if deltacomputer is None:
2054 if deltacomputer is None:
2044 deltacomputer = deltautil.deltacomputer(self)
2055 deltacomputer = deltautil.deltacomputer(self)
2045
2056
2046 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2057 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2047
2058
2048 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2059 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2049
2060
2050 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2061 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
2051 deltainfo.base, link, p1r, p2r, node)
2062 deltainfo.base, link, p1r, p2r, node)
2052 self.index.append(e)
2063 self.index.append(e)
2053 self.nodemap[node] = curr
2064 self.nodemap[node] = curr
2054
2065
2055 # Reset the pure node cache start lookup offset to account for new
2066 # Reset the pure node cache start lookup offset to account for new
2056 # revision.
2067 # revision.
2057 if self._nodepos is not None:
2068 if self._nodepos is not None:
2058 self._nodepos = curr
2069 self._nodepos = curr
2059
2070
2060 entry = self._io.packentry(e, self.node, self.version, curr)
2071 entry = self._io.packentry(e, self.node, self.version, curr)
2061 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2072 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
2062 link, offset)
2073 link, offset)
2063
2074
2064 rawtext = btext[0]
2075 rawtext = btext[0]
2065
2076
2066 if alwayscache and rawtext is None:
2077 if alwayscache and rawtext is None:
2067 rawtext = deltacomputer.buildtext(revinfo, fh)
2078 rawtext = deltacomputer.buildtext(revinfo, fh)
2068
2079
2069 if type(rawtext) == bytes: # only accept immutable objects
2080 if type(rawtext) == bytes: # only accept immutable objects
2070 self._revisioncache = (node, curr, rawtext)
2081 self._revisioncache = (node, curr, rawtext)
2071 self._chainbasecache[curr] = deltainfo.chainbase
2082 self._chainbasecache[curr] = deltainfo.chainbase
2072 return node
2083 return node
2073
2084
2074 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2085 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2075 # Files opened in a+ mode have inconsistent behavior on various
2086 # Files opened in a+ mode have inconsistent behavior on various
2076 # platforms. Windows requires that a file positioning call be made
2087 # platforms. Windows requires that a file positioning call be made
2077 # when the file handle transitions between reads and writes. See
2088 # when the file handle transitions between reads and writes. See
2078 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2089 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2079 # platforms, Python or the platform itself can be buggy. Some versions
2090 # platforms, Python or the platform itself can be buggy. Some versions
2080 # of Solaris have been observed to not append at the end of the file
2091 # of Solaris have been observed to not append at the end of the file
2081 # if the file was seeked to before the end. See issue4943 for more.
2092 # if the file was seeked to before the end. See issue4943 for more.
2082 #
2093 #
2083 # We work around this issue by inserting a seek() before writing.
2094 # We work around this issue by inserting a seek() before writing.
2084 # Note: This is likely not necessary on Python 3. However, because
2095 # Note: This is likely not necessary on Python 3. However, because
2085 # the file handle is reused for reads and may be seeked there, we need
2096 # the file handle is reused for reads and may be seeked there, we need
2086 # to be careful before changing this.
2097 # to be careful before changing this.
2087 ifh.seek(0, os.SEEK_END)
2098 ifh.seek(0, os.SEEK_END)
2088 if dfh:
2099 if dfh:
2089 dfh.seek(0, os.SEEK_END)
2100 dfh.seek(0, os.SEEK_END)
2090
2101
2091 curr = len(self) - 1
2102 curr = len(self) - 1
2092 if not self._inline:
2103 if not self._inline:
2093 transaction.add(self.datafile, offset)
2104 transaction.add(self.datafile, offset)
2094 transaction.add(self.indexfile, curr * len(entry))
2105 transaction.add(self.indexfile, curr * len(entry))
2095 if data[0]:
2106 if data[0]:
2096 dfh.write(data[0])
2107 dfh.write(data[0])
2097 dfh.write(data[1])
2108 dfh.write(data[1])
2098 ifh.write(entry)
2109 ifh.write(entry)
2099 else:
2110 else:
2100 offset += curr * self._io.size
2111 offset += curr * self._io.size
2101 transaction.add(self.indexfile, offset, curr)
2112 transaction.add(self.indexfile, offset, curr)
2102 ifh.write(entry)
2113 ifh.write(entry)
2103 ifh.write(data[0])
2114 ifh.write(data[0])
2104 ifh.write(data[1])
2115 ifh.write(data[1])
2105 self._enforceinlinesize(transaction, ifh)
2116 self._enforceinlinesize(transaction, ifh)
2106
2117
2107 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2118 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2108 """
2119 """
2109 add a delta group
2120 add a delta group
2110
2121
2111 given a set of deltas, add them to the revision log. the
2122 given a set of deltas, add them to the revision log. the
2112 first delta is against its parent, which should be in our
2123 first delta is against its parent, which should be in our
2113 log, the rest are against the previous delta.
2124 log, the rest are against the previous delta.
2114
2125
2115 If ``addrevisioncb`` is defined, it will be called with arguments of
2126 If ``addrevisioncb`` is defined, it will be called with arguments of
2116 this revlog and the node that was added.
2127 this revlog and the node that was added.
2117 """
2128 """
2118
2129
2119 if self._writinghandles:
2130 if self._writinghandles:
2120 raise error.ProgrammingError('cannot nest addgroup() calls')
2131 raise error.ProgrammingError('cannot nest addgroup() calls')
2121
2132
2122 nodes = []
2133 nodes = []
2123
2134
2124 r = len(self)
2135 r = len(self)
2125 end = 0
2136 end = 0
2126 if r:
2137 if r:
2127 end = self.end(r - 1)
2138 end = self.end(r - 1)
2128 ifh = self._indexfp("a+")
2139 ifh = self._indexfp("a+")
2129 isize = r * self._io.size
2140 isize = r * self._io.size
2130 if self._inline:
2141 if self._inline:
2131 transaction.add(self.indexfile, end + isize, r)
2142 transaction.add(self.indexfile, end + isize, r)
2132 dfh = None
2143 dfh = None
2133 else:
2144 else:
2134 transaction.add(self.indexfile, isize, r)
2145 transaction.add(self.indexfile, isize, r)
2135 transaction.add(self.datafile, end)
2146 transaction.add(self.datafile, end)
2136 dfh = self._datafp("a+")
2147 dfh = self._datafp("a+")
2137 def flush():
2148 def flush():
2138 if dfh:
2149 if dfh:
2139 dfh.flush()
2150 dfh.flush()
2140 ifh.flush()
2151 ifh.flush()
2141
2152
2142 self._writinghandles = (ifh, dfh)
2153 self._writinghandles = (ifh, dfh)
2143
2154
2144 try:
2155 try:
2145 deltacomputer = deltautil.deltacomputer(self)
2156 deltacomputer = deltautil.deltacomputer(self)
2146 # loop through our set of deltas
2157 # loop through our set of deltas
2147 for data in deltas:
2158 for data in deltas:
2148 node, p1, p2, linknode, deltabase, delta, flags = data
2159 node, p1, p2, linknode, deltabase, delta, flags = data
2149 link = linkmapper(linknode)
2160 link = linkmapper(linknode)
2150 flags = flags or REVIDX_DEFAULT_FLAGS
2161 flags = flags or REVIDX_DEFAULT_FLAGS
2151
2162
2152 nodes.append(node)
2163 nodes.append(node)
2153
2164
2154 if node in self.nodemap:
2165 if node in self.nodemap:
2155 self._nodeduplicatecallback(transaction, node)
2166 self._nodeduplicatecallback(transaction, node)
2156 # this can happen if two branches make the same change
2167 # this can happen if two branches make the same change
2157 continue
2168 continue
2158
2169
2159 for p in (p1, p2):
2170 for p in (p1, p2):
2160 if p not in self.nodemap:
2171 if p not in self.nodemap:
2161 raise error.LookupError(p, self.indexfile,
2172 raise error.LookupError(p, self.indexfile,
2162 _('unknown parent'))
2173 _('unknown parent'))
2163
2174
2164 if deltabase not in self.nodemap:
2175 if deltabase not in self.nodemap:
2165 raise error.LookupError(deltabase, self.indexfile,
2176 raise error.LookupError(deltabase, self.indexfile,
2166 _('unknown delta base'))
2177 _('unknown delta base'))
2167
2178
2168 baserev = self.rev(deltabase)
2179 baserev = self.rev(deltabase)
2169
2180
2170 if baserev != nullrev and self.iscensored(baserev):
2181 if baserev != nullrev and self.iscensored(baserev):
2171 # if base is censored, delta must be full replacement in a
2182 # if base is censored, delta must be full replacement in a
2172 # single patch operation
2183 # single patch operation
2173 hlen = struct.calcsize(">lll")
2184 hlen = struct.calcsize(">lll")
2174 oldlen = self.rawsize(baserev)
2185 oldlen = self.rawsize(baserev)
2175 newlen = len(delta) - hlen
2186 newlen = len(delta) - hlen
2176 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2187 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2177 raise error.CensoredBaseError(self.indexfile,
2188 raise error.CensoredBaseError(self.indexfile,
2178 self.node(baserev))
2189 self.node(baserev))
2179
2190
2180 if not flags and self._peek_iscensored(baserev, delta, flush):
2191 if not flags and self._peek_iscensored(baserev, delta, flush):
2181 flags |= REVIDX_ISCENSORED
2192 flags |= REVIDX_ISCENSORED
2182
2193
2183 # We assume consumers of addrevisioncb will want to retrieve
2194 # We assume consumers of addrevisioncb will want to retrieve
2184 # the added revision, which will require a call to
2195 # the added revision, which will require a call to
2185 # revision(). revision() will fast path if there is a cache
2196 # revision(). revision() will fast path if there is a cache
2186 # hit. So, we tell _addrevision() to always cache in this case.
2197 # hit. So, we tell _addrevision() to always cache in this case.
2187 # We're only using addgroup() in the context of changegroup
2198 # We're only using addgroup() in the context of changegroup
2188 # generation so the revision data can always be handled as raw
2199 # generation so the revision data can always be handled as raw
2189 # by the flagprocessor.
2200 # by the flagprocessor.
2190 self._addrevision(node, None, transaction, link,
2201 self._addrevision(node, None, transaction, link,
2191 p1, p2, flags, (baserev, delta),
2202 p1, p2, flags, (baserev, delta),
2192 ifh, dfh,
2203 ifh, dfh,
2193 alwayscache=bool(addrevisioncb),
2204 alwayscache=bool(addrevisioncb),
2194 deltacomputer=deltacomputer)
2205 deltacomputer=deltacomputer)
2195
2206
2196 if addrevisioncb:
2207 if addrevisioncb:
2197 addrevisioncb(self, node)
2208 addrevisioncb(self, node)
2198
2209
2199 if not dfh and not self._inline:
2210 if not dfh and not self._inline:
2200 # addrevision switched from inline to conventional
2211 # addrevision switched from inline to conventional
2201 # reopen the index
2212 # reopen the index
2202 ifh.close()
2213 ifh.close()
2203 dfh = self._datafp("a+")
2214 dfh = self._datafp("a+")
2204 ifh = self._indexfp("a+")
2215 ifh = self._indexfp("a+")
2205 self._writinghandles = (ifh, dfh)
2216 self._writinghandles = (ifh, dfh)
2206 finally:
2217 finally:
2207 self._writinghandles = None
2218 self._writinghandles = None
2208
2219
2209 if dfh:
2220 if dfh:
2210 dfh.close()
2221 dfh.close()
2211 ifh.close()
2222 ifh.close()
2212
2223
2213 return nodes
2224 return nodes
2214
2225
2215 def iscensored(self, rev):
2226 def iscensored(self, rev):
2216 """Check if a file revision is censored."""
2227 """Check if a file revision is censored."""
2217 if not self._censorable:
2228 if not self._censorable:
2218 return False
2229 return False
2219
2230
2220 return self.flags(rev) & REVIDX_ISCENSORED
2231 return self.flags(rev) & REVIDX_ISCENSORED
2221
2232
2222 def _peek_iscensored(self, baserev, delta, flush):
2233 def _peek_iscensored(self, baserev, delta, flush):
2223 """Quickly check if a delta produces a censored revision."""
2234 """Quickly check if a delta produces a censored revision."""
2224 if not self._censorable:
2235 if not self._censorable:
2225 return False
2236 return False
2226
2237
2227 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2238 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2228
2239
2229 def getstrippoint(self, minlink):
2240 def getstrippoint(self, minlink):
2230 """find the minimum rev that must be stripped to strip the linkrev
2241 """find the minimum rev that must be stripped to strip the linkrev
2231
2242
2232 Returns a tuple containing the minimum rev and a set of all revs that
2243 Returns a tuple containing the minimum rev and a set of all revs that
2233 have linkrevs that will be broken by this strip.
2244 have linkrevs that will be broken by this strip.
2234 """
2245 """
2235 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2246 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2236 self.headrevs(),
2247 self.headrevs(),
2237 self.linkrev, self.parentrevs)
2248 self.linkrev, self.parentrevs)
2238
2249
2239 def strip(self, minlink, transaction):
2250 def strip(self, minlink, transaction):
2240 """truncate the revlog on the first revision with a linkrev >= minlink
2251 """truncate the revlog on the first revision with a linkrev >= minlink
2241
2252
2242 This function is called when we're stripping revision minlink and
2253 This function is called when we're stripping revision minlink and
2243 its descendants from the repository.
2254 its descendants from the repository.
2244
2255
2245 We have to remove all revisions with linkrev >= minlink, because
2256 We have to remove all revisions with linkrev >= minlink, because
2246 the equivalent changelog revisions will be renumbered after the
2257 the equivalent changelog revisions will be renumbered after the
2247 strip.
2258 strip.
2248
2259
2249 So we truncate the revlog on the first of these revisions, and
2260 So we truncate the revlog on the first of these revisions, and
2250 trust that the caller has saved the revisions that shouldn't be
2261 trust that the caller has saved the revisions that shouldn't be
2251 removed and that it'll re-add them after this truncation.
2262 removed and that it'll re-add them after this truncation.
2252 """
2263 """
2253 if len(self) == 0:
2264 if len(self) == 0:
2254 return
2265 return
2255
2266
2256 rev, _ = self.getstrippoint(minlink)
2267 rev, _ = self.getstrippoint(minlink)
2257 if rev == len(self):
2268 if rev == len(self):
2258 return
2269 return
2259
2270
2260 # first truncate the files on disk
2271 # first truncate the files on disk
2261 end = self.start(rev)
2272 end = self.start(rev)
2262 if not self._inline:
2273 if not self._inline:
2263 transaction.add(self.datafile, end)
2274 transaction.add(self.datafile, end)
2264 end = rev * self._io.size
2275 end = rev * self._io.size
2265 else:
2276 else:
2266 end += rev * self._io.size
2277 end += rev * self._io.size
2267
2278
2268 transaction.add(self.indexfile, end)
2279 transaction.add(self.indexfile, end)
2269
2280
2270 # then reset internal state in memory to forget those revisions
2281 # then reset internal state in memory to forget those revisions
2271 self._revisioncache = None
2282 self._revisioncache = None
2272 self._chaininfocache = {}
2283 self._chaininfocache = {}
2273 self._chunkclear()
2284 self._chunkclear()
2274 for x in pycompat.xrange(rev, len(self)):
2285 for x in pycompat.xrange(rev, len(self)):
2275 del self.nodemap[self.node(x)]
2286 del self.nodemap[self.node(x)]
2276
2287
2277 del self.index[rev:-1]
2288 del self.index[rev:-1]
2278 self._nodepos = None
2289 self._nodepos = None
2279
2290
2280 def checksize(self):
2291 def checksize(self):
2281 """Check size of index and data files
2292 """Check size of index and data files
2282
2293
2283 return a (dd, di) tuple.
2294 return a (dd, di) tuple.
2284 - dd: extra bytes for the "data" file
2295 - dd: extra bytes for the "data" file
2285 - di: extra bytes for the "index" file
2296 - di: extra bytes for the "index" file
2286
2297
2287 A healthy revlog will return (0, 0).
2298 A healthy revlog will return (0, 0).
2288 """
2299 """
2289 expected = 0
2300 expected = 0
2290 if len(self):
2301 if len(self):
2291 expected = max(0, self.end(len(self) - 1))
2302 expected = max(0, self.end(len(self) - 1))
2292
2303
2293 try:
2304 try:
2294 with self._datafp() as f:
2305 with self._datafp() as f:
2295 f.seek(0, 2)
2306 f.seek(0, 2)
2296 actual = f.tell()
2307 actual = f.tell()
2297 dd = actual - expected
2308 dd = actual - expected
2298 except IOError as inst:
2309 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2310 if inst.errno != errno.ENOENT:
2300 raise
2311 raise
2301 dd = 0
2312 dd = 0
2302
2313
2303 try:
2314 try:
2304 f = self.opener(self.indexfile)
2315 f = self.opener(self.indexfile)
2305 f.seek(0, 2)
2316 f.seek(0, 2)
2306 actual = f.tell()
2317 actual = f.tell()
2307 f.close()
2318 f.close()
2308 s = self._io.size
2319 s = self._io.size
2309 i = max(0, actual // s)
2320 i = max(0, actual // s)
2310 di = actual - (i * s)
2321 di = actual - (i * s)
2311 if self._inline:
2322 if self._inline:
2312 databytes = 0
2323 databytes = 0
2313 for r in self:
2324 for r in self:
2314 databytes += max(0, self.length(r))
2325 databytes += max(0, self.length(r))
2315 dd = 0
2326 dd = 0
2316 di = actual - len(self) * s - databytes
2327 di = actual - len(self) * s - databytes
2317 except IOError as inst:
2328 except IOError as inst:
2318 if inst.errno != errno.ENOENT:
2329 if inst.errno != errno.ENOENT:
2319 raise
2330 raise
2320 di = 0
2331 di = 0
2321
2332
2322 return (dd, di)
2333 return (dd, di)
2323
2334
2324 def files(self):
2335 def files(self):
2325 res = [self.indexfile]
2336 res = [self.indexfile]
2326 if not self._inline:
2337 if not self._inline:
2327 res.append(self.datafile)
2338 res.append(self.datafile)
2328 return res
2339 return res
2329
2340
2330 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2341 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2331 assumehaveparentrevisions=False,
2342 assumehaveparentrevisions=False,
2332 deltamode=repository.CG_DELTAMODE_STD):
2343 deltamode=repository.CG_DELTAMODE_STD):
2333 if nodesorder not in ('nodes', 'storage', 'linear', None):
2344 if nodesorder not in ('nodes', 'storage', 'linear', None):
2334 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2345 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2335 nodesorder)
2346 nodesorder)
2336
2347
2337 if nodesorder is None and not self._generaldelta:
2348 if nodesorder is None and not self._generaldelta:
2338 nodesorder = 'storage'
2349 nodesorder = 'storage'
2339
2350
2340 if (not self._storedeltachains and
2351 if (not self._storedeltachains and
2341 deltamode != repository.CG_DELTAMODE_PREV):
2352 deltamode != repository.CG_DELTAMODE_PREV):
2342 deltamode = repository.CG_DELTAMODE_FULL
2353 deltamode = repository.CG_DELTAMODE_FULL
2343
2354
2344 return storageutil.emitrevisions(
2355 return storageutil.emitrevisions(
2345 self, nodes, nodesorder, revlogrevisiondelta,
2356 self, nodes, nodesorder, revlogrevisiondelta,
2346 deltaparentfn=self.deltaparent,
2357 deltaparentfn=self.deltaparent,
2347 candeltafn=self.candelta,
2358 candeltafn=self.candelta,
2348 rawsizefn=self.rawsize,
2359 rawsizefn=self.rawsize,
2349 revdifffn=self.revdiff,
2360 revdifffn=self.revdiff,
2350 flagsfn=self.flags,
2361 flagsfn=self.flags,
2351 deltamode=deltamode,
2362 deltamode=deltamode,
2352 revisiondata=revisiondata,
2363 revisiondata=revisiondata,
2353 assumehaveparentrevisions=assumehaveparentrevisions)
2364 assumehaveparentrevisions=assumehaveparentrevisions)
2354
2365
2355 DELTAREUSEALWAYS = 'always'
2366 DELTAREUSEALWAYS = 'always'
2356 DELTAREUSESAMEREVS = 'samerevs'
2367 DELTAREUSESAMEREVS = 'samerevs'
2357 DELTAREUSENEVER = 'never'
2368 DELTAREUSENEVER = 'never'
2358
2369
2359 DELTAREUSEFULLADD = 'fulladd'
2370 DELTAREUSEFULLADD = 'fulladd'
2360
2371
2361 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2372 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2362
2373
2363 def clone(self, tr, destrevlog, addrevisioncb=None,
2374 def clone(self, tr, destrevlog, addrevisioncb=None,
2364 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2375 deltareuse=DELTAREUSESAMEREVS, forcedeltabothparents=None):
2365 """Copy this revlog to another, possibly with format changes.
2376 """Copy this revlog to another, possibly with format changes.
2366
2377
2367 The destination revlog will contain the same revisions and nodes.
2378 The destination revlog will contain the same revisions and nodes.
2368 However, it may not be bit-for-bit identical due to e.g. delta encoding
2379 However, it may not be bit-for-bit identical due to e.g. delta encoding
2369 differences.
2380 differences.
2370
2381
2371 The ``deltareuse`` argument control how deltas from the existing revlog
2382 The ``deltareuse`` argument control how deltas from the existing revlog
2372 are preserved in the destination revlog. The argument can have the
2383 are preserved in the destination revlog. The argument can have the
2373 following values:
2384 following values:
2374
2385
2375 DELTAREUSEALWAYS
2386 DELTAREUSEALWAYS
2376 Deltas will always be reused (if possible), even if the destination
2387 Deltas will always be reused (if possible), even if the destination
2377 revlog would not select the same revisions for the delta. This is the
2388 revlog would not select the same revisions for the delta. This is the
2378 fastest mode of operation.
2389 fastest mode of operation.
2379 DELTAREUSESAMEREVS
2390 DELTAREUSESAMEREVS
2380 Deltas will be reused if the destination revlog would pick the same
2391 Deltas will be reused if the destination revlog would pick the same
2381 revisions for the delta. This mode strikes a balance between speed
2392 revisions for the delta. This mode strikes a balance between speed
2382 and optimization.
2393 and optimization.
2383 DELTAREUSENEVER
2394 DELTAREUSENEVER
2384 Deltas will never be reused. This is the slowest mode of execution.
2395 Deltas will never be reused. This is the slowest mode of execution.
2385 This mode can be used to recompute deltas (e.g. if the diff/delta
2396 This mode can be used to recompute deltas (e.g. if the diff/delta
2386 algorithm changes).
2397 algorithm changes).
2387
2398
2388 Delta computation can be slow, so the choice of delta reuse policy can
2399 Delta computation can be slow, so the choice of delta reuse policy can
2389 significantly affect run time.
2400 significantly affect run time.
2390
2401
2391 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2402 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2392 two extremes. Deltas will be reused if they are appropriate. But if the
2403 two extremes. Deltas will be reused if they are appropriate. But if the
2393 delta could choose a better revision, it will do so. This means if you
2404 delta could choose a better revision, it will do so. This means if you
2394 are converting a non-generaldelta revlog to a generaldelta revlog,
2405 are converting a non-generaldelta revlog to a generaldelta revlog,
2395 deltas will be recomputed if the delta's parent isn't a parent of the
2406 deltas will be recomputed if the delta's parent isn't a parent of the
2396 revision.
2407 revision.
2397
2408
2398 In addition to the delta policy, the ``forcedeltabothparents``
2409 In addition to the delta policy, the ``forcedeltabothparents``
2399 argument controls whether to force compute deltas against both parents
2410 argument controls whether to force compute deltas against both parents
2400 for merges. By default, the current default is used.
2411 for merges. By default, the current default is used.
2401 """
2412 """
2402 if deltareuse not in self.DELTAREUSEALL:
2413 if deltareuse not in self.DELTAREUSEALL:
2403 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2414 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2404
2415
2405 if len(destrevlog):
2416 if len(destrevlog):
2406 raise ValueError(_('destination revlog is not empty'))
2417 raise ValueError(_('destination revlog is not empty'))
2407
2418
2408 if getattr(self, 'filteredrevs', None):
2419 if getattr(self, 'filteredrevs', None):
2409 raise ValueError(_('source revlog has filtered revisions'))
2420 raise ValueError(_('source revlog has filtered revisions'))
2410 if getattr(destrevlog, 'filteredrevs', None):
2421 if getattr(destrevlog, 'filteredrevs', None):
2411 raise ValueError(_('destination revlog has filtered revisions'))
2422 raise ValueError(_('destination revlog has filtered revisions'))
2412
2423
2413 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2424 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2414 # if possible.
2425 # if possible.
2415 oldlazydelta = destrevlog._lazydelta
2426 oldlazydelta = destrevlog._lazydelta
2416 oldlazydeltabase = destrevlog._lazydeltabase
2427 oldlazydeltabase = destrevlog._lazydeltabase
2417 oldamd = destrevlog._deltabothparents
2428 oldamd = destrevlog._deltabothparents
2418
2429
2419 try:
2430 try:
2420 if deltareuse == self.DELTAREUSEALWAYS:
2431 if deltareuse == self.DELTAREUSEALWAYS:
2421 destrevlog._lazydeltabase = True
2432 destrevlog._lazydeltabase = True
2422 destrevlog._lazydelta = True
2433 destrevlog._lazydelta = True
2423 elif deltareuse == self.DELTAREUSESAMEREVS:
2434 elif deltareuse == self.DELTAREUSESAMEREVS:
2424 destrevlog._lazydeltabase = False
2435 destrevlog._lazydeltabase = False
2425 destrevlog._lazydelta = True
2436 destrevlog._lazydelta = True
2426 elif deltareuse == self.DELTAREUSENEVER:
2437 elif deltareuse == self.DELTAREUSENEVER:
2427 destrevlog._lazydeltabase = False
2438 destrevlog._lazydeltabase = False
2428 destrevlog._lazydelta = False
2439 destrevlog._lazydelta = False
2429
2440
2430 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2441 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2431
2442
2432 deltacomputer = deltautil.deltacomputer(destrevlog)
2443 deltacomputer = deltautil.deltacomputer(destrevlog)
2433 index = self.index
2444 index = self.index
2434 for rev in self:
2445 for rev in self:
2435 entry = index[rev]
2446 entry = index[rev]
2436
2447
2437 # Some classes override linkrev to take filtered revs into
2448 # Some classes override linkrev to take filtered revs into
2438 # account. Use raw entry from index.
2449 # account. Use raw entry from index.
2439 flags = entry[0] & 0xffff
2450 flags = entry[0] & 0xffff
2440 linkrev = entry[4]
2451 linkrev = entry[4]
2441 p1 = index[entry[5]][7]
2452 p1 = index[entry[5]][7]
2442 p2 = index[entry[6]][7]
2453 p2 = index[entry[6]][7]
2443 node = entry[7]
2454 node = entry[7]
2444
2455
2445 # (Possibly) reuse the delta from the revlog if allowed and
2456 # (Possibly) reuse the delta from the revlog if allowed and
2446 # the revlog chunk is a delta.
2457 # the revlog chunk is a delta.
2447 cachedelta = None
2458 cachedelta = None
2448 rawtext = None
2459 rawtext = None
2449 if destrevlog._lazydelta:
2460 if destrevlog._lazydelta:
2450 dp = self.deltaparent(rev)
2461 dp = self.deltaparent(rev)
2451 if dp != nullrev:
2462 if dp != nullrev:
2452 cachedelta = (dp, bytes(self._chunk(rev)))
2463 cachedelta = (dp, bytes(self._chunk(rev)))
2453
2464
2454 if not cachedelta:
2465 if not cachedelta:
2455 rawtext = self.revision(rev, raw=True)
2466 rawtext = self.revision(rev, raw=True)
2456
2467
2457
2468
2458 if deltareuse == self.DELTAREUSEFULLADD:
2469 if deltareuse == self.DELTAREUSEFULLADD:
2459 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2470 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2460 cachedelta=cachedelta,
2471 cachedelta=cachedelta,
2461 node=node, flags=flags,
2472 node=node, flags=flags,
2462 deltacomputer=deltacomputer)
2473 deltacomputer=deltacomputer)
2463 else:
2474 else:
2464 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2475 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2465 checkambig=False)
2476 checkambig=False)
2466 dfh = None
2477 dfh = None
2467 if not destrevlog._inline:
2478 if not destrevlog._inline:
2468 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2479 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2469 try:
2480 try:
2470 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2481 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2471 p2, flags, cachedelta, ifh, dfh,
2482 p2, flags, cachedelta, ifh, dfh,
2472 deltacomputer=deltacomputer)
2483 deltacomputer=deltacomputer)
2473 finally:
2484 finally:
2474 if dfh:
2485 if dfh:
2475 dfh.close()
2486 dfh.close()
2476 ifh.close()
2487 ifh.close()
2477
2488
2478 if addrevisioncb:
2489 if addrevisioncb:
2479 addrevisioncb(self, rev, node)
2490 addrevisioncb(self, rev, node)
2480 finally:
2491 finally:
2481 destrevlog._lazydelta = oldlazydelta
2492 destrevlog._lazydelta = oldlazydelta
2482 destrevlog._lazydeltabase = oldlazydeltabase
2493 destrevlog._lazydeltabase = oldlazydeltabase
2483 destrevlog._deltabothparents = oldamd
2494 destrevlog._deltabothparents = oldamd
2484
2495
2485 def censorrevision(self, tr, censornode, tombstone=b''):
2496 def censorrevision(self, tr, censornode, tombstone=b''):
2486 if (self.version & 0xFFFF) == REVLOGV0:
2497 if (self.version & 0xFFFF) == REVLOGV0:
2487 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2498 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2488 self.version)
2499 self.version)
2489
2500
2490 censorrev = self.rev(censornode)
2501 censorrev = self.rev(censornode)
2491 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2502 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2492
2503
2493 if len(tombstone) > self.rawsize(censorrev):
2504 if len(tombstone) > self.rawsize(censorrev):
2494 raise error.Abort(_('censor tombstone must be no longer than '
2505 raise error.Abort(_('censor tombstone must be no longer than '
2495 'censored data'))
2506 'censored data'))
2496
2507
2497 # Rewriting the revlog in place is hard. Our strategy for censoring is
2508 # Rewriting the revlog in place is hard. Our strategy for censoring is
2498 # to create a new revlog, copy all revisions to it, then replace the
2509 # to create a new revlog, copy all revisions to it, then replace the
2499 # revlogs on transaction close.
2510 # revlogs on transaction close.
2500
2511
2501 newindexfile = self.indexfile + b'.tmpcensored'
2512 newindexfile = self.indexfile + b'.tmpcensored'
2502 newdatafile = self.datafile + b'.tmpcensored'
2513 newdatafile = self.datafile + b'.tmpcensored'
2503
2514
2504 # This is a bit dangerous. We could easily have a mismatch of state.
2515 # This is a bit dangerous. We could easily have a mismatch of state.
2505 newrl = revlog(self.opener, newindexfile, newdatafile,
2516 newrl = revlog(self.opener, newindexfile, newdatafile,
2506 censorable=True)
2517 censorable=True)
2507 newrl.version = self.version
2518 newrl.version = self.version
2508 newrl._generaldelta = self._generaldelta
2519 newrl._generaldelta = self._generaldelta
2509 newrl._io = self._io
2520 newrl._io = self._io
2510
2521
2511 for rev in self.revs():
2522 for rev in self.revs():
2512 node = self.node(rev)
2523 node = self.node(rev)
2513 p1, p2 = self.parents(node)
2524 p1, p2 = self.parents(node)
2514
2525
2515 if rev == censorrev:
2526 if rev == censorrev:
2516 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2527 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2517 p1, p2, censornode, REVIDX_ISCENSORED)
2528 p1, p2, censornode, REVIDX_ISCENSORED)
2518
2529
2519 if newrl.deltaparent(rev) != nullrev:
2530 if newrl.deltaparent(rev) != nullrev:
2520 raise error.Abort(_('censored revision stored as delta; '
2531 raise error.Abort(_('censored revision stored as delta; '
2521 'cannot censor'),
2532 'cannot censor'),
2522 hint=_('censoring of revlogs is not '
2533 hint=_('censoring of revlogs is not '
2523 'fully implemented; please report '
2534 'fully implemented; please report '
2524 'this bug'))
2535 'this bug'))
2525 continue
2536 continue
2526
2537
2527 if self.iscensored(rev):
2538 if self.iscensored(rev):
2528 if self.deltaparent(rev) != nullrev:
2539 if self.deltaparent(rev) != nullrev:
2529 raise error.Abort(_('cannot censor due to censored '
2540 raise error.Abort(_('cannot censor due to censored '
2530 'revision having delta stored'))
2541 'revision having delta stored'))
2531 rawtext = self._chunk(rev)
2542 rawtext = self._chunk(rev)
2532 else:
2543 else:
2533 rawtext = self.revision(rev, raw=True)
2544 rawtext = self.revision(rev, raw=True)
2534
2545
2535 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2546 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2536 self.flags(rev))
2547 self.flags(rev))
2537
2548
2538 tr.addbackup(self.indexfile, location='store')
2549 tr.addbackup(self.indexfile, location='store')
2539 if not self._inline:
2550 if not self._inline:
2540 tr.addbackup(self.datafile, location='store')
2551 tr.addbackup(self.datafile, location='store')
2541
2552
2542 self.opener.rename(newrl.indexfile, self.indexfile)
2553 self.opener.rename(newrl.indexfile, self.indexfile)
2543 if not self._inline:
2554 if not self._inline:
2544 self.opener.rename(newrl.datafile, self.datafile)
2555 self.opener.rename(newrl.datafile, self.datafile)
2545
2556
2546 self.clearcaches()
2557 self.clearcaches()
2547 self._loadindex()
2558 self._loadindex()
2548
2559
2549 def verifyintegrity(self, state):
2560 def verifyintegrity(self, state):
2550 """Verifies the integrity of the revlog.
2561 """Verifies the integrity of the revlog.
2551
2562
2552 Yields ``revlogproblem`` instances describing problems that are
2563 Yields ``revlogproblem`` instances describing problems that are
2553 found.
2564 found.
2554 """
2565 """
2555 dd, di = self.checksize()
2566 dd, di = self.checksize()
2556 if dd:
2567 if dd:
2557 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2568 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2558 if di:
2569 if di:
2559 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2570 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2560
2571
2561 version = self.version & 0xFFFF
2572 version = self.version & 0xFFFF
2562
2573
2563 # The verifier tells us what version revlog we should be.
2574 # The verifier tells us what version revlog we should be.
2564 if version != state['expectedversion']:
2575 if version != state['expectedversion']:
2565 yield revlogproblem(
2576 yield revlogproblem(
2566 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2577 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2567 (self.indexfile, version, state['expectedversion']))
2578 (self.indexfile, version, state['expectedversion']))
2568
2579
2569 state['skipread'] = set()
2580 state['skipread'] = set()
2570
2581
2571 for rev in self:
2582 for rev in self:
2572 node = self.node(rev)
2583 node = self.node(rev)
2573
2584
2574 # Verify contents. 4 cases to care about:
2585 # Verify contents. 4 cases to care about:
2575 #
2586 #
2576 # common: the most common case
2587 # common: the most common case
2577 # rename: with a rename
2588 # rename: with a rename
2578 # meta: file content starts with b'\1\n', the metadata
2589 # meta: file content starts with b'\1\n', the metadata
2579 # header defined in filelog.py, but without a rename
2590 # header defined in filelog.py, but without a rename
2580 # ext: content stored externally
2591 # ext: content stored externally
2581 #
2592 #
2582 # More formally, their differences are shown below:
2593 # More formally, their differences are shown below:
2583 #
2594 #
2584 # | common | rename | meta | ext
2595 # | common | rename | meta | ext
2585 # -------------------------------------------------------
2596 # -------------------------------------------------------
2586 # flags() | 0 | 0 | 0 | not 0
2597 # flags() | 0 | 0 | 0 | not 0
2587 # renamed() | False | True | False | ?
2598 # renamed() | False | True | False | ?
2588 # rawtext[0:2]=='\1\n'| False | True | True | ?
2599 # rawtext[0:2]=='\1\n'| False | True | True | ?
2589 #
2600 #
2590 # "rawtext" means the raw text stored in revlog data, which
2601 # "rawtext" means the raw text stored in revlog data, which
2591 # could be retrieved by "revision(rev, raw=True)". "text"
2602 # could be retrieved by "revision(rev, raw=True)". "text"
2592 # mentioned below is "revision(rev, raw=False)".
2603 # mentioned below is "revision(rev, raw=False)".
2593 #
2604 #
2594 # There are 3 different lengths stored physically:
2605 # There are 3 different lengths stored physically:
2595 # 1. L1: rawsize, stored in revlog index
2606 # 1. L1: rawsize, stored in revlog index
2596 # 2. L2: len(rawtext), stored in revlog data
2607 # 2. L2: len(rawtext), stored in revlog data
2597 # 3. L3: len(text), stored in revlog data if flags==0, or
2608 # 3. L3: len(text), stored in revlog data if flags==0, or
2598 # possibly somewhere else if flags!=0
2609 # possibly somewhere else if flags!=0
2599 #
2610 #
2600 # L1 should be equal to L2. L3 could be different from them.
2611 # L1 should be equal to L2. L3 could be different from them.
2601 # "text" may or may not affect commit hash depending on flag
2612 # "text" may or may not affect commit hash depending on flag
2602 # processors (see revlog.addflagprocessor).
2613 # processors (see revlog.addflagprocessor).
2603 #
2614 #
2604 # | common | rename | meta | ext
2615 # | common | rename | meta | ext
2605 # -------------------------------------------------
2616 # -------------------------------------------------
2606 # rawsize() | L1 | L1 | L1 | L1
2617 # rawsize() | L1 | L1 | L1 | L1
2607 # size() | L1 | L2-LM | L1(*) | L1 (?)
2618 # size() | L1 | L2-LM | L1(*) | L1 (?)
2608 # len(rawtext) | L2 | L2 | L2 | L2
2619 # len(rawtext) | L2 | L2 | L2 | L2
2609 # len(text) | L2 | L2 | L2 | L3
2620 # len(text) | L2 | L2 | L2 | L3
2610 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2621 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2611 #
2622 #
2612 # LM: length of metadata, depending on rawtext
2623 # LM: length of metadata, depending on rawtext
2613 # (*): not ideal, see comment in filelog.size
2624 # (*): not ideal, see comment in filelog.size
2614 # (?): could be "- len(meta)" if the resolved content has
2625 # (?): could be "- len(meta)" if the resolved content has
2615 # rename metadata
2626 # rename metadata
2616 #
2627 #
2617 # Checks needed to be done:
2628 # Checks needed to be done:
2618 # 1. length check: L1 == L2, in all cases.
2629 # 1. length check: L1 == L2, in all cases.
2619 # 2. hash check: depending on flag processor, we may need to
2630 # 2. hash check: depending on flag processor, we may need to
2620 # use either "text" (external), or "rawtext" (in revlog).
2631 # use either "text" (external), or "rawtext" (in revlog).
2621
2632
2622 try:
2633 try:
2623 skipflags = state.get('skipflags', 0)
2634 skipflags = state.get('skipflags', 0)
2624 if skipflags:
2635 if skipflags:
2625 skipflags &= self.flags(rev)
2636 skipflags &= self.flags(rev)
2626
2637
2627 if skipflags:
2638 if skipflags:
2628 state['skipread'].add(node)
2639 state['skipread'].add(node)
2629 else:
2640 else:
2630 # Side-effect: read content and verify hash.
2641 # Side-effect: read content and verify hash.
2631 self.revision(node)
2642 self.revision(node)
2632
2643
2633 l1 = self.rawsize(rev)
2644 l1 = self.rawsize(rev)
2634 l2 = len(self.revision(node, raw=True))
2645 l2 = len(self.revision(node, raw=True))
2635
2646
2636 if l1 != l2:
2647 if l1 != l2:
2637 yield revlogproblem(
2648 yield revlogproblem(
2638 error=_('unpacked size is %d, %d expected') % (l2, l1),
2649 error=_('unpacked size is %d, %d expected') % (l2, l1),
2639 node=node)
2650 node=node)
2640
2651
2641 except error.CensoredNodeError:
2652 except error.CensoredNodeError:
2642 if state['erroroncensored']:
2653 if state['erroroncensored']:
2643 yield revlogproblem(error=_('censored file data'),
2654 yield revlogproblem(error=_('censored file data'),
2644 node=node)
2655 node=node)
2645 state['skipread'].add(node)
2656 state['skipread'].add(node)
2646 except Exception as e:
2657 except Exception as e:
2647 yield revlogproblem(
2658 yield revlogproblem(
2648 error=_('unpacking %s: %s') % (short(node),
2659 error=_('unpacking %s: %s') % (short(node),
2649 stringutil.forcebytestr(e)),
2660 stringutil.forcebytestr(e)),
2650 node=node)
2661 node=node)
2651 state['skipread'].add(node)
2662 state['skipread'].add(node)
2652
2663
2653 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2664 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2654 revisionscount=False, trackedsize=False,
2665 revisionscount=False, trackedsize=False,
2655 storedsize=False):
2666 storedsize=False):
2656 d = {}
2667 d = {}
2657
2668
2658 if exclusivefiles:
2669 if exclusivefiles:
2659 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2670 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2660 if not self._inline:
2671 if not self._inline:
2661 d['exclusivefiles'].append((self.opener, self.datafile))
2672 d['exclusivefiles'].append((self.opener, self.datafile))
2662
2673
2663 if sharedfiles:
2674 if sharedfiles:
2664 d['sharedfiles'] = []
2675 d['sharedfiles'] = []
2665
2676
2666 if revisionscount:
2677 if revisionscount:
2667 d['revisionscount'] = len(self)
2678 d['revisionscount'] = len(self)
2668
2679
2669 if trackedsize:
2680 if trackedsize:
2670 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2681 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2671
2682
2672 if storedsize:
2683 if storedsize:
2673 d['storedsize'] = sum(self.opener.stat(path).st_size
2684 d['storedsize'] = sum(self.opener.stat(path).st_size
2674 for path in self.files())
2685 for path in self.files())
2675
2686
2676 return d
2687 return d
General Comments 0
You need to be logged in to leave comments. Login now