##// END OF EJS Templates
copies: return None instead of ChangingFiles when relevant...
marmoute -
r46246:1720d843 default draft
parent child Browse files
Show More
@@ -1,618 +1,618
1 # changelog.py - changelog class for mercurial
1 # changelog.py - changelog class for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 from .i18n import _
10 from .i18n import _
11 from .node import (
11 from .node import (
12 bin,
12 bin,
13 hex,
13 hex,
14 nullid,
14 nullid,
15 )
15 )
16 from .thirdparty import attr
16 from .thirdparty import attr
17
17
18 from . import (
18 from . import (
19 encoding,
19 encoding,
20 error,
20 error,
21 metadata,
21 metadata,
22 pycompat,
22 pycompat,
23 revlog,
23 revlog,
24 )
24 )
25 from .utils import (
25 from .utils import (
26 dateutil,
26 dateutil,
27 stringutil,
27 stringutil,
28 )
28 )
29 from .revlogutils import flagutil
29 from .revlogutils import flagutil
30
30
31 _defaultextra = {b'branch': b'default'}
31 _defaultextra = {b'branch': b'default'}
32
32
33
33
34 def _string_escape(text):
34 def _string_escape(text):
35 """
35 """
36 >>> from .pycompat import bytechr as chr
36 >>> from .pycompat import bytechr as chr
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 >>> s
39 >>> s
40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 >>> res = _string_escape(s)
41 >>> res = _string_escape(s)
42 >>> s == _string_unescape(res)
42 >>> s == _string_unescape(res)
43 True
43 True
44 """
44 """
45 # subset of the string_escape codec
45 # subset of the string_escape codec
46 text = (
46 text = (
47 text.replace(b'\\', b'\\\\')
47 text.replace(b'\\', b'\\\\')
48 .replace(b'\n', b'\\n')
48 .replace(b'\n', b'\\n')
49 .replace(b'\r', b'\\r')
49 .replace(b'\r', b'\\r')
50 )
50 )
51 return text.replace(b'\0', b'\\0')
51 return text.replace(b'\0', b'\\0')
52
52
53
53
54 def _string_unescape(text):
54 def _string_unescape(text):
55 if b'\\0' in text:
55 if b'\\0' in text:
56 # fix up \0 without getting into trouble with \\0
56 # fix up \0 without getting into trouble with \\0
57 text = text.replace(b'\\\\', b'\\\\\n')
57 text = text.replace(b'\\\\', b'\\\\\n')
58 text = text.replace(b'\\0', b'\0')
58 text = text.replace(b'\\0', b'\0')
59 text = text.replace(b'\n', b'')
59 text = text.replace(b'\n', b'')
60 return stringutil.unescapestr(text)
60 return stringutil.unescapestr(text)
61
61
62
62
63 def decodeextra(text):
63 def decodeextra(text):
64 """
64 """
65 >>> from .pycompat import bytechr as chr
65 >>> from .pycompat import bytechr as chr
66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 ... ).items())
67 ... ).items())
68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 ... b'baz': chr(92) + chr(0) + b'2'})
70 ... b'baz': chr(92) + chr(0) + b'2'})
71 ... ).items())
71 ... ).items())
72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 """
73 """
74 extra = _defaultextra.copy()
74 extra = _defaultextra.copy()
75 for l in text.split(b'\0'):
75 for l in text.split(b'\0'):
76 if l:
76 if l:
77 k, v = _string_unescape(l).split(b':', 1)
77 k, v = _string_unescape(l).split(b':', 1)
78 extra[k] = v
78 extra[k] = v
79 return extra
79 return extra
80
80
81
81
82 def encodeextra(d):
82 def encodeextra(d):
83 # keys must be sorted to produce a deterministic changelog entry
83 # keys must be sorted to produce a deterministic changelog entry
84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 return b"\0".join(items)
85 return b"\0".join(items)
86
86
87
87
88 def stripdesc(desc):
88 def stripdesc(desc):
89 """strip trailing whitespace and leading and trailing empty lines"""
89 """strip trailing whitespace and leading and trailing empty lines"""
90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91
91
92
92
93 class appender(object):
93 class appender(object):
94 '''the changelog index must be updated last on disk, so we use this class
94 '''the changelog index must be updated last on disk, so we use this class
95 to delay writes to it'''
95 to delay writes to it'''
96
96
97 def __init__(self, vfs, name, mode, buf):
97 def __init__(self, vfs, name, mode, buf):
98 self.data = buf
98 self.data = buf
99 fp = vfs(name, mode)
99 fp = vfs(name, mode)
100 self.fp = fp
100 self.fp = fp
101 self.offset = fp.tell()
101 self.offset = fp.tell()
102 self.size = vfs.fstat(fp).st_size
102 self.size = vfs.fstat(fp).st_size
103 self._end = self.size
103 self._end = self.size
104
104
105 def end(self):
105 def end(self):
106 return self._end
106 return self._end
107
107
108 def tell(self):
108 def tell(self):
109 return self.offset
109 return self.offset
110
110
111 def flush(self):
111 def flush(self):
112 pass
112 pass
113
113
114 @property
114 @property
115 def closed(self):
115 def closed(self):
116 return self.fp.closed
116 return self.fp.closed
117
117
118 def close(self):
118 def close(self):
119 self.fp.close()
119 self.fp.close()
120
120
121 def seek(self, offset, whence=0):
121 def seek(self, offset, whence=0):
122 '''virtual file offset spans real file and data'''
122 '''virtual file offset spans real file and data'''
123 if whence == 0:
123 if whence == 0:
124 self.offset = offset
124 self.offset = offset
125 elif whence == 1:
125 elif whence == 1:
126 self.offset += offset
126 self.offset += offset
127 elif whence == 2:
127 elif whence == 2:
128 self.offset = self.end() + offset
128 self.offset = self.end() + offset
129 if self.offset < self.size:
129 if self.offset < self.size:
130 self.fp.seek(self.offset)
130 self.fp.seek(self.offset)
131
131
132 def read(self, count=-1):
132 def read(self, count=-1):
133 '''only trick here is reads that span real file and data'''
133 '''only trick here is reads that span real file and data'''
134 ret = b""
134 ret = b""
135 if self.offset < self.size:
135 if self.offset < self.size:
136 s = self.fp.read(count)
136 s = self.fp.read(count)
137 ret = s
137 ret = s
138 self.offset += len(s)
138 self.offset += len(s)
139 if count > 0:
139 if count > 0:
140 count -= len(s)
140 count -= len(s)
141 if count != 0:
141 if count != 0:
142 doff = self.offset - self.size
142 doff = self.offset - self.size
143 self.data.insert(0, b"".join(self.data))
143 self.data.insert(0, b"".join(self.data))
144 del self.data[1:]
144 del self.data[1:]
145 s = self.data[0][doff : doff + count]
145 s = self.data[0][doff : doff + count]
146 self.offset += len(s)
146 self.offset += len(s)
147 ret += s
147 ret += s
148 return ret
148 return ret
149
149
150 def write(self, s):
150 def write(self, s):
151 self.data.append(bytes(s))
151 self.data.append(bytes(s))
152 self.offset += len(s)
152 self.offset += len(s)
153 self._end += len(s)
153 self._end += len(s)
154
154
155 def __enter__(self):
155 def __enter__(self):
156 self.fp.__enter__()
156 self.fp.__enter__()
157 return self
157 return self
158
158
159 def __exit__(self, *args):
159 def __exit__(self, *args):
160 return self.fp.__exit__(*args)
160 return self.fp.__exit__(*args)
161
161
162
162
163 class _divertopener(object):
163 class _divertopener(object):
164 def __init__(self, opener, target):
164 def __init__(self, opener, target):
165 self._opener = opener
165 self._opener = opener
166 self._target = target
166 self._target = target
167
167
168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 if name != self._target:
169 if name != self._target:
170 return self._opener(name, mode, **kwargs)
170 return self._opener(name, mode, **kwargs)
171 return self._opener(name + b".a", mode, **kwargs)
171 return self._opener(name + b".a", mode, **kwargs)
172
172
173 def __getattr__(self, attr):
173 def __getattr__(self, attr):
174 return getattr(self._opener, attr)
174 return getattr(self._opener, attr)
175
175
176
176
177 def _delayopener(opener, target, buf):
177 def _delayopener(opener, target, buf):
178 """build an opener that stores chunks in 'buf' instead of 'target'"""
178 """build an opener that stores chunks in 'buf' instead of 'target'"""
179
179
180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 if name != target:
181 if name != target:
182 return opener(name, mode, **kwargs)
182 return opener(name, mode, **kwargs)
183 assert not kwargs
183 assert not kwargs
184 return appender(opener, name, mode, buf)
184 return appender(opener, name, mode, buf)
185
185
186 return _delay
186 return _delay
187
187
188
188
189 @attr.s
189 @attr.s
190 class _changelogrevision(object):
190 class _changelogrevision(object):
191 # Extensions might modify _defaultextra, so let the constructor below pass
191 # Extensions might modify _defaultextra, so let the constructor below pass
192 # it in
192 # it in
193 extra = attr.ib()
193 extra = attr.ib()
194 manifest = attr.ib(default=nullid)
194 manifest = attr.ib(default=nullid)
195 user = attr.ib(default=b'')
195 user = attr.ib(default=b'')
196 date = attr.ib(default=(0, 0))
196 date = attr.ib(default=(0, 0))
197 files = attr.ib(default=attr.Factory(list))
197 files = attr.ib(default=attr.Factory(list))
198 filesadded = attr.ib(default=None)
198 filesadded = attr.ib(default=None)
199 filesremoved = attr.ib(default=None)
199 filesremoved = attr.ib(default=None)
200 p1copies = attr.ib(default=None)
200 p1copies = attr.ib(default=None)
201 p2copies = attr.ib(default=None)
201 p2copies = attr.ib(default=None)
202 description = attr.ib(default=b'')
202 description = attr.ib(default=b'')
203
203
204
204
205 class changelogrevision(object):
205 class changelogrevision(object):
206 """Holds results of a parsed changelog revision.
206 """Holds results of a parsed changelog revision.
207
207
208 Changelog revisions consist of multiple pieces of data, including
208 Changelog revisions consist of multiple pieces of data, including
209 the manifest node, user, and date. This object exposes a view into
209 the manifest node, user, and date. This object exposes a view into
210 the parsed object.
210 the parsed object.
211 """
211 """
212
212
213 __slots__ = (
213 __slots__ = (
214 '_offsets',
214 '_offsets',
215 '_text',
215 '_text',
216 '_sidedata',
216 '_sidedata',
217 '_cpsd',
217 '_cpsd',
218 '_changes',
218 '_changes',
219 )
219 )
220
220
221 def __new__(cls, text, sidedata, cpsd):
221 def __new__(cls, text, sidedata, cpsd):
222 if not text:
222 if not text:
223 return _changelogrevision(extra=_defaultextra)
223 return _changelogrevision(extra=_defaultextra)
224
224
225 self = super(changelogrevision, cls).__new__(cls)
225 self = super(changelogrevision, cls).__new__(cls)
226 # We could return here and implement the following as an __init__.
226 # We could return here and implement the following as an __init__.
227 # But doing it here is equivalent and saves an extra function call.
227 # But doing it here is equivalent and saves an extra function call.
228
228
229 # format used:
229 # format used:
230 # nodeid\n : manifest node in ascii
230 # nodeid\n : manifest node in ascii
231 # user\n : user, no \n or \r allowed
231 # user\n : user, no \n or \r allowed
232 # time tz extra\n : date (time is int or float, timezone is int)
232 # time tz extra\n : date (time is int or float, timezone is int)
233 # : extra is metadata, encoded and separated by '\0'
233 # : extra is metadata, encoded and separated by '\0'
234 # : older versions ignore it
234 # : older versions ignore it
235 # files\n\n : files modified by the cset, no \n or \r allowed
235 # files\n\n : files modified by the cset, no \n or \r allowed
236 # (.*) : comment (free text, ideally utf-8)
236 # (.*) : comment (free text, ideally utf-8)
237 #
237 #
238 # changelog v0 doesn't use extra
238 # changelog v0 doesn't use extra
239
239
240 nl1 = text.index(b'\n')
240 nl1 = text.index(b'\n')
241 nl2 = text.index(b'\n', nl1 + 1)
241 nl2 = text.index(b'\n', nl1 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
242 nl3 = text.index(b'\n', nl2 + 1)
243
243
244 # The list of files may be empty. Which means nl3 is the first of the
244 # The list of files may be empty. Which means nl3 is the first of the
245 # double newline that precedes the description.
245 # double newline that precedes the description.
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
246 if text[nl3 + 1 : nl3 + 2] == b'\n':
247 doublenl = nl3
247 doublenl = nl3
248 else:
248 else:
249 doublenl = text.index(b'\n\n', nl3 + 1)
249 doublenl = text.index(b'\n\n', nl3 + 1)
250
250
251 self._offsets = (nl1, nl2, nl3, doublenl)
251 self._offsets = (nl1, nl2, nl3, doublenl)
252 self._text = text
252 self._text = text
253 self._sidedata = sidedata
253 self._sidedata = sidedata
254 self._cpsd = cpsd
254 self._cpsd = cpsd
255 self._changes = None
255 self._changes = None
256
256
257 return self
257 return self
258
258
259 @property
259 @property
260 def manifest(self):
260 def manifest(self):
261 return bin(self._text[0 : self._offsets[0]])
261 return bin(self._text[0 : self._offsets[0]])
262
262
263 @property
263 @property
264 def user(self):
264 def user(self):
265 off = self._offsets
265 off = self._offsets
266 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
266 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
267
267
268 @property
268 @property
269 def _rawdate(self):
269 def _rawdate(self):
270 off = self._offsets
270 off = self._offsets
271 dateextra = self._text[off[1] + 1 : off[2]]
271 dateextra = self._text[off[1] + 1 : off[2]]
272 return dateextra.split(b' ', 2)[0:2]
272 return dateextra.split(b' ', 2)[0:2]
273
273
274 @property
274 @property
275 def _rawextra(self):
275 def _rawextra(self):
276 off = self._offsets
276 off = self._offsets
277 dateextra = self._text[off[1] + 1 : off[2]]
277 dateextra = self._text[off[1] + 1 : off[2]]
278 fields = dateextra.split(b' ', 2)
278 fields = dateextra.split(b' ', 2)
279 if len(fields) != 3:
279 if len(fields) != 3:
280 return None
280 return None
281
281
282 return fields[2]
282 return fields[2]
283
283
284 @property
284 @property
285 def date(self):
285 def date(self):
286 raw = self._rawdate
286 raw = self._rawdate
287 time = float(raw[0])
287 time = float(raw[0])
288 # Various tools did silly things with the timezone.
288 # Various tools did silly things with the timezone.
289 try:
289 try:
290 timezone = int(raw[1])
290 timezone = int(raw[1])
291 except ValueError:
291 except ValueError:
292 timezone = 0
292 timezone = 0
293
293
294 return time, timezone
294 return time, timezone
295
295
296 @property
296 @property
297 def extra(self):
297 def extra(self):
298 raw = self._rawextra
298 raw = self._rawextra
299 if raw is None:
299 if raw is None:
300 return _defaultextra
300 return _defaultextra
301
301
302 return decodeextra(raw)
302 return decodeextra(raw)
303
303
304 @property
304 @property
305 def changes(self):
305 def changes(self):
306 if self._changes is not None:
306 if self._changes is not None:
307 return self._changes
307 return self._changes
308 if self._cpsd:
308 if self._cpsd:
309 changes = metadata.decode_files_sidedata(self._sidedata)
309 changes = metadata.decode_files_sidedata(self._sidedata)
310 else:
310 else:
311 changes = metadata.ChangingFiles(
311 changes = metadata.ChangingFiles(
312 touched=self.files or (),
312 touched=self.files or (),
313 added=self.filesadded or (),
313 added=self.filesadded or (),
314 removed=self.filesremoved or (),
314 removed=self.filesremoved or (),
315 p1_copies=self.p1copies or {},
315 p1_copies=self.p1copies or {},
316 p2_copies=self.p2copies or {},
316 p2_copies=self.p2copies or {},
317 )
317 )
318 self._changes = changes
318 self._changes = changes
319 return changes
319 return changes
320
320
321 @property
321 @property
322 def files(self):
322 def files(self):
323 if self._cpsd:
323 if self._cpsd:
324 return sorted(self.changes.touched)
324 return sorted(self.changes.touched)
325 off = self._offsets
325 off = self._offsets
326 if off[2] == off[3]:
326 if off[2] == off[3]:
327 return []
327 return []
328
328
329 return self._text[off[2] + 1 : off[3]].split(b'\n')
329 return self._text[off[2] + 1 : off[3]].split(b'\n')
330
330
331 @property
331 @property
332 def filesadded(self):
332 def filesadded(self):
333 if self._cpsd:
333 if self._cpsd:
334 return self.changes.added
334 return self.changes.added
335 else:
335 else:
336 rawindices = self.extra.get(b'filesadded')
336 rawindices = self.extra.get(b'filesadded')
337 if rawindices is None:
337 if rawindices is None:
338 return None
338 return None
339 return metadata.decodefileindices(self.files, rawindices)
339 return metadata.decodefileindices(self.files, rawindices)
340
340
341 @property
341 @property
342 def filesremoved(self):
342 def filesremoved(self):
343 if self._cpsd:
343 if self._cpsd:
344 return self.changes.removed
344 return self.changes.removed
345 else:
345 else:
346 rawindices = self.extra.get(b'filesremoved')
346 rawindices = self.extra.get(b'filesremoved')
347 if rawindices is None:
347 if rawindices is None:
348 return None
348 return None
349 return metadata.decodefileindices(self.files, rawindices)
349 return metadata.decodefileindices(self.files, rawindices)
350
350
351 @property
351 @property
352 def p1copies(self):
352 def p1copies(self):
353 if self._cpsd:
353 if self._cpsd:
354 return self.changes.copied_from_p1
354 return self.changes.copied_from_p1
355 else:
355 else:
356 rawcopies = self.extra.get(b'p1copies')
356 rawcopies = self.extra.get(b'p1copies')
357 if rawcopies is None:
357 if rawcopies is None:
358 return None
358 return None
359 return metadata.decodecopies(self.files, rawcopies)
359 return metadata.decodecopies(self.files, rawcopies)
360
360
361 @property
361 @property
362 def p2copies(self):
362 def p2copies(self):
363 if self._cpsd:
363 if self._cpsd:
364 return self.changes.copied_from_p2
364 return self.changes.copied_from_p2
365 else:
365 else:
366 rawcopies = self.extra.get(b'p2copies')
366 rawcopies = self.extra.get(b'p2copies')
367 if rawcopies is None:
367 if rawcopies is None:
368 return None
368 return None
369 return metadata.decodecopies(self.files, rawcopies)
369 return metadata.decodecopies(self.files, rawcopies)
370
370
371 @property
371 @property
372 def description(self):
372 def description(self):
373 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
373 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
374
374
375
375
376 class changelog(revlog.revlog):
376 class changelog(revlog.revlog):
377 def __init__(self, opener, trypending=False):
377 def __init__(self, opener, trypending=False):
378 """Load a changelog revlog using an opener.
378 """Load a changelog revlog using an opener.
379
379
380 If ``trypending`` is true, we attempt to load the index from a
380 If ``trypending`` is true, we attempt to load the index from a
381 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
381 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
382 The ``00changelog.i.a`` file contains index (and possibly inline
382 The ``00changelog.i.a`` file contains index (and possibly inline
383 revision) data for a transaction that hasn't been finalized yet.
383 revision) data for a transaction that hasn't been finalized yet.
384 It exists in a separate file to facilitate readers (such as
384 It exists in a separate file to facilitate readers (such as
385 hooks processes) accessing data before a transaction is finalized.
385 hooks processes) accessing data before a transaction is finalized.
386 """
386 """
387 if trypending and opener.exists(b'00changelog.i.a'):
387 if trypending and opener.exists(b'00changelog.i.a'):
388 indexfile = b'00changelog.i.a'
388 indexfile = b'00changelog.i.a'
389 else:
389 else:
390 indexfile = b'00changelog.i'
390 indexfile = b'00changelog.i'
391
391
392 datafile = b'00changelog.d'
392 datafile = b'00changelog.d'
393 revlog.revlog.__init__(
393 revlog.revlog.__init__(
394 self,
394 self,
395 opener,
395 opener,
396 indexfile,
396 indexfile,
397 datafile=datafile,
397 datafile=datafile,
398 checkambig=True,
398 checkambig=True,
399 mmaplargeindex=True,
399 mmaplargeindex=True,
400 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
400 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
401 )
401 )
402
402
403 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
403 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
404 # changelogs don't benefit from generaldelta.
404 # changelogs don't benefit from generaldelta.
405
405
406 self.version &= ~revlog.FLAG_GENERALDELTA
406 self.version &= ~revlog.FLAG_GENERALDELTA
407 self._generaldelta = False
407 self._generaldelta = False
408
408
409 # Delta chains for changelogs tend to be very small because entries
409 # Delta chains for changelogs tend to be very small because entries
410 # tend to be small and don't delta well with each. So disable delta
410 # tend to be small and don't delta well with each. So disable delta
411 # chains.
411 # chains.
412 self._storedeltachains = False
412 self._storedeltachains = False
413
413
414 self._realopener = opener
414 self._realopener = opener
415 self._delayed = False
415 self._delayed = False
416 self._delaybuf = None
416 self._delaybuf = None
417 self._divert = False
417 self._divert = False
418 self._filteredrevs = frozenset()
418 self._filteredrevs = frozenset()
419 self._filteredrevs_hashcache = {}
419 self._filteredrevs_hashcache = {}
420 self._copiesstorage = opener.options.get(b'copies-storage')
420 self._copiesstorage = opener.options.get(b'copies-storage')
421
421
422 @property
422 @property
423 def filteredrevs(self):
423 def filteredrevs(self):
424 return self._filteredrevs
424 return self._filteredrevs
425
425
426 @filteredrevs.setter
426 @filteredrevs.setter
427 def filteredrevs(self, val):
427 def filteredrevs(self, val):
428 # Ensure all updates go through this function
428 # Ensure all updates go through this function
429 assert isinstance(val, frozenset)
429 assert isinstance(val, frozenset)
430 self._filteredrevs = val
430 self._filteredrevs = val
431 self._filteredrevs_hashcache = {}
431 self._filteredrevs_hashcache = {}
432
432
433 def delayupdate(self, tr):
433 def delayupdate(self, tr):
434 """delay visibility of index updates to other readers"""
434 """delay visibility of index updates to other readers"""
435
435
436 if not self._delayed:
436 if not self._delayed:
437 if len(self) == 0:
437 if len(self) == 0:
438 self._divert = True
438 self._divert = True
439 if self._realopener.exists(self.indexfile + b'.a'):
439 if self._realopener.exists(self.indexfile + b'.a'):
440 self._realopener.unlink(self.indexfile + b'.a')
440 self._realopener.unlink(self.indexfile + b'.a')
441 self.opener = _divertopener(self._realopener, self.indexfile)
441 self.opener = _divertopener(self._realopener, self.indexfile)
442 else:
442 else:
443 self._delaybuf = []
443 self._delaybuf = []
444 self.opener = _delayopener(
444 self.opener = _delayopener(
445 self._realopener, self.indexfile, self._delaybuf
445 self._realopener, self.indexfile, self._delaybuf
446 )
446 )
447 self._delayed = True
447 self._delayed = True
448 tr.addpending(b'cl-%i' % id(self), self._writepending)
448 tr.addpending(b'cl-%i' % id(self), self._writepending)
449 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
449 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
450
450
451 def _finalize(self, tr):
451 def _finalize(self, tr):
452 """finalize index updates"""
452 """finalize index updates"""
453 self._delayed = False
453 self._delayed = False
454 self.opener = self._realopener
454 self.opener = self._realopener
455 # move redirected index data back into place
455 # move redirected index data back into place
456 if self._divert:
456 if self._divert:
457 assert not self._delaybuf
457 assert not self._delaybuf
458 tmpname = self.indexfile + b".a"
458 tmpname = self.indexfile + b".a"
459 nfile = self.opener.open(tmpname)
459 nfile = self.opener.open(tmpname)
460 nfile.close()
460 nfile.close()
461 self.opener.rename(tmpname, self.indexfile, checkambig=True)
461 self.opener.rename(tmpname, self.indexfile, checkambig=True)
462 elif self._delaybuf:
462 elif self._delaybuf:
463 fp = self.opener(self.indexfile, b'a', checkambig=True)
463 fp = self.opener(self.indexfile, b'a', checkambig=True)
464 fp.write(b"".join(self._delaybuf))
464 fp.write(b"".join(self._delaybuf))
465 fp.close()
465 fp.close()
466 self._delaybuf = None
466 self._delaybuf = None
467 self._divert = False
467 self._divert = False
468 # split when we're done
468 # split when we're done
469 self._enforceinlinesize(tr)
469 self._enforceinlinesize(tr)
470
470
471 def _writepending(self, tr):
471 def _writepending(self, tr):
472 """create a file containing the unfinalized state for
472 """create a file containing the unfinalized state for
473 pretxnchangegroup"""
473 pretxnchangegroup"""
474 if self._delaybuf:
474 if self._delaybuf:
475 # make a temporary copy of the index
475 # make a temporary copy of the index
476 fp1 = self._realopener(self.indexfile)
476 fp1 = self._realopener(self.indexfile)
477 pendingfilename = self.indexfile + b".a"
477 pendingfilename = self.indexfile + b".a"
478 # register as a temp file to ensure cleanup on failure
478 # register as a temp file to ensure cleanup on failure
479 tr.registertmp(pendingfilename)
479 tr.registertmp(pendingfilename)
480 # write existing data
480 # write existing data
481 fp2 = self._realopener(pendingfilename, b"w")
481 fp2 = self._realopener(pendingfilename, b"w")
482 fp2.write(fp1.read())
482 fp2.write(fp1.read())
483 # add pending data
483 # add pending data
484 fp2.write(b"".join(self._delaybuf))
484 fp2.write(b"".join(self._delaybuf))
485 fp2.close()
485 fp2.close()
486 # switch modes so finalize can simply rename
486 # switch modes so finalize can simply rename
487 self._delaybuf = None
487 self._delaybuf = None
488 self._divert = True
488 self._divert = True
489 self.opener = _divertopener(self._realopener, self.indexfile)
489 self.opener = _divertopener(self._realopener, self.indexfile)
490
490
491 if self._divert:
491 if self._divert:
492 return True
492 return True
493
493
494 return False
494 return False
495
495
496 def _enforceinlinesize(self, tr, fp=None):
496 def _enforceinlinesize(self, tr, fp=None):
497 if not self._delayed:
497 if not self._delayed:
498 revlog.revlog._enforceinlinesize(self, tr, fp)
498 revlog.revlog._enforceinlinesize(self, tr, fp)
499
499
500 def read(self, node):
500 def read(self, node):
501 """Obtain data from a parsed changelog revision.
501 """Obtain data from a parsed changelog revision.
502
502
503 Returns a 6-tuple of:
503 Returns a 6-tuple of:
504
504
505 - manifest node in binary
505 - manifest node in binary
506 - author/user as a localstr
506 - author/user as a localstr
507 - date as a 2-tuple of (time, timezone)
507 - date as a 2-tuple of (time, timezone)
508 - list of files
508 - list of files
509 - commit message as a localstr
509 - commit message as a localstr
510 - dict of extra metadata
510 - dict of extra metadata
511
511
512 Unless you need to access all fields, consider calling
512 Unless you need to access all fields, consider calling
513 ``changelogrevision`` instead, as it is faster for partial object
513 ``changelogrevision`` instead, as it is faster for partial object
514 access.
514 access.
515 """
515 """
516 d, s = self._revisiondata(node)
516 d, s = self._revisiondata(node)
517 c = changelogrevision(
517 c = changelogrevision(
518 d, s, self._copiesstorage == b'changeset-sidedata'
518 d, s, self._copiesstorage == b'changeset-sidedata'
519 )
519 )
520 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
520 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
521
521
522 def changelogrevision(self, nodeorrev):
522 def changelogrevision(self, nodeorrev):
523 """Obtain a ``changelogrevision`` for a node or revision."""
523 """Obtain a ``changelogrevision`` for a node or revision."""
524 text, sidedata = self._revisiondata(nodeorrev)
524 text, sidedata = self._revisiondata(nodeorrev)
525 return changelogrevision(
525 return changelogrevision(
526 text, sidedata, self._copiesstorage == b'changeset-sidedata'
526 text, sidedata, self._copiesstorage == b'changeset-sidedata'
527 )
527 )
528
528
529 def readfiles(self, node):
529 def readfiles(self, node):
530 """
530 """
531 short version of read that only returns the files modified by the cset
531 short version of read that only returns the files modified by the cset
532 """
532 """
533 text = self.revision(node)
533 text = self.revision(node)
534 if not text:
534 if not text:
535 return []
535 return []
536 last = text.index(b"\n\n")
536 last = text.index(b"\n\n")
537 l = text[:last].split(b'\n')
537 l = text[:last].split(b'\n')
538 return l[3:]
538 return l[3:]
539
539
540 def add(
540 def add(
541 self,
541 self,
542 manifest,
542 manifest,
543 files,
543 files,
544 desc,
544 desc,
545 transaction,
545 transaction,
546 p1,
546 p1,
547 p2,
547 p2,
548 user,
548 user,
549 date=None,
549 date=None,
550 extra=None,
550 extra=None,
551 ):
551 ):
552 # Convert to UTF-8 encoded bytestrings as the very first
552 # Convert to UTF-8 encoded bytestrings as the very first
553 # thing: calling any method on a localstr object will turn it
553 # thing: calling any method on a localstr object will turn it
554 # into a str object and the cached UTF-8 string is thus lost.
554 # into a str object and the cached UTF-8 string is thus lost.
555 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
555 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
556
556
557 user = user.strip()
557 user = user.strip()
558 # An empty username or a username with a "\n" will make the
558 # An empty username or a username with a "\n" will make the
559 # revision text contain two "\n\n" sequences -> corrupt
559 # revision text contain two "\n\n" sequences -> corrupt
560 # repository since read cannot unpack the revision.
560 # repository since read cannot unpack the revision.
561 if not user:
561 if not user:
562 raise error.StorageError(_(b"empty username"))
562 raise error.StorageError(_(b"empty username"))
563 if b"\n" in user:
563 if b"\n" in user:
564 raise error.StorageError(
564 raise error.StorageError(
565 _(b"username %r contains a newline") % pycompat.bytestr(user)
565 _(b"username %r contains a newline") % pycompat.bytestr(user)
566 )
566 )
567
567
568 desc = stripdesc(desc)
568 desc = stripdesc(desc)
569
569
570 if date:
570 if date:
571 parseddate = b"%d %d" % dateutil.parsedate(date)
571 parseddate = b"%d %d" % dateutil.parsedate(date)
572 else:
572 else:
573 parseddate = b"%d %d" % dateutil.makedate()
573 parseddate = b"%d %d" % dateutil.makedate()
574 if extra:
574 if extra:
575 branch = extra.get(b"branch")
575 branch = extra.get(b"branch")
576 if branch in (b"default", b""):
576 if branch in (b"default", b""):
577 del extra[b"branch"]
577 del extra[b"branch"]
578 elif branch in (b".", b"null", b"tip"):
578 elif branch in (b".", b"null", b"tip"):
579 raise error.StorageError(
579 raise error.StorageError(
580 _(b'the name \'%s\' is reserved') % branch
580 _(b'the name \'%s\' is reserved') % branch
581 )
581 )
582 sortedfiles = sorted(files.touched)
582 sortedfiles = sorted(files.touched)
583 flags = 0
583 flags = 0
584 sidedata = None
584 sidedata = None
585 if self._copiesstorage == b'changeset-sidedata':
585 if self._copiesstorage == b'changeset-sidedata':
586 if (
586 if (
587 files.removed
587 files.removed
588 or files.merged
588 or files.merged
589 or files.salvaged
589 or files.salvaged
590 or files.copied_from_p1
590 or files.copied_from_p1
591 or files.copied_from_p2
591 or files.copied_from_p2
592 ):
592 ):
593 flags |= flagutil.REVIDX_HASCOPIESINFO
593 flags |= flagutil.REVIDX_HASCOPIESINFO
594 sidedata = metadata.encode_files_sidedata(files)
594 sidedata = metadata.encode_files_sidedata(files)
595
595
596 if extra:
596 if extra:
597 extra = encodeextra(extra)
597 extra = encodeextra(extra)
598 parseddate = b"%s %s" % (parseddate, extra)
598 parseddate = b"%s %s" % (parseddate, extra)
599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
600 text = b"\n".join(l)
600 text = b"\n".join(l)
601 return self.addrevision(
601 return self.addrevision(
602 text, transaction, len(self), p1, p2, sidedata=sidedata
602 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
603 )
603 )
604
604
605 def branchinfo(self, rev):
605 def branchinfo(self, rev):
606 """return the branch name and open/close state of a revision
606 """return the branch name and open/close state of a revision
607
607
608 This function exists because creating a changectx object
608 This function exists because creating a changectx object
609 just to access this is costly."""
609 just to access this is costly."""
610 extra = self.read(rev)[5]
610 extra = self.read(rev)[5]
611 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
611 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
612
612
613 def _nodeduplicatecallback(self, transaction, node):
613 def _nodeduplicatecallback(self, transaction, node):
614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
615 #
615 #
616 # We track them in a list to preserve their order from the source bundle
616 # We track them in a list to preserve their order from the source bundle
617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
618 duplicates.append(self.rev(node))
618 duplicates.append(self.rev(node))
@@ -1,1089 +1,1109
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 pycompat,
20 pycompat,
21 util,
21 util,
22 )
22 )
23
23
24
24
25 from .utils import stringutil
25 from .utils import stringutil
26
26
27 from .revlogutils import flagutil
28
27
29
28 def _filter(src, dst, t):
30 def _filter(src, dst, t):
29 """filters out invalid copies after chaining"""
31 """filters out invalid copies after chaining"""
30
32
31 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
32 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
33 # in the following table (not including trivial cases). For example, case 2
35 # in the following table (not including trivial cases). For example, case 2
34 # is where a file existed in 'src' and remained under that name in 'mid' and
36 # is where a file existed in 'src' and remained under that name in 'mid' and
35 # then was renamed between 'mid' and 'dst'.
37 # then was renamed between 'mid' and 'dst'.
36 #
38 #
37 # case src mid dst result
39 # case src mid dst result
38 # 1 x y - -
40 # 1 x y - -
39 # 2 x y y x->y
41 # 2 x y y x->y
40 # 3 x y x -
42 # 3 x y x -
41 # 4 x y z x->z
43 # 4 x y z x->z
42 # 5 - x y -
44 # 5 - x y -
43 # 6 x x y x->y
45 # 6 x x y x->y
44 #
46 #
45 # _chain() takes care of chaining the copies in 'a' and 'b', but it
47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
46 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
47 # between 5 and 6, so it includes all cases in its result.
49 # between 5 and 6, so it includes all cases in its result.
48 # Cases 1, 3, and 5 are then removed by _filter().
50 # Cases 1, 3, and 5 are then removed by _filter().
49
51
50 for k, v in list(t.items()):
52 for k, v in list(t.items()):
51 # remove copies from files that didn't exist
53 # remove copies from files that didn't exist
52 if v not in src:
54 if v not in src:
53 del t[k]
55 del t[k]
54 # remove criss-crossed copies
56 # remove criss-crossed copies
55 elif k in src and v in dst:
57 elif k in src and v in dst:
56 del t[k]
58 del t[k]
57 # remove copies to files that were then removed
59 # remove copies to files that were then removed
58 elif k not in dst:
60 elif k not in dst:
59 del t[k]
61 del t[k]
60
62
61
63
62 def _chain(prefix, suffix):
64 def _chain(prefix, suffix):
63 """chain two sets of copies 'prefix' and 'suffix'"""
65 """chain two sets of copies 'prefix' and 'suffix'"""
64 result = prefix.copy()
66 result = prefix.copy()
65 for key, value in pycompat.iteritems(suffix):
67 for key, value in pycompat.iteritems(suffix):
66 result[key] = prefix.get(value, value)
68 result[key] = prefix.get(value, value)
67 return result
69 return result
68
70
69
71
70 def _tracefile(fctx, am, basemf):
72 def _tracefile(fctx, am, basemf):
71 """return file context that is the ancestor of fctx present in ancestor
73 """return file context that is the ancestor of fctx present in ancestor
72 manifest am
74 manifest am
73
75
74 Note: we used to try and stop after a given limit, however checking if that
76 Note: we used to try and stop after a given limit, however checking if that
75 limit is reached turned out to be very expensive. we are better off
77 limit is reached turned out to be very expensive. we are better off
76 disabling that feature."""
78 disabling that feature."""
77
79
78 for f in fctx.ancestors():
80 for f in fctx.ancestors():
79 path = f.path()
81 path = f.path()
80 if am.get(path, None) == f.filenode():
82 if am.get(path, None) == f.filenode():
81 return path
83 return path
82 if basemf and basemf.get(path, None) == f.filenode():
84 if basemf and basemf.get(path, None) == f.filenode():
83 return path
85 return path
84
86
85
87
86 def _dirstatecopies(repo, match=None):
88 def _dirstatecopies(repo, match=None):
87 ds = repo.dirstate
89 ds = repo.dirstate
88 c = ds.copies().copy()
90 c = ds.copies().copy()
89 for k in list(c):
91 for k in list(c):
90 if ds[k] not in b'anm' or (match and not match(k)):
92 if ds[k] not in b'anm' or (match and not match(k)):
91 del c[k]
93 del c[k]
92 return c
94 return c
93
95
94
96
95 def _computeforwardmissing(a, b, match=None):
97 def _computeforwardmissing(a, b, match=None):
96 """Computes which files are in b but not a.
98 """Computes which files are in b but not a.
97 This is its own function so extensions can easily wrap this call to see what
99 This is its own function so extensions can easily wrap this call to see what
98 files _forwardcopies is about to process.
100 files _forwardcopies is about to process.
99 """
101 """
100 ma = a.manifest()
102 ma = a.manifest()
101 mb = b.manifest()
103 mb = b.manifest()
102 return mb.filesnotin(ma, match=match)
104 return mb.filesnotin(ma, match=match)
103
105
104
106
105 def usechangesetcentricalgo(repo):
107 def usechangesetcentricalgo(repo):
106 """Checks if we should use changeset-centric copy algorithms"""
108 """Checks if we should use changeset-centric copy algorithms"""
107 if repo.filecopiesmode == b'changeset-sidedata':
109 if repo.filecopiesmode == b'changeset-sidedata':
108 return True
110 return True
109 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
110 changesetsource = (b'changeset-only', b'compatibility')
112 changesetsource = (b'changeset-only', b'compatibility')
111 return readfrom in changesetsource
113 return readfrom in changesetsource
112
114
113
115
114 def _committedforwardcopies(a, b, base, match):
116 def _committedforwardcopies(a, b, base, match):
115 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
116 # files might have to be traced back to the fctx parent of the last
118 # files might have to be traced back to the fctx parent of the last
117 # one-side-only changeset, but not further back than that
119 # one-side-only changeset, but not further back than that
118 repo = a._repo
120 repo = a._repo
119
121
120 if usechangesetcentricalgo(repo):
122 if usechangesetcentricalgo(repo):
121 return _changesetforwardcopies(a, b, match)
123 return _changesetforwardcopies(a, b, match)
122
124
123 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
124 dbg = repo.ui.debug
126 dbg = repo.ui.debug
125 if debug:
127 if debug:
126 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
127 am = a.manifest()
129 am = a.manifest()
128 basemf = None if base is None else base.manifest()
130 basemf = None if base is None else base.manifest()
129
131
130 # find where new files came from
132 # find where new files came from
131 # we currently don't try to find where old files went, too expensive
133 # we currently don't try to find where old files went, too expensive
132 # this means we can miss a case like 'hg rm b; hg cp a b'
134 # this means we can miss a case like 'hg rm b; hg cp a b'
133 cm = {}
135 cm = {}
134
136
135 # Computing the forward missing is quite expensive on large manifests, since
137 # Computing the forward missing is quite expensive on large manifests, since
136 # it compares the entire manifests. We can optimize it in the common use
138 # it compares the entire manifests. We can optimize it in the common use
137 # case of computing what copies are in a commit versus its parent (like
139 # case of computing what copies are in a commit versus its parent (like
138 # during a rebase or histedit). Note, we exclude merge commits from this
140 # during a rebase or histedit). Note, we exclude merge commits from this
139 # optimization, since the ctx.files() for a merge commit is not correct for
141 # optimization, since the ctx.files() for a merge commit is not correct for
140 # this comparison.
142 # this comparison.
141 forwardmissingmatch = match
143 forwardmissingmatch = match
142 if b.p1() == a and b.p2().node() == node.nullid:
144 if b.p1() == a and b.p2().node() == node.nullid:
143 filesmatcher = matchmod.exact(b.files())
145 filesmatcher = matchmod.exact(b.files())
144 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
145 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
146
148
147 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
148
150
149 if debug:
151 if debug:
150 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
151
153
152 for f in sorted(missing):
154 for f in sorted(missing):
153 if debug:
155 if debug:
154 dbg(b'debug.copies: tracing file: %s\n' % f)
156 dbg(b'debug.copies: tracing file: %s\n' % f)
155 fctx = b[f]
157 fctx = b[f]
156 fctx._ancestrycontext = ancestrycontext
158 fctx._ancestrycontext = ancestrycontext
157
159
158 if debug:
160 if debug:
159 start = util.timer()
161 start = util.timer()
160 opath = _tracefile(fctx, am, basemf)
162 opath = _tracefile(fctx, am, basemf)
161 if opath:
163 if opath:
162 if debug:
164 if debug:
163 dbg(b'debug.copies: rename of: %s\n' % opath)
165 dbg(b'debug.copies: rename of: %s\n' % opath)
164 cm[f] = opath
166 cm[f] = opath
165 if debug:
167 if debug:
166 dbg(
168 dbg(
167 b'debug.copies: time: %f seconds\n'
169 b'debug.copies: time: %f seconds\n'
168 % (util.timer() - start)
170 % (util.timer() - start)
169 )
171 )
170 return cm
172 return cm
171
173
172
174
173 def _revinfo_getter(repo):
175 def _revinfo_getter(repo):
174 """returns a function that returns the following data given a <rev>"
176 """returns a function that returns the following data given a <rev>"
175
177
176 * p1: revision number of first parent
178 * p1: revision number of first parent
177 * p2: revision number of first parent
179 * p2: revision number of first parent
178 * changes: a ChangingFiles object
180 * changes: a ChangingFiles object
179 """
181 """
180 cl = repo.changelog
182 cl = repo.changelog
181 parents = cl.parentrevs
183 parents = cl.parentrevs
184 flags = cl.flags
185
186 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
182
187
183 changelogrevision = cl.changelogrevision
188 changelogrevision = cl.changelogrevision
184
189
185 # A small cache to avoid doing the work twice for merges
190 # A small cache to avoid doing the work twice for merges
186 #
191 #
187 # In the vast majority of cases, if we ask information for a revision
192 # In the vast majority of cases, if we ask information for a revision
188 # about 1 parent, we'll later ask it for the other. So it make sense to
193 # about 1 parent, we'll later ask it for the other. So it make sense to
189 # keep the information around when reaching the first parent of a merge
194 # keep the information around when reaching the first parent of a merge
190 # and dropping it after it was provided for the second parents.
195 # and dropping it after it was provided for the second parents.
191 #
196 #
192 # It exists cases were only one parent of the merge will be walked. It
197 # It exists cases were only one parent of the merge will be walked. It
193 # happens when the "destination" the copy tracing is descendant from a
198 # happens when the "destination" the copy tracing is descendant from a
194 # new root, not common with the "source". In that case, we will only walk
199 # new root, not common with the "source". In that case, we will only walk
195 # through merge parents that are descendant of changesets common
200 # through merge parents that are descendant of changesets common
196 # between "source" and "destination".
201 # between "source" and "destination".
197 #
202 #
198 # With the current case implementation if such changesets have a copy
203 # With the current case implementation if such changesets have a copy
199 # information, we'll keep them in memory until the end of
204 # information, we'll keep them in memory until the end of
200 # _changesetforwardcopies. We don't expect the case to be frequent
205 # _changesetforwardcopies. We don't expect the case to be frequent
201 # enough to matters.
206 # enough to matters.
202 #
207 #
203 # In addition, it would be possible to reach pathological case, were
208 # In addition, it would be possible to reach pathological case, were
204 # many first parent are met before any second parent is reached. In
209 # many first parent are met before any second parent is reached. In
205 # that case the cache could grow. If this even become an issue one can
210 # that case the cache could grow. If this even become an issue one can
206 # safely introduce a maximum cache size. This would trade extra CPU/IO
211 # safely introduce a maximum cache size. This would trade extra CPU/IO
207 # time to save memory.
212 # time to save memory.
208 merge_caches = {}
213 merge_caches = {}
209
214
210 def revinfo(rev):
215 def revinfo(rev):
211 p1, p2 = parents(rev)
216 p1, p2 = parents(rev)
212 value = None
217 value = None
213 e = merge_caches.pop(rev, None)
218 e = merge_caches.pop(rev, None)
214 if e is not None:
219 if e is not None:
215 return e
220 return e
216 value = (p1, p2, changelogrevision(rev).changes)
221 changes = None
222 if flags(rev) & HASCOPIESINFO:
223 changes = changelogrevision(rev).changes
224 value = (p1, p2, changes)
217 if p1 != node.nullrev and p2 != node.nullrev:
225 if p1 != node.nullrev and p2 != node.nullrev:
218 # XXX some case we over cache, IGNORE
226 # XXX some case we over cache, IGNORE
219 merge_caches[rev] = value
227 merge_caches[rev] = value
220 return value
228 return value
221
229
222 return revinfo
230 return revinfo
223
231
224
232
225 def _changesetforwardcopies(a, b, match):
233 def _changesetforwardcopies(a, b, match):
226 if a.rev() in (node.nullrev, b.rev()):
234 if a.rev() in (node.nullrev, b.rev()):
227 return {}
235 return {}
228
236
229 repo = a.repo().unfiltered()
237 repo = a.repo().unfiltered()
230 children = {}
238 children = {}
231
239
232 cl = repo.changelog
240 cl = repo.changelog
233 isancestor = cl.isancestorrev # XXX we should had chaching to this.
241 isancestor = cl.isancestorrev # XXX we should had chaching to this.
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
242 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 mrset = set(missingrevs)
243 mrset = set(missingrevs)
236 roots = set()
244 roots = set()
237 for r in missingrevs:
245 for r in missingrevs:
238 for p in cl.parentrevs(r):
246 for p in cl.parentrevs(r):
239 if p == node.nullrev:
247 if p == node.nullrev:
240 continue
248 continue
241 if p not in children:
249 if p not in children:
242 children[p] = [r]
250 children[p] = [r]
243 else:
251 else:
244 children[p].append(r)
252 children[p].append(r)
245 if p not in mrset:
253 if p not in mrset:
246 roots.add(p)
254 roots.add(p)
247 if not roots:
255 if not roots:
248 # no common revision to track copies from
256 # no common revision to track copies from
249 return {}
257 return {}
250 min_root = min(roots)
258 min_root = min(roots)
251
259
252 from_head = set(
260 from_head = set(
253 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
261 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
254 )
262 )
255
263
256 iterrevs = set(from_head)
264 iterrevs = set(from_head)
257 iterrevs &= mrset
265 iterrevs &= mrset
258 iterrevs.update(roots)
266 iterrevs.update(roots)
259 iterrevs.remove(b.rev())
267 iterrevs.remove(b.rev())
260 revs = sorted(iterrevs)
268 revs = sorted(iterrevs)
261
269
262 if repo.filecopiesmode == b'changeset-sidedata':
270 if repo.filecopiesmode == b'changeset-sidedata':
263 revinfo = _revinfo_getter(repo)
271 revinfo = _revinfo_getter(repo)
264 return _combine_changeset_copies(
272 return _combine_changeset_copies(
265 revs, children, b.rev(), revinfo, match, isancestor
273 revs, children, b.rev(), revinfo, match, isancestor
266 )
274 )
267 else:
275 else:
268 revinfo = _revinfo_getter_extra(repo)
276 revinfo = _revinfo_getter_extra(repo)
269 return _combine_changeset_copies_extra(
277 return _combine_changeset_copies_extra(
270 revs, children, b.rev(), revinfo, match, isancestor
278 revs, children, b.rev(), revinfo, match, isancestor
271 )
279 )
272
280
273
281
274 def _combine_changeset_copies(
282 def _combine_changeset_copies(
275 revs, children, targetrev, revinfo, match, isancestor
283 revs, children, targetrev, revinfo, match, isancestor
276 ):
284 ):
277 """combine the copies information for each item of iterrevs
285 """combine the copies information for each item of iterrevs
278
286
279 revs: sorted iterable of revision to visit
287 revs: sorted iterable of revision to visit
280 children: a {parent: [children]} mapping.
288 children: a {parent: [children]} mapping.
281 targetrev: the final copies destination revision (not in iterrevs)
289 targetrev: the final copies destination revision (not in iterrevs)
282 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
290 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
283 match: a matcher
291 match: a matcher
284
292
285 It returns the aggregated copies information for `targetrev`.
293 It returns the aggregated copies information for `targetrev`.
286 """
294 """
287 all_copies = {}
295 all_copies = {}
288 alwaysmatch = match.always()
296 alwaysmatch = match.always()
289 for r in revs:
297 for r in revs:
290 copies = all_copies.pop(r, None)
298 copies = all_copies.pop(r, None)
291 if copies is None:
299 if copies is None:
292 # this is a root
300 # this is a root
293 copies = {}
301 copies = {}
294 for i, c in enumerate(children[r]):
302 for i, c in enumerate(children[r]):
295 p1, p2, changes = revinfo(c)
303 p1, p2, changes = revinfo(c)
304 childcopies = {}
296 if r == p1:
305 if r == p1:
297 parent = 1
306 parent = 1
307 if changes is not None:
298 childcopies = changes.copied_from_p1
308 childcopies = changes.copied_from_p1
299 else:
309 else:
300 assert r == p2
310 assert r == p2
301 parent = 2
311 parent = 2
312 if changes is not None:
302 childcopies = changes.copied_from_p2
313 childcopies = changes.copied_from_p2
303 if not alwaysmatch:
314 if not alwaysmatch:
304 childcopies = {
315 childcopies = {
305 dst: src for dst, src in childcopies.items() if match(dst)
316 dst: src for dst, src in childcopies.items() if match(dst)
306 }
317 }
307 newcopies = copies
318 newcopies = copies
308 if childcopies:
319 if childcopies:
309 newcopies = copies.copy()
320 newcopies = copies.copy()
310 for dest, source in pycompat.iteritems(childcopies):
321 for dest, source in pycompat.iteritems(childcopies):
311 prev = copies.get(source)
322 prev = copies.get(source)
312 if prev is not None and prev[1] is not None:
323 if prev is not None and prev[1] is not None:
313 source = prev[1]
324 source = prev[1]
314 newcopies[dest] = (c, source)
325 newcopies[dest] = (c, source)
315 assert newcopies is not copies
326 assert newcopies is not copies
327 if changes is not None:
316 for f in changes.removed:
328 for f in changes.removed:
317 if f in newcopies:
329 if f in newcopies:
318 if newcopies is copies:
330 if newcopies is copies:
319 # copy on write to avoid affecting potential other
331 # copy on write to avoid affecting potential other
320 # branches. when there are no other branches, this
332 # branches. when there are no other branches, this
321 # could be avoided.
333 # could be avoided.
322 newcopies = copies.copy()
334 newcopies = copies.copy()
323 newcopies[f] = (c, None)
335 newcopies[f] = (c, None)
324 othercopies = all_copies.get(c)
336 othercopies = all_copies.get(c)
325 if othercopies is None:
337 if othercopies is None:
326 all_copies[c] = newcopies
338 all_copies[c] = newcopies
327 else:
339 else:
328 # we are the second parent to work on c, we need to merge our
340 # we are the second parent to work on c, we need to merge our
329 # work with the other.
341 # work with the other.
330 #
342 #
331 # In case of conflict, parent 1 take precedence over parent 2.
343 # In case of conflict, parent 1 take precedence over parent 2.
332 # This is an arbitrary choice made anew when implementing
344 # This is an arbitrary choice made anew when implementing
333 # changeset based copies. It was made without regards with
345 # changeset based copies. It was made without regards with
334 # potential filelog related behavior.
346 # potential filelog related behavior.
335 if parent == 1:
347 if parent == 1:
336 _merge_copies_dict(
348 _merge_copies_dict(
337 othercopies, newcopies, isancestor, changes
349 othercopies, newcopies, isancestor, changes
338 )
350 )
339 else:
351 else:
340 _merge_copies_dict(
352 _merge_copies_dict(
341 newcopies, othercopies, isancestor, changes
353 newcopies, othercopies, isancestor, changes
342 )
354 )
343 all_copies[c] = newcopies
355 all_copies[c] = newcopies
344
356
345 final_copies = {}
357 final_copies = {}
346 for dest, (tt, source) in all_copies[targetrev].items():
358 for dest, (tt, source) in all_copies[targetrev].items():
347 if source is not None:
359 if source is not None:
348 final_copies[dest] = source
360 final_copies[dest] = source
349 return final_copies
361 return final_copies
350
362
351
363
352 def _merge_copies_dict(minor, major, isancestor, changes):
364 def _merge_copies_dict(minor, major, isancestor, changes):
353 """merge two copies-mapping together, minor and major
365 """merge two copies-mapping together, minor and major
354
366
355 In case of conflict, value from "major" will be picked.
367 In case of conflict, value from "major" will be picked.
356
368
357 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
369 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
358 ancestors of `high_rev`,
370 ancestors of `high_rev`,
359
371
360 - `ismerged(path)`: callable return True if `path` have been merged in the
372 - `ismerged(path)`: callable return True if `path` have been merged in the
361 current revision,
373 current revision,
362 """
374 """
363 for dest, value in major.items():
375 for dest, value in major.items():
364 other = minor.get(dest)
376 other = minor.get(dest)
365 if other is None:
377 if other is None:
366 minor[dest] = value
378 minor[dest] = value
367 else:
379 else:
368 new_tt = value[0]
380 new_tt = value[0]
369 other_tt = other[0]
381 other_tt = other[0]
370 if value[1] == other[1]:
382 if value[1] == other[1]:
371 continue
383 continue
372 # content from "major" wins, unless it is older
384 # content from "major" wins, unless it is older
373 # than the branch point or there is a merge
385 # than the branch point or there is a merge
374 if new_tt == other_tt:
386 if new_tt == other_tt:
375 minor[dest] = value
387 minor[dest] = value
376 elif value[1] is None and dest in changes.salvaged:
388 elif (
389 changes is not None
390 and value[1] is None
391 and dest in changes.salvaged
392 ):
377 pass
393 pass
378 elif other[1] is None and dest in changes.salvaged:
394 elif (
395 changes is not None
396 and other[1] is None
397 and dest in changes.salvaged
398 ):
379 minor[dest] = value
399 minor[dest] = value
380 elif not isancestor(new_tt, other_tt):
400 elif not isancestor(new_tt, other_tt):
381 minor[dest] = value
401 minor[dest] = value
382 elif dest in changes.merged:
402 elif changes is not None and dest in changes.merged:
383 minor[dest] = value
403 minor[dest] = value
384
404
385
405
386 def _revinfo_getter_extra(repo):
406 def _revinfo_getter_extra(repo):
387 """return a function that return multiple data given a <rev>"i
407 """return a function that return multiple data given a <rev>"i
388
408
389 * p1: revision number of first parent
409 * p1: revision number of first parent
390 * p2: revision number of first parent
410 * p2: revision number of first parent
391 * p1copies: mapping of copies from p1
411 * p1copies: mapping of copies from p1
392 * p2copies: mapping of copies from p2
412 * p2copies: mapping of copies from p2
393 * removed: a list of removed files
413 * removed: a list of removed files
394 * ismerged: a callback to know if file was merged in that revision
414 * ismerged: a callback to know if file was merged in that revision
395 """
415 """
396 cl = repo.changelog
416 cl = repo.changelog
397 parents = cl.parentrevs
417 parents = cl.parentrevs
398
418
399 def get_ismerged(rev):
419 def get_ismerged(rev):
400 ctx = repo[rev]
420 ctx = repo[rev]
401
421
402 def ismerged(path):
422 def ismerged(path):
403 if path not in ctx.files():
423 if path not in ctx.files():
404 return False
424 return False
405 fctx = ctx[path]
425 fctx = ctx[path]
406 parents = fctx._filelog.parents(fctx._filenode)
426 parents = fctx._filelog.parents(fctx._filenode)
407 nb_parents = 0
427 nb_parents = 0
408 for n in parents:
428 for n in parents:
409 if n != node.nullid:
429 if n != node.nullid:
410 nb_parents += 1
430 nb_parents += 1
411 return nb_parents >= 2
431 return nb_parents >= 2
412
432
413 return ismerged
433 return ismerged
414
434
415 def revinfo(rev):
435 def revinfo(rev):
416 p1, p2 = parents(rev)
436 p1, p2 = parents(rev)
417 ctx = repo[rev]
437 ctx = repo[rev]
418 p1copies, p2copies = ctx._copies
438 p1copies, p2copies = ctx._copies
419 removed = ctx.filesremoved()
439 removed = ctx.filesremoved()
420 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
440 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
421
441
422 return revinfo
442 return revinfo
423
443
424
444
425 def _combine_changeset_copies_extra(
445 def _combine_changeset_copies_extra(
426 revs, children, targetrev, revinfo, match, isancestor
446 revs, children, targetrev, revinfo, match, isancestor
427 ):
447 ):
428 """version of `_combine_changeset_copies` that works with the Google
448 """version of `_combine_changeset_copies` that works with the Google
429 specific "extra" based storage for copy information"""
449 specific "extra" based storage for copy information"""
430 all_copies = {}
450 all_copies = {}
431 alwaysmatch = match.always()
451 alwaysmatch = match.always()
432 for r in revs:
452 for r in revs:
433 copies = all_copies.pop(r, None)
453 copies = all_copies.pop(r, None)
434 if copies is None:
454 if copies is None:
435 # this is a root
455 # this is a root
436 copies = {}
456 copies = {}
437 for i, c in enumerate(children[r]):
457 for i, c in enumerate(children[r]):
438 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
458 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
439 if r == p1:
459 if r == p1:
440 parent = 1
460 parent = 1
441 childcopies = p1copies
461 childcopies = p1copies
442 else:
462 else:
443 assert r == p2
463 assert r == p2
444 parent = 2
464 parent = 2
445 childcopies = p2copies
465 childcopies = p2copies
446 if not alwaysmatch:
466 if not alwaysmatch:
447 childcopies = {
467 childcopies = {
448 dst: src for dst, src in childcopies.items() if match(dst)
468 dst: src for dst, src in childcopies.items() if match(dst)
449 }
469 }
450 newcopies = copies
470 newcopies = copies
451 if childcopies:
471 if childcopies:
452 newcopies = copies.copy()
472 newcopies = copies.copy()
453 for dest, source in pycompat.iteritems(childcopies):
473 for dest, source in pycompat.iteritems(childcopies):
454 prev = copies.get(source)
474 prev = copies.get(source)
455 if prev is not None and prev[1] is not None:
475 if prev is not None and prev[1] is not None:
456 source = prev[1]
476 source = prev[1]
457 newcopies[dest] = (c, source)
477 newcopies[dest] = (c, source)
458 assert newcopies is not copies
478 assert newcopies is not copies
459 for f in removed:
479 for f in removed:
460 if f in newcopies:
480 if f in newcopies:
461 if newcopies is copies:
481 if newcopies is copies:
462 # copy on write to avoid affecting potential other
482 # copy on write to avoid affecting potential other
463 # branches. when there are no other branches, this
483 # branches. when there are no other branches, this
464 # could be avoided.
484 # could be avoided.
465 newcopies = copies.copy()
485 newcopies = copies.copy()
466 newcopies[f] = (c, None)
486 newcopies[f] = (c, None)
467 othercopies = all_copies.get(c)
487 othercopies = all_copies.get(c)
468 if othercopies is None:
488 if othercopies is None:
469 all_copies[c] = newcopies
489 all_copies[c] = newcopies
470 else:
490 else:
471 # we are the second parent to work on c, we need to merge our
491 # we are the second parent to work on c, we need to merge our
472 # work with the other.
492 # work with the other.
473 #
493 #
474 # In case of conflict, parent 1 take precedence over parent 2.
494 # In case of conflict, parent 1 take precedence over parent 2.
475 # This is an arbitrary choice made anew when implementing
495 # This is an arbitrary choice made anew when implementing
476 # changeset based copies. It was made without regards with
496 # changeset based copies. It was made without regards with
477 # potential filelog related behavior.
497 # potential filelog related behavior.
478 if parent == 1:
498 if parent == 1:
479 _merge_copies_dict_extra(
499 _merge_copies_dict_extra(
480 othercopies, newcopies, isancestor, ismerged
500 othercopies, newcopies, isancestor, ismerged
481 )
501 )
482 else:
502 else:
483 _merge_copies_dict_extra(
503 _merge_copies_dict_extra(
484 newcopies, othercopies, isancestor, ismerged
504 newcopies, othercopies, isancestor, ismerged
485 )
505 )
486 all_copies[c] = newcopies
506 all_copies[c] = newcopies
487
507
488 final_copies = {}
508 final_copies = {}
489 for dest, (tt, source) in all_copies[targetrev].items():
509 for dest, (tt, source) in all_copies[targetrev].items():
490 if source is not None:
510 if source is not None:
491 final_copies[dest] = source
511 final_copies[dest] = source
492 return final_copies
512 return final_copies
493
513
494
514
495 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
515 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
496 """version of `_merge_copies_dict` that works with the Google
516 """version of `_merge_copies_dict` that works with the Google
497 specific "extra" based storage for copy information"""
517 specific "extra" based storage for copy information"""
498 for dest, value in major.items():
518 for dest, value in major.items():
499 other = minor.get(dest)
519 other = minor.get(dest)
500 if other is None:
520 if other is None:
501 minor[dest] = value
521 minor[dest] = value
502 else:
522 else:
503 new_tt = value[0]
523 new_tt = value[0]
504 other_tt = other[0]
524 other_tt = other[0]
505 if value[1] == other[1]:
525 if value[1] == other[1]:
506 continue
526 continue
507 # content from "major" wins, unless it is older
527 # content from "major" wins, unless it is older
508 # than the branch point or there is a merge
528 # than the branch point or there is a merge
509 if (
529 if (
510 new_tt == other_tt
530 new_tt == other_tt
511 or not isancestor(new_tt, other_tt)
531 or not isancestor(new_tt, other_tt)
512 or ismerged(dest)
532 or ismerged(dest)
513 ):
533 ):
514 minor[dest] = value
534 minor[dest] = value
515
535
516
536
517 def _forwardcopies(a, b, base=None, match=None):
537 def _forwardcopies(a, b, base=None, match=None):
518 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
538 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
519
539
520 if base is None:
540 if base is None:
521 base = a
541 base = a
522 match = a.repo().narrowmatch(match)
542 match = a.repo().narrowmatch(match)
523 # check for working copy
543 # check for working copy
524 if b.rev() is None:
544 if b.rev() is None:
525 cm = _committedforwardcopies(a, b.p1(), base, match)
545 cm = _committedforwardcopies(a, b.p1(), base, match)
526 # combine copies from dirstate if necessary
546 # combine copies from dirstate if necessary
527 copies = _chain(cm, _dirstatecopies(b._repo, match))
547 copies = _chain(cm, _dirstatecopies(b._repo, match))
528 else:
548 else:
529 copies = _committedforwardcopies(a, b, base, match)
549 copies = _committedforwardcopies(a, b, base, match)
530 return copies
550 return copies
531
551
532
552
533 def _backwardrenames(a, b, match):
553 def _backwardrenames(a, b, match):
534 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
554 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
535 return {}
555 return {}
536
556
537 # Even though we're not taking copies into account, 1:n rename situations
557 # Even though we're not taking copies into account, 1:n rename situations
538 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
558 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
539 # arbitrarily pick one of the renames.
559 # arbitrarily pick one of the renames.
540 # We don't want to pass in "match" here, since that would filter
560 # We don't want to pass in "match" here, since that would filter
541 # the destination by it. Since we're reversing the copies, we want
561 # the destination by it. Since we're reversing the copies, we want
542 # to filter the source instead.
562 # to filter the source instead.
543 f = _forwardcopies(b, a)
563 f = _forwardcopies(b, a)
544 r = {}
564 r = {}
545 for k, v in sorted(pycompat.iteritems(f)):
565 for k, v in sorted(pycompat.iteritems(f)):
546 if match and not match(v):
566 if match and not match(v):
547 continue
567 continue
548 # remove copies
568 # remove copies
549 if v in a:
569 if v in a:
550 continue
570 continue
551 r[v] = k
571 r[v] = k
552 return r
572 return r
553
573
554
574
555 def pathcopies(x, y, match=None):
575 def pathcopies(x, y, match=None):
556 """find {dst@y: src@x} copy mapping for directed compare"""
576 """find {dst@y: src@x} copy mapping for directed compare"""
557 repo = x._repo
577 repo = x._repo
558 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
578 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
559 if debug:
579 if debug:
560 repo.ui.debug(
580 repo.ui.debug(
561 b'debug.copies: searching copies from %s to %s\n' % (x, y)
581 b'debug.copies: searching copies from %s to %s\n' % (x, y)
562 )
582 )
563 if x == y or not x or not y:
583 if x == y or not x or not y:
564 return {}
584 return {}
565 if y.rev() is None and x == y.p1():
585 if y.rev() is None and x == y.p1():
566 if debug:
586 if debug:
567 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
587 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
568 # short-circuit to avoid issues with merge states
588 # short-circuit to avoid issues with merge states
569 return _dirstatecopies(repo, match)
589 return _dirstatecopies(repo, match)
570 a = y.ancestor(x)
590 a = y.ancestor(x)
571 if a == x:
591 if a == x:
572 if debug:
592 if debug:
573 repo.ui.debug(b'debug.copies: search mode: forward\n')
593 repo.ui.debug(b'debug.copies: search mode: forward\n')
574 copies = _forwardcopies(x, y, match=match)
594 copies = _forwardcopies(x, y, match=match)
575 elif a == y:
595 elif a == y:
576 if debug:
596 if debug:
577 repo.ui.debug(b'debug.copies: search mode: backward\n')
597 repo.ui.debug(b'debug.copies: search mode: backward\n')
578 copies = _backwardrenames(x, y, match=match)
598 copies = _backwardrenames(x, y, match=match)
579 else:
599 else:
580 if debug:
600 if debug:
581 repo.ui.debug(b'debug.copies: search mode: combined\n')
601 repo.ui.debug(b'debug.copies: search mode: combined\n')
582 base = None
602 base = None
583 if a.rev() != node.nullrev:
603 if a.rev() != node.nullrev:
584 base = x
604 base = x
585 copies = _chain(
605 copies = _chain(
586 _backwardrenames(x, a, match=match),
606 _backwardrenames(x, a, match=match),
587 _forwardcopies(a, y, base, match=match),
607 _forwardcopies(a, y, base, match=match),
588 )
608 )
589 _filter(x, y, copies)
609 _filter(x, y, copies)
590 return copies
610 return copies
591
611
592
612
593 def mergecopies(repo, c1, c2, base):
613 def mergecopies(repo, c1, c2, base):
594 """
614 """
595 Finds moves and copies between context c1 and c2 that are relevant for
615 Finds moves and copies between context c1 and c2 that are relevant for
596 merging. 'base' will be used as the merge base.
616 merging. 'base' will be used as the merge base.
597
617
598 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
618 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
599 files that were moved/ copied in one merge parent and modified in another.
619 files that were moved/ copied in one merge parent and modified in another.
600 For example:
620 For example:
601
621
602 o ---> 4 another commit
622 o ---> 4 another commit
603 |
623 |
604 | o ---> 3 commit that modifies a.txt
624 | o ---> 3 commit that modifies a.txt
605 | /
625 | /
606 o / ---> 2 commit that moves a.txt to b.txt
626 o / ---> 2 commit that moves a.txt to b.txt
607 |/
627 |/
608 o ---> 1 merge base
628 o ---> 1 merge base
609
629
610 If we try to rebase revision 3 on revision 4, since there is no a.txt in
630 If we try to rebase revision 3 on revision 4, since there is no a.txt in
611 revision 4, and if user have copytrace disabled, we prints the following
631 revision 4, and if user have copytrace disabled, we prints the following
612 message:
632 message:
613
633
614 ```other changed <file> which local deleted```
634 ```other changed <file> which local deleted```
615
635
616 Returns a tuple where:
636 Returns a tuple where:
617
637
618 "branch_copies" an instance of branch_copies.
638 "branch_copies" an instance of branch_copies.
619
639
620 "diverge" is a mapping of source name -> list of destination names
640 "diverge" is a mapping of source name -> list of destination names
621 for divergent renames.
641 for divergent renames.
622
642
623 This function calls different copytracing algorithms based on config.
643 This function calls different copytracing algorithms based on config.
624 """
644 """
625 # avoid silly behavior for update from empty dir
645 # avoid silly behavior for update from empty dir
626 if not c1 or not c2 or c1 == c2:
646 if not c1 or not c2 or c1 == c2:
627 return branch_copies(), branch_copies(), {}
647 return branch_copies(), branch_copies(), {}
628
648
629 narrowmatch = c1.repo().narrowmatch()
649 narrowmatch = c1.repo().narrowmatch()
630
650
631 # avoid silly behavior for parent -> working dir
651 # avoid silly behavior for parent -> working dir
632 if c2.node() is None and c1.node() == repo.dirstate.p1():
652 if c2.node() is None and c1.node() == repo.dirstate.p1():
633 return (
653 return (
634 branch_copies(_dirstatecopies(repo, narrowmatch)),
654 branch_copies(_dirstatecopies(repo, narrowmatch)),
635 branch_copies(),
655 branch_copies(),
636 {},
656 {},
637 )
657 )
638
658
639 copytracing = repo.ui.config(b'experimental', b'copytrace')
659 copytracing = repo.ui.config(b'experimental', b'copytrace')
640 if stringutil.parsebool(copytracing) is False:
660 if stringutil.parsebool(copytracing) is False:
641 # stringutil.parsebool() returns None when it is unable to parse the
661 # stringutil.parsebool() returns None when it is unable to parse the
642 # value, so we should rely on making sure copytracing is on such cases
662 # value, so we should rely on making sure copytracing is on such cases
643 return branch_copies(), branch_copies(), {}
663 return branch_copies(), branch_copies(), {}
644
664
645 if usechangesetcentricalgo(repo):
665 if usechangesetcentricalgo(repo):
646 # The heuristics don't make sense when we need changeset-centric algos
666 # The heuristics don't make sense when we need changeset-centric algos
647 return _fullcopytracing(repo, c1, c2, base)
667 return _fullcopytracing(repo, c1, c2, base)
648
668
649 # Copy trace disabling is explicitly below the node == p1 logic above
669 # Copy trace disabling is explicitly below the node == p1 logic above
650 # because the logic above is required for a simple copy to be kept across a
670 # because the logic above is required for a simple copy to be kept across a
651 # rebase.
671 # rebase.
652 if copytracing == b'heuristics':
672 if copytracing == b'heuristics':
653 # Do full copytracing if only non-public revisions are involved as
673 # Do full copytracing if only non-public revisions are involved as
654 # that will be fast enough and will also cover the copies which could
674 # that will be fast enough and will also cover the copies which could
655 # be missed by heuristics
675 # be missed by heuristics
656 if _isfullcopytraceable(repo, c1, base):
676 if _isfullcopytraceable(repo, c1, base):
657 return _fullcopytracing(repo, c1, c2, base)
677 return _fullcopytracing(repo, c1, c2, base)
658 return _heuristicscopytracing(repo, c1, c2, base)
678 return _heuristicscopytracing(repo, c1, c2, base)
659 else:
679 else:
660 return _fullcopytracing(repo, c1, c2, base)
680 return _fullcopytracing(repo, c1, c2, base)
661
681
662
682
663 def _isfullcopytraceable(repo, c1, base):
683 def _isfullcopytraceable(repo, c1, base):
664 """ Checks that if base, source and destination are all no-public branches,
684 """ Checks that if base, source and destination are all no-public branches,
665 if yes let's use the full copytrace algorithm for increased capabilities
685 if yes let's use the full copytrace algorithm for increased capabilities
666 since it will be fast enough.
686 since it will be fast enough.
667
687
668 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
688 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
669 number of changesets from c1 to base such that if number of changesets are
689 number of changesets from c1 to base such that if number of changesets are
670 more than the limit, full copytracing algorithm won't be used.
690 more than the limit, full copytracing algorithm won't be used.
671 """
691 """
672 if c1.rev() is None:
692 if c1.rev() is None:
673 c1 = c1.p1()
693 c1 = c1.p1()
674 if c1.mutable() and base.mutable():
694 if c1.mutable() and base.mutable():
675 sourcecommitlimit = repo.ui.configint(
695 sourcecommitlimit = repo.ui.configint(
676 b'experimental', b'copytrace.sourcecommitlimit'
696 b'experimental', b'copytrace.sourcecommitlimit'
677 )
697 )
678 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
698 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
679 return commits < sourcecommitlimit
699 return commits < sourcecommitlimit
680 return False
700 return False
681
701
682
702
683 def _checksinglesidecopies(
703 def _checksinglesidecopies(
684 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
704 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
685 ):
705 ):
686 if src not in m2:
706 if src not in m2:
687 # deleted on side 2
707 # deleted on side 2
688 if src not in m1:
708 if src not in m1:
689 # renamed on side 1, deleted on side 2
709 # renamed on side 1, deleted on side 2
690 renamedelete[src] = dsts1
710 renamedelete[src] = dsts1
691 elif src not in mb:
711 elif src not in mb:
692 # Work around the "short-circuit to avoid issues with merge states"
712 # Work around the "short-circuit to avoid issues with merge states"
693 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
713 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
694 # destination doesn't exist in y.
714 # destination doesn't exist in y.
695 pass
715 pass
696 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
716 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
697 return
717 return
698 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
718 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
699 # modified on side 2
719 # modified on side 2
700 for dst in dsts1:
720 for dst in dsts1:
701 copy[dst] = src
721 copy[dst] = src
702
722
703
723
704 class branch_copies(object):
724 class branch_copies(object):
705 """Information about copies made on one side of a merge/graft.
725 """Information about copies made on one side of a merge/graft.
706
726
707 "copy" is a mapping from destination name -> source name,
727 "copy" is a mapping from destination name -> source name,
708 where source is in c1 and destination is in c2 or vice-versa.
728 where source is in c1 and destination is in c2 or vice-versa.
709
729
710 "movewithdir" is a mapping from source name -> destination name,
730 "movewithdir" is a mapping from source name -> destination name,
711 where the file at source present in one context but not the other
731 where the file at source present in one context but not the other
712 needs to be moved to destination by the merge process, because the
732 needs to be moved to destination by the merge process, because the
713 other context moved the directory it is in.
733 other context moved the directory it is in.
714
734
715 "renamedelete" is a mapping of source name -> list of destination
735 "renamedelete" is a mapping of source name -> list of destination
716 names for files deleted in c1 that were renamed in c2 or vice-versa.
736 names for files deleted in c1 that were renamed in c2 or vice-versa.
717
737
718 "dirmove" is a mapping of detected source dir -> destination dir renames.
738 "dirmove" is a mapping of detected source dir -> destination dir renames.
719 This is needed for handling changes to new files previously grafted into
739 This is needed for handling changes to new files previously grafted into
720 renamed directories.
740 renamed directories.
721 """
741 """
722
742
723 def __init__(
743 def __init__(
724 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
744 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
725 ):
745 ):
726 self.copy = {} if copy is None else copy
746 self.copy = {} if copy is None else copy
727 self.renamedelete = {} if renamedelete is None else renamedelete
747 self.renamedelete = {} if renamedelete is None else renamedelete
728 self.dirmove = {} if dirmove is None else dirmove
748 self.dirmove = {} if dirmove is None else dirmove
729 self.movewithdir = {} if movewithdir is None else movewithdir
749 self.movewithdir = {} if movewithdir is None else movewithdir
730
750
731 def __repr__(self):
751 def __repr__(self):
732 return (
752 return (
733 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
753 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
734 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
754 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
735 )
755 )
736
756
737
757
738 def _fullcopytracing(repo, c1, c2, base):
758 def _fullcopytracing(repo, c1, c2, base):
739 """ The full copytracing algorithm which finds all the new files that were
759 """ The full copytracing algorithm which finds all the new files that were
740 added from merge base up to the top commit and for each file it checks if
760 added from merge base up to the top commit and for each file it checks if
741 this file was copied from another file.
761 this file was copied from another file.
742
762
743 This is pretty slow when a lot of changesets are involved but will track all
763 This is pretty slow when a lot of changesets are involved but will track all
744 the copies.
764 the copies.
745 """
765 """
746 m1 = c1.manifest()
766 m1 = c1.manifest()
747 m2 = c2.manifest()
767 m2 = c2.manifest()
748 mb = base.manifest()
768 mb = base.manifest()
749
769
750 copies1 = pathcopies(base, c1)
770 copies1 = pathcopies(base, c1)
751 copies2 = pathcopies(base, c2)
771 copies2 = pathcopies(base, c2)
752
772
753 if not (copies1 or copies2):
773 if not (copies1 or copies2):
754 return branch_copies(), branch_copies(), {}
774 return branch_copies(), branch_copies(), {}
755
775
756 inversecopies1 = {}
776 inversecopies1 = {}
757 inversecopies2 = {}
777 inversecopies2 = {}
758 for dst, src in copies1.items():
778 for dst, src in copies1.items():
759 inversecopies1.setdefault(src, []).append(dst)
779 inversecopies1.setdefault(src, []).append(dst)
760 for dst, src in copies2.items():
780 for dst, src in copies2.items():
761 inversecopies2.setdefault(src, []).append(dst)
781 inversecopies2.setdefault(src, []).append(dst)
762
782
763 copy1 = {}
783 copy1 = {}
764 copy2 = {}
784 copy2 = {}
765 diverge = {}
785 diverge = {}
766 renamedelete1 = {}
786 renamedelete1 = {}
767 renamedelete2 = {}
787 renamedelete2 = {}
768 allsources = set(inversecopies1) | set(inversecopies2)
788 allsources = set(inversecopies1) | set(inversecopies2)
769 for src in allsources:
789 for src in allsources:
770 dsts1 = inversecopies1.get(src)
790 dsts1 = inversecopies1.get(src)
771 dsts2 = inversecopies2.get(src)
791 dsts2 = inversecopies2.get(src)
772 if dsts1 and dsts2:
792 if dsts1 and dsts2:
773 # copied/renamed on both sides
793 # copied/renamed on both sides
774 if src not in m1 and src not in m2:
794 if src not in m1 and src not in m2:
775 # renamed on both sides
795 # renamed on both sides
776 dsts1 = set(dsts1)
796 dsts1 = set(dsts1)
777 dsts2 = set(dsts2)
797 dsts2 = set(dsts2)
778 # If there's some overlap in the rename destinations, we
798 # If there's some overlap in the rename destinations, we
779 # consider it not divergent. For example, if side 1 copies 'a'
799 # consider it not divergent. For example, if side 1 copies 'a'
780 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
800 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
781 # and 'd' and deletes 'a'.
801 # and 'd' and deletes 'a'.
782 if dsts1 & dsts2:
802 if dsts1 & dsts2:
783 for dst in dsts1 & dsts2:
803 for dst in dsts1 & dsts2:
784 copy1[dst] = src
804 copy1[dst] = src
785 copy2[dst] = src
805 copy2[dst] = src
786 else:
806 else:
787 diverge[src] = sorted(dsts1 | dsts2)
807 diverge[src] = sorted(dsts1 | dsts2)
788 elif src in m1 and src in m2:
808 elif src in m1 and src in m2:
789 # copied on both sides
809 # copied on both sides
790 dsts1 = set(dsts1)
810 dsts1 = set(dsts1)
791 dsts2 = set(dsts2)
811 dsts2 = set(dsts2)
792 for dst in dsts1 & dsts2:
812 for dst in dsts1 & dsts2:
793 copy1[dst] = src
813 copy1[dst] = src
794 copy2[dst] = src
814 copy2[dst] = src
795 # TODO: Handle cases where it was renamed on one side and copied
815 # TODO: Handle cases where it was renamed on one side and copied
796 # on the other side
816 # on the other side
797 elif dsts1:
817 elif dsts1:
798 # copied/renamed only on side 1
818 # copied/renamed only on side 1
799 _checksinglesidecopies(
819 _checksinglesidecopies(
800 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
820 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
801 )
821 )
802 elif dsts2:
822 elif dsts2:
803 # copied/renamed only on side 2
823 # copied/renamed only on side 2
804 _checksinglesidecopies(
824 _checksinglesidecopies(
805 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
825 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
806 )
826 )
807
827
808 # find interesting file sets from manifests
828 # find interesting file sets from manifests
809 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
829 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
810 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
830 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
811 u1 = sorted(addedinm1 - addedinm2)
831 u1 = sorted(addedinm1 - addedinm2)
812 u2 = sorted(addedinm2 - addedinm1)
832 u2 = sorted(addedinm2 - addedinm1)
813
833
814 header = b" unmatched files in %s"
834 header = b" unmatched files in %s"
815 if u1:
835 if u1:
816 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
836 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
817 if u2:
837 if u2:
818 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
838 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
819
839
820 if repo.ui.debugflag:
840 if repo.ui.debugflag:
821 renamedeleteset = set()
841 renamedeleteset = set()
822 divergeset = set()
842 divergeset = set()
823 for dsts in diverge.values():
843 for dsts in diverge.values():
824 divergeset.update(dsts)
844 divergeset.update(dsts)
825 for dsts in renamedelete1.values():
845 for dsts in renamedelete1.values():
826 renamedeleteset.update(dsts)
846 renamedeleteset.update(dsts)
827 for dsts in renamedelete2.values():
847 for dsts in renamedelete2.values():
828 renamedeleteset.update(dsts)
848 renamedeleteset.update(dsts)
829
849
830 repo.ui.debug(
850 repo.ui.debug(
831 b" all copies found (* = to merge, ! = divergent, "
851 b" all copies found (* = to merge, ! = divergent, "
832 b"% = renamed and deleted):\n"
852 b"% = renamed and deleted):\n"
833 )
853 )
834 for side, copies in ((b"local", copies1), (b"remote", copies2)):
854 for side, copies in ((b"local", copies1), (b"remote", copies2)):
835 if not copies:
855 if not copies:
836 continue
856 continue
837 repo.ui.debug(b" on %s side:\n" % side)
857 repo.ui.debug(b" on %s side:\n" % side)
838 for f in sorted(copies):
858 for f in sorted(copies):
839 note = b""
859 note = b""
840 if f in copy1 or f in copy2:
860 if f in copy1 or f in copy2:
841 note += b"*"
861 note += b"*"
842 if f in divergeset:
862 if f in divergeset:
843 note += b"!"
863 note += b"!"
844 if f in renamedeleteset:
864 if f in renamedeleteset:
845 note += b"%"
865 note += b"%"
846 repo.ui.debug(
866 repo.ui.debug(
847 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
867 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
848 )
868 )
849 del renamedeleteset
869 del renamedeleteset
850 del divergeset
870 del divergeset
851
871
852 repo.ui.debug(b" checking for directory renames\n")
872 repo.ui.debug(b" checking for directory renames\n")
853
873
854 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
874 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
855 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
875 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
856
876
857 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
877 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
858 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
878 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
859
879
860 return branch_copies1, branch_copies2, diverge
880 return branch_copies1, branch_copies2, diverge
861
881
862
882
863 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
883 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
864 """Finds moved directories and files that should move with them.
884 """Finds moved directories and files that should move with them.
865
885
866 ctx: the context for one of the sides
886 ctx: the context for one of the sides
867 copy: files copied on the same side (as ctx)
887 copy: files copied on the same side (as ctx)
868 fullcopy: files copied on the same side (as ctx), including those that
888 fullcopy: files copied on the same side (as ctx), including those that
869 merge.manifestmerge() won't care about
889 merge.manifestmerge() won't care about
870 addedfiles: added files on the other side (compared to ctx)
890 addedfiles: added files on the other side (compared to ctx)
871 """
891 """
872 # generate a directory move map
892 # generate a directory move map
873 d = ctx.dirs()
893 d = ctx.dirs()
874 invalid = set()
894 invalid = set()
875 dirmove = {}
895 dirmove = {}
876
896
877 # examine each file copy for a potential directory move, which is
897 # examine each file copy for a potential directory move, which is
878 # when all the files in a directory are moved to a new directory
898 # when all the files in a directory are moved to a new directory
879 for dst, src in pycompat.iteritems(fullcopy):
899 for dst, src in pycompat.iteritems(fullcopy):
880 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
900 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
881 if dsrc in invalid:
901 if dsrc in invalid:
882 # already seen to be uninteresting
902 # already seen to be uninteresting
883 continue
903 continue
884 elif dsrc in d and ddst in d:
904 elif dsrc in d and ddst in d:
885 # directory wasn't entirely moved locally
905 # directory wasn't entirely moved locally
886 invalid.add(dsrc)
906 invalid.add(dsrc)
887 elif dsrc in dirmove and dirmove[dsrc] != ddst:
907 elif dsrc in dirmove and dirmove[dsrc] != ddst:
888 # files from the same directory moved to two different places
908 # files from the same directory moved to two different places
889 invalid.add(dsrc)
909 invalid.add(dsrc)
890 else:
910 else:
891 # looks good so far
911 # looks good so far
892 dirmove[dsrc] = ddst
912 dirmove[dsrc] = ddst
893
913
894 for i in invalid:
914 for i in invalid:
895 if i in dirmove:
915 if i in dirmove:
896 del dirmove[i]
916 del dirmove[i]
897 del d, invalid
917 del d, invalid
898
918
899 if not dirmove:
919 if not dirmove:
900 return {}, {}
920 return {}, {}
901
921
902 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
922 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
903
923
904 for d in dirmove:
924 for d in dirmove:
905 repo.ui.debug(
925 repo.ui.debug(
906 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
926 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
907 )
927 )
908
928
909 movewithdir = {}
929 movewithdir = {}
910 # check unaccounted nonoverlapping files against directory moves
930 # check unaccounted nonoverlapping files against directory moves
911 for f in addedfiles:
931 for f in addedfiles:
912 if f not in fullcopy:
932 if f not in fullcopy:
913 for d in dirmove:
933 for d in dirmove:
914 if f.startswith(d):
934 if f.startswith(d):
915 # new file added in a directory that was moved, move it
935 # new file added in a directory that was moved, move it
916 df = dirmove[d] + f[len(d) :]
936 df = dirmove[d] + f[len(d) :]
917 if df not in copy:
937 if df not in copy:
918 movewithdir[f] = df
938 movewithdir[f] = df
919 repo.ui.debug(
939 repo.ui.debug(
920 b" pending file src: '%s' -> dst: '%s'\n"
940 b" pending file src: '%s' -> dst: '%s'\n"
921 % (f, df)
941 % (f, df)
922 )
942 )
923 break
943 break
924
944
925 return dirmove, movewithdir
945 return dirmove, movewithdir
926
946
927
947
928 def _heuristicscopytracing(repo, c1, c2, base):
948 def _heuristicscopytracing(repo, c1, c2, base):
929 """ Fast copytracing using filename heuristics
949 """ Fast copytracing using filename heuristics
930
950
931 Assumes that moves or renames are of following two types:
951 Assumes that moves or renames are of following two types:
932
952
933 1) Inside a directory only (same directory name but different filenames)
953 1) Inside a directory only (same directory name but different filenames)
934 2) Move from one directory to another
954 2) Move from one directory to another
935 (same filenames but different directory names)
955 (same filenames but different directory names)
936
956
937 Works only when there are no merge commits in the "source branch".
957 Works only when there are no merge commits in the "source branch".
938 Source branch is commits from base up to c2 not including base.
958 Source branch is commits from base up to c2 not including base.
939
959
940 If merge is involved it fallbacks to _fullcopytracing().
960 If merge is involved it fallbacks to _fullcopytracing().
941
961
942 Can be used by setting the following config:
962 Can be used by setting the following config:
943
963
944 [experimental]
964 [experimental]
945 copytrace = heuristics
965 copytrace = heuristics
946
966
947 In some cases the copy/move candidates found by heuristics can be very large
967 In some cases the copy/move candidates found by heuristics can be very large
948 in number and that will make the algorithm slow. The number of possible
968 in number and that will make the algorithm slow. The number of possible
949 candidates to check can be limited by using the config
969 candidates to check can be limited by using the config
950 `experimental.copytrace.movecandidateslimit` which defaults to 100.
970 `experimental.copytrace.movecandidateslimit` which defaults to 100.
951 """
971 """
952
972
953 if c1.rev() is None:
973 if c1.rev() is None:
954 c1 = c1.p1()
974 c1 = c1.p1()
955 if c2.rev() is None:
975 if c2.rev() is None:
956 c2 = c2.p1()
976 c2 = c2.p1()
957
977
958 changedfiles = set()
978 changedfiles = set()
959 m1 = c1.manifest()
979 m1 = c1.manifest()
960 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
980 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
961 # If base is not in c2 branch, we switch to fullcopytracing
981 # If base is not in c2 branch, we switch to fullcopytracing
962 repo.ui.debug(
982 repo.ui.debug(
963 b"switching to full copytracing as base is not "
983 b"switching to full copytracing as base is not "
964 b"an ancestor of c2\n"
984 b"an ancestor of c2\n"
965 )
985 )
966 return _fullcopytracing(repo, c1, c2, base)
986 return _fullcopytracing(repo, c1, c2, base)
967
987
968 ctx = c2
988 ctx = c2
969 while ctx != base:
989 while ctx != base:
970 if len(ctx.parents()) == 2:
990 if len(ctx.parents()) == 2:
971 # To keep things simple let's not handle merges
991 # To keep things simple let's not handle merges
972 repo.ui.debug(b"switching to full copytracing because of merges\n")
992 repo.ui.debug(b"switching to full copytracing because of merges\n")
973 return _fullcopytracing(repo, c1, c2, base)
993 return _fullcopytracing(repo, c1, c2, base)
974 changedfiles.update(ctx.files())
994 changedfiles.update(ctx.files())
975 ctx = ctx.p1()
995 ctx = ctx.p1()
976
996
977 copies2 = {}
997 copies2 = {}
978 cp = _forwardcopies(base, c2)
998 cp = _forwardcopies(base, c2)
979 for dst, src in pycompat.iteritems(cp):
999 for dst, src in pycompat.iteritems(cp):
980 if src in m1:
1000 if src in m1:
981 copies2[dst] = src
1001 copies2[dst] = src
982
1002
983 # file is missing if it isn't present in the destination, but is present in
1003 # file is missing if it isn't present in the destination, but is present in
984 # the base and present in the source.
1004 # the base and present in the source.
985 # Presence in the base is important to exclude added files, presence in the
1005 # Presence in the base is important to exclude added files, presence in the
986 # source is important to exclude removed files.
1006 # source is important to exclude removed files.
987 filt = lambda f: f not in m1 and f in base and f in c2
1007 filt = lambda f: f not in m1 and f in base and f in c2
988 missingfiles = [f for f in changedfiles if filt(f)]
1008 missingfiles = [f for f in changedfiles if filt(f)]
989
1009
990 copies1 = {}
1010 copies1 = {}
991 if missingfiles:
1011 if missingfiles:
992 basenametofilename = collections.defaultdict(list)
1012 basenametofilename = collections.defaultdict(list)
993 dirnametofilename = collections.defaultdict(list)
1013 dirnametofilename = collections.defaultdict(list)
994
1014
995 for f in m1.filesnotin(base.manifest()):
1015 for f in m1.filesnotin(base.manifest()):
996 basename = os.path.basename(f)
1016 basename = os.path.basename(f)
997 dirname = os.path.dirname(f)
1017 dirname = os.path.dirname(f)
998 basenametofilename[basename].append(f)
1018 basenametofilename[basename].append(f)
999 dirnametofilename[dirname].append(f)
1019 dirnametofilename[dirname].append(f)
1000
1020
1001 for f in missingfiles:
1021 for f in missingfiles:
1002 basename = os.path.basename(f)
1022 basename = os.path.basename(f)
1003 dirname = os.path.dirname(f)
1023 dirname = os.path.dirname(f)
1004 samebasename = basenametofilename[basename]
1024 samebasename = basenametofilename[basename]
1005 samedirname = dirnametofilename[dirname]
1025 samedirname = dirnametofilename[dirname]
1006 movecandidates = samebasename + samedirname
1026 movecandidates = samebasename + samedirname
1007 # f is guaranteed to be present in c2, that's why
1027 # f is guaranteed to be present in c2, that's why
1008 # c2.filectx(f) won't fail
1028 # c2.filectx(f) won't fail
1009 f2 = c2.filectx(f)
1029 f2 = c2.filectx(f)
1010 # we can have a lot of candidates which can slow down the heuristics
1030 # we can have a lot of candidates which can slow down the heuristics
1011 # config value to limit the number of candidates moves to check
1031 # config value to limit the number of candidates moves to check
1012 maxcandidates = repo.ui.configint(
1032 maxcandidates = repo.ui.configint(
1013 b'experimental', b'copytrace.movecandidateslimit'
1033 b'experimental', b'copytrace.movecandidateslimit'
1014 )
1034 )
1015
1035
1016 if len(movecandidates) > maxcandidates:
1036 if len(movecandidates) > maxcandidates:
1017 repo.ui.status(
1037 repo.ui.status(
1018 _(
1038 _(
1019 b"skipping copytracing for '%s', more "
1039 b"skipping copytracing for '%s', more "
1020 b"candidates than the limit: %d\n"
1040 b"candidates than the limit: %d\n"
1021 )
1041 )
1022 % (f, len(movecandidates))
1042 % (f, len(movecandidates))
1023 )
1043 )
1024 continue
1044 continue
1025
1045
1026 for candidate in movecandidates:
1046 for candidate in movecandidates:
1027 f1 = c1.filectx(candidate)
1047 f1 = c1.filectx(candidate)
1028 if _related(f1, f2):
1048 if _related(f1, f2):
1029 # if there are a few related copies then we'll merge
1049 # if there are a few related copies then we'll merge
1030 # changes into all of them. This matches the behaviour
1050 # changes into all of them. This matches the behaviour
1031 # of upstream copytracing
1051 # of upstream copytracing
1032 copies1[candidate] = f
1052 copies1[candidate] = f
1033
1053
1034 return branch_copies(copies1), branch_copies(copies2), {}
1054 return branch_copies(copies1), branch_copies(copies2), {}
1035
1055
1036
1056
1037 def _related(f1, f2):
1057 def _related(f1, f2):
1038 """return True if f1 and f2 filectx have a common ancestor
1058 """return True if f1 and f2 filectx have a common ancestor
1039
1059
1040 Walk back to common ancestor to see if the two files originate
1060 Walk back to common ancestor to see if the two files originate
1041 from the same file. Since workingfilectx's rev() is None it messes
1061 from the same file. Since workingfilectx's rev() is None it messes
1042 up the integer comparison logic, hence the pre-step check for
1062 up the integer comparison logic, hence the pre-step check for
1043 None (f1 and f2 can only be workingfilectx's initially).
1063 None (f1 and f2 can only be workingfilectx's initially).
1044 """
1064 """
1045
1065
1046 if f1 == f2:
1066 if f1 == f2:
1047 return True # a match
1067 return True # a match
1048
1068
1049 g1, g2 = f1.ancestors(), f2.ancestors()
1069 g1, g2 = f1.ancestors(), f2.ancestors()
1050 try:
1070 try:
1051 f1r, f2r = f1.linkrev(), f2.linkrev()
1071 f1r, f2r = f1.linkrev(), f2.linkrev()
1052
1072
1053 if f1r is None:
1073 if f1r is None:
1054 f1 = next(g1)
1074 f1 = next(g1)
1055 if f2r is None:
1075 if f2r is None:
1056 f2 = next(g2)
1076 f2 = next(g2)
1057
1077
1058 while True:
1078 while True:
1059 f1r, f2r = f1.linkrev(), f2.linkrev()
1079 f1r, f2r = f1.linkrev(), f2.linkrev()
1060 if f1r > f2r:
1080 if f1r > f2r:
1061 f1 = next(g1)
1081 f1 = next(g1)
1062 elif f2r > f1r:
1082 elif f2r > f1r:
1063 f2 = next(g2)
1083 f2 = next(g2)
1064 else: # f1 and f2 point to files in the same linkrev
1084 else: # f1 and f2 point to files in the same linkrev
1065 return f1 == f2 # true if they point to the same file
1085 return f1 == f2 # true if they point to the same file
1066 except StopIteration:
1086 except StopIteration:
1067 return False
1087 return False
1068
1088
1069
1089
1070 def graftcopies(wctx, ctx, base):
1090 def graftcopies(wctx, ctx, base):
1071 """reproduce copies between base and ctx in the wctx
1091 """reproduce copies between base and ctx in the wctx
1072
1092
1073 Unlike mergecopies(), this function will only consider copies between base
1093 Unlike mergecopies(), this function will only consider copies between base
1074 and ctx; it will ignore copies between base and wctx. Also unlike
1094 and ctx; it will ignore copies between base and wctx. Also unlike
1075 mergecopies(), this function will apply copies to the working copy (instead
1095 mergecopies(), this function will apply copies to the working copy (instead
1076 of just returning information about the copies). That makes it cheaper
1096 of just returning information about the copies). That makes it cheaper
1077 (especially in the common case of base==ctx.p1()) and useful also when
1097 (especially in the common case of base==ctx.p1()) and useful also when
1078 experimental.copytrace=off.
1098 experimental.copytrace=off.
1079
1099
1080 merge.update() will have already marked most copies, but it will only
1100 merge.update() will have already marked most copies, but it will only
1081 mark copies if it thinks the source files are related (see
1101 mark copies if it thinks the source files are related (see
1082 merge._related()). It will also not mark copies if the file wasn't modified
1102 merge._related()). It will also not mark copies if the file wasn't modified
1083 on the local side. This function adds the copies that were "missed"
1103 on the local side. This function adds the copies that were "missed"
1084 by merge.update().
1104 by merge.update().
1085 """
1105 """
1086 new_copies = pathcopies(base, ctx)
1106 new_copies = pathcopies(base, ctx)
1087 _filter(wctx.p1(), wctx, new_copies)
1107 _filter(wctx.p1(), wctx, new_copies)
1088 for dst, src in pycompat.iteritems(new_copies):
1108 for dst, src in pycompat.iteritems(new_copies):
1089 wctx[dst].markcopied(src)
1109 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now