##// END OF EJS Templates
copies: make calculating lazy for dir move detection's "addedfiles"...
Kyle Lippincott -
r46725:2f357d05 default
parent child Browse files
Show More
@@ -1,1178 +1,1194
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25
25
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 from .revlogutils import (
28 from .revlogutils import (
29 flagutil,
29 flagutil,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32
32
33 rustmod = policy.importrust("copy_tracing")
33 rustmod = policy.importrust("copy_tracing")
34
34
35
35
36 def _filter(src, dst, t):
36 def _filter(src, dst, t):
37 """filters out invalid copies after chaining"""
37 """filters out invalid copies after chaining"""
38
38
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
41 # in the following table (not including trivial cases). For example, case 2
41 # in the following table (not including trivial cases). For example, case 2
42 # is where a file existed in 'src' and remained under that name in 'mid' and
42 # is where a file existed in 'src' and remained under that name in 'mid' and
43 # then was renamed between 'mid' and 'dst'.
43 # then was renamed between 'mid' and 'dst'.
44 #
44 #
45 # case src mid dst result
45 # case src mid dst result
46 # 1 x y - -
46 # 1 x y - -
47 # 2 x y y x->y
47 # 2 x y y x->y
48 # 3 x y x -
48 # 3 x y x -
49 # 4 x y z x->z
49 # 4 x y z x->z
50 # 5 - x y -
50 # 5 - x y -
51 # 6 x x y x->y
51 # 6 x x y x->y
52 #
52 #
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
55 # between 5 and 6, so it includes all cases in its result.
55 # between 5 and 6, so it includes all cases in its result.
56 # Cases 1, 3, and 5 are then removed by _filter().
56 # Cases 1, 3, and 5 are then removed by _filter().
57
57
58 for k, v in list(t.items()):
58 for k, v in list(t.items()):
59 # remove copies from files that didn't exist
59 # remove copies from files that didn't exist
60 if v not in src:
60 if v not in src:
61 del t[k]
61 del t[k]
62 # remove criss-crossed copies
62 # remove criss-crossed copies
63 elif k in src and v in dst:
63 elif k in src and v in dst:
64 del t[k]
64 del t[k]
65 # remove copies to files that were then removed
65 # remove copies to files that were then removed
66 elif k not in dst:
66 elif k not in dst:
67 del t[k]
67 del t[k]
68
68
69
69
70 def _chain(prefix, suffix):
70 def _chain(prefix, suffix):
71 """chain two sets of copies 'prefix' and 'suffix'"""
71 """chain two sets of copies 'prefix' and 'suffix'"""
72 result = prefix.copy()
72 result = prefix.copy()
73 for key, value in pycompat.iteritems(suffix):
73 for key, value in pycompat.iteritems(suffix):
74 result[key] = prefix.get(value, value)
74 result[key] = prefix.get(value, value)
75 return result
75 return result
76
76
77
77
78 def _tracefile(fctx, am, basemf):
78 def _tracefile(fctx, am, basemf):
79 """return file context that is the ancestor of fctx present in ancestor
79 """return file context that is the ancestor of fctx present in ancestor
80 manifest am
80 manifest am
81
81
82 Note: we used to try and stop after a given limit, however checking if that
82 Note: we used to try and stop after a given limit, however checking if that
83 limit is reached turned out to be very expensive. we are better off
83 limit is reached turned out to be very expensive. we are better off
84 disabling that feature."""
84 disabling that feature."""
85
85
86 for f in fctx.ancestors():
86 for f in fctx.ancestors():
87 path = f.path()
87 path = f.path()
88 if am.get(path, None) == f.filenode():
88 if am.get(path, None) == f.filenode():
89 return path
89 return path
90 if basemf and basemf.get(path, None) == f.filenode():
90 if basemf and basemf.get(path, None) == f.filenode():
91 return path
91 return path
92
92
93
93
94 def _dirstatecopies(repo, match=None):
94 def _dirstatecopies(repo, match=None):
95 ds = repo.dirstate
95 ds = repo.dirstate
96 c = ds.copies().copy()
96 c = ds.copies().copy()
97 for k in list(c):
97 for k in list(c):
98 if ds[k] not in b'anm' or (match and not match(k)):
98 if ds[k] not in b'anm' or (match and not match(k)):
99 del c[k]
99 del c[k]
100 return c
100 return c
101
101
102
102
103 def _computeforwardmissing(a, b, match=None):
103 def _computeforwardmissing(a, b, match=None):
104 """Computes which files are in b but not a.
104 """Computes which files are in b but not a.
105 This is its own function so extensions can easily wrap this call to see what
105 This is its own function so extensions can easily wrap this call to see what
106 files _forwardcopies is about to process.
106 files _forwardcopies is about to process.
107 """
107 """
108 ma = a.manifest()
108 ma = a.manifest()
109 mb = b.manifest()
109 mb = b.manifest()
110 return mb.filesnotin(ma, match=match)
110 return mb.filesnotin(ma, match=match)
111
111
112
112
113 def usechangesetcentricalgo(repo):
113 def usechangesetcentricalgo(repo):
114 """Checks if we should use changeset-centric copy algorithms"""
114 """Checks if we should use changeset-centric copy algorithms"""
115 if repo.filecopiesmode == b'changeset-sidedata':
115 if repo.filecopiesmode == b'changeset-sidedata':
116 return True
116 return True
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
118 changesetsource = (b'changeset-only', b'compatibility')
118 changesetsource = (b'changeset-only', b'compatibility')
119 return readfrom in changesetsource
119 return readfrom in changesetsource
120
120
121
121
122 def _committedforwardcopies(a, b, base, match):
122 def _committedforwardcopies(a, b, base, match):
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
124 # files might have to be traced back to the fctx parent of the last
124 # files might have to be traced back to the fctx parent of the last
125 # one-side-only changeset, but not further back than that
125 # one-side-only changeset, but not further back than that
126 repo = a._repo
126 repo = a._repo
127
127
128 if usechangesetcentricalgo(repo):
128 if usechangesetcentricalgo(repo):
129 return _changesetforwardcopies(a, b, match)
129 return _changesetforwardcopies(a, b, match)
130
130
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
132 dbg = repo.ui.debug
132 dbg = repo.ui.debug
133 if debug:
133 if debug:
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
135 am = a.manifest()
135 am = a.manifest()
136 basemf = None if base is None else base.manifest()
136 basemf = None if base is None else base.manifest()
137
137
138 # find where new files came from
138 # find where new files came from
139 # we currently don't try to find where old files went, too expensive
139 # we currently don't try to find where old files went, too expensive
140 # this means we can miss a case like 'hg rm b; hg cp a b'
140 # this means we can miss a case like 'hg rm b; hg cp a b'
141 cm = {}
141 cm = {}
142
142
143 # Computing the forward missing is quite expensive on large manifests, since
143 # Computing the forward missing is quite expensive on large manifests, since
144 # it compares the entire manifests. We can optimize it in the common use
144 # it compares the entire manifests. We can optimize it in the common use
145 # case of computing what copies are in a commit versus its parent (like
145 # case of computing what copies are in a commit versus its parent (like
146 # during a rebase or histedit). Note, we exclude merge commits from this
146 # during a rebase or histedit). Note, we exclude merge commits from this
147 # optimization, since the ctx.files() for a merge commit is not correct for
147 # optimization, since the ctx.files() for a merge commit is not correct for
148 # this comparison.
148 # this comparison.
149 forwardmissingmatch = match
149 forwardmissingmatch = match
150 if b.p1() == a and b.p2().node() == node.nullid:
150 if b.p1() == a and b.p2().node() == node.nullid:
151 filesmatcher = matchmod.exact(b.files())
151 filesmatcher = matchmod.exact(b.files())
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
154
154
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
156
156
157 if debug:
157 if debug:
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
159
159
160 for f in sorted(missing):
160 for f in sorted(missing):
161 if debug:
161 if debug:
162 dbg(b'debug.copies: tracing file: %s\n' % f)
162 dbg(b'debug.copies: tracing file: %s\n' % f)
163 fctx = b[f]
163 fctx = b[f]
164 fctx._ancestrycontext = ancestrycontext
164 fctx._ancestrycontext = ancestrycontext
165
165
166 if debug:
166 if debug:
167 start = util.timer()
167 start = util.timer()
168 opath = _tracefile(fctx, am, basemf)
168 opath = _tracefile(fctx, am, basemf)
169 if opath:
169 if opath:
170 if debug:
170 if debug:
171 dbg(b'debug.copies: rename of: %s\n' % opath)
171 dbg(b'debug.copies: rename of: %s\n' % opath)
172 cm[f] = opath
172 cm[f] = opath
173 if debug:
173 if debug:
174 dbg(
174 dbg(
175 b'debug.copies: time: %f seconds\n'
175 b'debug.copies: time: %f seconds\n'
176 % (util.timer() - start)
176 % (util.timer() - start)
177 )
177 )
178 return cm
178 return cm
179
179
180
180
181 def _revinfo_getter(repo, match):
181 def _revinfo_getter(repo, match):
182 """returns a function that returns the following data given a <rev>"
182 """returns a function that returns the following data given a <rev>"
183
183
184 * p1: revision number of first parent
184 * p1: revision number of first parent
185 * p2: revision number of first parent
185 * p2: revision number of first parent
186 * changes: a ChangingFiles object
186 * changes: a ChangingFiles object
187 """
187 """
188 cl = repo.changelog
188 cl = repo.changelog
189 parents = cl.parentrevs
189 parents = cl.parentrevs
190 flags = cl.flags
190 flags = cl.flags
191
191
192 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
192 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
193
193
194 changelogrevision = cl.changelogrevision
194 changelogrevision = cl.changelogrevision
195
195
196 # A small cache to avoid doing the work twice for merges
196 # A small cache to avoid doing the work twice for merges
197 #
197 #
198 # In the vast majority of cases, if we ask information for a revision
198 # In the vast majority of cases, if we ask information for a revision
199 # about 1 parent, we'll later ask it for the other. So it make sense to
199 # about 1 parent, we'll later ask it for the other. So it make sense to
200 # keep the information around when reaching the first parent of a merge
200 # keep the information around when reaching the first parent of a merge
201 # and dropping it after it was provided for the second parents.
201 # and dropping it after it was provided for the second parents.
202 #
202 #
203 # It exists cases were only one parent of the merge will be walked. It
203 # It exists cases were only one parent of the merge will be walked. It
204 # happens when the "destination" the copy tracing is descendant from a
204 # happens when the "destination" the copy tracing is descendant from a
205 # new root, not common with the "source". In that case, we will only walk
205 # new root, not common with the "source". In that case, we will only walk
206 # through merge parents that are descendant of changesets common
206 # through merge parents that are descendant of changesets common
207 # between "source" and "destination".
207 # between "source" and "destination".
208 #
208 #
209 # With the current case implementation if such changesets have a copy
209 # With the current case implementation if such changesets have a copy
210 # information, we'll keep them in memory until the end of
210 # information, we'll keep them in memory until the end of
211 # _changesetforwardcopies. We don't expect the case to be frequent
211 # _changesetforwardcopies. We don't expect the case to be frequent
212 # enough to matters.
212 # enough to matters.
213 #
213 #
214 # In addition, it would be possible to reach pathological case, were
214 # In addition, it would be possible to reach pathological case, were
215 # many first parent are met before any second parent is reached. In
215 # many first parent are met before any second parent is reached. In
216 # that case the cache could grow. If this even become an issue one can
216 # that case the cache could grow. If this even become an issue one can
217 # safely introduce a maximum cache size. This would trade extra CPU/IO
217 # safely introduce a maximum cache size. This would trade extra CPU/IO
218 # time to save memory.
218 # time to save memory.
219 merge_caches = {}
219 merge_caches = {}
220
220
221 alwaysmatch = match.always()
221 alwaysmatch = match.always()
222
222
223 if rustmod is not None and alwaysmatch:
223 if rustmod is not None and alwaysmatch:
224
224
225 def revinfo(rev):
225 def revinfo(rev):
226 p1, p2 = parents(rev)
226 p1, p2 = parents(rev)
227 value = None
227 value = None
228 e = merge_caches.pop(rev, None)
228 e = merge_caches.pop(rev, None)
229 if e is not None:
229 if e is not None:
230 return e
230 return e
231 if flags(rev) & HASCOPIESINFO:
231 if flags(rev) & HASCOPIESINFO:
232 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
232 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
233 else:
233 else:
234 raw = None
234 raw = None
235 value = (p1, p2, raw)
235 value = (p1, p2, raw)
236 if p1 != node.nullrev and p2 != node.nullrev:
236 if p1 != node.nullrev and p2 != node.nullrev:
237 # XXX some case we over cache, IGNORE
237 # XXX some case we over cache, IGNORE
238 merge_caches[rev] = value
238 merge_caches[rev] = value
239 return value
239 return value
240
240
241 else:
241 else:
242
242
243 def revinfo(rev):
243 def revinfo(rev):
244 p1, p2 = parents(rev)
244 p1, p2 = parents(rev)
245 value = None
245 value = None
246 e = merge_caches.pop(rev, None)
246 e = merge_caches.pop(rev, None)
247 if e is not None:
247 if e is not None:
248 return e
248 return e
249 changes = None
249 changes = None
250 if flags(rev) & HASCOPIESINFO:
250 if flags(rev) & HASCOPIESINFO:
251 changes = changelogrevision(rev).changes
251 changes = changelogrevision(rev).changes
252 value = (p1, p2, changes)
252 value = (p1, p2, changes)
253 if p1 != node.nullrev and p2 != node.nullrev:
253 if p1 != node.nullrev and p2 != node.nullrev:
254 # XXX some case we over cache, IGNORE
254 # XXX some case we over cache, IGNORE
255 merge_caches[rev] = value
255 merge_caches[rev] = value
256 return value
256 return value
257
257
258 return revinfo
258 return revinfo
259
259
260
260
261 def cached_is_ancestor(is_ancestor):
261 def cached_is_ancestor(is_ancestor):
262 """return a cached version of is_ancestor"""
262 """return a cached version of is_ancestor"""
263 cache = {}
263 cache = {}
264
264
265 def _is_ancestor(anc, desc):
265 def _is_ancestor(anc, desc):
266 if anc > desc:
266 if anc > desc:
267 return False
267 return False
268 elif anc == desc:
268 elif anc == desc:
269 return True
269 return True
270 key = (anc, desc)
270 key = (anc, desc)
271 ret = cache.get(key)
271 ret = cache.get(key)
272 if ret is None:
272 if ret is None:
273 ret = cache[key] = is_ancestor(anc, desc)
273 ret = cache[key] = is_ancestor(anc, desc)
274 return ret
274 return ret
275
275
276 return _is_ancestor
276 return _is_ancestor
277
277
278
278
279 def _changesetforwardcopies(a, b, match):
279 def _changesetforwardcopies(a, b, match):
280 if a.rev() in (node.nullrev, b.rev()):
280 if a.rev() in (node.nullrev, b.rev()):
281 return {}
281 return {}
282
282
283 repo = a.repo().unfiltered()
283 repo = a.repo().unfiltered()
284 children = {}
284 children = {}
285
285
286 cl = repo.changelog
286 cl = repo.changelog
287 isancestor = cl.isancestorrev
287 isancestor = cl.isancestorrev
288 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
288 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
289 mrset = set(missingrevs)
289 mrset = set(missingrevs)
290 roots = set()
290 roots = set()
291 for r in missingrevs:
291 for r in missingrevs:
292 for p in cl.parentrevs(r):
292 for p in cl.parentrevs(r):
293 if p == node.nullrev:
293 if p == node.nullrev:
294 continue
294 continue
295 if p not in children:
295 if p not in children:
296 children[p] = [r]
296 children[p] = [r]
297 else:
297 else:
298 children[p].append(r)
298 children[p].append(r)
299 if p not in mrset:
299 if p not in mrset:
300 roots.add(p)
300 roots.add(p)
301 if not roots:
301 if not roots:
302 # no common revision to track copies from
302 # no common revision to track copies from
303 return {}
303 return {}
304 min_root = min(roots)
304 min_root = min(roots)
305
305
306 from_head = set(
306 from_head = set(
307 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
307 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
308 )
308 )
309
309
310 iterrevs = set(from_head)
310 iterrevs = set(from_head)
311 iterrevs &= mrset
311 iterrevs &= mrset
312 iterrevs.update(roots)
312 iterrevs.update(roots)
313 iterrevs.remove(b.rev())
313 iterrevs.remove(b.rev())
314 revs = sorted(iterrevs)
314 revs = sorted(iterrevs)
315
315
316 if repo.filecopiesmode == b'changeset-sidedata':
316 if repo.filecopiesmode == b'changeset-sidedata':
317 revinfo = _revinfo_getter(repo, match)
317 revinfo = _revinfo_getter(repo, match)
318 return _combine_changeset_copies(
318 return _combine_changeset_copies(
319 revs, children, b.rev(), revinfo, match, isancestor
319 revs, children, b.rev(), revinfo, match, isancestor
320 )
320 )
321 else:
321 else:
322 revinfo = _revinfo_getter_extra(repo)
322 revinfo = _revinfo_getter_extra(repo)
323 return _combine_changeset_copies_extra(
323 return _combine_changeset_copies_extra(
324 revs, children, b.rev(), revinfo, match, isancestor
324 revs, children, b.rev(), revinfo, match, isancestor
325 )
325 )
326
326
327
327
328 def _combine_changeset_copies(
328 def _combine_changeset_copies(
329 revs, children, targetrev, revinfo, match, isancestor
329 revs, children, targetrev, revinfo, match, isancestor
330 ):
330 ):
331 """combine the copies information for each item of iterrevs
331 """combine the copies information for each item of iterrevs
332
332
333 revs: sorted iterable of revision to visit
333 revs: sorted iterable of revision to visit
334 children: a {parent: [children]} mapping.
334 children: a {parent: [children]} mapping.
335 targetrev: the final copies destination revision (not in iterrevs)
335 targetrev: the final copies destination revision (not in iterrevs)
336 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
336 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
337 match: a matcher
337 match: a matcher
338
338
339 It returns the aggregated copies information for `targetrev`.
339 It returns the aggregated copies information for `targetrev`.
340 """
340 """
341
341
342 alwaysmatch = match.always()
342 alwaysmatch = match.always()
343
343
344 if rustmod is not None and alwaysmatch:
344 if rustmod is not None and alwaysmatch:
345 return rustmod.combine_changeset_copies(
345 return rustmod.combine_changeset_copies(
346 list(revs), children, targetrev, revinfo, isancestor
346 list(revs), children, targetrev, revinfo, isancestor
347 )
347 )
348
348
349 isancestor = cached_is_ancestor(isancestor)
349 isancestor = cached_is_ancestor(isancestor)
350
350
351 all_copies = {}
351 all_copies = {}
352 for r in revs:
352 for r in revs:
353 copies = all_copies.pop(r, None)
353 copies = all_copies.pop(r, None)
354 if copies is None:
354 if copies is None:
355 # this is a root
355 # this is a root
356 copies = {}
356 copies = {}
357 for i, c in enumerate(children[r]):
357 for i, c in enumerate(children[r]):
358 p1, p2, changes = revinfo(c)
358 p1, p2, changes = revinfo(c)
359 childcopies = {}
359 childcopies = {}
360 if r == p1:
360 if r == p1:
361 parent = 1
361 parent = 1
362 if changes is not None:
362 if changes is not None:
363 childcopies = changes.copied_from_p1
363 childcopies = changes.copied_from_p1
364 else:
364 else:
365 assert r == p2
365 assert r == p2
366 parent = 2
366 parent = 2
367 if changes is not None:
367 if changes is not None:
368 childcopies = changes.copied_from_p2
368 childcopies = changes.copied_from_p2
369 if not alwaysmatch:
369 if not alwaysmatch:
370 childcopies = {
370 childcopies = {
371 dst: src for dst, src in childcopies.items() if match(dst)
371 dst: src for dst, src in childcopies.items() if match(dst)
372 }
372 }
373 newcopies = copies
373 newcopies = copies
374 if childcopies:
374 if childcopies:
375 newcopies = copies.copy()
375 newcopies = copies.copy()
376 for dest, source in pycompat.iteritems(childcopies):
376 for dest, source in pycompat.iteritems(childcopies):
377 prev = copies.get(source)
377 prev = copies.get(source)
378 if prev is not None and prev[1] is not None:
378 if prev is not None and prev[1] is not None:
379 source = prev[1]
379 source = prev[1]
380 newcopies[dest] = (c, source)
380 newcopies[dest] = (c, source)
381 assert newcopies is not copies
381 assert newcopies is not copies
382 if changes is not None and changes.removed:
382 if changes is not None and changes.removed:
383 if newcopies is copies:
383 if newcopies is copies:
384 newcopies = copies.copy()
384 newcopies = copies.copy()
385 for f in changes.removed:
385 for f in changes.removed:
386 if f in newcopies:
386 if f in newcopies:
387 if newcopies is copies:
387 if newcopies is copies:
388 # copy on write to avoid affecting potential other
388 # copy on write to avoid affecting potential other
389 # branches. when there are no other branches, this
389 # branches. when there are no other branches, this
390 # could be avoided.
390 # could be avoided.
391 newcopies = copies.copy()
391 newcopies = copies.copy()
392 newcopies[f] = (c, None)
392 newcopies[f] = (c, None)
393 othercopies = all_copies.get(c)
393 othercopies = all_copies.get(c)
394 if othercopies is None:
394 if othercopies is None:
395 all_copies[c] = newcopies
395 all_copies[c] = newcopies
396 elif newcopies is othercopies:
396 elif newcopies is othercopies:
397 # nothing to merge:
397 # nothing to merge:
398 pass
398 pass
399 else:
399 else:
400 # we are the second parent to work on c, we need to merge our
400 # we are the second parent to work on c, we need to merge our
401 # work with the other.
401 # work with the other.
402 #
402 #
403 # In case of conflict, parent 1 take precedence over parent 2.
403 # In case of conflict, parent 1 take precedence over parent 2.
404 # This is an arbitrary choice made anew when implementing
404 # This is an arbitrary choice made anew when implementing
405 # changeset based copies. It was made without regards with
405 # changeset based copies. It was made without regards with
406 # potential filelog related behavior.
406 # potential filelog related behavior.
407 if parent == 1:
407 if parent == 1:
408 if newcopies is copies:
408 if newcopies is copies:
409 newcopies = copies.copy()
409 newcopies = copies.copy()
410 minor, major = othercopies, newcopies
410 minor, major = othercopies, newcopies
411 else:
411 else:
412 # we do not know if the other dict is a copy or not, so we
412 # we do not know if the other dict is a copy or not, so we
413 # need to blindly copy it. Future change should make this
413 # need to blindly copy it. Future change should make this
414 # unnecessary.
414 # unnecessary.
415 minor, major = newcopies, othercopies.copy()
415 minor, major = newcopies, othercopies.copy()
416 copies = _merge_copies_dict(minor, major, isancestor, changes)
416 copies = _merge_copies_dict(minor, major, isancestor, changes)
417 all_copies[c] = copies
417 all_copies[c] = copies
418
418
419 final_copies = {}
419 final_copies = {}
420 for dest, (tt, source) in all_copies[targetrev].items():
420 for dest, (tt, source) in all_copies[targetrev].items():
421 if source is not None:
421 if source is not None:
422 final_copies[dest] = source
422 final_copies[dest] = source
423 return final_copies
423 return final_copies
424
424
425
425
426 def _merge_copies_dict(minor, major, isancestor, changes):
426 def _merge_copies_dict(minor, major, isancestor, changes):
427 """merge two copies-mapping together, minor and major
427 """merge two copies-mapping together, minor and major
428
428
429 In case of conflict, value from "major" will be picked.
429 In case of conflict, value from "major" will be picked.
430
430
431 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
431 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
432 ancestors of `high_rev`,
432 ancestors of `high_rev`,
433
433
434 - `ismerged(path)`: callable return True if `path` have been merged in the
434 - `ismerged(path)`: callable return True if `path` have been merged in the
435 current revision,
435 current revision,
436
436
437 return the resulting dict (in practice, the "minor" object, updated)
437 return the resulting dict (in practice, the "minor" object, updated)
438 """
438 """
439 for dest, value in major.items():
439 for dest, value in major.items():
440 other = minor.get(dest)
440 other = minor.get(dest)
441 if other is None:
441 if other is None:
442 minor[dest] = value
442 minor[dest] = value
443 else:
443 else:
444 new_tt = value[0]
444 new_tt = value[0]
445 other_tt = other[0]
445 other_tt = other[0]
446 if value[1] == other[1]:
446 if value[1] == other[1]:
447 continue
447 continue
448 # content from "major" wins, unless it is older
448 # content from "major" wins, unless it is older
449 # than the branch point or there is a merge
449 # than the branch point or there is a merge
450 if new_tt == other_tt:
450 if new_tt == other_tt:
451 minor[dest] = value
451 minor[dest] = value
452 elif (
452 elif (
453 changes is not None
453 changes is not None
454 and value[1] is None
454 and value[1] is None
455 and dest in changes.salvaged
455 and dest in changes.salvaged
456 ):
456 ):
457 pass
457 pass
458 elif (
458 elif (
459 changes is not None
459 changes is not None
460 and other[1] is None
460 and other[1] is None
461 and dest in changes.salvaged
461 and dest in changes.salvaged
462 ):
462 ):
463 minor[dest] = value
463 minor[dest] = value
464 elif changes is not None and dest in changes.merged:
464 elif changes is not None and dest in changes.merged:
465 minor[dest] = value
465 minor[dest] = value
466 elif not isancestor(new_tt, other_tt):
466 elif not isancestor(new_tt, other_tt):
467 if value[1] is not None:
467 if value[1] is not None:
468 minor[dest] = value
468 minor[dest] = value
469 elif isancestor(other_tt, new_tt):
469 elif isancestor(other_tt, new_tt):
470 minor[dest] = value
470 minor[dest] = value
471 return minor
471 return minor
472
472
473
473
474 def _revinfo_getter_extra(repo):
474 def _revinfo_getter_extra(repo):
475 """return a function that return multiple data given a <rev>"i
475 """return a function that return multiple data given a <rev>"i
476
476
477 * p1: revision number of first parent
477 * p1: revision number of first parent
478 * p2: revision number of first parent
478 * p2: revision number of first parent
479 * p1copies: mapping of copies from p1
479 * p1copies: mapping of copies from p1
480 * p2copies: mapping of copies from p2
480 * p2copies: mapping of copies from p2
481 * removed: a list of removed files
481 * removed: a list of removed files
482 * ismerged: a callback to know if file was merged in that revision
482 * ismerged: a callback to know if file was merged in that revision
483 """
483 """
484 cl = repo.changelog
484 cl = repo.changelog
485 parents = cl.parentrevs
485 parents = cl.parentrevs
486
486
487 def get_ismerged(rev):
487 def get_ismerged(rev):
488 ctx = repo[rev]
488 ctx = repo[rev]
489
489
490 def ismerged(path):
490 def ismerged(path):
491 if path not in ctx.files():
491 if path not in ctx.files():
492 return False
492 return False
493 fctx = ctx[path]
493 fctx = ctx[path]
494 parents = fctx._filelog.parents(fctx._filenode)
494 parents = fctx._filelog.parents(fctx._filenode)
495 nb_parents = 0
495 nb_parents = 0
496 for n in parents:
496 for n in parents:
497 if n != node.nullid:
497 if n != node.nullid:
498 nb_parents += 1
498 nb_parents += 1
499 return nb_parents >= 2
499 return nb_parents >= 2
500
500
501 return ismerged
501 return ismerged
502
502
503 def revinfo(rev):
503 def revinfo(rev):
504 p1, p2 = parents(rev)
504 p1, p2 = parents(rev)
505 ctx = repo[rev]
505 ctx = repo[rev]
506 p1copies, p2copies = ctx._copies
506 p1copies, p2copies = ctx._copies
507 removed = ctx.filesremoved()
507 removed = ctx.filesremoved()
508 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
508 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
509
509
510 return revinfo
510 return revinfo
511
511
512
512
513 def _combine_changeset_copies_extra(
513 def _combine_changeset_copies_extra(
514 revs, children, targetrev, revinfo, match, isancestor
514 revs, children, targetrev, revinfo, match, isancestor
515 ):
515 ):
516 """version of `_combine_changeset_copies` that works with the Google
516 """version of `_combine_changeset_copies` that works with the Google
517 specific "extra" based storage for copy information"""
517 specific "extra" based storage for copy information"""
518 all_copies = {}
518 all_copies = {}
519 alwaysmatch = match.always()
519 alwaysmatch = match.always()
520 for r in revs:
520 for r in revs:
521 copies = all_copies.pop(r, None)
521 copies = all_copies.pop(r, None)
522 if copies is None:
522 if copies is None:
523 # this is a root
523 # this is a root
524 copies = {}
524 copies = {}
525 for i, c in enumerate(children[r]):
525 for i, c in enumerate(children[r]):
526 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
526 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
527 if r == p1:
527 if r == p1:
528 parent = 1
528 parent = 1
529 childcopies = p1copies
529 childcopies = p1copies
530 else:
530 else:
531 assert r == p2
531 assert r == p2
532 parent = 2
532 parent = 2
533 childcopies = p2copies
533 childcopies = p2copies
534 if not alwaysmatch:
534 if not alwaysmatch:
535 childcopies = {
535 childcopies = {
536 dst: src for dst, src in childcopies.items() if match(dst)
536 dst: src for dst, src in childcopies.items() if match(dst)
537 }
537 }
538 newcopies = copies
538 newcopies = copies
539 if childcopies:
539 if childcopies:
540 newcopies = copies.copy()
540 newcopies = copies.copy()
541 for dest, source in pycompat.iteritems(childcopies):
541 for dest, source in pycompat.iteritems(childcopies):
542 prev = copies.get(source)
542 prev = copies.get(source)
543 if prev is not None and prev[1] is not None:
543 if prev is not None and prev[1] is not None:
544 source = prev[1]
544 source = prev[1]
545 newcopies[dest] = (c, source)
545 newcopies[dest] = (c, source)
546 assert newcopies is not copies
546 assert newcopies is not copies
547 for f in removed:
547 for f in removed:
548 if f in newcopies:
548 if f in newcopies:
549 if newcopies is copies:
549 if newcopies is copies:
550 # copy on write to avoid affecting potential other
550 # copy on write to avoid affecting potential other
551 # branches. when there are no other branches, this
551 # branches. when there are no other branches, this
552 # could be avoided.
552 # could be avoided.
553 newcopies = copies.copy()
553 newcopies = copies.copy()
554 newcopies[f] = (c, None)
554 newcopies[f] = (c, None)
555 othercopies = all_copies.get(c)
555 othercopies = all_copies.get(c)
556 if othercopies is None:
556 if othercopies is None:
557 all_copies[c] = newcopies
557 all_copies[c] = newcopies
558 else:
558 else:
559 # we are the second parent to work on c, we need to merge our
559 # we are the second parent to work on c, we need to merge our
560 # work with the other.
560 # work with the other.
561 #
561 #
562 # In case of conflict, parent 1 take precedence over parent 2.
562 # In case of conflict, parent 1 take precedence over parent 2.
563 # This is an arbitrary choice made anew when implementing
563 # This is an arbitrary choice made anew when implementing
564 # changeset based copies. It was made without regards with
564 # changeset based copies. It was made without regards with
565 # potential filelog related behavior.
565 # potential filelog related behavior.
566 if parent == 1:
566 if parent == 1:
567 _merge_copies_dict_extra(
567 _merge_copies_dict_extra(
568 othercopies, newcopies, isancestor, ismerged
568 othercopies, newcopies, isancestor, ismerged
569 )
569 )
570 else:
570 else:
571 _merge_copies_dict_extra(
571 _merge_copies_dict_extra(
572 newcopies, othercopies, isancestor, ismerged
572 newcopies, othercopies, isancestor, ismerged
573 )
573 )
574 all_copies[c] = newcopies
574 all_copies[c] = newcopies
575
575
576 final_copies = {}
576 final_copies = {}
577 for dest, (tt, source) in all_copies[targetrev].items():
577 for dest, (tt, source) in all_copies[targetrev].items():
578 if source is not None:
578 if source is not None:
579 final_copies[dest] = source
579 final_copies[dest] = source
580 return final_copies
580 return final_copies
581
581
582
582
583 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
583 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
584 """version of `_merge_copies_dict` that works with the Google
584 """version of `_merge_copies_dict` that works with the Google
585 specific "extra" based storage for copy information"""
585 specific "extra" based storage for copy information"""
586 for dest, value in major.items():
586 for dest, value in major.items():
587 other = minor.get(dest)
587 other = minor.get(dest)
588 if other is None:
588 if other is None:
589 minor[dest] = value
589 minor[dest] = value
590 else:
590 else:
591 new_tt = value[0]
591 new_tt = value[0]
592 other_tt = other[0]
592 other_tt = other[0]
593 if value[1] == other[1]:
593 if value[1] == other[1]:
594 continue
594 continue
595 # content from "major" wins, unless it is older
595 # content from "major" wins, unless it is older
596 # than the branch point or there is a merge
596 # than the branch point or there is a merge
597 if (
597 if (
598 new_tt == other_tt
598 new_tt == other_tt
599 or not isancestor(new_tt, other_tt)
599 or not isancestor(new_tt, other_tt)
600 or ismerged(dest)
600 or ismerged(dest)
601 ):
601 ):
602 minor[dest] = value
602 minor[dest] = value
603
603
604
604
605 def _forwardcopies(a, b, base=None, match=None):
605 def _forwardcopies(a, b, base=None, match=None):
606 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
606 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
607
607
608 if base is None:
608 if base is None:
609 base = a
609 base = a
610 match = a.repo().narrowmatch(match)
610 match = a.repo().narrowmatch(match)
611 # check for working copy
611 # check for working copy
612 if b.rev() is None:
612 if b.rev() is None:
613 cm = _committedforwardcopies(a, b.p1(), base, match)
613 cm = _committedforwardcopies(a, b.p1(), base, match)
614 # combine copies from dirstate if necessary
614 # combine copies from dirstate if necessary
615 copies = _chain(cm, _dirstatecopies(b._repo, match))
615 copies = _chain(cm, _dirstatecopies(b._repo, match))
616 else:
616 else:
617 copies = _committedforwardcopies(a, b, base, match)
617 copies = _committedforwardcopies(a, b, base, match)
618 return copies
618 return copies
619
619
620
620
621 def _backwardrenames(a, b, match):
621 def _backwardrenames(a, b, match):
622 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
622 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
623 return {}
623 return {}
624
624
625 # Even though we're not taking copies into account, 1:n rename situations
625 # Even though we're not taking copies into account, 1:n rename situations
626 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
626 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
627 # arbitrarily pick one of the renames.
627 # arbitrarily pick one of the renames.
628 # We don't want to pass in "match" here, since that would filter
628 # We don't want to pass in "match" here, since that would filter
629 # the destination by it. Since we're reversing the copies, we want
629 # the destination by it. Since we're reversing the copies, we want
630 # to filter the source instead.
630 # to filter the source instead.
631 f = _forwardcopies(b, a)
631 f = _forwardcopies(b, a)
632 r = {}
632 r = {}
633 for k, v in sorted(pycompat.iteritems(f)):
633 for k, v in sorted(pycompat.iteritems(f)):
634 if match and not match(v):
634 if match and not match(v):
635 continue
635 continue
636 # remove copies
636 # remove copies
637 if v in a:
637 if v in a:
638 continue
638 continue
639 r[v] = k
639 r[v] = k
640 return r
640 return r
641
641
642
642
643 def pathcopies(x, y, match=None):
643 def pathcopies(x, y, match=None):
644 """find {dst@y: src@x} copy mapping for directed compare"""
644 """find {dst@y: src@x} copy mapping for directed compare"""
645 repo = x._repo
645 repo = x._repo
646 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
646 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
647 if debug:
647 if debug:
648 repo.ui.debug(
648 repo.ui.debug(
649 b'debug.copies: searching copies from %s to %s\n' % (x, y)
649 b'debug.copies: searching copies from %s to %s\n' % (x, y)
650 )
650 )
651 if x == y or not x or not y:
651 if x == y or not x or not y:
652 return {}
652 return {}
653 if y.rev() is None and x == y.p1():
653 if y.rev() is None and x == y.p1():
654 if debug:
654 if debug:
655 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
655 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
656 # short-circuit to avoid issues with merge states
656 # short-circuit to avoid issues with merge states
657 return _dirstatecopies(repo, match)
657 return _dirstatecopies(repo, match)
658 a = y.ancestor(x)
658 a = y.ancestor(x)
659 if a == x:
659 if a == x:
660 if debug:
660 if debug:
661 repo.ui.debug(b'debug.copies: search mode: forward\n')
661 repo.ui.debug(b'debug.copies: search mode: forward\n')
662 copies = _forwardcopies(x, y, match=match)
662 copies = _forwardcopies(x, y, match=match)
663 elif a == y:
663 elif a == y:
664 if debug:
664 if debug:
665 repo.ui.debug(b'debug.copies: search mode: backward\n')
665 repo.ui.debug(b'debug.copies: search mode: backward\n')
666 copies = _backwardrenames(x, y, match=match)
666 copies = _backwardrenames(x, y, match=match)
667 else:
667 else:
668 if debug:
668 if debug:
669 repo.ui.debug(b'debug.copies: search mode: combined\n')
669 repo.ui.debug(b'debug.copies: search mode: combined\n')
670 base = None
670 base = None
671 if a.rev() != node.nullrev:
671 if a.rev() != node.nullrev:
672 base = x
672 base = x
673 copies = _chain(
673 copies = _chain(
674 _backwardrenames(x, a, match=match),
674 _backwardrenames(x, a, match=match),
675 _forwardcopies(a, y, base, match=match),
675 _forwardcopies(a, y, base, match=match),
676 )
676 )
677 _filter(x, y, copies)
677 _filter(x, y, copies)
678 return copies
678 return copies
679
679
680
680
681 def mergecopies(repo, c1, c2, base):
681 def mergecopies(repo, c1, c2, base):
682 """
682 """
683 Finds moves and copies between context c1 and c2 that are relevant for
683 Finds moves and copies between context c1 and c2 that are relevant for
684 merging. 'base' will be used as the merge base.
684 merging. 'base' will be used as the merge base.
685
685
686 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
686 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
687 files that were moved/ copied in one merge parent and modified in another.
687 files that were moved/ copied in one merge parent and modified in another.
688 For example:
688 For example:
689
689
690 o ---> 4 another commit
690 o ---> 4 another commit
691 |
691 |
692 | o ---> 3 commit that modifies a.txt
692 | o ---> 3 commit that modifies a.txt
693 | /
693 | /
694 o / ---> 2 commit that moves a.txt to b.txt
694 o / ---> 2 commit that moves a.txt to b.txt
695 |/
695 |/
696 o ---> 1 merge base
696 o ---> 1 merge base
697
697
698 If we try to rebase revision 3 on revision 4, since there is no a.txt in
698 If we try to rebase revision 3 on revision 4, since there is no a.txt in
699 revision 4, and if user have copytrace disabled, we prints the following
699 revision 4, and if user have copytrace disabled, we prints the following
700 message:
700 message:
701
701
702 ```other changed <file> which local deleted```
702 ```other changed <file> which local deleted```
703
703
704 Returns a tuple where:
704 Returns a tuple where:
705
705
706 "branch_copies" an instance of branch_copies.
706 "branch_copies" an instance of branch_copies.
707
707
708 "diverge" is a mapping of source name -> list of destination names
708 "diverge" is a mapping of source name -> list of destination names
709 for divergent renames.
709 for divergent renames.
710
710
711 This function calls different copytracing algorithms based on config.
711 This function calls different copytracing algorithms based on config.
712 """
712 """
713 # avoid silly behavior for update from empty dir
713 # avoid silly behavior for update from empty dir
714 if not c1 or not c2 or c1 == c2:
714 if not c1 or not c2 or c1 == c2:
715 return branch_copies(), branch_copies(), {}
715 return branch_copies(), branch_copies(), {}
716
716
717 narrowmatch = c1.repo().narrowmatch()
717 narrowmatch = c1.repo().narrowmatch()
718
718
719 # avoid silly behavior for parent -> working dir
719 # avoid silly behavior for parent -> working dir
720 if c2.node() is None and c1.node() == repo.dirstate.p1():
720 if c2.node() is None and c1.node() == repo.dirstate.p1():
721 return (
721 return (
722 branch_copies(_dirstatecopies(repo, narrowmatch)),
722 branch_copies(_dirstatecopies(repo, narrowmatch)),
723 branch_copies(),
723 branch_copies(),
724 {},
724 {},
725 )
725 )
726
726
727 copytracing = repo.ui.config(b'experimental', b'copytrace')
727 copytracing = repo.ui.config(b'experimental', b'copytrace')
728 if stringutil.parsebool(copytracing) is False:
728 if stringutil.parsebool(copytracing) is False:
729 # stringutil.parsebool() returns None when it is unable to parse the
729 # stringutil.parsebool() returns None when it is unable to parse the
730 # value, so we should rely on making sure copytracing is on such cases
730 # value, so we should rely on making sure copytracing is on such cases
731 return branch_copies(), branch_copies(), {}
731 return branch_copies(), branch_copies(), {}
732
732
733 if usechangesetcentricalgo(repo):
733 if usechangesetcentricalgo(repo):
734 # The heuristics don't make sense when we need changeset-centric algos
734 # The heuristics don't make sense when we need changeset-centric algos
735 return _fullcopytracing(repo, c1, c2, base)
735 return _fullcopytracing(repo, c1, c2, base)
736
736
737 # Copy trace disabling is explicitly below the node == p1 logic above
737 # Copy trace disabling is explicitly below the node == p1 logic above
738 # because the logic above is required for a simple copy to be kept across a
738 # because the logic above is required for a simple copy to be kept across a
739 # rebase.
739 # rebase.
740 if copytracing == b'heuristics':
740 if copytracing == b'heuristics':
741 # Do full copytracing if only non-public revisions are involved as
741 # Do full copytracing if only non-public revisions are involved as
742 # that will be fast enough and will also cover the copies which could
742 # that will be fast enough and will also cover the copies which could
743 # be missed by heuristics
743 # be missed by heuristics
744 if _isfullcopytraceable(repo, c1, base):
744 if _isfullcopytraceable(repo, c1, base):
745 return _fullcopytracing(repo, c1, c2, base)
745 return _fullcopytracing(repo, c1, c2, base)
746 return _heuristicscopytracing(repo, c1, c2, base)
746 return _heuristicscopytracing(repo, c1, c2, base)
747 else:
747 else:
748 return _fullcopytracing(repo, c1, c2, base)
748 return _fullcopytracing(repo, c1, c2, base)
749
749
750
750
751 def _isfullcopytraceable(repo, c1, base):
751 def _isfullcopytraceable(repo, c1, base):
752 """Checks that if base, source and destination are all no-public branches,
752 """Checks that if base, source and destination are all no-public branches,
753 if yes let's use the full copytrace algorithm for increased capabilities
753 if yes let's use the full copytrace algorithm for increased capabilities
754 since it will be fast enough.
754 since it will be fast enough.
755
755
756 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
756 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
757 number of changesets from c1 to base such that if number of changesets are
757 number of changesets from c1 to base such that if number of changesets are
758 more than the limit, full copytracing algorithm won't be used.
758 more than the limit, full copytracing algorithm won't be used.
759 """
759 """
760 if c1.rev() is None:
760 if c1.rev() is None:
761 c1 = c1.p1()
761 c1 = c1.p1()
762 if c1.mutable() and base.mutable():
762 if c1.mutable() and base.mutable():
763 sourcecommitlimit = repo.ui.configint(
763 sourcecommitlimit = repo.ui.configint(
764 b'experimental', b'copytrace.sourcecommitlimit'
764 b'experimental', b'copytrace.sourcecommitlimit'
765 )
765 )
766 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
766 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
767 return commits < sourcecommitlimit
767 return commits < sourcecommitlimit
768 return False
768 return False
769
769
770
770
771 def _checksinglesidecopies(
771 def _checksinglesidecopies(
772 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
772 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
773 ):
773 ):
774 if src not in m2:
774 if src not in m2:
775 # deleted on side 2
775 # deleted on side 2
776 if src not in m1:
776 if src not in m1:
777 # renamed on side 1, deleted on side 2
777 # renamed on side 1, deleted on side 2
778 renamedelete[src] = dsts1
778 renamedelete[src] = dsts1
779 elif src not in mb:
779 elif src not in mb:
780 # Work around the "short-circuit to avoid issues with merge states"
780 # Work around the "short-circuit to avoid issues with merge states"
781 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
781 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
782 # destination doesn't exist in y.
782 # destination doesn't exist in y.
783 pass
783 pass
784 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
784 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
785 return
785 return
786 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
786 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
787 # modified on side 2
787 # modified on side 2
788 for dst in dsts1:
788 for dst in dsts1:
789 copy[dst] = src
789 copy[dst] = src
790
790
791
791
792 class branch_copies(object):
792 class branch_copies(object):
793 """Information about copies made on one side of a merge/graft.
793 """Information about copies made on one side of a merge/graft.
794
794
795 "copy" is a mapping from destination name -> source name,
795 "copy" is a mapping from destination name -> source name,
796 where source is in c1 and destination is in c2 or vice-versa.
796 where source is in c1 and destination is in c2 or vice-versa.
797
797
798 "movewithdir" is a mapping from source name -> destination name,
798 "movewithdir" is a mapping from source name -> destination name,
799 where the file at source present in one context but not the other
799 where the file at source present in one context but not the other
800 needs to be moved to destination by the merge process, because the
800 needs to be moved to destination by the merge process, because the
801 other context moved the directory it is in.
801 other context moved the directory it is in.
802
802
803 "renamedelete" is a mapping of source name -> list of destination
803 "renamedelete" is a mapping of source name -> list of destination
804 names for files deleted in c1 that were renamed in c2 or vice-versa.
804 names for files deleted in c1 that were renamed in c2 or vice-versa.
805
805
806 "dirmove" is a mapping of detected source dir -> destination dir renames.
806 "dirmove" is a mapping of detected source dir -> destination dir renames.
807 This is needed for handling changes to new files previously grafted into
807 This is needed for handling changes to new files previously grafted into
808 renamed directories.
808 renamed directories.
809 """
809 """
810
810
811 def __init__(
811 def __init__(
812 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
812 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
813 ):
813 ):
814 self.copy = {} if copy is None else copy
814 self.copy = {} if copy is None else copy
815 self.renamedelete = {} if renamedelete is None else renamedelete
815 self.renamedelete = {} if renamedelete is None else renamedelete
816 self.dirmove = {} if dirmove is None else dirmove
816 self.dirmove = {} if dirmove is None else dirmove
817 self.movewithdir = {} if movewithdir is None else movewithdir
817 self.movewithdir = {} if movewithdir is None else movewithdir
818
818
819 def __repr__(self):
819 def __repr__(self):
820 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
820 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
821 self.copy,
821 self.copy,
822 self.renamedelete,
822 self.renamedelete,
823 self.dirmove,
823 self.dirmove,
824 self.movewithdir,
824 self.movewithdir,
825 )
825 )
826
826
827
827
828 def _fullcopytracing(repo, c1, c2, base):
828 def _fullcopytracing(repo, c1, c2, base):
829 """The full copytracing algorithm which finds all the new files that were
829 """The full copytracing algorithm which finds all the new files that were
830 added from merge base up to the top commit and for each file it checks if
830 added from merge base up to the top commit and for each file it checks if
831 this file was copied from another file.
831 this file was copied from another file.
832
832
833 This is pretty slow when a lot of changesets are involved but will track all
833 This is pretty slow when a lot of changesets are involved but will track all
834 the copies.
834 the copies.
835 """
835 """
836 m1 = c1.manifest()
836 m1 = c1.manifest()
837 m2 = c2.manifest()
837 m2 = c2.manifest()
838 mb = base.manifest()
838 mb = base.manifest()
839
839
840 copies1 = pathcopies(base, c1)
840 copies1 = pathcopies(base, c1)
841 copies2 = pathcopies(base, c2)
841 copies2 = pathcopies(base, c2)
842
842
843 if not (copies1 or copies2):
843 if not (copies1 or copies2):
844 return branch_copies(), branch_copies(), {}
844 return branch_copies(), branch_copies(), {}
845
845
846 inversecopies1 = {}
846 inversecopies1 = {}
847 inversecopies2 = {}
847 inversecopies2 = {}
848 for dst, src in copies1.items():
848 for dst, src in copies1.items():
849 inversecopies1.setdefault(src, []).append(dst)
849 inversecopies1.setdefault(src, []).append(dst)
850 for dst, src in copies2.items():
850 for dst, src in copies2.items():
851 inversecopies2.setdefault(src, []).append(dst)
851 inversecopies2.setdefault(src, []).append(dst)
852
852
853 copy1 = {}
853 copy1 = {}
854 copy2 = {}
854 copy2 = {}
855 diverge = {}
855 diverge = {}
856 renamedelete1 = {}
856 renamedelete1 = {}
857 renamedelete2 = {}
857 renamedelete2 = {}
858 allsources = set(inversecopies1) | set(inversecopies2)
858 allsources = set(inversecopies1) | set(inversecopies2)
859 for src in allsources:
859 for src in allsources:
860 dsts1 = inversecopies1.get(src)
860 dsts1 = inversecopies1.get(src)
861 dsts2 = inversecopies2.get(src)
861 dsts2 = inversecopies2.get(src)
862 if dsts1 and dsts2:
862 if dsts1 and dsts2:
863 # copied/renamed on both sides
863 # copied/renamed on both sides
864 if src not in m1 and src not in m2:
864 if src not in m1 and src not in m2:
865 # renamed on both sides
865 # renamed on both sides
866 dsts1 = set(dsts1)
866 dsts1 = set(dsts1)
867 dsts2 = set(dsts2)
867 dsts2 = set(dsts2)
868 # If there's some overlap in the rename destinations, we
868 # If there's some overlap in the rename destinations, we
869 # consider it not divergent. For example, if side 1 copies 'a'
869 # consider it not divergent. For example, if side 1 copies 'a'
870 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
870 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
871 # and 'd' and deletes 'a'.
871 # and 'd' and deletes 'a'.
872 if dsts1 & dsts2:
872 if dsts1 & dsts2:
873 for dst in dsts1 & dsts2:
873 for dst in dsts1 & dsts2:
874 copy1[dst] = src
874 copy1[dst] = src
875 copy2[dst] = src
875 copy2[dst] = src
876 else:
876 else:
877 diverge[src] = sorted(dsts1 | dsts2)
877 diverge[src] = sorted(dsts1 | dsts2)
878 elif src in m1 and src in m2:
878 elif src in m1 and src in m2:
879 # copied on both sides
879 # copied on both sides
880 dsts1 = set(dsts1)
880 dsts1 = set(dsts1)
881 dsts2 = set(dsts2)
881 dsts2 = set(dsts2)
882 for dst in dsts1 & dsts2:
882 for dst in dsts1 & dsts2:
883 copy1[dst] = src
883 copy1[dst] = src
884 copy2[dst] = src
884 copy2[dst] = src
885 # TODO: Handle cases where it was renamed on one side and copied
885 # TODO: Handle cases where it was renamed on one side and copied
886 # on the other side
886 # on the other side
887 elif dsts1:
887 elif dsts1:
888 # copied/renamed only on side 1
888 # copied/renamed only on side 1
889 _checksinglesidecopies(
889 _checksinglesidecopies(
890 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
890 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
891 )
891 )
892 elif dsts2:
892 elif dsts2:
893 # copied/renamed only on side 2
893 # copied/renamed only on side 2
894 _checksinglesidecopies(
894 _checksinglesidecopies(
895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
895 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
896 )
896 )
897
897
898 # find interesting file sets from manifests
898 # find interesting file sets from manifests
899 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
899 cache = []
900 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
900
901 u1 = sorted(addedinm1 - addedinm2)
901 def _get_addedfiles(idx):
902 u2 = sorted(addedinm2 - addedinm1)
902 if not cache:
903 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
904 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
905 u1 = sorted(addedinm1 - addedinm2)
906 u2 = sorted(addedinm2 - addedinm1)
907 cache.extend((u1, u2))
908 return cache[idx]
903
909
904 header = b" unmatched files in %s"
910 u1fn = lambda: _get_addedfiles(0)
905 if u1:
911 u2fn = lambda: _get_addedfiles(1)
906 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
912 if repo.ui.debugflag:
907 if u2:
913 u1 = u1fn()
908 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
914 u2 = u2fn()
909
915
910 if repo.ui.debugflag:
916 header = b" unmatched files in %s"
917 if u1:
918 repo.ui.debug(
919 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
920 )
921 if u2:
922 repo.ui.debug(
923 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
924 )
925
911 renamedeleteset = set()
926 renamedeleteset = set()
912 divergeset = set()
927 divergeset = set()
913 for dsts in diverge.values():
928 for dsts in diverge.values():
914 divergeset.update(dsts)
929 divergeset.update(dsts)
915 for dsts in renamedelete1.values():
930 for dsts in renamedelete1.values():
916 renamedeleteset.update(dsts)
931 renamedeleteset.update(dsts)
917 for dsts in renamedelete2.values():
932 for dsts in renamedelete2.values():
918 renamedeleteset.update(dsts)
933 renamedeleteset.update(dsts)
919
934
920 repo.ui.debug(
935 repo.ui.debug(
921 b" all copies found (* = to merge, ! = divergent, "
936 b" all copies found (* = to merge, ! = divergent, "
922 b"% = renamed and deleted):\n"
937 b"% = renamed and deleted):\n"
923 )
938 )
924 for side, copies in ((b"local", copies1), (b"remote", copies2)):
939 for side, copies in ((b"local", copies1), (b"remote", copies2)):
925 if not copies:
940 if not copies:
926 continue
941 continue
927 repo.ui.debug(b" on %s side:\n" % side)
942 repo.ui.debug(b" on %s side:\n" % side)
928 for f in sorted(copies):
943 for f in sorted(copies):
929 note = b""
944 note = b""
930 if f in copy1 or f in copy2:
945 if f in copy1 or f in copy2:
931 note += b"*"
946 note += b"*"
932 if f in divergeset:
947 if f in divergeset:
933 note += b"!"
948 note += b"!"
934 if f in renamedeleteset:
949 if f in renamedeleteset:
935 note += b"%"
950 note += b"%"
936 repo.ui.debug(
951 repo.ui.debug(
937 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
952 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
938 )
953 )
939 del renamedeleteset
954 del renamedeleteset
940 del divergeset
955 del divergeset
941
956
942 repo.ui.debug(b" checking for directory renames\n")
957 repo.ui.debug(b" checking for directory renames\n")
943
958
944 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
959 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
945 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
960 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
946
961
947 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
962 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
948 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
963 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
949
964
950 return branch_copies1, branch_copies2, diverge
965 return branch_copies1, branch_copies2, diverge
951
966
952
967
953 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
968 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
954 """Finds moved directories and files that should move with them.
969 """Finds moved directories and files that should move with them.
955
970
956 ctx: the context for one of the sides
971 ctx: the context for one of the sides
957 copy: files copied on the same side (as ctx)
972 copy: files copied on the same side (as ctx)
958 fullcopy: files copied on the same side (as ctx), including those that
973 fullcopy: files copied on the same side (as ctx), including those that
959 merge.manifestmerge() won't care about
974 merge.manifestmerge() won't care about
960 addedfiles: added files on the other side (compared to ctx)
975 addedfilesfn: function returning added files on the other side (compared to
976 ctx)
961 """
977 """
962 # generate a directory move map
978 # generate a directory move map
963 invalid = set()
979 invalid = set()
964 dirmove = {}
980 dirmove = {}
965
981
966 # examine each file copy for a potential directory move, which is
982 # examine each file copy for a potential directory move, which is
967 # when all the files in a directory are moved to a new directory
983 # when all the files in a directory are moved to a new directory
968 for dst, src in pycompat.iteritems(fullcopy):
984 for dst, src in pycompat.iteritems(fullcopy):
969 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
985 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
970 if dsrc in invalid:
986 if dsrc in invalid:
971 # already seen to be uninteresting
987 # already seen to be uninteresting
972 continue
988 continue
973 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
989 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
974 # directory wasn't entirely moved locally
990 # directory wasn't entirely moved locally
975 invalid.add(dsrc)
991 invalid.add(dsrc)
976 elif dsrc in dirmove and dirmove[dsrc] != ddst:
992 elif dsrc in dirmove and dirmove[dsrc] != ddst:
977 # files from the same directory moved to two different places
993 # files from the same directory moved to two different places
978 invalid.add(dsrc)
994 invalid.add(dsrc)
979 else:
995 else:
980 # looks good so far
996 # looks good so far
981 dirmove[dsrc] = ddst
997 dirmove[dsrc] = ddst
982
998
983 for i in invalid:
999 for i in invalid:
984 if i in dirmove:
1000 if i in dirmove:
985 del dirmove[i]
1001 del dirmove[i]
986 del invalid
1002 del invalid
987
1003
988 if not dirmove:
1004 if not dirmove:
989 return {}, {}
1005 return {}, {}
990
1006
991 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1007 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
992
1008
993 for d in dirmove:
1009 for d in dirmove:
994 repo.ui.debug(
1010 repo.ui.debug(
995 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1011 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
996 )
1012 )
997
1013
998 movewithdir = {}
1014 movewithdir = {}
999 # check unaccounted nonoverlapping files against directory moves
1015 # check unaccounted nonoverlapping files against directory moves
1000 for f in addedfiles:
1016 for f in addedfilesfn():
1001 if f not in fullcopy:
1017 if f not in fullcopy:
1002 for d in dirmove:
1018 for d in dirmove:
1003 if f.startswith(d):
1019 if f.startswith(d):
1004 # new file added in a directory that was moved, move it
1020 # new file added in a directory that was moved, move it
1005 df = dirmove[d] + f[len(d) :]
1021 df = dirmove[d] + f[len(d) :]
1006 if df not in copy:
1022 if df not in copy:
1007 movewithdir[f] = df
1023 movewithdir[f] = df
1008 repo.ui.debug(
1024 repo.ui.debug(
1009 b" pending file src: '%s' -> dst: '%s'\n"
1025 b" pending file src: '%s' -> dst: '%s'\n"
1010 % (f, df)
1026 % (f, df)
1011 )
1027 )
1012 break
1028 break
1013
1029
1014 return dirmove, movewithdir
1030 return dirmove, movewithdir
1015
1031
1016
1032
1017 def _heuristicscopytracing(repo, c1, c2, base):
1033 def _heuristicscopytracing(repo, c1, c2, base):
1018 """Fast copytracing using filename heuristics
1034 """Fast copytracing using filename heuristics
1019
1035
1020 Assumes that moves or renames are of following two types:
1036 Assumes that moves or renames are of following two types:
1021
1037
1022 1) Inside a directory only (same directory name but different filenames)
1038 1) Inside a directory only (same directory name but different filenames)
1023 2) Move from one directory to another
1039 2) Move from one directory to another
1024 (same filenames but different directory names)
1040 (same filenames but different directory names)
1025
1041
1026 Works only when there are no merge commits in the "source branch".
1042 Works only when there are no merge commits in the "source branch".
1027 Source branch is commits from base up to c2 not including base.
1043 Source branch is commits from base up to c2 not including base.
1028
1044
1029 If merge is involved it fallbacks to _fullcopytracing().
1045 If merge is involved it fallbacks to _fullcopytracing().
1030
1046
1031 Can be used by setting the following config:
1047 Can be used by setting the following config:
1032
1048
1033 [experimental]
1049 [experimental]
1034 copytrace = heuristics
1050 copytrace = heuristics
1035
1051
1036 In some cases the copy/move candidates found by heuristics can be very large
1052 In some cases the copy/move candidates found by heuristics can be very large
1037 in number and that will make the algorithm slow. The number of possible
1053 in number and that will make the algorithm slow. The number of possible
1038 candidates to check can be limited by using the config
1054 candidates to check can be limited by using the config
1039 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1055 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1040 """
1056 """
1041
1057
1042 if c1.rev() is None:
1058 if c1.rev() is None:
1043 c1 = c1.p1()
1059 c1 = c1.p1()
1044 if c2.rev() is None:
1060 if c2.rev() is None:
1045 c2 = c2.p1()
1061 c2 = c2.p1()
1046
1062
1047 changedfiles = set()
1063 changedfiles = set()
1048 m1 = c1.manifest()
1064 m1 = c1.manifest()
1049 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1065 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1050 # If base is not in c2 branch, we switch to fullcopytracing
1066 # If base is not in c2 branch, we switch to fullcopytracing
1051 repo.ui.debug(
1067 repo.ui.debug(
1052 b"switching to full copytracing as base is not "
1068 b"switching to full copytracing as base is not "
1053 b"an ancestor of c2\n"
1069 b"an ancestor of c2\n"
1054 )
1070 )
1055 return _fullcopytracing(repo, c1, c2, base)
1071 return _fullcopytracing(repo, c1, c2, base)
1056
1072
1057 ctx = c2
1073 ctx = c2
1058 while ctx != base:
1074 while ctx != base:
1059 if len(ctx.parents()) == 2:
1075 if len(ctx.parents()) == 2:
1060 # To keep things simple let's not handle merges
1076 # To keep things simple let's not handle merges
1061 repo.ui.debug(b"switching to full copytracing because of merges\n")
1077 repo.ui.debug(b"switching to full copytracing because of merges\n")
1062 return _fullcopytracing(repo, c1, c2, base)
1078 return _fullcopytracing(repo, c1, c2, base)
1063 changedfiles.update(ctx.files())
1079 changedfiles.update(ctx.files())
1064 ctx = ctx.p1()
1080 ctx = ctx.p1()
1065
1081
1066 copies2 = {}
1082 copies2 = {}
1067 cp = _forwardcopies(base, c2)
1083 cp = _forwardcopies(base, c2)
1068 for dst, src in pycompat.iteritems(cp):
1084 for dst, src in pycompat.iteritems(cp):
1069 if src in m1:
1085 if src in m1:
1070 copies2[dst] = src
1086 copies2[dst] = src
1071
1087
1072 # file is missing if it isn't present in the destination, but is present in
1088 # file is missing if it isn't present in the destination, but is present in
1073 # the base and present in the source.
1089 # the base and present in the source.
1074 # Presence in the base is important to exclude added files, presence in the
1090 # Presence in the base is important to exclude added files, presence in the
1075 # source is important to exclude removed files.
1091 # source is important to exclude removed files.
1076 filt = lambda f: f not in m1 and f in base and f in c2
1092 filt = lambda f: f not in m1 and f in base and f in c2
1077 missingfiles = [f for f in changedfiles if filt(f)]
1093 missingfiles = [f for f in changedfiles if filt(f)]
1078
1094
1079 copies1 = {}
1095 copies1 = {}
1080 if missingfiles:
1096 if missingfiles:
1081 basenametofilename = collections.defaultdict(list)
1097 basenametofilename = collections.defaultdict(list)
1082 dirnametofilename = collections.defaultdict(list)
1098 dirnametofilename = collections.defaultdict(list)
1083
1099
1084 for f in m1.filesnotin(base.manifest()):
1100 for f in m1.filesnotin(base.manifest()):
1085 basename = os.path.basename(f)
1101 basename = os.path.basename(f)
1086 dirname = os.path.dirname(f)
1102 dirname = os.path.dirname(f)
1087 basenametofilename[basename].append(f)
1103 basenametofilename[basename].append(f)
1088 dirnametofilename[dirname].append(f)
1104 dirnametofilename[dirname].append(f)
1089
1105
1090 for f in missingfiles:
1106 for f in missingfiles:
1091 basename = os.path.basename(f)
1107 basename = os.path.basename(f)
1092 dirname = os.path.dirname(f)
1108 dirname = os.path.dirname(f)
1093 samebasename = basenametofilename[basename]
1109 samebasename = basenametofilename[basename]
1094 samedirname = dirnametofilename[dirname]
1110 samedirname = dirnametofilename[dirname]
1095 movecandidates = samebasename + samedirname
1111 movecandidates = samebasename + samedirname
1096 # f is guaranteed to be present in c2, that's why
1112 # f is guaranteed to be present in c2, that's why
1097 # c2.filectx(f) won't fail
1113 # c2.filectx(f) won't fail
1098 f2 = c2.filectx(f)
1114 f2 = c2.filectx(f)
1099 # we can have a lot of candidates which can slow down the heuristics
1115 # we can have a lot of candidates which can slow down the heuristics
1100 # config value to limit the number of candidates moves to check
1116 # config value to limit the number of candidates moves to check
1101 maxcandidates = repo.ui.configint(
1117 maxcandidates = repo.ui.configint(
1102 b'experimental', b'copytrace.movecandidateslimit'
1118 b'experimental', b'copytrace.movecandidateslimit'
1103 )
1119 )
1104
1120
1105 if len(movecandidates) > maxcandidates:
1121 if len(movecandidates) > maxcandidates:
1106 repo.ui.status(
1122 repo.ui.status(
1107 _(
1123 _(
1108 b"skipping copytracing for '%s', more "
1124 b"skipping copytracing for '%s', more "
1109 b"candidates than the limit: %d\n"
1125 b"candidates than the limit: %d\n"
1110 )
1126 )
1111 % (f, len(movecandidates))
1127 % (f, len(movecandidates))
1112 )
1128 )
1113 continue
1129 continue
1114
1130
1115 for candidate in movecandidates:
1131 for candidate in movecandidates:
1116 f1 = c1.filectx(candidate)
1132 f1 = c1.filectx(candidate)
1117 if _related(f1, f2):
1133 if _related(f1, f2):
1118 # if there are a few related copies then we'll merge
1134 # if there are a few related copies then we'll merge
1119 # changes into all of them. This matches the behaviour
1135 # changes into all of them. This matches the behaviour
1120 # of upstream copytracing
1136 # of upstream copytracing
1121 copies1[candidate] = f
1137 copies1[candidate] = f
1122
1138
1123 return branch_copies(copies1), branch_copies(copies2), {}
1139 return branch_copies(copies1), branch_copies(copies2), {}
1124
1140
1125
1141
1126 def _related(f1, f2):
1142 def _related(f1, f2):
1127 """return True if f1 and f2 filectx have a common ancestor
1143 """return True if f1 and f2 filectx have a common ancestor
1128
1144
1129 Walk back to common ancestor to see if the two files originate
1145 Walk back to common ancestor to see if the two files originate
1130 from the same file. Since workingfilectx's rev() is None it messes
1146 from the same file. Since workingfilectx's rev() is None it messes
1131 up the integer comparison logic, hence the pre-step check for
1147 up the integer comparison logic, hence the pre-step check for
1132 None (f1 and f2 can only be workingfilectx's initially).
1148 None (f1 and f2 can only be workingfilectx's initially).
1133 """
1149 """
1134
1150
1135 if f1 == f2:
1151 if f1 == f2:
1136 return True # a match
1152 return True # a match
1137
1153
1138 g1, g2 = f1.ancestors(), f2.ancestors()
1154 g1, g2 = f1.ancestors(), f2.ancestors()
1139 try:
1155 try:
1140 f1r, f2r = f1.linkrev(), f2.linkrev()
1156 f1r, f2r = f1.linkrev(), f2.linkrev()
1141
1157
1142 if f1r is None:
1158 if f1r is None:
1143 f1 = next(g1)
1159 f1 = next(g1)
1144 if f2r is None:
1160 if f2r is None:
1145 f2 = next(g2)
1161 f2 = next(g2)
1146
1162
1147 while True:
1163 while True:
1148 f1r, f2r = f1.linkrev(), f2.linkrev()
1164 f1r, f2r = f1.linkrev(), f2.linkrev()
1149 if f1r > f2r:
1165 if f1r > f2r:
1150 f1 = next(g1)
1166 f1 = next(g1)
1151 elif f2r > f1r:
1167 elif f2r > f1r:
1152 f2 = next(g2)
1168 f2 = next(g2)
1153 else: # f1 and f2 point to files in the same linkrev
1169 else: # f1 and f2 point to files in the same linkrev
1154 return f1 == f2 # true if they point to the same file
1170 return f1 == f2 # true if they point to the same file
1155 except StopIteration:
1171 except StopIteration:
1156 return False
1172 return False
1157
1173
1158
1174
1159 def graftcopies(wctx, ctx, base):
1175 def graftcopies(wctx, ctx, base):
1160 """reproduce copies between base and ctx in the wctx
1176 """reproduce copies between base and ctx in the wctx
1161
1177
1162 Unlike mergecopies(), this function will only consider copies between base
1178 Unlike mergecopies(), this function will only consider copies between base
1163 and ctx; it will ignore copies between base and wctx. Also unlike
1179 and ctx; it will ignore copies between base and wctx. Also unlike
1164 mergecopies(), this function will apply copies to the working copy (instead
1180 mergecopies(), this function will apply copies to the working copy (instead
1165 of just returning information about the copies). That makes it cheaper
1181 of just returning information about the copies). That makes it cheaper
1166 (especially in the common case of base==ctx.p1()) and useful also when
1182 (especially in the common case of base==ctx.p1()) and useful also when
1167 experimental.copytrace=off.
1183 experimental.copytrace=off.
1168
1184
1169 merge.update() will have already marked most copies, but it will only
1185 merge.update() will have already marked most copies, but it will only
1170 mark copies if it thinks the source files are related (see
1186 mark copies if it thinks the source files are related (see
1171 merge._related()). It will also not mark copies if the file wasn't modified
1187 merge._related()). It will also not mark copies if the file wasn't modified
1172 on the local side. This function adds the copies that were "missed"
1188 on the local side. This function adds the copies that were "missed"
1173 by merge.update().
1189 by merge.update().
1174 """
1190 """
1175 new_copies = pathcopies(base, ctx)
1191 new_copies = pathcopies(base, ctx)
1176 _filter(wctx.p1(), wctx, new_copies)
1192 _filter(wctx.p1(), wctx, new_copies)
1177 for dst, src in pycompat.iteritems(new_copies):
1193 for dst, src in pycompat.iteritems(new_copies):
1178 wctx[dst].markcopied(src)
1194 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now