##// END OF EJS Templates
copies: clarify which case some conditional are handling...
marmoute -
r47127:154ded91 default
parent child Browse files
Show More
@@ -1,1231 +1,1231
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src: # case 5
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst: # case 1
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 if rustmod is not None:
199 if rustmod is not None:
200
200
201 def revinfo(rev):
201 def revinfo(rev):
202 p1, p2 = parents(rev)
202 p1, p2 = parents(rev)
203 if flags(rev) & HASCOPIESINFO:
203 if flags(rev) & HASCOPIESINFO:
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 else:
205 else:
206 raw = None
206 raw = None
207 return (p1, p2, raw)
207 return (p1, p2, raw)
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 if flags(rev) & HASCOPIESINFO:
213 if flags(rev) & HASCOPIESINFO:
214 changes = changelogrevision(rev).changes
214 changes = changelogrevision(rev).changes
215 else:
215 else:
216 changes = None
216 changes = None
217 return (p1, p2, changes)
217 return (p1, p2, changes)
218
218
219 return revinfo
219 return revinfo
220
220
221
221
222 def cached_is_ancestor(is_ancestor):
222 def cached_is_ancestor(is_ancestor):
223 """return a cached version of is_ancestor"""
223 """return a cached version of is_ancestor"""
224 cache = {}
224 cache = {}
225
225
226 def _is_ancestor(anc, desc):
226 def _is_ancestor(anc, desc):
227 if anc > desc:
227 if anc > desc:
228 return False
228 return False
229 elif anc == desc:
229 elif anc == desc:
230 return True
230 return True
231 key = (anc, desc)
231 key = (anc, desc)
232 ret = cache.get(key)
232 ret = cache.get(key)
233 if ret is None:
233 if ret is None:
234 ret = cache[key] = is_ancestor(anc, desc)
234 ret = cache[key] = is_ancestor(anc, desc)
235 return ret
235 return ret
236
236
237 return _is_ancestor
237 return _is_ancestor
238
238
239
239
240 def _changesetforwardcopies(a, b, match):
240 def _changesetforwardcopies(a, b, match):
241 if a.rev() in (nullrev, b.rev()):
241 if a.rev() in (nullrev, b.rev()):
242 return {}
242 return {}
243
243
244 repo = a.repo().unfiltered()
244 repo = a.repo().unfiltered()
245 children = {}
245 children = {}
246
246
247 cl = repo.changelog
247 cl = repo.changelog
248 isancestor = cl.isancestorrev
248 isancestor = cl.isancestorrev
249
249
250 # To track rename from "A" to B, we need to gather all parent β†’ children
250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 # edges that are contains in `::B` but not in `::A`.
251 # edges that are contains in `::B` but not in `::A`.
252 #
252 #
253 #
253 #
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 # that belong to ::a. These are exactly all the revisions needed to express
256 # that belong to ::a. These are exactly all the revisions needed to express
257 # the parent β†’ children we need to combine.
257 # the parent β†’ children we need to combine.
258 #
258 #
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 # excluding paths that leads to roots that are not ancestors of `a`. We
260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 # keep this out of the explanation because it is hard enough without this special case..
261 # keep this out of the explanation because it is hard enough without this special case..
262
262
263 parents = cl._uncheckedparentrevs
263 parents = cl._uncheckedparentrevs
264 graph_roots = (nullrev, nullrev)
264 graph_roots = (nullrev, nullrev)
265
265
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 roots = set()
268 roots = set()
269 has_graph_roots = False
269 has_graph_roots = False
270
270
271 # iterate over `only(B, A)`
271 # iterate over `only(B, A)`
272 for r in revs:
272 for r in revs:
273 ps = parents(r)
273 ps = parents(r)
274 if ps == graph_roots:
274 if ps == graph_roots:
275 has_graph_roots = True
275 has_graph_roots = True
276 else:
276 else:
277 p1, p2 = ps
277 p1, p2 = ps
278
278
279 # find all the "root points" (see larger comment above)
279 # find all the "root points" (see larger comment above)
280 if p1 != nullrev and p1 in ancestors:
280 if p1 != nullrev and p1 in ancestors:
281 roots.add(p1)
281 roots.add(p1)
282 if p2 != nullrev and p2 in ancestors:
282 if p2 != nullrev and p2 in ancestors:
283 roots.add(p2)
283 roots.add(p2)
284 if not roots:
284 if not roots:
285 # no common revision to track copies from
285 # no common revision to track copies from
286 return {}
286 return {}
287 if has_graph_roots:
287 if has_graph_roots:
288 # this deal with the special case mentionned in the [1] footnotes. We
288 # this deal with the special case mentionned in the [1] footnotes. We
289 # must filter out revisions that leads to non-common graphroots.
289 # must filter out revisions that leads to non-common graphroots.
290 roots = list(roots)
290 roots = list(roots)
291 m = min(roots)
291 m = min(roots)
292 h = [b.rev()]
292 h = [b.rev()]
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 roots_to_head = set(roots_to_head)
294 roots_to_head = set(roots_to_head)
295 revs = [r for r in revs if r in roots_to_head]
295 revs = [r for r in revs if r in roots_to_head]
296
296
297 if repo.filecopiesmode == b'changeset-sidedata':
297 if repo.filecopiesmode == b'changeset-sidedata':
298 # When using side-data, we will process the edges "from" the children.
298 # When using side-data, we will process the edges "from" the children.
299 # We iterate over the childre, gathering previous collected data for
299 # We iterate over the childre, gathering previous collected data for
300 # the parents. Do know when the parents data is no longer necessary, we
300 # the parents. Do know when the parents data is no longer necessary, we
301 # keep a counter of how many children each revision has.
301 # keep a counter of how many children each revision has.
302 #
302 #
303 # An interresting property of `children_count` is that it only contains
303 # An interresting property of `children_count` is that it only contains
304 # revision that will be relevant for a edge of the graph. So if a
304 # revision that will be relevant for a edge of the graph. So if a
305 # children has parent not in `children_count`, that edges should not be
305 # children has parent not in `children_count`, that edges should not be
306 # processed.
306 # processed.
307 children_count = dict((r, 0) for r in roots)
307 children_count = dict((r, 0) for r in roots)
308 for r in revs:
308 for r in revs:
309 for p in cl.parentrevs(r):
309 for p in cl.parentrevs(r):
310 if p == nullrev:
310 if p == nullrev:
311 continue
311 continue
312 children_count[r] = 0
312 children_count[r] = 0
313 if p in children_count:
313 if p in children_count:
314 children_count[p] += 1
314 children_count[p] += 1
315 revinfo = _revinfo_getter(repo, match)
315 revinfo = _revinfo_getter(repo, match)
316 return _combine_changeset_copies(
316 return _combine_changeset_copies(
317 revs, children_count, b.rev(), revinfo, match, isancestor
317 revs, children_count, b.rev(), revinfo, match, isancestor
318 )
318 )
319 else:
319 else:
320 # When not using side-data, we will process the edges "from" the parent.
320 # When not using side-data, we will process the edges "from" the parent.
321 # so we need a full mapping of the parent -> children relation.
321 # so we need a full mapping of the parent -> children relation.
322 children = dict((r, []) for r in roots)
322 children = dict((r, []) for r in roots)
323 for r in revs:
323 for r in revs:
324 for p in cl.parentrevs(r):
324 for p in cl.parentrevs(r):
325 if p == nullrev:
325 if p == nullrev:
326 continue
326 continue
327 children[r] = []
327 children[r] = []
328 if p in children:
328 if p in children:
329 children[p].append(r)
329 children[p].append(r)
330 x = revs.pop()
330 x = revs.pop()
331 assert x == b.rev()
331 assert x == b.rev()
332 revs.extend(roots)
332 revs.extend(roots)
333 revs.sort()
333 revs.sort()
334
334
335 revinfo = _revinfo_getter_extra(repo)
335 revinfo = _revinfo_getter_extra(repo)
336 return _combine_changeset_copies_extra(
336 return _combine_changeset_copies_extra(
337 revs, children, b.rev(), revinfo, match, isancestor
337 revs, children, b.rev(), revinfo, match, isancestor
338 )
338 )
339
339
340
340
341 def _combine_changeset_copies(
341 def _combine_changeset_copies(
342 revs, children_count, targetrev, revinfo, match, isancestor
342 revs, children_count, targetrev, revinfo, match, isancestor
343 ):
343 ):
344 """combine the copies information for each item of iterrevs
344 """combine the copies information for each item of iterrevs
345
345
346 revs: sorted iterable of revision to visit
346 revs: sorted iterable of revision to visit
347 children_count: a {parent: <number-of-relevant-children>} mapping.
347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 targetrev: the final copies destination revision (not in iterrevs)
348 targetrev: the final copies destination revision (not in iterrevs)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 match: a matcher
350 match: a matcher
351
351
352 It returns the aggregated copies information for `targetrev`.
352 It returns the aggregated copies information for `targetrev`.
353 """
353 """
354
354
355 alwaysmatch = match.always()
355 alwaysmatch = match.always()
356
356
357 if rustmod is not None:
357 if rustmod is not None:
358 final_copies = rustmod.combine_changeset_copies(
358 final_copies = rustmod.combine_changeset_copies(
359 list(revs), children_count, targetrev, revinfo, isancestor
359 list(revs), children_count, targetrev, revinfo, isancestor
360 )
360 )
361 else:
361 else:
362 isancestor = cached_is_ancestor(isancestor)
362 isancestor = cached_is_ancestor(isancestor)
363
363
364 all_copies = {}
364 all_copies = {}
365 # iterate over all the "children" side of copy tracing "edge"
365 # iterate over all the "children" side of copy tracing "edge"
366 for current_rev in revs:
366 for current_rev in revs:
367 p1, p2, changes = revinfo(current_rev)
367 p1, p2, changes = revinfo(current_rev)
368 current_copies = None
368 current_copies = None
369 # iterate over all parents to chain the existing data with the
369 # iterate over all parents to chain the existing data with the
370 # data from the parent β†’ child edge.
370 # data from the parent β†’ child edge.
371 for parent, parent_rev in ((1, p1), (2, p2)):
371 for parent, parent_rev in ((1, p1), (2, p2)):
372 if parent_rev == nullrev:
372 if parent_rev == nullrev:
373 continue
373 continue
374 remaining_children = children_count.get(parent_rev)
374 remaining_children = children_count.get(parent_rev)
375 if remaining_children is None:
375 if remaining_children is None:
376 continue
376 continue
377 remaining_children -= 1
377 remaining_children -= 1
378 children_count[parent_rev] = remaining_children
378 children_count[parent_rev] = remaining_children
379 if remaining_children:
379 if remaining_children:
380 copies = all_copies.get(parent_rev, None)
380 copies = all_copies.get(parent_rev, None)
381 else:
381 else:
382 copies = all_copies.pop(parent_rev, None)
382 copies = all_copies.pop(parent_rev, None)
383
383
384 if copies is None:
384 if copies is None:
385 # this is a root
385 # this is a root
386 newcopies = copies = {}
386 newcopies = copies = {}
387 elif remaining_children:
387 elif remaining_children:
388 newcopies = copies.copy()
388 newcopies = copies.copy()
389 else:
389 else:
390 newcopies = copies
390 newcopies = copies
391 # chain the data in the edge with the existing data
391 # chain the data in the edge with the existing data
392 if changes is not None:
392 if changes is not None:
393 childcopies = {}
393 childcopies = {}
394 if parent == 1:
394 if parent == 1:
395 childcopies = changes.copied_from_p1
395 childcopies = changes.copied_from_p1
396 elif parent == 2:
396 elif parent == 2:
397 childcopies = changes.copied_from_p2
397 childcopies = changes.copied_from_p2
398
398
399 if childcopies:
399 if childcopies:
400 newcopies = copies.copy()
400 newcopies = copies.copy()
401 for dest, source in pycompat.iteritems(childcopies):
401 for dest, source in pycompat.iteritems(childcopies):
402 prev = copies.get(source)
402 prev = copies.get(source)
403 if prev is not None and prev[1] is not None:
403 if prev is not None and prev[1] is not None:
404 source = prev[1]
404 source = prev[1]
405 newcopies[dest] = (current_rev, source)
405 newcopies[dest] = (current_rev, source)
406 assert newcopies is not copies
406 assert newcopies is not copies
407 if changes.removed:
407 if changes.removed:
408 for f in changes.removed:
408 for f in changes.removed:
409 if f in newcopies:
409 if f in newcopies:
410 if newcopies is copies:
410 if newcopies is copies:
411 # copy on write to avoid affecting potential other
411 # copy on write to avoid affecting potential other
412 # branches. when there are no other branches, this
412 # branches. when there are no other branches, this
413 # could be avoided.
413 # could be avoided.
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 newcopies[f] = (current_rev, None)
415 newcopies[f] = (current_rev, None)
416 # check potential need to combine the data from another parent (for
416 # check potential need to combine the data from another parent (for
417 # that child). See comment below for details.
417 # that child). See comment below for details.
418 if current_copies is None:
418 if current_copies is None:
419 current_copies = newcopies
419 current_copies = newcopies
420 else:
420 else:
421 # we are the second parent to work on c, we need to merge our
421 # we are the second parent to work on c, we need to merge our
422 # work with the other.
422 # work with the other.
423 #
423 #
424 # In case of conflict, parent 1 take precedence over parent 2.
424 # In case of conflict, parent 1 take precedence over parent 2.
425 # This is an arbitrary choice made anew when implementing
425 # This is an arbitrary choice made anew when implementing
426 # changeset based copies. It was made without regards with
426 # changeset based copies. It was made without regards with
427 # potential filelog related behavior.
427 # potential filelog related behavior.
428 assert parent == 2
428 assert parent == 2
429 current_copies = _merge_copies_dict(
429 current_copies = _merge_copies_dict(
430 newcopies, current_copies, isancestor, changes
430 newcopies, current_copies, isancestor, changes
431 )
431 )
432 all_copies[current_rev] = current_copies
432 all_copies[current_rev] = current_copies
433
433
434 # filter out internal details and return a {dest: source mapping}
434 # filter out internal details and return a {dest: source mapping}
435 final_copies = {}
435 final_copies = {}
436 for dest, (tt, source) in all_copies[targetrev].items():
436 for dest, (tt, source) in all_copies[targetrev].items():
437 if source is not None:
437 if source is not None:
438 final_copies[dest] = source
438 final_copies[dest] = source
439 if not alwaysmatch:
439 if not alwaysmatch:
440 for filename in list(final_copies.keys()):
440 for filename in list(final_copies.keys()):
441 if not match(filename):
441 if not match(filename):
442 del final_copies[filename]
442 del final_copies[filename]
443 return final_copies
443 return final_copies
444
444
445
445
446 # constant to decide which side to pick with _merge_copies_dict
446 # constant to decide which side to pick with _merge_copies_dict
447 PICK_MINOR = 0
447 PICK_MINOR = 0
448 PICK_MAJOR = 1
448 PICK_MAJOR = 1
449 PICK_EITHER = 2
449 PICK_EITHER = 2
450
450
451
451
452 def _merge_copies_dict(minor, major, isancestor, changes):
452 def _merge_copies_dict(minor, major, isancestor, changes):
453 """merge two copies-mapping together, minor and major
453 """merge two copies-mapping together, minor and major
454
454
455 In case of conflict, value from "major" will be picked.
455 In case of conflict, value from "major" will be picked.
456
456
457 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
457 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
458 ancestors of `high_rev`,
458 ancestors of `high_rev`,
459
459
460 - `ismerged(path)`: callable return True if `path` have been merged in the
460 - `ismerged(path)`: callable return True if `path` have been merged in the
461 current revision,
461 current revision,
462
462
463 return the resulting dict (in practice, the "minor" object, updated)
463 return the resulting dict (in practice, the "minor" object, updated)
464 """
464 """
465 for dest, value in major.items():
465 for dest, value in major.items():
466 other = minor.get(dest)
466 other = minor.get(dest)
467 if other is None:
467 if other is None:
468 minor[dest] = value
468 minor[dest] = value
469 else:
469 else:
470 pick = _compare_values(changes, isancestor, dest, other, value)
470 pick = _compare_values(changes, isancestor, dest, other, value)
471 if pick == PICK_MAJOR:
471 if pick == PICK_MAJOR:
472 minor[dest] = value
472 minor[dest] = value
473 return minor
473 return minor
474
474
475
475
476 def _compare_values(changes, isancestor, dest, minor, major):
476 def _compare_values(changes, isancestor, dest, minor, major):
477 """compare two value within a _merge_copies_dict loop iteration"""
477 """compare two value within a _merge_copies_dict loop iteration"""
478 major_tt, major_value = major
478 major_tt, major_value = major
479 minor_tt, minor_value = minor
479 minor_tt, minor_value = minor
480
480
481 # evacuate some simple case first:
481 # evacuate some simple case first:
482 if major_tt == minor_tt:
482 if major_tt == minor_tt:
483 # if it comes from the same revision it must be the same value
483 # if it comes from the same revision it must be the same value
484 assert major_value == minor_value
484 assert major_value == minor_value
485 return PICK_EITHER
485 return PICK_EITHER
486 elif major[1] == minor[1]:
486 elif major[1] == minor[1]:
487 return PICK_EITHER
487 return PICK_EITHER
488
488
489 # actual merging needed: content from "major" wins, unless it is older than
489 # actual merging needed: content from "major" wins, unless it is older than
490 # the branch point or there is a merge
490 # the branch point or there is a merge
491 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 elif changes is not None and major[1] is None and dest in changes.salvaged:
492 return PICK_MINOR
492 return PICK_MINOR
493 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 elif changes is not None and minor[1] is None and dest in changes.salvaged:
494 return PICK_MAJOR
494 return PICK_MAJOR
495 elif changes is not None and dest in changes.merged:
495 elif changes is not None and dest in changes.merged:
496 return PICK_MAJOR
496 return PICK_MAJOR
497 elif not isancestor(major_tt, minor_tt):
497 elif not isancestor(major_tt, minor_tt):
498 if major[1] is not None:
498 if major[1] is not None:
499 return PICK_MAJOR
499 return PICK_MAJOR
500 elif isancestor(minor_tt, major_tt):
500 elif isancestor(minor_tt, major_tt):
501 return PICK_MAJOR
501 return PICK_MAJOR
502 return PICK_MINOR
502 return PICK_MINOR
503
503
504
504
505 def _revinfo_getter_extra(repo):
505 def _revinfo_getter_extra(repo):
506 """return a function that return multiple data given a <rev>"i
506 """return a function that return multiple data given a <rev>"i
507
507
508 * p1: revision number of first parent
508 * p1: revision number of first parent
509 * p2: revision number of first parent
509 * p2: revision number of first parent
510 * p1copies: mapping of copies from p1
510 * p1copies: mapping of copies from p1
511 * p2copies: mapping of copies from p2
511 * p2copies: mapping of copies from p2
512 * removed: a list of removed files
512 * removed: a list of removed files
513 * ismerged: a callback to know if file was merged in that revision
513 * ismerged: a callback to know if file was merged in that revision
514 """
514 """
515 cl = repo.changelog
515 cl = repo.changelog
516 parents = cl.parentrevs
516 parents = cl.parentrevs
517
517
518 def get_ismerged(rev):
518 def get_ismerged(rev):
519 ctx = repo[rev]
519 ctx = repo[rev]
520
520
521 def ismerged(path):
521 def ismerged(path):
522 if path not in ctx.files():
522 if path not in ctx.files():
523 return False
523 return False
524 fctx = ctx[path]
524 fctx = ctx[path]
525 parents = fctx._filelog.parents(fctx._filenode)
525 parents = fctx._filelog.parents(fctx._filenode)
526 nb_parents = 0
526 nb_parents = 0
527 for n in parents:
527 for n in parents:
528 if n != nullid:
528 if n != nullid:
529 nb_parents += 1
529 nb_parents += 1
530 return nb_parents >= 2
530 return nb_parents >= 2
531
531
532 return ismerged
532 return ismerged
533
533
534 def revinfo(rev):
534 def revinfo(rev):
535 p1, p2 = parents(rev)
535 p1, p2 = parents(rev)
536 ctx = repo[rev]
536 ctx = repo[rev]
537 p1copies, p2copies = ctx._copies
537 p1copies, p2copies = ctx._copies
538 removed = ctx.filesremoved()
538 removed = ctx.filesremoved()
539 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
540
540
541 return revinfo
541 return revinfo
542
542
543
543
544 def _combine_changeset_copies_extra(
544 def _combine_changeset_copies_extra(
545 revs, children, targetrev, revinfo, match, isancestor
545 revs, children, targetrev, revinfo, match, isancestor
546 ):
546 ):
547 """version of `_combine_changeset_copies` that works with the Google
547 """version of `_combine_changeset_copies` that works with the Google
548 specific "extra" based storage for copy information"""
548 specific "extra" based storage for copy information"""
549 all_copies = {}
549 all_copies = {}
550 alwaysmatch = match.always()
550 alwaysmatch = match.always()
551 for r in revs:
551 for r in revs:
552 copies = all_copies.pop(r, None)
552 copies = all_copies.pop(r, None)
553 if copies is None:
553 if copies is None:
554 # this is a root
554 # this is a root
555 copies = {}
555 copies = {}
556 for i, c in enumerate(children[r]):
556 for i, c in enumerate(children[r]):
557 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
558 if r == p1:
558 if r == p1:
559 parent = 1
559 parent = 1
560 childcopies = p1copies
560 childcopies = p1copies
561 else:
561 else:
562 assert r == p2
562 assert r == p2
563 parent = 2
563 parent = 2
564 childcopies = p2copies
564 childcopies = p2copies
565 if not alwaysmatch:
565 if not alwaysmatch:
566 childcopies = {
566 childcopies = {
567 dst: src for dst, src in childcopies.items() if match(dst)
567 dst: src for dst, src in childcopies.items() if match(dst)
568 }
568 }
569 newcopies = copies
569 newcopies = copies
570 if childcopies:
570 if childcopies:
571 newcopies = copies.copy()
571 newcopies = copies.copy()
572 for dest, source in pycompat.iteritems(childcopies):
572 for dest, source in pycompat.iteritems(childcopies):
573 prev = copies.get(source)
573 prev = copies.get(source)
574 if prev is not None and prev[1] is not None:
574 if prev is not None and prev[1] is not None:
575 source = prev[1]
575 source = prev[1]
576 newcopies[dest] = (c, source)
576 newcopies[dest] = (c, source)
577 assert newcopies is not copies
577 assert newcopies is not copies
578 for f in removed:
578 for f in removed:
579 if f in newcopies:
579 if f in newcopies:
580 if newcopies is copies:
580 if newcopies is copies:
581 # copy on write to avoid affecting potential other
581 # copy on write to avoid affecting potential other
582 # branches. when there are no other branches, this
582 # branches. when there are no other branches, this
583 # could be avoided.
583 # could be avoided.
584 newcopies = copies.copy()
584 newcopies = copies.copy()
585 newcopies[f] = (c, None)
585 newcopies[f] = (c, None)
586 othercopies = all_copies.get(c)
586 othercopies = all_copies.get(c)
587 if othercopies is None:
587 if othercopies is None:
588 all_copies[c] = newcopies
588 all_copies[c] = newcopies
589 else:
589 else:
590 # we are the second parent to work on c, we need to merge our
590 # we are the second parent to work on c, we need to merge our
591 # work with the other.
591 # work with the other.
592 #
592 #
593 # In case of conflict, parent 1 take precedence over parent 2.
593 # In case of conflict, parent 1 take precedence over parent 2.
594 # This is an arbitrary choice made anew when implementing
594 # This is an arbitrary choice made anew when implementing
595 # changeset based copies. It was made without regards with
595 # changeset based copies. It was made without regards with
596 # potential filelog related behavior.
596 # potential filelog related behavior.
597 if parent == 1:
597 if parent == 1:
598 _merge_copies_dict_extra(
598 _merge_copies_dict_extra(
599 othercopies, newcopies, isancestor, ismerged
599 othercopies, newcopies, isancestor, ismerged
600 )
600 )
601 else:
601 else:
602 _merge_copies_dict_extra(
602 _merge_copies_dict_extra(
603 newcopies, othercopies, isancestor, ismerged
603 newcopies, othercopies, isancestor, ismerged
604 )
604 )
605 all_copies[c] = newcopies
605 all_copies[c] = newcopies
606
606
607 final_copies = {}
607 final_copies = {}
608 for dest, (tt, source) in all_copies[targetrev].items():
608 for dest, (tt, source) in all_copies[targetrev].items():
609 if source is not None:
609 if source is not None:
610 final_copies[dest] = source
610 final_copies[dest] = source
611 return final_copies
611 return final_copies
612
612
613
613
614 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
615 """version of `_merge_copies_dict` that works with the Google
615 """version of `_merge_copies_dict` that works with the Google
616 specific "extra" based storage for copy information"""
616 specific "extra" based storage for copy information"""
617 for dest, value in major.items():
617 for dest, value in major.items():
618 other = minor.get(dest)
618 other = minor.get(dest)
619 if other is None:
619 if other is None:
620 minor[dest] = value
620 minor[dest] = value
621 else:
621 else:
622 new_tt = value[0]
622 new_tt = value[0]
623 other_tt = other[0]
623 other_tt = other[0]
624 if value[1] == other[1]:
624 if value[1] == other[1]:
625 continue
625 continue
626 # content from "major" wins, unless it is older
626 # content from "major" wins, unless it is older
627 # than the branch point or there is a merge
627 # than the branch point or there is a merge
628 if (
628 if (
629 new_tt == other_tt
629 new_tt == other_tt
630 or not isancestor(new_tt, other_tt)
630 or not isancestor(new_tt, other_tt)
631 or ismerged(dest)
631 or ismerged(dest)
632 ):
632 ):
633 minor[dest] = value
633 minor[dest] = value
634
634
635
635
636 def _forwardcopies(a, b, base=None, match=None):
636 def _forwardcopies(a, b, base=None, match=None):
637 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
638
638
639 if base is None:
639 if base is None:
640 base = a
640 base = a
641 match = a.repo().narrowmatch(match)
641 match = a.repo().narrowmatch(match)
642 # check for working copy
642 # check for working copy
643 if b.rev() is None:
643 if b.rev() is None:
644 cm = _committedforwardcopies(a, b.p1(), base, match)
644 cm = _committedforwardcopies(a, b.p1(), base, match)
645 # combine copies from dirstate if necessary
645 # combine copies from dirstate if necessary
646 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 copies = _chain(cm, _dirstatecopies(b._repo, match))
647 else:
647 else:
648 copies = _committedforwardcopies(a, b, base, match)
648 copies = _committedforwardcopies(a, b, base, match)
649 return copies
649 return copies
650
650
651
651
652 def _backwardrenames(a, b, match):
652 def _backwardrenames(a, b, match):
653 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
654 return {}
654 return {}
655
655
656 # Even though we're not taking copies into account, 1:n rename situations
656 # Even though we're not taking copies into account, 1:n rename situations
657 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
658 # arbitrarily pick one of the renames.
658 # arbitrarily pick one of the renames.
659 # We don't want to pass in "match" here, since that would filter
659 # We don't want to pass in "match" here, since that would filter
660 # the destination by it. Since we're reversing the copies, we want
660 # the destination by it. Since we're reversing the copies, we want
661 # to filter the source instead.
661 # to filter the source instead.
662 f = _forwardcopies(b, a)
662 f = _forwardcopies(b, a)
663 r = {}
663 r = {}
664 for k, v in sorted(pycompat.iteritems(f)):
664 for k, v in sorted(pycompat.iteritems(f)):
665 if match and not match(v):
665 if match and not match(v):
666 continue
666 continue
667 # remove copies
667 # remove copies
668 if v in a:
668 if v in a:
669 continue
669 continue
670 r[v] = k
670 r[v] = k
671 return r
671 return r
672
672
673
673
674 def pathcopies(x, y, match=None):
674 def pathcopies(x, y, match=None):
675 """find {dst@y: src@x} copy mapping for directed compare"""
675 """find {dst@y: src@x} copy mapping for directed compare"""
676 repo = x._repo
676 repo = x._repo
677 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
678 if debug:
678 if debug:
679 repo.ui.debug(
679 repo.ui.debug(
680 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 b'debug.copies: searching copies from %s to %s\n' % (x, y)
681 )
681 )
682 if x == y or not x or not y:
682 if x == y or not x or not y:
683 return {}
683 return {}
684 if y.rev() is None and x == y.p1():
684 if y.rev() is None and x == y.p1():
685 if debug:
685 if debug:
686 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
687 # short-circuit to avoid issues with merge states
687 # short-circuit to avoid issues with merge states
688 return _dirstatecopies(repo, match)
688 return _dirstatecopies(repo, match)
689 a = y.ancestor(x)
689 a = y.ancestor(x)
690 if a == x:
690 if a == x:
691 if debug:
691 if debug:
692 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 repo.ui.debug(b'debug.copies: search mode: forward\n')
693 copies = _forwardcopies(x, y, match=match)
693 copies = _forwardcopies(x, y, match=match)
694 elif a == y:
694 elif a == y:
695 if debug:
695 if debug:
696 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 repo.ui.debug(b'debug.copies: search mode: backward\n')
697 copies = _backwardrenames(x, y, match=match)
697 copies = _backwardrenames(x, y, match=match)
698 else:
698 else:
699 if debug:
699 if debug:
700 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 repo.ui.debug(b'debug.copies: search mode: combined\n')
701 base = None
701 base = None
702 if a.rev() != nullrev:
702 if a.rev() != nullrev:
703 base = x
703 base = x
704 copies = _chain(
704 copies = _chain(
705 _backwardrenames(x, a, match=match),
705 _backwardrenames(x, a, match=match),
706 _forwardcopies(a, y, base, match=match),
706 _forwardcopies(a, y, base, match=match),
707 )
707 )
708 _filter(x, y, copies)
708 _filter(x, y, copies)
709 return copies
709 return copies
710
710
711
711
712 def mergecopies(repo, c1, c2, base):
712 def mergecopies(repo, c1, c2, base):
713 """
713 """
714 Finds moves and copies between context c1 and c2 that are relevant for
714 Finds moves and copies between context c1 and c2 that are relevant for
715 merging. 'base' will be used as the merge base.
715 merging. 'base' will be used as the merge base.
716
716
717 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
718 files that were moved/ copied in one merge parent and modified in another.
718 files that were moved/ copied in one merge parent and modified in another.
719 For example:
719 For example:
720
720
721 o ---> 4 another commit
721 o ---> 4 another commit
722 |
722 |
723 | o ---> 3 commit that modifies a.txt
723 | o ---> 3 commit that modifies a.txt
724 | /
724 | /
725 o / ---> 2 commit that moves a.txt to b.txt
725 o / ---> 2 commit that moves a.txt to b.txt
726 |/
726 |/
727 o ---> 1 merge base
727 o ---> 1 merge base
728
728
729 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 If we try to rebase revision 3 on revision 4, since there is no a.txt in
730 revision 4, and if user have copytrace disabled, we prints the following
730 revision 4, and if user have copytrace disabled, we prints the following
731 message:
731 message:
732
732
733 ```other changed <file> which local deleted```
733 ```other changed <file> which local deleted```
734
734
735 Returns a tuple where:
735 Returns a tuple where:
736
736
737 "branch_copies" an instance of branch_copies.
737 "branch_copies" an instance of branch_copies.
738
738
739 "diverge" is a mapping of source name -> list of destination names
739 "diverge" is a mapping of source name -> list of destination names
740 for divergent renames.
740 for divergent renames.
741
741
742 This function calls different copytracing algorithms based on config.
742 This function calls different copytracing algorithms based on config.
743 """
743 """
744 # avoid silly behavior for update from empty dir
744 # avoid silly behavior for update from empty dir
745 if not c1 or not c2 or c1 == c2:
745 if not c1 or not c2 or c1 == c2:
746 return branch_copies(), branch_copies(), {}
746 return branch_copies(), branch_copies(), {}
747
747
748 narrowmatch = c1.repo().narrowmatch()
748 narrowmatch = c1.repo().narrowmatch()
749
749
750 # avoid silly behavior for parent -> working dir
750 # avoid silly behavior for parent -> working dir
751 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 if c2.node() is None and c1.node() == repo.dirstate.p1():
752 return (
752 return (
753 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 branch_copies(_dirstatecopies(repo, narrowmatch)),
754 branch_copies(),
754 branch_copies(),
755 {},
755 {},
756 )
756 )
757
757
758 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 copytracing = repo.ui.config(b'experimental', b'copytrace')
759 if stringutil.parsebool(copytracing) is False:
759 if stringutil.parsebool(copytracing) is False:
760 # stringutil.parsebool() returns None when it is unable to parse the
760 # stringutil.parsebool() returns None when it is unable to parse the
761 # value, so we should rely on making sure copytracing is on such cases
761 # value, so we should rely on making sure copytracing is on such cases
762 return branch_copies(), branch_copies(), {}
762 return branch_copies(), branch_copies(), {}
763
763
764 if usechangesetcentricalgo(repo):
764 if usechangesetcentricalgo(repo):
765 # The heuristics don't make sense when we need changeset-centric algos
765 # The heuristics don't make sense when we need changeset-centric algos
766 return _fullcopytracing(repo, c1, c2, base)
766 return _fullcopytracing(repo, c1, c2, base)
767
767
768 # Copy trace disabling is explicitly below the node == p1 logic above
768 # Copy trace disabling is explicitly below the node == p1 logic above
769 # because the logic above is required for a simple copy to be kept across a
769 # because the logic above is required for a simple copy to be kept across a
770 # rebase.
770 # rebase.
771 if copytracing == b'heuristics':
771 if copytracing == b'heuristics':
772 # Do full copytracing if only non-public revisions are involved as
772 # Do full copytracing if only non-public revisions are involved as
773 # that will be fast enough and will also cover the copies which could
773 # that will be fast enough and will also cover the copies which could
774 # be missed by heuristics
774 # be missed by heuristics
775 if _isfullcopytraceable(repo, c1, base):
775 if _isfullcopytraceable(repo, c1, base):
776 return _fullcopytracing(repo, c1, c2, base)
776 return _fullcopytracing(repo, c1, c2, base)
777 return _heuristicscopytracing(repo, c1, c2, base)
777 return _heuristicscopytracing(repo, c1, c2, base)
778 else:
778 else:
779 return _fullcopytracing(repo, c1, c2, base)
779 return _fullcopytracing(repo, c1, c2, base)
780
780
781
781
782 def _isfullcopytraceable(repo, c1, base):
782 def _isfullcopytraceable(repo, c1, base):
783 """Checks that if base, source and destination are all no-public branches,
783 """Checks that if base, source and destination are all no-public branches,
784 if yes let's use the full copytrace algorithm for increased capabilities
784 if yes let's use the full copytrace algorithm for increased capabilities
785 since it will be fast enough.
785 since it will be fast enough.
786
786
787 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
788 number of changesets from c1 to base such that if number of changesets are
788 number of changesets from c1 to base such that if number of changesets are
789 more than the limit, full copytracing algorithm won't be used.
789 more than the limit, full copytracing algorithm won't be used.
790 """
790 """
791 if c1.rev() is None:
791 if c1.rev() is None:
792 c1 = c1.p1()
792 c1 = c1.p1()
793 if c1.mutable() and base.mutable():
793 if c1.mutable() and base.mutable():
794 sourcecommitlimit = repo.ui.configint(
794 sourcecommitlimit = repo.ui.configint(
795 b'experimental', b'copytrace.sourcecommitlimit'
795 b'experimental', b'copytrace.sourcecommitlimit'
796 )
796 )
797 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
798 return commits < sourcecommitlimit
798 return commits < sourcecommitlimit
799 return False
799 return False
800
800
801
801
802 def _checksinglesidecopies(
802 def _checksinglesidecopies(
803 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
804 ):
804 ):
805 if src not in m2:
805 if src not in m2:
806 # deleted on side 2
806 # deleted on side 2
807 if src not in m1:
807 if src not in m1:
808 # renamed on side 1, deleted on side 2
808 # renamed on side 1, deleted on side 2
809 renamedelete[src] = dsts1
809 renamedelete[src] = dsts1
810 elif src not in mb:
810 elif src not in mb:
811 # Work around the "short-circuit to avoid issues with merge states"
811 # Work around the "short-circuit to avoid issues with merge states"
812 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
813 # destination doesn't exist in y.
813 # destination doesn't exist in y.
814 pass
814 pass
815 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
816 return
816 return
817 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
818 # modified on side 2
818 # modified on side 2
819 for dst in dsts1:
819 for dst in dsts1:
820 copy[dst] = src
820 copy[dst] = src
821
821
822
822
823 class branch_copies(object):
823 class branch_copies(object):
824 """Information about copies made on one side of a merge/graft.
824 """Information about copies made on one side of a merge/graft.
825
825
826 "copy" is a mapping from destination name -> source name,
826 "copy" is a mapping from destination name -> source name,
827 where source is in c1 and destination is in c2 or vice-versa.
827 where source is in c1 and destination is in c2 or vice-versa.
828
828
829 "movewithdir" is a mapping from source name -> destination name,
829 "movewithdir" is a mapping from source name -> destination name,
830 where the file at source present in one context but not the other
830 where the file at source present in one context but not the other
831 needs to be moved to destination by the merge process, because the
831 needs to be moved to destination by the merge process, because the
832 other context moved the directory it is in.
832 other context moved the directory it is in.
833
833
834 "renamedelete" is a mapping of source name -> list of destination
834 "renamedelete" is a mapping of source name -> list of destination
835 names for files deleted in c1 that were renamed in c2 or vice-versa.
835 names for files deleted in c1 that were renamed in c2 or vice-versa.
836
836
837 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 "dirmove" is a mapping of detected source dir -> destination dir renames.
838 This is needed for handling changes to new files previously grafted into
838 This is needed for handling changes to new files previously grafted into
839 renamed directories.
839 renamed directories.
840 """
840 """
841
841
842 def __init__(
842 def __init__(
843 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
844 ):
844 ):
845 self.copy = {} if copy is None else copy
845 self.copy = {} if copy is None else copy
846 self.renamedelete = {} if renamedelete is None else renamedelete
846 self.renamedelete = {} if renamedelete is None else renamedelete
847 self.dirmove = {} if dirmove is None else dirmove
847 self.dirmove = {} if dirmove is None else dirmove
848 self.movewithdir = {} if movewithdir is None else movewithdir
848 self.movewithdir = {} if movewithdir is None else movewithdir
849
849
850 def __repr__(self):
850 def __repr__(self):
851 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
852 self.copy,
852 self.copy,
853 self.renamedelete,
853 self.renamedelete,
854 self.dirmove,
854 self.dirmove,
855 self.movewithdir,
855 self.movewithdir,
856 )
856 )
857
857
858
858
859 def _fullcopytracing(repo, c1, c2, base):
859 def _fullcopytracing(repo, c1, c2, base):
860 """The full copytracing algorithm which finds all the new files that were
860 """The full copytracing algorithm which finds all the new files that were
861 added from merge base up to the top commit and for each file it checks if
861 added from merge base up to the top commit and for each file it checks if
862 this file was copied from another file.
862 this file was copied from another file.
863
863
864 This is pretty slow when a lot of changesets are involved but will track all
864 This is pretty slow when a lot of changesets are involved but will track all
865 the copies.
865 the copies.
866 """
866 """
867 m1 = c1.manifest()
867 m1 = c1.manifest()
868 m2 = c2.manifest()
868 m2 = c2.manifest()
869 mb = base.manifest()
869 mb = base.manifest()
870
870
871 copies1 = pathcopies(base, c1)
871 copies1 = pathcopies(base, c1)
872 copies2 = pathcopies(base, c2)
872 copies2 = pathcopies(base, c2)
873
873
874 if not (copies1 or copies2):
874 if not (copies1 or copies2):
875 return branch_copies(), branch_copies(), {}
875 return branch_copies(), branch_copies(), {}
876
876
877 inversecopies1 = {}
877 inversecopies1 = {}
878 inversecopies2 = {}
878 inversecopies2 = {}
879 for dst, src in copies1.items():
879 for dst, src in copies1.items():
880 inversecopies1.setdefault(src, []).append(dst)
880 inversecopies1.setdefault(src, []).append(dst)
881 for dst, src in copies2.items():
881 for dst, src in copies2.items():
882 inversecopies2.setdefault(src, []).append(dst)
882 inversecopies2.setdefault(src, []).append(dst)
883
883
884 copy1 = {}
884 copy1 = {}
885 copy2 = {}
885 copy2 = {}
886 diverge = {}
886 diverge = {}
887 renamedelete1 = {}
887 renamedelete1 = {}
888 renamedelete2 = {}
888 renamedelete2 = {}
889 allsources = set(inversecopies1) | set(inversecopies2)
889 allsources = set(inversecopies1) | set(inversecopies2)
890 for src in allsources:
890 for src in allsources:
891 dsts1 = inversecopies1.get(src)
891 dsts1 = inversecopies1.get(src)
892 dsts2 = inversecopies2.get(src)
892 dsts2 = inversecopies2.get(src)
893 if dsts1 and dsts2:
893 if dsts1 and dsts2:
894 # copied/renamed on both sides
894 # copied/renamed on both sides
895 if src not in m1 and src not in m2:
895 if src not in m1 and src not in m2:
896 # renamed on both sides
896 # renamed on both sides
897 dsts1 = set(dsts1)
897 dsts1 = set(dsts1)
898 dsts2 = set(dsts2)
898 dsts2 = set(dsts2)
899 # If there's some overlap in the rename destinations, we
899 # If there's some overlap in the rename destinations, we
900 # consider it not divergent. For example, if side 1 copies 'a'
900 # consider it not divergent. For example, if side 1 copies 'a'
901 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
902 # and 'd' and deletes 'a'.
902 # and 'd' and deletes 'a'.
903 if dsts1 & dsts2:
903 if dsts1 & dsts2:
904 for dst in dsts1 & dsts2:
904 for dst in dsts1 & dsts2:
905 copy1[dst] = src
905 copy1[dst] = src
906 copy2[dst] = src
906 copy2[dst] = src
907 else:
907 else:
908 diverge[src] = sorted(dsts1 | dsts2)
908 diverge[src] = sorted(dsts1 | dsts2)
909 elif src in m1 and src in m2:
909 elif src in m1 and src in m2:
910 # copied on both sides
910 # copied on both sides
911 dsts1 = set(dsts1)
911 dsts1 = set(dsts1)
912 dsts2 = set(dsts2)
912 dsts2 = set(dsts2)
913 for dst in dsts1 & dsts2:
913 for dst in dsts1 & dsts2:
914 copy1[dst] = src
914 copy1[dst] = src
915 copy2[dst] = src
915 copy2[dst] = src
916 # TODO: Handle cases where it was renamed on one side and copied
916 # TODO: Handle cases where it was renamed on one side and copied
917 # on the other side
917 # on the other side
918 elif dsts1:
918 elif dsts1:
919 # copied/renamed only on side 1
919 # copied/renamed only on side 1
920 _checksinglesidecopies(
920 _checksinglesidecopies(
921 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
922 )
922 )
923 elif dsts2:
923 elif dsts2:
924 # copied/renamed only on side 2
924 # copied/renamed only on side 2
925 _checksinglesidecopies(
925 _checksinglesidecopies(
926 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
927 )
927 )
928
928
929 # find interesting file sets from manifests
929 # find interesting file sets from manifests
930 cache = []
930 cache = []
931
931
932 def _get_addedfiles(idx):
932 def _get_addedfiles(idx):
933 if not cache:
933 if not cache:
934 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
935 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
936 u1 = sorted(addedinm1 - addedinm2)
936 u1 = sorted(addedinm1 - addedinm2)
937 u2 = sorted(addedinm2 - addedinm1)
937 u2 = sorted(addedinm2 - addedinm1)
938 cache.extend((u1, u2))
938 cache.extend((u1, u2))
939 return cache[idx]
939 return cache[idx]
940
940
941 u1fn = lambda: _get_addedfiles(0)
941 u1fn = lambda: _get_addedfiles(0)
942 u2fn = lambda: _get_addedfiles(1)
942 u2fn = lambda: _get_addedfiles(1)
943 if repo.ui.debugflag:
943 if repo.ui.debugflag:
944 u1 = u1fn()
944 u1 = u1fn()
945 u2 = u2fn()
945 u2 = u2fn()
946
946
947 header = b" unmatched files in %s"
947 header = b" unmatched files in %s"
948 if u1:
948 if u1:
949 repo.ui.debug(
949 repo.ui.debug(
950 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
951 )
951 )
952 if u2:
952 if u2:
953 repo.ui.debug(
953 repo.ui.debug(
954 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
955 )
955 )
956
956
957 renamedeleteset = set()
957 renamedeleteset = set()
958 divergeset = set()
958 divergeset = set()
959 for dsts in diverge.values():
959 for dsts in diverge.values():
960 divergeset.update(dsts)
960 divergeset.update(dsts)
961 for dsts in renamedelete1.values():
961 for dsts in renamedelete1.values():
962 renamedeleteset.update(dsts)
962 renamedeleteset.update(dsts)
963 for dsts in renamedelete2.values():
963 for dsts in renamedelete2.values():
964 renamedeleteset.update(dsts)
964 renamedeleteset.update(dsts)
965
965
966 repo.ui.debug(
966 repo.ui.debug(
967 b" all copies found (* = to merge, ! = divergent, "
967 b" all copies found (* = to merge, ! = divergent, "
968 b"% = renamed and deleted):\n"
968 b"% = renamed and deleted):\n"
969 )
969 )
970 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 for side, copies in ((b"local", copies1), (b"remote", copies2)):
971 if not copies:
971 if not copies:
972 continue
972 continue
973 repo.ui.debug(b" on %s side:\n" % side)
973 repo.ui.debug(b" on %s side:\n" % side)
974 for f in sorted(copies):
974 for f in sorted(copies):
975 note = b""
975 note = b""
976 if f in copy1 or f in copy2:
976 if f in copy1 or f in copy2:
977 note += b"*"
977 note += b"*"
978 if f in divergeset:
978 if f in divergeset:
979 note += b"!"
979 note += b"!"
980 if f in renamedeleteset:
980 if f in renamedeleteset:
981 note += b"%"
981 note += b"%"
982 repo.ui.debug(
982 repo.ui.debug(
983 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
984 )
984 )
985 del renamedeleteset
985 del renamedeleteset
986 del divergeset
986 del divergeset
987
987
988 repo.ui.debug(b" checking for directory renames\n")
988 repo.ui.debug(b" checking for directory renames\n")
989
989
990 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
991 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
992
992
993 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
994 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
995
995
996 return branch_copies1, branch_copies2, diverge
996 return branch_copies1, branch_copies2, diverge
997
997
998
998
999 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1000 """Finds moved directories and files that should move with them.
1000 """Finds moved directories and files that should move with them.
1001
1001
1002 ctx: the context for one of the sides
1002 ctx: the context for one of the sides
1003 copy: files copied on the same side (as ctx)
1003 copy: files copied on the same side (as ctx)
1004 fullcopy: files copied on the same side (as ctx), including those that
1004 fullcopy: files copied on the same side (as ctx), including those that
1005 merge.manifestmerge() won't care about
1005 merge.manifestmerge() won't care about
1006 addedfilesfn: function returning added files on the other side (compared to
1006 addedfilesfn: function returning added files on the other side (compared to
1007 ctx)
1007 ctx)
1008 """
1008 """
1009 # generate a directory move map
1009 # generate a directory move map
1010 invalid = set()
1010 invalid = set()
1011 dirmove = {}
1011 dirmove = {}
1012
1012
1013 # examine each file copy for a potential directory move, which is
1013 # examine each file copy for a potential directory move, which is
1014 # when all the files in a directory are moved to a new directory
1014 # when all the files in a directory are moved to a new directory
1015 for dst, src in pycompat.iteritems(fullcopy):
1015 for dst, src in pycompat.iteritems(fullcopy):
1016 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1017 if dsrc in invalid:
1017 if dsrc in invalid:
1018 # already seen to be uninteresting
1018 # already seen to be uninteresting
1019 continue
1019 continue
1020 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1021 # directory wasn't entirely moved locally
1021 # directory wasn't entirely moved locally
1022 invalid.add(dsrc)
1022 invalid.add(dsrc)
1023 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1024 # files from the same directory moved to two different places
1024 # files from the same directory moved to two different places
1025 invalid.add(dsrc)
1025 invalid.add(dsrc)
1026 else:
1026 else:
1027 # looks good so far
1027 # looks good so far
1028 dirmove[dsrc] = ddst
1028 dirmove[dsrc] = ddst
1029
1029
1030 for i in invalid:
1030 for i in invalid:
1031 if i in dirmove:
1031 if i in dirmove:
1032 del dirmove[i]
1032 del dirmove[i]
1033 del invalid
1033 del invalid
1034
1034
1035 if not dirmove:
1035 if not dirmove:
1036 return {}, {}
1036 return {}, {}
1037
1037
1038 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1039
1039
1040 for d in dirmove:
1040 for d in dirmove:
1041 repo.ui.debug(
1041 repo.ui.debug(
1042 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1043 )
1043 )
1044
1044
1045 movewithdir = {}
1045 movewithdir = {}
1046 # check unaccounted nonoverlapping files against directory moves
1046 # check unaccounted nonoverlapping files against directory moves
1047 for f in addedfilesfn():
1047 for f in addedfilesfn():
1048 if f not in fullcopy:
1048 if f not in fullcopy:
1049 for d in dirmove:
1049 for d in dirmove:
1050 if f.startswith(d):
1050 if f.startswith(d):
1051 # new file added in a directory that was moved, move it
1051 # new file added in a directory that was moved, move it
1052 df = dirmove[d] + f[len(d) :]
1052 df = dirmove[d] + f[len(d) :]
1053 if df not in copy:
1053 if df not in copy:
1054 movewithdir[f] = df
1054 movewithdir[f] = df
1055 repo.ui.debug(
1055 repo.ui.debug(
1056 b" pending file src: '%s' -> dst: '%s'\n"
1056 b" pending file src: '%s' -> dst: '%s'\n"
1057 % (f, df)
1057 % (f, df)
1058 )
1058 )
1059 break
1059 break
1060
1060
1061 return dirmove, movewithdir
1061 return dirmove, movewithdir
1062
1062
1063
1063
1064 def _heuristicscopytracing(repo, c1, c2, base):
1064 def _heuristicscopytracing(repo, c1, c2, base):
1065 """Fast copytracing using filename heuristics
1065 """Fast copytracing using filename heuristics
1066
1066
1067 Assumes that moves or renames are of following two types:
1067 Assumes that moves or renames are of following two types:
1068
1068
1069 1) Inside a directory only (same directory name but different filenames)
1069 1) Inside a directory only (same directory name but different filenames)
1070 2) Move from one directory to another
1070 2) Move from one directory to another
1071 (same filenames but different directory names)
1071 (same filenames but different directory names)
1072
1072
1073 Works only when there are no merge commits in the "source branch".
1073 Works only when there are no merge commits in the "source branch".
1074 Source branch is commits from base up to c2 not including base.
1074 Source branch is commits from base up to c2 not including base.
1075
1075
1076 If merge is involved it fallbacks to _fullcopytracing().
1076 If merge is involved it fallbacks to _fullcopytracing().
1077
1077
1078 Can be used by setting the following config:
1078 Can be used by setting the following config:
1079
1079
1080 [experimental]
1080 [experimental]
1081 copytrace = heuristics
1081 copytrace = heuristics
1082
1082
1083 In some cases the copy/move candidates found by heuristics can be very large
1083 In some cases the copy/move candidates found by heuristics can be very large
1084 in number and that will make the algorithm slow. The number of possible
1084 in number and that will make the algorithm slow. The number of possible
1085 candidates to check can be limited by using the config
1085 candidates to check can be limited by using the config
1086 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1087 """
1087 """
1088
1088
1089 if c1.rev() is None:
1089 if c1.rev() is None:
1090 c1 = c1.p1()
1090 c1 = c1.p1()
1091 if c2.rev() is None:
1091 if c2.rev() is None:
1092 c2 = c2.p1()
1092 c2 = c2.p1()
1093
1093
1094 changedfiles = set()
1094 changedfiles = set()
1095 m1 = c1.manifest()
1095 m1 = c1.manifest()
1096 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1097 # If base is not in c2 branch, we switch to fullcopytracing
1097 # If base is not in c2 branch, we switch to fullcopytracing
1098 repo.ui.debug(
1098 repo.ui.debug(
1099 b"switching to full copytracing as base is not "
1099 b"switching to full copytracing as base is not "
1100 b"an ancestor of c2\n"
1100 b"an ancestor of c2\n"
1101 )
1101 )
1102 return _fullcopytracing(repo, c1, c2, base)
1102 return _fullcopytracing(repo, c1, c2, base)
1103
1103
1104 ctx = c2
1104 ctx = c2
1105 while ctx != base:
1105 while ctx != base:
1106 if len(ctx.parents()) == 2:
1106 if len(ctx.parents()) == 2:
1107 # To keep things simple let's not handle merges
1107 # To keep things simple let's not handle merges
1108 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 repo.ui.debug(b"switching to full copytracing because of merges\n")
1109 return _fullcopytracing(repo, c1, c2, base)
1109 return _fullcopytracing(repo, c1, c2, base)
1110 changedfiles.update(ctx.files())
1110 changedfiles.update(ctx.files())
1111 ctx = ctx.p1()
1111 ctx = ctx.p1()
1112
1112
1113 copies2 = {}
1113 copies2 = {}
1114 cp = _forwardcopies(base, c2)
1114 cp = _forwardcopies(base, c2)
1115 for dst, src in pycompat.iteritems(cp):
1115 for dst, src in pycompat.iteritems(cp):
1116 if src in m1:
1116 if src in m1:
1117 copies2[dst] = src
1117 copies2[dst] = src
1118
1118
1119 # file is missing if it isn't present in the destination, but is present in
1119 # file is missing if it isn't present in the destination, but is present in
1120 # the base and present in the source.
1120 # the base and present in the source.
1121 # Presence in the base is important to exclude added files, presence in the
1121 # Presence in the base is important to exclude added files, presence in the
1122 # source is important to exclude removed files.
1122 # source is important to exclude removed files.
1123 filt = lambda f: f not in m1 and f in base and f in c2
1123 filt = lambda f: f not in m1 and f in base and f in c2
1124 missingfiles = [f for f in changedfiles if filt(f)]
1124 missingfiles = [f for f in changedfiles if filt(f)]
1125
1125
1126 copies1 = {}
1126 copies1 = {}
1127 if missingfiles:
1127 if missingfiles:
1128 basenametofilename = collections.defaultdict(list)
1128 basenametofilename = collections.defaultdict(list)
1129 dirnametofilename = collections.defaultdict(list)
1129 dirnametofilename = collections.defaultdict(list)
1130
1130
1131 for f in m1.filesnotin(base.manifest()):
1131 for f in m1.filesnotin(base.manifest()):
1132 basename = os.path.basename(f)
1132 basename = os.path.basename(f)
1133 dirname = os.path.dirname(f)
1133 dirname = os.path.dirname(f)
1134 basenametofilename[basename].append(f)
1134 basenametofilename[basename].append(f)
1135 dirnametofilename[dirname].append(f)
1135 dirnametofilename[dirname].append(f)
1136
1136
1137 for f in missingfiles:
1137 for f in missingfiles:
1138 basename = os.path.basename(f)
1138 basename = os.path.basename(f)
1139 dirname = os.path.dirname(f)
1139 dirname = os.path.dirname(f)
1140 samebasename = basenametofilename[basename]
1140 samebasename = basenametofilename[basename]
1141 samedirname = dirnametofilename[dirname]
1141 samedirname = dirnametofilename[dirname]
1142 movecandidates = samebasename + samedirname
1142 movecandidates = samebasename + samedirname
1143 # f is guaranteed to be present in c2, that's why
1143 # f is guaranteed to be present in c2, that's why
1144 # c2.filectx(f) won't fail
1144 # c2.filectx(f) won't fail
1145 f2 = c2.filectx(f)
1145 f2 = c2.filectx(f)
1146 # we can have a lot of candidates which can slow down the heuristics
1146 # we can have a lot of candidates which can slow down the heuristics
1147 # config value to limit the number of candidates moves to check
1147 # config value to limit the number of candidates moves to check
1148 maxcandidates = repo.ui.configint(
1148 maxcandidates = repo.ui.configint(
1149 b'experimental', b'copytrace.movecandidateslimit'
1149 b'experimental', b'copytrace.movecandidateslimit'
1150 )
1150 )
1151
1151
1152 if len(movecandidates) > maxcandidates:
1152 if len(movecandidates) > maxcandidates:
1153 repo.ui.status(
1153 repo.ui.status(
1154 _(
1154 _(
1155 b"skipping copytracing for '%s', more "
1155 b"skipping copytracing for '%s', more "
1156 b"candidates than the limit: %d\n"
1156 b"candidates than the limit: %d\n"
1157 )
1157 )
1158 % (f, len(movecandidates))
1158 % (f, len(movecandidates))
1159 )
1159 )
1160 continue
1160 continue
1161
1161
1162 for candidate in movecandidates:
1162 for candidate in movecandidates:
1163 f1 = c1.filectx(candidate)
1163 f1 = c1.filectx(candidate)
1164 if _related(f1, f2):
1164 if _related(f1, f2):
1165 # if there are a few related copies then we'll merge
1165 # if there are a few related copies then we'll merge
1166 # changes into all of them. This matches the behaviour
1166 # changes into all of them. This matches the behaviour
1167 # of upstream copytracing
1167 # of upstream copytracing
1168 copies1[candidate] = f
1168 copies1[candidate] = f
1169
1169
1170 return branch_copies(copies1), branch_copies(copies2), {}
1170 return branch_copies(copies1), branch_copies(copies2), {}
1171
1171
1172
1172
1173 def _related(f1, f2):
1173 def _related(f1, f2):
1174 """return True if f1 and f2 filectx have a common ancestor
1174 """return True if f1 and f2 filectx have a common ancestor
1175
1175
1176 Walk back to common ancestor to see if the two files originate
1176 Walk back to common ancestor to see if the two files originate
1177 from the same file. Since workingfilectx's rev() is None it messes
1177 from the same file. Since workingfilectx's rev() is None it messes
1178 up the integer comparison logic, hence the pre-step check for
1178 up the integer comparison logic, hence the pre-step check for
1179 None (f1 and f2 can only be workingfilectx's initially).
1179 None (f1 and f2 can only be workingfilectx's initially).
1180 """
1180 """
1181
1181
1182 if f1 == f2:
1182 if f1 == f2:
1183 return True # a match
1183 return True # a match
1184
1184
1185 g1, g2 = f1.ancestors(), f2.ancestors()
1185 g1, g2 = f1.ancestors(), f2.ancestors()
1186 try:
1186 try:
1187 f1r, f2r = f1.linkrev(), f2.linkrev()
1187 f1r, f2r = f1.linkrev(), f2.linkrev()
1188
1188
1189 if f1r is None:
1189 if f1r is None:
1190 f1 = next(g1)
1190 f1 = next(g1)
1191 if f2r is None:
1191 if f2r is None:
1192 f2 = next(g2)
1192 f2 = next(g2)
1193
1193
1194 while True:
1194 while True:
1195 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 f1r, f2r = f1.linkrev(), f2.linkrev()
1196 if f1r > f2r:
1196 if f1r > f2r:
1197 f1 = next(g1)
1197 f1 = next(g1)
1198 elif f2r > f1r:
1198 elif f2r > f1r:
1199 f2 = next(g2)
1199 f2 = next(g2)
1200 else: # f1 and f2 point to files in the same linkrev
1200 else: # f1 and f2 point to files in the same linkrev
1201 return f1 == f2 # true if they point to the same file
1201 return f1 == f2 # true if they point to the same file
1202 except StopIteration:
1202 except StopIteration:
1203 return False
1203 return False
1204
1204
1205
1205
1206 def graftcopies(wctx, ctx, base):
1206 def graftcopies(wctx, ctx, base):
1207 """reproduce copies between base and ctx in the wctx
1207 """reproduce copies between base and ctx in the wctx
1208
1208
1209 Unlike mergecopies(), this function will only consider copies between base
1209 Unlike mergecopies(), this function will only consider copies between base
1210 and ctx; it will ignore copies between base and wctx. Also unlike
1210 and ctx; it will ignore copies between base and wctx. Also unlike
1211 mergecopies(), this function will apply copies to the working copy (instead
1211 mergecopies(), this function will apply copies to the working copy (instead
1212 of just returning information about the copies). That makes it cheaper
1212 of just returning information about the copies). That makes it cheaper
1213 (especially in the common case of base==ctx.p1()) and useful also when
1213 (especially in the common case of base==ctx.p1()) and useful also when
1214 experimental.copytrace=off.
1214 experimental.copytrace=off.
1215
1215
1216 merge.update() will have already marked most copies, but it will only
1216 merge.update() will have already marked most copies, but it will only
1217 mark copies if it thinks the source files are related (see
1217 mark copies if it thinks the source files are related (see
1218 merge._related()). It will also not mark copies if the file wasn't modified
1218 merge._related()). It will also not mark copies if the file wasn't modified
1219 on the local side. This function adds the copies that were "missed"
1219 on the local side. This function adds the copies that were "missed"
1220 by merge.update().
1220 by merge.update().
1221 """
1221 """
1222 new_copies = pathcopies(base, ctx)
1222 new_copies = pathcopies(base, ctx)
1223 parent = wctx.p1()
1223 parent = wctx.p1()
1224 _filter(parent, wctx, new_copies)
1224 _filter(parent, wctx, new_copies)
1225 # extra filtering to drop copy information for files that existed before
1225 # extra filtering to drop copy information for files that existed before
1226 # the graft (otherwise we would create merge filelog for non-merge commit
1226 # the graft (otherwise we would create merge filelog for non-merge commit
1227 for dest, __ in list(new_copies.items()):
1227 for dest, __ in list(new_copies.items()):
1228 if dest in parent:
1228 if dest in parent:
1229 del new_copies[dest]
1229 del new_copies[dest]
1230 for dst, src in pycompat.iteritems(new_copies):
1230 for dst, src in pycompat.iteritems(new_copies):
1231 wctx[dst].markcopied(src)
1231 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now