##// END OF EJS Templates
copies: extract value comparison in the python copy tracing...
marmoute -
r46776:3a0c4133 default
parent child Browse files
Show More
@@ -1,1217 +1,1224 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 if rustmod is not None:
199 if rustmod is not None:
200
200
201 def revinfo(rev):
201 def revinfo(rev):
202 p1, p2 = parents(rev)
202 p1, p2 = parents(rev)
203 if flags(rev) & HASCOPIESINFO:
203 if flags(rev) & HASCOPIESINFO:
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 else:
205 else:
206 raw = None
206 raw = None
207 return (p1, p2, raw)
207 return (p1, p2, raw)
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 if flags(rev) & HASCOPIESINFO:
213 if flags(rev) & HASCOPIESINFO:
214 changes = changelogrevision(rev).changes
214 changes = changelogrevision(rev).changes
215 else:
215 else:
216 changes = None
216 changes = None
217 return (p1, p2, changes)
217 return (p1, p2, changes)
218
218
219 return revinfo
219 return revinfo
220
220
221
221
222 def cached_is_ancestor(is_ancestor):
222 def cached_is_ancestor(is_ancestor):
223 """return a cached version of is_ancestor"""
223 """return a cached version of is_ancestor"""
224 cache = {}
224 cache = {}
225
225
226 def _is_ancestor(anc, desc):
226 def _is_ancestor(anc, desc):
227 if anc > desc:
227 if anc > desc:
228 return False
228 return False
229 elif anc == desc:
229 elif anc == desc:
230 return True
230 return True
231 key = (anc, desc)
231 key = (anc, desc)
232 ret = cache.get(key)
232 ret = cache.get(key)
233 if ret is None:
233 if ret is None:
234 ret = cache[key] = is_ancestor(anc, desc)
234 ret = cache[key] = is_ancestor(anc, desc)
235 return ret
235 return ret
236
236
237 return _is_ancestor
237 return _is_ancestor
238
238
239
239
240 def _changesetforwardcopies(a, b, match):
240 def _changesetforwardcopies(a, b, match):
241 if a.rev() in (nullrev, b.rev()):
241 if a.rev() in (nullrev, b.rev()):
242 return {}
242 return {}
243
243
244 repo = a.repo().unfiltered()
244 repo = a.repo().unfiltered()
245 children = {}
245 children = {}
246
246
247 cl = repo.changelog
247 cl = repo.changelog
248 isancestor = cl.isancestorrev
248 isancestor = cl.isancestorrev
249
249
250 # To track rename from "A" to B, we need to gather all parent β†’ children
250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 # edges that are contains in `::B` but not in `::A`.
251 # edges that are contains in `::B` but not in `::A`.
252 #
252 #
253 #
253 #
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 # that belong to ::a. These are exactly all the revisions needed to express
256 # that belong to ::a. These are exactly all the revisions needed to express
257 # the parent β†’ children we need to combine.
257 # the parent β†’ children we need to combine.
258 #
258 #
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 # excluding paths that leads to roots that are not ancestors of `a`. We
260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 # keep this out of the explanation because it is hard enough without this special case..
261 # keep this out of the explanation because it is hard enough without this special case..
262
262
263 parents = cl._uncheckedparentrevs
263 parents = cl._uncheckedparentrevs
264 graph_roots = (nullrev, nullrev)
264 graph_roots = (nullrev, nullrev)
265
265
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 roots = set()
268 roots = set()
269 has_graph_roots = False
269 has_graph_roots = False
270
270
271 # iterate over `only(B, A)`
271 # iterate over `only(B, A)`
272 for r in revs:
272 for r in revs:
273 ps = parents(r)
273 ps = parents(r)
274 if ps == graph_roots:
274 if ps == graph_roots:
275 has_graph_roots = True
275 has_graph_roots = True
276 else:
276 else:
277 p1, p2 = ps
277 p1, p2 = ps
278
278
279 # find all the "root points" (see larger comment above)
279 # find all the "root points" (see larger comment above)
280 if p1 != nullrev and p1 in ancestors:
280 if p1 != nullrev and p1 in ancestors:
281 roots.add(p1)
281 roots.add(p1)
282 if p2 != nullrev and p2 in ancestors:
282 if p2 != nullrev and p2 in ancestors:
283 roots.add(p2)
283 roots.add(p2)
284 if not roots:
284 if not roots:
285 # no common revision to track copies from
285 # no common revision to track copies from
286 return {}
286 return {}
287 if has_graph_roots:
287 if has_graph_roots:
288 # this deal with the special case mentionned in the [1] footnotes. We
288 # this deal with the special case mentionned in the [1] footnotes. We
289 # must filter out revisions that leads to non-common graphroots.
289 # must filter out revisions that leads to non-common graphroots.
290 roots = list(roots)
290 roots = list(roots)
291 m = min(roots)
291 m = min(roots)
292 h = [b.rev()]
292 h = [b.rev()]
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 roots_to_head = set(roots_to_head)
294 roots_to_head = set(roots_to_head)
295 revs = [r for r in revs if r in roots_to_head]
295 revs = [r for r in revs if r in roots_to_head]
296
296
297 if repo.filecopiesmode == b'changeset-sidedata':
297 if repo.filecopiesmode == b'changeset-sidedata':
298 # When using side-data, we will process the edges "from" the children.
298 # When using side-data, we will process the edges "from" the children.
299 # We iterate over the childre, gathering previous collected data for
299 # We iterate over the childre, gathering previous collected data for
300 # the parents. Do know when the parents data is no longer necessary, we
300 # the parents. Do know when the parents data is no longer necessary, we
301 # keep a counter of how many children each revision has.
301 # keep a counter of how many children each revision has.
302 #
302 #
303 # An interresting property of `children_count` is that it only contains
303 # An interresting property of `children_count` is that it only contains
304 # revision that will be relevant for a edge of the graph. So if a
304 # revision that will be relevant for a edge of the graph. So if a
305 # children has parent not in `children_count`, that edges should not be
305 # children has parent not in `children_count`, that edges should not be
306 # processed.
306 # processed.
307 children_count = dict((r, 0) for r in roots)
307 children_count = dict((r, 0) for r in roots)
308 for r in revs:
308 for r in revs:
309 for p in cl.parentrevs(r):
309 for p in cl.parentrevs(r):
310 if p == nullrev:
310 if p == nullrev:
311 continue
311 continue
312 children_count[r] = 0
312 children_count[r] = 0
313 if p in children_count:
313 if p in children_count:
314 children_count[p] += 1
314 children_count[p] += 1
315 revinfo = _revinfo_getter(repo, match)
315 revinfo = _revinfo_getter(repo, match)
316 return _combine_changeset_copies(
316 return _combine_changeset_copies(
317 revs, children_count, b.rev(), revinfo, match, isancestor
317 revs, children_count, b.rev(), revinfo, match, isancestor
318 )
318 )
319 else:
319 else:
320 # When not using side-data, we will process the edges "from" the parent.
320 # When not using side-data, we will process the edges "from" the parent.
321 # so we need a full mapping of the parent -> children relation.
321 # so we need a full mapping of the parent -> children relation.
322 children = dict((r, []) for r in roots)
322 children = dict((r, []) for r in roots)
323 for r in revs:
323 for r in revs:
324 for p in cl.parentrevs(r):
324 for p in cl.parentrevs(r):
325 if p == nullrev:
325 if p == nullrev:
326 continue
326 continue
327 children[r] = []
327 children[r] = []
328 if p in children:
328 if p in children:
329 children[p].append(r)
329 children[p].append(r)
330 x = revs.pop()
330 x = revs.pop()
331 assert x == b.rev()
331 assert x == b.rev()
332 revs.extend(roots)
332 revs.extend(roots)
333 revs.sort()
333 revs.sort()
334
334
335 revinfo = _revinfo_getter_extra(repo)
335 revinfo = _revinfo_getter_extra(repo)
336 return _combine_changeset_copies_extra(
336 return _combine_changeset_copies_extra(
337 revs, children, b.rev(), revinfo, match, isancestor
337 revs, children, b.rev(), revinfo, match, isancestor
338 )
338 )
339
339
340
340
341 def _combine_changeset_copies(
341 def _combine_changeset_copies(
342 revs, children_count, targetrev, revinfo, match, isancestor
342 revs, children_count, targetrev, revinfo, match, isancestor
343 ):
343 ):
344 """combine the copies information for each item of iterrevs
344 """combine the copies information for each item of iterrevs
345
345
346 revs: sorted iterable of revision to visit
346 revs: sorted iterable of revision to visit
347 children_count: a {parent: <number-of-relevant-children>} mapping.
347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 targetrev: the final copies destination revision (not in iterrevs)
348 targetrev: the final copies destination revision (not in iterrevs)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 match: a matcher
350 match: a matcher
351
351
352 It returns the aggregated copies information for `targetrev`.
352 It returns the aggregated copies information for `targetrev`.
353 """
353 """
354
354
355 alwaysmatch = match.always()
355 alwaysmatch = match.always()
356
356
357 if rustmod is not None:
357 if rustmod is not None:
358 final_copies = rustmod.combine_changeset_copies(
358 final_copies = rustmod.combine_changeset_copies(
359 list(revs), children_count, targetrev, revinfo, isancestor
359 list(revs), children_count, targetrev, revinfo, isancestor
360 )
360 )
361 else:
361 else:
362 isancestor = cached_is_ancestor(isancestor)
362 isancestor = cached_is_ancestor(isancestor)
363
363
364 all_copies = {}
364 all_copies = {}
365 # iterate over all the "children" side of copy tracing "edge"
365 # iterate over all the "children" side of copy tracing "edge"
366 for current_rev in revs:
366 for current_rev in revs:
367 p1, p2, changes = revinfo(current_rev)
367 p1, p2, changes = revinfo(current_rev)
368 current_copies = None
368 current_copies = None
369 # iterate over all parents to chain the existing data with the
369 # iterate over all parents to chain the existing data with the
370 # data from the parent β†’ child edge.
370 # data from the parent β†’ child edge.
371 for parent, parent_rev in ((1, p1), (2, p2)):
371 for parent, parent_rev in ((1, p1), (2, p2)):
372 if parent_rev == nullrev:
372 if parent_rev == nullrev:
373 continue
373 continue
374 remaining_children = children_count.get(parent_rev)
374 remaining_children = children_count.get(parent_rev)
375 if remaining_children is None:
375 if remaining_children is None:
376 continue
376 continue
377 remaining_children -= 1
377 remaining_children -= 1
378 children_count[parent_rev] = remaining_children
378 children_count[parent_rev] = remaining_children
379 if remaining_children:
379 if remaining_children:
380 copies = all_copies.get(parent_rev, None)
380 copies = all_copies.get(parent_rev, None)
381 else:
381 else:
382 copies = all_copies.pop(parent_rev, None)
382 copies = all_copies.pop(parent_rev, None)
383
383
384 if copies is None:
384 if copies is None:
385 # this is a root
385 # this is a root
386 copies = {}
386 copies = {}
387
387
388 newcopies = copies
388 newcopies = copies
389 # chain the data in the edge with the existing data
389 # chain the data in the edge with the existing data
390 if changes is not None:
390 if changes is not None:
391 childcopies = {}
391 childcopies = {}
392 if parent == 1:
392 if parent == 1:
393 childcopies = changes.copied_from_p1
393 childcopies = changes.copied_from_p1
394 elif parent == 2:
394 elif parent == 2:
395 childcopies = changes.copied_from_p2
395 childcopies = changes.copied_from_p2
396
396
397 if childcopies:
397 if childcopies:
398 newcopies = copies.copy()
398 newcopies = copies.copy()
399 for dest, source in pycompat.iteritems(childcopies):
399 for dest, source in pycompat.iteritems(childcopies):
400 prev = copies.get(source)
400 prev = copies.get(source)
401 if prev is not None and prev[1] is not None:
401 if prev is not None and prev[1] is not None:
402 source = prev[1]
402 source = prev[1]
403 newcopies[dest] = (current_rev, source)
403 newcopies[dest] = (current_rev, source)
404 assert newcopies is not copies
404 assert newcopies is not copies
405 if changes.removed:
405 if changes.removed:
406 if newcopies is copies:
406 if newcopies is copies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for f in changes.removed:
408 for f in changes.removed:
409 if f in newcopies:
409 if f in newcopies:
410 if newcopies is copies:
410 if newcopies is copies:
411 # copy on write to avoid affecting potential other
411 # copy on write to avoid affecting potential other
412 # branches. when there are no other branches, this
412 # branches. when there are no other branches, this
413 # could be avoided.
413 # could be avoided.
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 newcopies[f] = (current_rev, None)
415 newcopies[f] = (current_rev, None)
416 # check potential need to combine the data from another parent (for
416 # check potential need to combine the data from another parent (for
417 # that child). See comment below for details.
417 # that child). See comment below for details.
418 if current_copies is None:
418 if current_copies is None:
419 current_copies = newcopies
419 current_copies = newcopies
420 elif current_copies is newcopies:
420 elif current_copies is newcopies:
421 # nothing to merge:
421 # nothing to merge:
422 pass
422 pass
423 else:
423 else:
424 # we are the second parent to work on c, we need to merge our
424 # we are the second parent to work on c, we need to merge our
425 # work with the other.
425 # work with the other.
426 #
426 #
427 # In case of conflict, parent 1 take precedence over parent 2.
427 # In case of conflict, parent 1 take precedence over parent 2.
428 # This is an arbitrary choice made anew when implementing
428 # This is an arbitrary choice made anew when implementing
429 # changeset based copies. It was made without regards with
429 # changeset based copies. It was made without regards with
430 # potential filelog related behavior.
430 # potential filelog related behavior.
431 assert parent == 2
431 assert parent == 2
432 current_copies = _merge_copies_dict(
432 current_copies = _merge_copies_dict(
433 newcopies, current_copies, isancestor, changes
433 newcopies, current_copies, isancestor, changes
434 )
434 )
435 all_copies[current_rev] = current_copies
435 all_copies[current_rev] = current_copies
436
436
437 # filter out internal details and return a {dest: source mapping}
437 # filter out internal details and return a {dest: source mapping}
438 final_copies = {}
438 final_copies = {}
439 for dest, (tt, source) in all_copies[targetrev].items():
439 for dest, (tt, source) in all_copies[targetrev].items():
440 if source is not None:
440 if source is not None:
441 final_copies[dest] = source
441 final_copies[dest] = source
442 if not alwaysmatch:
442 if not alwaysmatch:
443 for filename in list(final_copies.keys()):
443 for filename in list(final_copies.keys()):
444 if not match(filename):
444 if not match(filename):
445 del final_copies[filename]
445 del final_copies[filename]
446 return final_copies
446 return final_copies
447
447
448
448
449 # constant to decide which side to pick with _merge_copies_dict
450 PICK_MINOR = 0
451 PICK_MAJOR = 1
452 PICK_EITHER = 2
453
454
449 def _merge_copies_dict(minor, major, isancestor, changes):
455 def _merge_copies_dict(minor, major, isancestor, changes):
450 """merge two copies-mapping together, minor and major
456 """merge two copies-mapping together, minor and major
451
457
452 In case of conflict, value from "major" will be picked.
458 In case of conflict, value from "major" will be picked.
453
459
454 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
455 ancestors of `high_rev`,
461 ancestors of `high_rev`,
456
462
457 - `ismerged(path)`: callable return True if `path` have been merged in the
463 - `ismerged(path)`: callable return True if `path` have been merged in the
458 current revision,
464 current revision,
459
465
460 return the resulting dict (in practice, the "minor" object, updated)
466 return the resulting dict (in practice, the "minor" object, updated)
461 """
467 """
462 for dest, value in major.items():
468 for dest, value in major.items():
463 other = minor.get(dest)
469 other = minor.get(dest)
464 if other is None:
470 if other is None:
465 minor[dest] = value
471 minor[dest] = value
466 else:
472 else:
467 new_tt = value[0]
473 pick = _compare_values(changes, isancestor, dest, other, value)
468 other_tt = other[0]
474 if pick == PICK_MAJOR:
469 if value[1] == other[1]:
470 continue
471 # content from "major" wins, unless it is older
472 # than the branch point or there is a merge
473 if new_tt == other_tt:
474 minor[dest] = value
475 minor[dest] = value
475 elif (
476 changes is not None
477 and value[1] is None
478 and dest in changes.salvaged
479 ):
480 pass
481 elif (
482 changes is not None
483 and other[1] is None
484 and dest in changes.salvaged
485 ):
486 minor[dest] = value
487 elif changes is not None and dest in changes.merged:
488 minor[dest] = value
489 elif not isancestor(new_tt, other_tt):
490 if value[1] is not None:
491 minor[dest] = value
492 elif isancestor(other_tt, new_tt):
493 minor[dest] = value
494 return minor
476 return minor
495
477
496
478
479 def _compare_values(changes, isancestor, dest, other, value):
480 """compare two value within a _merge_copies_dict loop iteration"""
481 new_tt = value[0]
482 other_tt = other[0]
483
484 if value[1] == other[1]:
485 return PICK_EITHER
486 # content from "major" wins, unless it is older
487 # than the branch point or there is a merge
488 if new_tt == other_tt:
489 return PICK_MAJOR
490 elif changes is not None and value[1] is None and dest in changes.salvaged:
491 return PICK_MINOR
492 elif changes is not None and other[1] is None and dest in changes.salvaged:
493 return PICK_MAJOR
494 elif changes is not None and dest in changes.merged:
495 return PICK_MAJOR
496 elif not isancestor(new_tt, other_tt):
497 if value[1] is not None:
498 return PICK_MAJOR
499 elif isancestor(other_tt, new_tt):
500 return PICK_MAJOR
501 return PICK_MINOR
502
503
497 def _revinfo_getter_extra(repo):
504 def _revinfo_getter_extra(repo):
498 """return a function that return multiple data given a <rev>"i
505 """return a function that return multiple data given a <rev>"i
499
506
500 * p1: revision number of first parent
507 * p1: revision number of first parent
501 * p2: revision number of first parent
508 * p2: revision number of first parent
502 * p1copies: mapping of copies from p1
509 * p1copies: mapping of copies from p1
503 * p2copies: mapping of copies from p2
510 * p2copies: mapping of copies from p2
504 * removed: a list of removed files
511 * removed: a list of removed files
505 * ismerged: a callback to know if file was merged in that revision
512 * ismerged: a callback to know if file was merged in that revision
506 """
513 """
507 cl = repo.changelog
514 cl = repo.changelog
508 parents = cl.parentrevs
515 parents = cl.parentrevs
509
516
510 def get_ismerged(rev):
517 def get_ismerged(rev):
511 ctx = repo[rev]
518 ctx = repo[rev]
512
519
513 def ismerged(path):
520 def ismerged(path):
514 if path not in ctx.files():
521 if path not in ctx.files():
515 return False
522 return False
516 fctx = ctx[path]
523 fctx = ctx[path]
517 parents = fctx._filelog.parents(fctx._filenode)
524 parents = fctx._filelog.parents(fctx._filenode)
518 nb_parents = 0
525 nb_parents = 0
519 for n in parents:
526 for n in parents:
520 if n != nullid:
527 if n != nullid:
521 nb_parents += 1
528 nb_parents += 1
522 return nb_parents >= 2
529 return nb_parents >= 2
523
530
524 return ismerged
531 return ismerged
525
532
526 def revinfo(rev):
533 def revinfo(rev):
527 p1, p2 = parents(rev)
534 p1, p2 = parents(rev)
528 ctx = repo[rev]
535 ctx = repo[rev]
529 p1copies, p2copies = ctx._copies
536 p1copies, p2copies = ctx._copies
530 removed = ctx.filesremoved()
537 removed = ctx.filesremoved()
531 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
538 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
532
539
533 return revinfo
540 return revinfo
534
541
535
542
536 def _combine_changeset_copies_extra(
543 def _combine_changeset_copies_extra(
537 revs, children, targetrev, revinfo, match, isancestor
544 revs, children, targetrev, revinfo, match, isancestor
538 ):
545 ):
539 """version of `_combine_changeset_copies` that works with the Google
546 """version of `_combine_changeset_copies` that works with the Google
540 specific "extra" based storage for copy information"""
547 specific "extra" based storage for copy information"""
541 all_copies = {}
548 all_copies = {}
542 alwaysmatch = match.always()
549 alwaysmatch = match.always()
543 for r in revs:
550 for r in revs:
544 copies = all_copies.pop(r, None)
551 copies = all_copies.pop(r, None)
545 if copies is None:
552 if copies is None:
546 # this is a root
553 # this is a root
547 copies = {}
554 copies = {}
548 for i, c in enumerate(children[r]):
555 for i, c in enumerate(children[r]):
549 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
556 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
550 if r == p1:
557 if r == p1:
551 parent = 1
558 parent = 1
552 childcopies = p1copies
559 childcopies = p1copies
553 else:
560 else:
554 assert r == p2
561 assert r == p2
555 parent = 2
562 parent = 2
556 childcopies = p2copies
563 childcopies = p2copies
557 if not alwaysmatch:
564 if not alwaysmatch:
558 childcopies = {
565 childcopies = {
559 dst: src for dst, src in childcopies.items() if match(dst)
566 dst: src for dst, src in childcopies.items() if match(dst)
560 }
567 }
561 newcopies = copies
568 newcopies = copies
562 if childcopies:
569 if childcopies:
563 newcopies = copies.copy()
570 newcopies = copies.copy()
564 for dest, source in pycompat.iteritems(childcopies):
571 for dest, source in pycompat.iteritems(childcopies):
565 prev = copies.get(source)
572 prev = copies.get(source)
566 if prev is not None and prev[1] is not None:
573 if prev is not None and prev[1] is not None:
567 source = prev[1]
574 source = prev[1]
568 newcopies[dest] = (c, source)
575 newcopies[dest] = (c, source)
569 assert newcopies is not copies
576 assert newcopies is not copies
570 for f in removed:
577 for f in removed:
571 if f in newcopies:
578 if f in newcopies:
572 if newcopies is copies:
579 if newcopies is copies:
573 # copy on write to avoid affecting potential other
580 # copy on write to avoid affecting potential other
574 # branches. when there are no other branches, this
581 # branches. when there are no other branches, this
575 # could be avoided.
582 # could be avoided.
576 newcopies = copies.copy()
583 newcopies = copies.copy()
577 newcopies[f] = (c, None)
584 newcopies[f] = (c, None)
578 othercopies = all_copies.get(c)
585 othercopies = all_copies.get(c)
579 if othercopies is None:
586 if othercopies is None:
580 all_copies[c] = newcopies
587 all_copies[c] = newcopies
581 else:
588 else:
582 # we are the second parent to work on c, we need to merge our
589 # we are the second parent to work on c, we need to merge our
583 # work with the other.
590 # work with the other.
584 #
591 #
585 # In case of conflict, parent 1 take precedence over parent 2.
592 # In case of conflict, parent 1 take precedence over parent 2.
586 # This is an arbitrary choice made anew when implementing
593 # This is an arbitrary choice made anew when implementing
587 # changeset based copies. It was made without regards with
594 # changeset based copies. It was made without regards with
588 # potential filelog related behavior.
595 # potential filelog related behavior.
589 if parent == 1:
596 if parent == 1:
590 _merge_copies_dict_extra(
597 _merge_copies_dict_extra(
591 othercopies, newcopies, isancestor, ismerged
598 othercopies, newcopies, isancestor, ismerged
592 )
599 )
593 else:
600 else:
594 _merge_copies_dict_extra(
601 _merge_copies_dict_extra(
595 newcopies, othercopies, isancestor, ismerged
602 newcopies, othercopies, isancestor, ismerged
596 )
603 )
597 all_copies[c] = newcopies
604 all_copies[c] = newcopies
598
605
599 final_copies = {}
606 final_copies = {}
600 for dest, (tt, source) in all_copies[targetrev].items():
607 for dest, (tt, source) in all_copies[targetrev].items():
601 if source is not None:
608 if source is not None:
602 final_copies[dest] = source
609 final_copies[dest] = source
603 return final_copies
610 return final_copies
604
611
605
612
606 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
613 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
607 """version of `_merge_copies_dict` that works with the Google
614 """version of `_merge_copies_dict` that works with the Google
608 specific "extra" based storage for copy information"""
615 specific "extra" based storage for copy information"""
609 for dest, value in major.items():
616 for dest, value in major.items():
610 other = minor.get(dest)
617 other = minor.get(dest)
611 if other is None:
618 if other is None:
612 minor[dest] = value
619 minor[dest] = value
613 else:
620 else:
614 new_tt = value[0]
621 new_tt = value[0]
615 other_tt = other[0]
622 other_tt = other[0]
616 if value[1] == other[1]:
623 if value[1] == other[1]:
617 continue
624 continue
618 # content from "major" wins, unless it is older
625 # content from "major" wins, unless it is older
619 # than the branch point or there is a merge
626 # than the branch point or there is a merge
620 if (
627 if (
621 new_tt == other_tt
628 new_tt == other_tt
622 or not isancestor(new_tt, other_tt)
629 or not isancestor(new_tt, other_tt)
623 or ismerged(dest)
630 or ismerged(dest)
624 ):
631 ):
625 minor[dest] = value
632 minor[dest] = value
626
633
627
634
628 def _forwardcopies(a, b, base=None, match=None):
635 def _forwardcopies(a, b, base=None, match=None):
629 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
636 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
630
637
631 if base is None:
638 if base is None:
632 base = a
639 base = a
633 match = a.repo().narrowmatch(match)
640 match = a.repo().narrowmatch(match)
634 # check for working copy
641 # check for working copy
635 if b.rev() is None:
642 if b.rev() is None:
636 cm = _committedforwardcopies(a, b.p1(), base, match)
643 cm = _committedforwardcopies(a, b.p1(), base, match)
637 # combine copies from dirstate if necessary
644 # combine copies from dirstate if necessary
638 copies = _chain(cm, _dirstatecopies(b._repo, match))
645 copies = _chain(cm, _dirstatecopies(b._repo, match))
639 else:
646 else:
640 copies = _committedforwardcopies(a, b, base, match)
647 copies = _committedforwardcopies(a, b, base, match)
641 return copies
648 return copies
642
649
643
650
644 def _backwardrenames(a, b, match):
651 def _backwardrenames(a, b, match):
645 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
652 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
646 return {}
653 return {}
647
654
648 # Even though we're not taking copies into account, 1:n rename situations
655 # Even though we're not taking copies into account, 1:n rename situations
649 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
656 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
650 # arbitrarily pick one of the renames.
657 # arbitrarily pick one of the renames.
651 # We don't want to pass in "match" here, since that would filter
658 # We don't want to pass in "match" here, since that would filter
652 # the destination by it. Since we're reversing the copies, we want
659 # the destination by it. Since we're reversing the copies, we want
653 # to filter the source instead.
660 # to filter the source instead.
654 f = _forwardcopies(b, a)
661 f = _forwardcopies(b, a)
655 r = {}
662 r = {}
656 for k, v in sorted(pycompat.iteritems(f)):
663 for k, v in sorted(pycompat.iteritems(f)):
657 if match and not match(v):
664 if match and not match(v):
658 continue
665 continue
659 # remove copies
666 # remove copies
660 if v in a:
667 if v in a:
661 continue
668 continue
662 r[v] = k
669 r[v] = k
663 return r
670 return r
664
671
665
672
666 def pathcopies(x, y, match=None):
673 def pathcopies(x, y, match=None):
667 """find {dst@y: src@x} copy mapping for directed compare"""
674 """find {dst@y: src@x} copy mapping for directed compare"""
668 repo = x._repo
675 repo = x._repo
669 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
676 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
670 if debug:
677 if debug:
671 repo.ui.debug(
678 repo.ui.debug(
672 b'debug.copies: searching copies from %s to %s\n' % (x, y)
679 b'debug.copies: searching copies from %s to %s\n' % (x, y)
673 )
680 )
674 if x == y or not x or not y:
681 if x == y or not x or not y:
675 return {}
682 return {}
676 if y.rev() is None and x == y.p1():
683 if y.rev() is None and x == y.p1():
677 if debug:
684 if debug:
678 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
685 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
679 # short-circuit to avoid issues with merge states
686 # short-circuit to avoid issues with merge states
680 return _dirstatecopies(repo, match)
687 return _dirstatecopies(repo, match)
681 a = y.ancestor(x)
688 a = y.ancestor(x)
682 if a == x:
689 if a == x:
683 if debug:
690 if debug:
684 repo.ui.debug(b'debug.copies: search mode: forward\n')
691 repo.ui.debug(b'debug.copies: search mode: forward\n')
685 copies = _forwardcopies(x, y, match=match)
692 copies = _forwardcopies(x, y, match=match)
686 elif a == y:
693 elif a == y:
687 if debug:
694 if debug:
688 repo.ui.debug(b'debug.copies: search mode: backward\n')
695 repo.ui.debug(b'debug.copies: search mode: backward\n')
689 copies = _backwardrenames(x, y, match=match)
696 copies = _backwardrenames(x, y, match=match)
690 else:
697 else:
691 if debug:
698 if debug:
692 repo.ui.debug(b'debug.copies: search mode: combined\n')
699 repo.ui.debug(b'debug.copies: search mode: combined\n')
693 base = None
700 base = None
694 if a.rev() != nullrev:
701 if a.rev() != nullrev:
695 base = x
702 base = x
696 copies = _chain(
703 copies = _chain(
697 _backwardrenames(x, a, match=match),
704 _backwardrenames(x, a, match=match),
698 _forwardcopies(a, y, base, match=match),
705 _forwardcopies(a, y, base, match=match),
699 )
706 )
700 _filter(x, y, copies)
707 _filter(x, y, copies)
701 return copies
708 return copies
702
709
703
710
704 def mergecopies(repo, c1, c2, base):
711 def mergecopies(repo, c1, c2, base):
705 """
712 """
706 Finds moves and copies between context c1 and c2 that are relevant for
713 Finds moves and copies between context c1 and c2 that are relevant for
707 merging. 'base' will be used as the merge base.
714 merging. 'base' will be used as the merge base.
708
715
709 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
716 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
710 files that were moved/ copied in one merge parent and modified in another.
717 files that were moved/ copied in one merge parent and modified in another.
711 For example:
718 For example:
712
719
713 o ---> 4 another commit
720 o ---> 4 another commit
714 |
721 |
715 | o ---> 3 commit that modifies a.txt
722 | o ---> 3 commit that modifies a.txt
716 | /
723 | /
717 o / ---> 2 commit that moves a.txt to b.txt
724 o / ---> 2 commit that moves a.txt to b.txt
718 |/
725 |/
719 o ---> 1 merge base
726 o ---> 1 merge base
720
727
721 If we try to rebase revision 3 on revision 4, since there is no a.txt in
728 If we try to rebase revision 3 on revision 4, since there is no a.txt in
722 revision 4, and if user have copytrace disabled, we prints the following
729 revision 4, and if user have copytrace disabled, we prints the following
723 message:
730 message:
724
731
725 ```other changed <file> which local deleted```
732 ```other changed <file> which local deleted```
726
733
727 Returns a tuple where:
734 Returns a tuple where:
728
735
729 "branch_copies" an instance of branch_copies.
736 "branch_copies" an instance of branch_copies.
730
737
731 "diverge" is a mapping of source name -> list of destination names
738 "diverge" is a mapping of source name -> list of destination names
732 for divergent renames.
739 for divergent renames.
733
740
734 This function calls different copytracing algorithms based on config.
741 This function calls different copytracing algorithms based on config.
735 """
742 """
736 # avoid silly behavior for update from empty dir
743 # avoid silly behavior for update from empty dir
737 if not c1 or not c2 or c1 == c2:
744 if not c1 or not c2 or c1 == c2:
738 return branch_copies(), branch_copies(), {}
745 return branch_copies(), branch_copies(), {}
739
746
740 narrowmatch = c1.repo().narrowmatch()
747 narrowmatch = c1.repo().narrowmatch()
741
748
742 # avoid silly behavior for parent -> working dir
749 # avoid silly behavior for parent -> working dir
743 if c2.node() is None and c1.node() == repo.dirstate.p1():
750 if c2.node() is None and c1.node() == repo.dirstate.p1():
744 return (
751 return (
745 branch_copies(_dirstatecopies(repo, narrowmatch)),
752 branch_copies(_dirstatecopies(repo, narrowmatch)),
746 branch_copies(),
753 branch_copies(),
747 {},
754 {},
748 )
755 )
749
756
750 copytracing = repo.ui.config(b'experimental', b'copytrace')
757 copytracing = repo.ui.config(b'experimental', b'copytrace')
751 if stringutil.parsebool(copytracing) is False:
758 if stringutil.parsebool(copytracing) is False:
752 # stringutil.parsebool() returns None when it is unable to parse the
759 # stringutil.parsebool() returns None when it is unable to parse the
753 # value, so we should rely on making sure copytracing is on such cases
760 # value, so we should rely on making sure copytracing is on such cases
754 return branch_copies(), branch_copies(), {}
761 return branch_copies(), branch_copies(), {}
755
762
756 if usechangesetcentricalgo(repo):
763 if usechangesetcentricalgo(repo):
757 # The heuristics don't make sense when we need changeset-centric algos
764 # The heuristics don't make sense when we need changeset-centric algos
758 return _fullcopytracing(repo, c1, c2, base)
765 return _fullcopytracing(repo, c1, c2, base)
759
766
760 # Copy trace disabling is explicitly below the node == p1 logic above
767 # Copy trace disabling is explicitly below the node == p1 logic above
761 # because the logic above is required for a simple copy to be kept across a
768 # because the logic above is required for a simple copy to be kept across a
762 # rebase.
769 # rebase.
763 if copytracing == b'heuristics':
770 if copytracing == b'heuristics':
764 # Do full copytracing if only non-public revisions are involved as
771 # Do full copytracing if only non-public revisions are involved as
765 # that will be fast enough and will also cover the copies which could
772 # that will be fast enough and will also cover the copies which could
766 # be missed by heuristics
773 # be missed by heuristics
767 if _isfullcopytraceable(repo, c1, base):
774 if _isfullcopytraceable(repo, c1, base):
768 return _fullcopytracing(repo, c1, c2, base)
775 return _fullcopytracing(repo, c1, c2, base)
769 return _heuristicscopytracing(repo, c1, c2, base)
776 return _heuristicscopytracing(repo, c1, c2, base)
770 else:
777 else:
771 return _fullcopytracing(repo, c1, c2, base)
778 return _fullcopytracing(repo, c1, c2, base)
772
779
773
780
774 def _isfullcopytraceable(repo, c1, base):
781 def _isfullcopytraceable(repo, c1, base):
775 """Checks that if base, source and destination are all no-public branches,
782 """Checks that if base, source and destination are all no-public branches,
776 if yes let's use the full copytrace algorithm for increased capabilities
783 if yes let's use the full copytrace algorithm for increased capabilities
777 since it will be fast enough.
784 since it will be fast enough.
778
785
779 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
786 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
780 number of changesets from c1 to base such that if number of changesets are
787 number of changesets from c1 to base such that if number of changesets are
781 more than the limit, full copytracing algorithm won't be used.
788 more than the limit, full copytracing algorithm won't be used.
782 """
789 """
783 if c1.rev() is None:
790 if c1.rev() is None:
784 c1 = c1.p1()
791 c1 = c1.p1()
785 if c1.mutable() and base.mutable():
792 if c1.mutable() and base.mutable():
786 sourcecommitlimit = repo.ui.configint(
793 sourcecommitlimit = repo.ui.configint(
787 b'experimental', b'copytrace.sourcecommitlimit'
794 b'experimental', b'copytrace.sourcecommitlimit'
788 )
795 )
789 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
796 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
790 return commits < sourcecommitlimit
797 return commits < sourcecommitlimit
791 return False
798 return False
792
799
793
800
794 def _checksinglesidecopies(
801 def _checksinglesidecopies(
795 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
802 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
796 ):
803 ):
797 if src not in m2:
804 if src not in m2:
798 # deleted on side 2
805 # deleted on side 2
799 if src not in m1:
806 if src not in m1:
800 # renamed on side 1, deleted on side 2
807 # renamed on side 1, deleted on side 2
801 renamedelete[src] = dsts1
808 renamedelete[src] = dsts1
802 elif src not in mb:
809 elif src not in mb:
803 # Work around the "short-circuit to avoid issues with merge states"
810 # Work around the "short-circuit to avoid issues with merge states"
804 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
811 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
805 # destination doesn't exist in y.
812 # destination doesn't exist in y.
806 pass
813 pass
807 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
814 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
808 return
815 return
809 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
816 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
810 # modified on side 2
817 # modified on side 2
811 for dst in dsts1:
818 for dst in dsts1:
812 copy[dst] = src
819 copy[dst] = src
813
820
814
821
815 class branch_copies(object):
822 class branch_copies(object):
816 """Information about copies made on one side of a merge/graft.
823 """Information about copies made on one side of a merge/graft.
817
824
818 "copy" is a mapping from destination name -> source name,
825 "copy" is a mapping from destination name -> source name,
819 where source is in c1 and destination is in c2 or vice-versa.
826 where source is in c1 and destination is in c2 or vice-versa.
820
827
821 "movewithdir" is a mapping from source name -> destination name,
828 "movewithdir" is a mapping from source name -> destination name,
822 where the file at source present in one context but not the other
829 where the file at source present in one context but not the other
823 needs to be moved to destination by the merge process, because the
830 needs to be moved to destination by the merge process, because the
824 other context moved the directory it is in.
831 other context moved the directory it is in.
825
832
826 "renamedelete" is a mapping of source name -> list of destination
833 "renamedelete" is a mapping of source name -> list of destination
827 names for files deleted in c1 that were renamed in c2 or vice-versa.
834 names for files deleted in c1 that were renamed in c2 or vice-versa.
828
835
829 "dirmove" is a mapping of detected source dir -> destination dir renames.
836 "dirmove" is a mapping of detected source dir -> destination dir renames.
830 This is needed for handling changes to new files previously grafted into
837 This is needed for handling changes to new files previously grafted into
831 renamed directories.
838 renamed directories.
832 """
839 """
833
840
834 def __init__(
841 def __init__(
835 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
842 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
836 ):
843 ):
837 self.copy = {} if copy is None else copy
844 self.copy = {} if copy is None else copy
838 self.renamedelete = {} if renamedelete is None else renamedelete
845 self.renamedelete = {} if renamedelete is None else renamedelete
839 self.dirmove = {} if dirmove is None else dirmove
846 self.dirmove = {} if dirmove is None else dirmove
840 self.movewithdir = {} if movewithdir is None else movewithdir
847 self.movewithdir = {} if movewithdir is None else movewithdir
841
848
842 def __repr__(self):
849 def __repr__(self):
843 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
850 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
844 self.copy,
851 self.copy,
845 self.renamedelete,
852 self.renamedelete,
846 self.dirmove,
853 self.dirmove,
847 self.movewithdir,
854 self.movewithdir,
848 )
855 )
849
856
850
857
851 def _fullcopytracing(repo, c1, c2, base):
858 def _fullcopytracing(repo, c1, c2, base):
852 """The full copytracing algorithm which finds all the new files that were
859 """The full copytracing algorithm which finds all the new files that were
853 added from merge base up to the top commit and for each file it checks if
860 added from merge base up to the top commit and for each file it checks if
854 this file was copied from another file.
861 this file was copied from another file.
855
862
856 This is pretty slow when a lot of changesets are involved but will track all
863 This is pretty slow when a lot of changesets are involved but will track all
857 the copies.
864 the copies.
858 """
865 """
859 m1 = c1.manifest()
866 m1 = c1.manifest()
860 m2 = c2.manifest()
867 m2 = c2.manifest()
861 mb = base.manifest()
868 mb = base.manifest()
862
869
863 copies1 = pathcopies(base, c1)
870 copies1 = pathcopies(base, c1)
864 copies2 = pathcopies(base, c2)
871 copies2 = pathcopies(base, c2)
865
872
866 if not (copies1 or copies2):
873 if not (copies1 or copies2):
867 return branch_copies(), branch_copies(), {}
874 return branch_copies(), branch_copies(), {}
868
875
869 inversecopies1 = {}
876 inversecopies1 = {}
870 inversecopies2 = {}
877 inversecopies2 = {}
871 for dst, src in copies1.items():
878 for dst, src in copies1.items():
872 inversecopies1.setdefault(src, []).append(dst)
879 inversecopies1.setdefault(src, []).append(dst)
873 for dst, src in copies2.items():
880 for dst, src in copies2.items():
874 inversecopies2.setdefault(src, []).append(dst)
881 inversecopies2.setdefault(src, []).append(dst)
875
882
876 copy1 = {}
883 copy1 = {}
877 copy2 = {}
884 copy2 = {}
878 diverge = {}
885 diverge = {}
879 renamedelete1 = {}
886 renamedelete1 = {}
880 renamedelete2 = {}
887 renamedelete2 = {}
881 allsources = set(inversecopies1) | set(inversecopies2)
888 allsources = set(inversecopies1) | set(inversecopies2)
882 for src in allsources:
889 for src in allsources:
883 dsts1 = inversecopies1.get(src)
890 dsts1 = inversecopies1.get(src)
884 dsts2 = inversecopies2.get(src)
891 dsts2 = inversecopies2.get(src)
885 if dsts1 and dsts2:
892 if dsts1 and dsts2:
886 # copied/renamed on both sides
893 # copied/renamed on both sides
887 if src not in m1 and src not in m2:
894 if src not in m1 and src not in m2:
888 # renamed on both sides
895 # renamed on both sides
889 dsts1 = set(dsts1)
896 dsts1 = set(dsts1)
890 dsts2 = set(dsts2)
897 dsts2 = set(dsts2)
891 # If there's some overlap in the rename destinations, we
898 # If there's some overlap in the rename destinations, we
892 # consider it not divergent. For example, if side 1 copies 'a'
899 # consider it not divergent. For example, if side 1 copies 'a'
893 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
900 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
894 # and 'd' and deletes 'a'.
901 # and 'd' and deletes 'a'.
895 if dsts1 & dsts2:
902 if dsts1 & dsts2:
896 for dst in dsts1 & dsts2:
903 for dst in dsts1 & dsts2:
897 copy1[dst] = src
904 copy1[dst] = src
898 copy2[dst] = src
905 copy2[dst] = src
899 else:
906 else:
900 diverge[src] = sorted(dsts1 | dsts2)
907 diverge[src] = sorted(dsts1 | dsts2)
901 elif src in m1 and src in m2:
908 elif src in m1 and src in m2:
902 # copied on both sides
909 # copied on both sides
903 dsts1 = set(dsts1)
910 dsts1 = set(dsts1)
904 dsts2 = set(dsts2)
911 dsts2 = set(dsts2)
905 for dst in dsts1 & dsts2:
912 for dst in dsts1 & dsts2:
906 copy1[dst] = src
913 copy1[dst] = src
907 copy2[dst] = src
914 copy2[dst] = src
908 # TODO: Handle cases where it was renamed on one side and copied
915 # TODO: Handle cases where it was renamed on one side and copied
909 # on the other side
916 # on the other side
910 elif dsts1:
917 elif dsts1:
911 # copied/renamed only on side 1
918 # copied/renamed only on side 1
912 _checksinglesidecopies(
919 _checksinglesidecopies(
913 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
920 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
914 )
921 )
915 elif dsts2:
922 elif dsts2:
916 # copied/renamed only on side 2
923 # copied/renamed only on side 2
917 _checksinglesidecopies(
924 _checksinglesidecopies(
918 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
925 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
919 )
926 )
920
927
921 # find interesting file sets from manifests
928 # find interesting file sets from manifests
922 cache = []
929 cache = []
923
930
924 def _get_addedfiles(idx):
931 def _get_addedfiles(idx):
925 if not cache:
932 if not cache:
926 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
933 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
927 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
934 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
928 u1 = sorted(addedinm1 - addedinm2)
935 u1 = sorted(addedinm1 - addedinm2)
929 u2 = sorted(addedinm2 - addedinm1)
936 u2 = sorted(addedinm2 - addedinm1)
930 cache.extend((u1, u2))
937 cache.extend((u1, u2))
931 return cache[idx]
938 return cache[idx]
932
939
933 u1fn = lambda: _get_addedfiles(0)
940 u1fn = lambda: _get_addedfiles(0)
934 u2fn = lambda: _get_addedfiles(1)
941 u2fn = lambda: _get_addedfiles(1)
935 if repo.ui.debugflag:
942 if repo.ui.debugflag:
936 u1 = u1fn()
943 u1 = u1fn()
937 u2 = u2fn()
944 u2 = u2fn()
938
945
939 header = b" unmatched files in %s"
946 header = b" unmatched files in %s"
940 if u1:
947 if u1:
941 repo.ui.debug(
948 repo.ui.debug(
942 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
949 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
943 )
950 )
944 if u2:
951 if u2:
945 repo.ui.debug(
952 repo.ui.debug(
946 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
953 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
947 )
954 )
948
955
949 renamedeleteset = set()
956 renamedeleteset = set()
950 divergeset = set()
957 divergeset = set()
951 for dsts in diverge.values():
958 for dsts in diverge.values():
952 divergeset.update(dsts)
959 divergeset.update(dsts)
953 for dsts in renamedelete1.values():
960 for dsts in renamedelete1.values():
954 renamedeleteset.update(dsts)
961 renamedeleteset.update(dsts)
955 for dsts in renamedelete2.values():
962 for dsts in renamedelete2.values():
956 renamedeleteset.update(dsts)
963 renamedeleteset.update(dsts)
957
964
958 repo.ui.debug(
965 repo.ui.debug(
959 b" all copies found (* = to merge, ! = divergent, "
966 b" all copies found (* = to merge, ! = divergent, "
960 b"% = renamed and deleted):\n"
967 b"% = renamed and deleted):\n"
961 )
968 )
962 for side, copies in ((b"local", copies1), (b"remote", copies2)):
969 for side, copies in ((b"local", copies1), (b"remote", copies2)):
963 if not copies:
970 if not copies:
964 continue
971 continue
965 repo.ui.debug(b" on %s side:\n" % side)
972 repo.ui.debug(b" on %s side:\n" % side)
966 for f in sorted(copies):
973 for f in sorted(copies):
967 note = b""
974 note = b""
968 if f in copy1 or f in copy2:
975 if f in copy1 or f in copy2:
969 note += b"*"
976 note += b"*"
970 if f in divergeset:
977 if f in divergeset:
971 note += b"!"
978 note += b"!"
972 if f in renamedeleteset:
979 if f in renamedeleteset:
973 note += b"%"
980 note += b"%"
974 repo.ui.debug(
981 repo.ui.debug(
975 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
982 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
976 )
983 )
977 del renamedeleteset
984 del renamedeleteset
978 del divergeset
985 del divergeset
979
986
980 repo.ui.debug(b" checking for directory renames\n")
987 repo.ui.debug(b" checking for directory renames\n")
981
988
982 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
989 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
983 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
990 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
984
991
985 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
992 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
986 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
993 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
987
994
988 return branch_copies1, branch_copies2, diverge
995 return branch_copies1, branch_copies2, diverge
989
996
990
997
991 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
998 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
992 """Finds moved directories and files that should move with them.
999 """Finds moved directories and files that should move with them.
993
1000
994 ctx: the context for one of the sides
1001 ctx: the context for one of the sides
995 copy: files copied on the same side (as ctx)
1002 copy: files copied on the same side (as ctx)
996 fullcopy: files copied on the same side (as ctx), including those that
1003 fullcopy: files copied on the same side (as ctx), including those that
997 merge.manifestmerge() won't care about
1004 merge.manifestmerge() won't care about
998 addedfilesfn: function returning added files on the other side (compared to
1005 addedfilesfn: function returning added files on the other side (compared to
999 ctx)
1006 ctx)
1000 """
1007 """
1001 # generate a directory move map
1008 # generate a directory move map
1002 invalid = set()
1009 invalid = set()
1003 dirmove = {}
1010 dirmove = {}
1004
1011
1005 # examine each file copy for a potential directory move, which is
1012 # examine each file copy for a potential directory move, which is
1006 # when all the files in a directory are moved to a new directory
1013 # when all the files in a directory are moved to a new directory
1007 for dst, src in pycompat.iteritems(fullcopy):
1014 for dst, src in pycompat.iteritems(fullcopy):
1008 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1015 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1009 if dsrc in invalid:
1016 if dsrc in invalid:
1010 # already seen to be uninteresting
1017 # already seen to be uninteresting
1011 continue
1018 continue
1012 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1019 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1013 # directory wasn't entirely moved locally
1020 # directory wasn't entirely moved locally
1014 invalid.add(dsrc)
1021 invalid.add(dsrc)
1015 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1022 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1016 # files from the same directory moved to two different places
1023 # files from the same directory moved to two different places
1017 invalid.add(dsrc)
1024 invalid.add(dsrc)
1018 else:
1025 else:
1019 # looks good so far
1026 # looks good so far
1020 dirmove[dsrc] = ddst
1027 dirmove[dsrc] = ddst
1021
1028
1022 for i in invalid:
1029 for i in invalid:
1023 if i in dirmove:
1030 if i in dirmove:
1024 del dirmove[i]
1031 del dirmove[i]
1025 del invalid
1032 del invalid
1026
1033
1027 if not dirmove:
1034 if not dirmove:
1028 return {}, {}
1035 return {}, {}
1029
1036
1030 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1037 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1031
1038
1032 for d in dirmove:
1039 for d in dirmove:
1033 repo.ui.debug(
1040 repo.ui.debug(
1034 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1041 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1035 )
1042 )
1036
1043
1037 movewithdir = {}
1044 movewithdir = {}
1038 # check unaccounted nonoverlapping files against directory moves
1045 # check unaccounted nonoverlapping files against directory moves
1039 for f in addedfilesfn():
1046 for f in addedfilesfn():
1040 if f not in fullcopy:
1047 if f not in fullcopy:
1041 for d in dirmove:
1048 for d in dirmove:
1042 if f.startswith(d):
1049 if f.startswith(d):
1043 # new file added in a directory that was moved, move it
1050 # new file added in a directory that was moved, move it
1044 df = dirmove[d] + f[len(d) :]
1051 df = dirmove[d] + f[len(d) :]
1045 if df not in copy:
1052 if df not in copy:
1046 movewithdir[f] = df
1053 movewithdir[f] = df
1047 repo.ui.debug(
1054 repo.ui.debug(
1048 b" pending file src: '%s' -> dst: '%s'\n"
1055 b" pending file src: '%s' -> dst: '%s'\n"
1049 % (f, df)
1056 % (f, df)
1050 )
1057 )
1051 break
1058 break
1052
1059
1053 return dirmove, movewithdir
1060 return dirmove, movewithdir
1054
1061
1055
1062
1056 def _heuristicscopytracing(repo, c1, c2, base):
1063 def _heuristicscopytracing(repo, c1, c2, base):
1057 """Fast copytracing using filename heuristics
1064 """Fast copytracing using filename heuristics
1058
1065
1059 Assumes that moves or renames are of following two types:
1066 Assumes that moves or renames are of following two types:
1060
1067
1061 1) Inside a directory only (same directory name but different filenames)
1068 1) Inside a directory only (same directory name but different filenames)
1062 2) Move from one directory to another
1069 2) Move from one directory to another
1063 (same filenames but different directory names)
1070 (same filenames but different directory names)
1064
1071
1065 Works only when there are no merge commits in the "source branch".
1072 Works only when there are no merge commits in the "source branch".
1066 Source branch is commits from base up to c2 not including base.
1073 Source branch is commits from base up to c2 not including base.
1067
1074
1068 If merge is involved it fallbacks to _fullcopytracing().
1075 If merge is involved it fallbacks to _fullcopytracing().
1069
1076
1070 Can be used by setting the following config:
1077 Can be used by setting the following config:
1071
1078
1072 [experimental]
1079 [experimental]
1073 copytrace = heuristics
1080 copytrace = heuristics
1074
1081
1075 In some cases the copy/move candidates found by heuristics can be very large
1082 In some cases the copy/move candidates found by heuristics can be very large
1076 in number and that will make the algorithm slow. The number of possible
1083 in number and that will make the algorithm slow. The number of possible
1077 candidates to check can be limited by using the config
1084 candidates to check can be limited by using the config
1078 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1085 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1079 """
1086 """
1080
1087
1081 if c1.rev() is None:
1088 if c1.rev() is None:
1082 c1 = c1.p1()
1089 c1 = c1.p1()
1083 if c2.rev() is None:
1090 if c2.rev() is None:
1084 c2 = c2.p1()
1091 c2 = c2.p1()
1085
1092
1086 changedfiles = set()
1093 changedfiles = set()
1087 m1 = c1.manifest()
1094 m1 = c1.manifest()
1088 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1095 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1089 # If base is not in c2 branch, we switch to fullcopytracing
1096 # If base is not in c2 branch, we switch to fullcopytracing
1090 repo.ui.debug(
1097 repo.ui.debug(
1091 b"switching to full copytracing as base is not "
1098 b"switching to full copytracing as base is not "
1092 b"an ancestor of c2\n"
1099 b"an ancestor of c2\n"
1093 )
1100 )
1094 return _fullcopytracing(repo, c1, c2, base)
1101 return _fullcopytracing(repo, c1, c2, base)
1095
1102
1096 ctx = c2
1103 ctx = c2
1097 while ctx != base:
1104 while ctx != base:
1098 if len(ctx.parents()) == 2:
1105 if len(ctx.parents()) == 2:
1099 # To keep things simple let's not handle merges
1106 # To keep things simple let's not handle merges
1100 repo.ui.debug(b"switching to full copytracing because of merges\n")
1107 repo.ui.debug(b"switching to full copytracing because of merges\n")
1101 return _fullcopytracing(repo, c1, c2, base)
1108 return _fullcopytracing(repo, c1, c2, base)
1102 changedfiles.update(ctx.files())
1109 changedfiles.update(ctx.files())
1103 ctx = ctx.p1()
1110 ctx = ctx.p1()
1104
1111
1105 copies2 = {}
1112 copies2 = {}
1106 cp = _forwardcopies(base, c2)
1113 cp = _forwardcopies(base, c2)
1107 for dst, src in pycompat.iteritems(cp):
1114 for dst, src in pycompat.iteritems(cp):
1108 if src in m1:
1115 if src in m1:
1109 copies2[dst] = src
1116 copies2[dst] = src
1110
1117
1111 # file is missing if it isn't present in the destination, but is present in
1118 # file is missing if it isn't present in the destination, but is present in
1112 # the base and present in the source.
1119 # the base and present in the source.
1113 # Presence in the base is important to exclude added files, presence in the
1120 # Presence in the base is important to exclude added files, presence in the
1114 # source is important to exclude removed files.
1121 # source is important to exclude removed files.
1115 filt = lambda f: f not in m1 and f in base and f in c2
1122 filt = lambda f: f not in m1 and f in base and f in c2
1116 missingfiles = [f for f in changedfiles if filt(f)]
1123 missingfiles = [f for f in changedfiles if filt(f)]
1117
1124
1118 copies1 = {}
1125 copies1 = {}
1119 if missingfiles:
1126 if missingfiles:
1120 basenametofilename = collections.defaultdict(list)
1127 basenametofilename = collections.defaultdict(list)
1121 dirnametofilename = collections.defaultdict(list)
1128 dirnametofilename = collections.defaultdict(list)
1122
1129
1123 for f in m1.filesnotin(base.manifest()):
1130 for f in m1.filesnotin(base.manifest()):
1124 basename = os.path.basename(f)
1131 basename = os.path.basename(f)
1125 dirname = os.path.dirname(f)
1132 dirname = os.path.dirname(f)
1126 basenametofilename[basename].append(f)
1133 basenametofilename[basename].append(f)
1127 dirnametofilename[dirname].append(f)
1134 dirnametofilename[dirname].append(f)
1128
1135
1129 for f in missingfiles:
1136 for f in missingfiles:
1130 basename = os.path.basename(f)
1137 basename = os.path.basename(f)
1131 dirname = os.path.dirname(f)
1138 dirname = os.path.dirname(f)
1132 samebasename = basenametofilename[basename]
1139 samebasename = basenametofilename[basename]
1133 samedirname = dirnametofilename[dirname]
1140 samedirname = dirnametofilename[dirname]
1134 movecandidates = samebasename + samedirname
1141 movecandidates = samebasename + samedirname
1135 # f is guaranteed to be present in c2, that's why
1142 # f is guaranteed to be present in c2, that's why
1136 # c2.filectx(f) won't fail
1143 # c2.filectx(f) won't fail
1137 f2 = c2.filectx(f)
1144 f2 = c2.filectx(f)
1138 # we can have a lot of candidates which can slow down the heuristics
1145 # we can have a lot of candidates which can slow down the heuristics
1139 # config value to limit the number of candidates moves to check
1146 # config value to limit the number of candidates moves to check
1140 maxcandidates = repo.ui.configint(
1147 maxcandidates = repo.ui.configint(
1141 b'experimental', b'copytrace.movecandidateslimit'
1148 b'experimental', b'copytrace.movecandidateslimit'
1142 )
1149 )
1143
1150
1144 if len(movecandidates) > maxcandidates:
1151 if len(movecandidates) > maxcandidates:
1145 repo.ui.status(
1152 repo.ui.status(
1146 _(
1153 _(
1147 b"skipping copytracing for '%s', more "
1154 b"skipping copytracing for '%s', more "
1148 b"candidates than the limit: %d\n"
1155 b"candidates than the limit: %d\n"
1149 )
1156 )
1150 % (f, len(movecandidates))
1157 % (f, len(movecandidates))
1151 )
1158 )
1152 continue
1159 continue
1153
1160
1154 for candidate in movecandidates:
1161 for candidate in movecandidates:
1155 f1 = c1.filectx(candidate)
1162 f1 = c1.filectx(candidate)
1156 if _related(f1, f2):
1163 if _related(f1, f2):
1157 # if there are a few related copies then we'll merge
1164 # if there are a few related copies then we'll merge
1158 # changes into all of them. This matches the behaviour
1165 # changes into all of them. This matches the behaviour
1159 # of upstream copytracing
1166 # of upstream copytracing
1160 copies1[candidate] = f
1167 copies1[candidate] = f
1161
1168
1162 return branch_copies(copies1), branch_copies(copies2), {}
1169 return branch_copies(copies1), branch_copies(copies2), {}
1163
1170
1164
1171
1165 def _related(f1, f2):
1172 def _related(f1, f2):
1166 """return True if f1 and f2 filectx have a common ancestor
1173 """return True if f1 and f2 filectx have a common ancestor
1167
1174
1168 Walk back to common ancestor to see if the two files originate
1175 Walk back to common ancestor to see if the two files originate
1169 from the same file. Since workingfilectx's rev() is None it messes
1176 from the same file. Since workingfilectx's rev() is None it messes
1170 up the integer comparison logic, hence the pre-step check for
1177 up the integer comparison logic, hence the pre-step check for
1171 None (f1 and f2 can only be workingfilectx's initially).
1178 None (f1 and f2 can only be workingfilectx's initially).
1172 """
1179 """
1173
1180
1174 if f1 == f2:
1181 if f1 == f2:
1175 return True # a match
1182 return True # a match
1176
1183
1177 g1, g2 = f1.ancestors(), f2.ancestors()
1184 g1, g2 = f1.ancestors(), f2.ancestors()
1178 try:
1185 try:
1179 f1r, f2r = f1.linkrev(), f2.linkrev()
1186 f1r, f2r = f1.linkrev(), f2.linkrev()
1180
1187
1181 if f1r is None:
1188 if f1r is None:
1182 f1 = next(g1)
1189 f1 = next(g1)
1183 if f2r is None:
1190 if f2r is None:
1184 f2 = next(g2)
1191 f2 = next(g2)
1185
1192
1186 while True:
1193 while True:
1187 f1r, f2r = f1.linkrev(), f2.linkrev()
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1188 if f1r > f2r:
1195 if f1r > f2r:
1189 f1 = next(g1)
1196 f1 = next(g1)
1190 elif f2r > f1r:
1197 elif f2r > f1r:
1191 f2 = next(g2)
1198 f2 = next(g2)
1192 else: # f1 and f2 point to files in the same linkrev
1199 else: # f1 and f2 point to files in the same linkrev
1193 return f1 == f2 # true if they point to the same file
1200 return f1 == f2 # true if they point to the same file
1194 except StopIteration:
1201 except StopIteration:
1195 return False
1202 return False
1196
1203
1197
1204
1198 def graftcopies(wctx, ctx, base):
1205 def graftcopies(wctx, ctx, base):
1199 """reproduce copies between base and ctx in the wctx
1206 """reproduce copies between base and ctx in the wctx
1200
1207
1201 Unlike mergecopies(), this function will only consider copies between base
1208 Unlike mergecopies(), this function will only consider copies between base
1202 and ctx; it will ignore copies between base and wctx. Also unlike
1209 and ctx; it will ignore copies between base and wctx. Also unlike
1203 mergecopies(), this function will apply copies to the working copy (instead
1210 mergecopies(), this function will apply copies to the working copy (instead
1204 of just returning information about the copies). That makes it cheaper
1211 of just returning information about the copies). That makes it cheaper
1205 (especially in the common case of base==ctx.p1()) and useful also when
1212 (especially in the common case of base==ctx.p1()) and useful also when
1206 experimental.copytrace=off.
1213 experimental.copytrace=off.
1207
1214
1208 merge.update() will have already marked most copies, but it will only
1215 merge.update() will have already marked most copies, but it will only
1209 mark copies if it thinks the source files are related (see
1216 mark copies if it thinks the source files are related (see
1210 merge._related()). It will also not mark copies if the file wasn't modified
1217 merge._related()). It will also not mark copies if the file wasn't modified
1211 on the local side. This function adds the copies that were "missed"
1218 on the local side. This function adds the copies that were "missed"
1212 by merge.update().
1219 by merge.update().
1213 """
1220 """
1214 new_copies = pathcopies(base, ctx)
1221 new_copies = pathcopies(base, ctx)
1215 _filter(wctx.p1(), wctx, new_copies)
1222 _filter(wctx.p1(), wctx, new_copies)
1216 for dst, src in pycompat.iteritems(new_copies):
1223 for dst, src in pycompat.iteritems(new_copies):
1217 wctx[dst].markcopied(src)
1224 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now