##// END OF EJS Templates
copies: avoid early return in _combine_changeset_copies...
marmoute -
r46773:1fcfff09 default
parent child Browse files
Show More
@@ -1,1223 +1,1221 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 alwaysmatch = match.always()
199 alwaysmatch = match.always()
200
200
201 if rustmod is not None and alwaysmatch:
201 if rustmod is not None and alwaysmatch:
202
202
203 def revinfo(rev):
203 def revinfo(rev):
204 p1, p2 = parents(rev)
204 p1, p2 = parents(rev)
205 if flags(rev) & HASCOPIESINFO:
205 if flags(rev) & HASCOPIESINFO:
206 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
206 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
207 else:
207 else:
208 raw = None
208 raw = None
209 return (p1, p2, raw)
209 return (p1, p2, raw)
210
210
211 else:
211 else:
212
212
213 def revinfo(rev):
213 def revinfo(rev):
214 p1, p2 = parents(rev)
214 p1, p2 = parents(rev)
215 if flags(rev) & HASCOPIESINFO:
215 if flags(rev) & HASCOPIESINFO:
216 changes = changelogrevision(rev).changes
216 changes = changelogrevision(rev).changes
217 else:
217 else:
218 changes = None
218 changes = None
219 return (p1, p2, changes)
219 return (p1, p2, changes)
220
220
221 return revinfo
221 return revinfo
222
222
223
223
224 def cached_is_ancestor(is_ancestor):
224 def cached_is_ancestor(is_ancestor):
225 """return a cached version of is_ancestor"""
225 """return a cached version of is_ancestor"""
226 cache = {}
226 cache = {}
227
227
228 def _is_ancestor(anc, desc):
228 def _is_ancestor(anc, desc):
229 if anc > desc:
229 if anc > desc:
230 return False
230 return False
231 elif anc == desc:
231 elif anc == desc:
232 return True
232 return True
233 key = (anc, desc)
233 key = (anc, desc)
234 ret = cache.get(key)
234 ret = cache.get(key)
235 if ret is None:
235 if ret is None:
236 ret = cache[key] = is_ancestor(anc, desc)
236 ret = cache[key] = is_ancestor(anc, desc)
237 return ret
237 return ret
238
238
239 return _is_ancestor
239 return _is_ancestor
240
240
241
241
242 def _changesetforwardcopies(a, b, match):
242 def _changesetforwardcopies(a, b, match):
243 if a.rev() in (nullrev, b.rev()):
243 if a.rev() in (nullrev, b.rev()):
244 return {}
244 return {}
245
245
246 repo = a.repo().unfiltered()
246 repo = a.repo().unfiltered()
247 children = {}
247 children = {}
248
248
249 cl = repo.changelog
249 cl = repo.changelog
250 isancestor = cl.isancestorrev
250 isancestor = cl.isancestorrev
251
251
252 # To track rename from "A" to B, we need to gather all parent β†’ children
252 # To track rename from "A" to B, we need to gather all parent β†’ children
253 # edges that are contains in `::B` but not in `::A`.
253 # edges that are contains in `::B` but not in `::A`.
254 #
254 #
255 #
255 #
256 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
256 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
257 # ::a`) and also all the "roots point", ie the parents of the exclusive set
257 # ::a`) and also all the "roots point", ie the parents of the exclusive set
258 # that belong to ::a. These are exactly all the revisions needed to express
258 # that belong to ::a. These are exactly all the revisions needed to express
259 # the parent β†’ children we need to combine.
259 # the parent β†’ children we need to combine.
260 #
260 #
261 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
261 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
262 # excluding paths that leads to roots that are not ancestors of `a`. We
262 # excluding paths that leads to roots that are not ancestors of `a`. We
263 # keep this out of the explanation because it is hard enough without this special case..
263 # keep this out of the explanation because it is hard enough without this special case..
264
264
265 parents = cl._uncheckedparentrevs
265 parents = cl._uncheckedparentrevs
266 graph_roots = (nullrev, nullrev)
266 graph_roots = (nullrev, nullrev)
267
267
268 ancestors = cl.ancestors([a.rev()], inclusive=True)
268 ancestors = cl.ancestors([a.rev()], inclusive=True)
269 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
269 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
270 roots = set()
270 roots = set()
271 has_graph_roots = False
271 has_graph_roots = False
272
272
273 # iterate over `only(B, A)`
273 # iterate over `only(B, A)`
274 for r in revs:
274 for r in revs:
275 ps = parents(r)
275 ps = parents(r)
276 if ps == graph_roots:
276 if ps == graph_roots:
277 has_graph_roots = True
277 has_graph_roots = True
278 else:
278 else:
279 p1, p2 = ps
279 p1, p2 = ps
280
280
281 # find all the "root points" (see larger comment above)
281 # find all the "root points" (see larger comment above)
282 if p1 != nullrev and p1 in ancestors:
282 if p1 != nullrev and p1 in ancestors:
283 roots.add(p1)
283 roots.add(p1)
284 if p2 != nullrev and p2 in ancestors:
284 if p2 != nullrev and p2 in ancestors:
285 roots.add(p2)
285 roots.add(p2)
286 if not roots:
286 if not roots:
287 # no common revision to track copies from
287 # no common revision to track copies from
288 return {}
288 return {}
289 if has_graph_roots:
289 if has_graph_roots:
290 # this deal with the special case mentionned in the [1] footnotes. We
290 # this deal with the special case mentionned in the [1] footnotes. We
291 # must filter out revisions that leads to non-common graphroots.
291 # must filter out revisions that leads to non-common graphroots.
292 roots = list(roots)
292 roots = list(roots)
293 m = min(roots)
293 m = min(roots)
294 h = [b.rev()]
294 h = [b.rev()]
295 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
295 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
296 roots_to_head = set(roots_to_head)
296 roots_to_head = set(roots_to_head)
297 revs = [r for r in revs if r in roots_to_head]
297 revs = [r for r in revs if r in roots_to_head]
298
298
299 if repo.filecopiesmode == b'changeset-sidedata':
299 if repo.filecopiesmode == b'changeset-sidedata':
300 # When using side-data, we will process the edges "from" the children.
300 # When using side-data, we will process the edges "from" the children.
301 # We iterate over the childre, gathering previous collected data for
301 # We iterate over the childre, gathering previous collected data for
302 # the parents. Do know when the parents data is no longer necessary, we
302 # the parents. Do know when the parents data is no longer necessary, we
303 # keep a counter of how many children each revision has.
303 # keep a counter of how many children each revision has.
304 #
304 #
305 # An interresting property of `children_count` is that it only contains
305 # An interresting property of `children_count` is that it only contains
306 # revision that will be relevant for a edge of the graph. So if a
306 # revision that will be relevant for a edge of the graph. So if a
307 # children has parent not in `children_count`, that edges should not be
307 # children has parent not in `children_count`, that edges should not be
308 # processed.
308 # processed.
309 children_count = dict((r, 0) for r in roots)
309 children_count = dict((r, 0) for r in roots)
310 for r in revs:
310 for r in revs:
311 for p in cl.parentrevs(r):
311 for p in cl.parentrevs(r):
312 if p == nullrev:
312 if p == nullrev:
313 continue
313 continue
314 children_count[r] = 0
314 children_count[r] = 0
315 if p in children_count:
315 if p in children_count:
316 children_count[p] += 1
316 children_count[p] += 1
317 revinfo = _revinfo_getter(repo, match)
317 revinfo = _revinfo_getter(repo, match)
318 return _combine_changeset_copies(
318 return _combine_changeset_copies(
319 revs, children_count, b.rev(), revinfo, match, isancestor
319 revs, children_count, b.rev(), revinfo, match, isancestor
320 )
320 )
321 else:
321 else:
322 # When not using side-data, we will process the edges "from" the parent.
322 # When not using side-data, we will process the edges "from" the parent.
323 # so we need a full mapping of the parent -> children relation.
323 # so we need a full mapping of the parent -> children relation.
324 children = dict((r, []) for r in roots)
324 children = dict((r, []) for r in roots)
325 for r in revs:
325 for r in revs:
326 for p in cl.parentrevs(r):
326 for p in cl.parentrevs(r):
327 if p == nullrev:
327 if p == nullrev:
328 continue
328 continue
329 children[r] = []
329 children[r] = []
330 if p in children:
330 if p in children:
331 children[p].append(r)
331 children[p].append(r)
332 x = revs.pop()
332 x = revs.pop()
333 assert x == b.rev()
333 assert x == b.rev()
334 revs.extend(roots)
334 revs.extend(roots)
335 revs.sort()
335 revs.sort()
336
336
337 revinfo = _revinfo_getter_extra(repo)
337 revinfo = _revinfo_getter_extra(repo)
338 return _combine_changeset_copies_extra(
338 return _combine_changeset_copies_extra(
339 revs, children, b.rev(), revinfo, match, isancestor
339 revs, children, b.rev(), revinfo, match, isancestor
340 )
340 )
341
341
342
342
343 def _combine_changeset_copies(
343 def _combine_changeset_copies(
344 revs, children_count, targetrev, revinfo, match, isancestor
344 revs, children_count, targetrev, revinfo, match, isancestor
345 ):
345 ):
346 """combine the copies information for each item of iterrevs
346 """combine the copies information for each item of iterrevs
347
347
348 revs: sorted iterable of revision to visit
348 revs: sorted iterable of revision to visit
349 children_count: a {parent: <number-of-relevant-children>} mapping.
349 children_count: a {parent: <number-of-relevant-children>} mapping.
350 targetrev: the final copies destination revision (not in iterrevs)
350 targetrev: the final copies destination revision (not in iterrevs)
351 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
351 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
352 match: a matcher
352 match: a matcher
353
353
354 It returns the aggregated copies information for `targetrev`.
354 It returns the aggregated copies information for `targetrev`.
355 """
355 """
356
356
357 alwaysmatch = match.always()
357 alwaysmatch = match.always()
358
358
359 if rustmod is not None and alwaysmatch:
359 if rustmod is not None and alwaysmatch:
360 return rustmod.combine_changeset_copies(
360 final_copies = rustmod.combine_changeset_copies(
361 list(revs), children_count, targetrev, revinfo, isancestor
361 list(revs), children_count, targetrev, revinfo, isancestor
362 )
362 )
363
363 else:
364 isancestor = cached_is_ancestor(isancestor)
364 isancestor = cached_is_ancestor(isancestor)
365
366 all_copies = {}
367 # iterate over all the "children" side of copy tracing "edge"
368 for current_rev in revs:
369 p1, p2, changes = revinfo(current_rev)
370 current_copies = None
371
365
372 # iterate over all parents to chain the existing data with the
366 all_copies = {}
373 # data from the parent β†’ child edge.
367 # iterate over all the "children" side of copy tracing "edge"
374 for parent, parent_rev in ((1, p1), (2, p2)):
368 for current_rev in revs:
375 if parent_rev == nullrev:
369 p1, p2, changes = revinfo(current_rev)
376 continue
370 current_copies = None
377 remaining_children = children_count.get(parent_rev)
371 # iterate over all parents to chain the existing data with the
378 if remaining_children is None:
372 # data from the parent β†’ child edge.
379 continue
373 for parent, parent_rev in ((1, p1), (2, p2)):
380 remaining_children -= 1
374 if parent_rev == nullrev:
381 children_count[parent_rev] = remaining_children
375 continue
382 if remaining_children:
376 remaining_children = children_count.get(parent_rev)
383 copies = all_copies.get(parent_rev, None)
377 if remaining_children is None:
384 else:
378 continue
385 copies = all_copies.pop(parent_rev, None)
379 remaining_children -= 1
380 children_count[parent_rev] = remaining_children
381 if remaining_children:
382 copies = all_copies.get(parent_rev, None)
383 else:
384 copies = all_copies.pop(parent_rev, None)
386
385
387 if copies is None:
386 if copies is None:
388 # this is a root
387 # this is a root
389 copies = {}
388 copies = {}
390
389
391 newcopies = copies
390 newcopies = copies
392 # chain the data in the edge with the existing data
391 # chain the data in the edge with the existing data
393 if changes is not None:
392 if changes is not None:
394 childcopies = {}
393 childcopies = {}
395 if parent == 1:
394 if parent == 1:
396 childcopies = changes.copied_from_p1
395 childcopies = changes.copied_from_p1
397 elif parent == 2:
396 elif parent == 2:
398 childcopies = changes.copied_from_p2
397 childcopies = changes.copied_from_p2
399
398
400 if not alwaysmatch:
399 if not alwaysmatch:
401 childcopies = {
400 childcopies = {
402 dst: src
401 dst: src
403 for dst, src in childcopies.items()
402 for dst, src in childcopies.items()
404 if match(dst)
403 if match(dst)
405 }
404 }
406 if childcopies:
405 if childcopies:
407 newcopies = copies.copy()
408 for dest, source in pycompat.iteritems(childcopies):
409 prev = copies.get(source)
410 if prev is not None and prev[1] is not None:
411 source = prev[1]
412 newcopies[dest] = (current_rev, source)
413 assert newcopies is not copies
414 if changes.removed:
415 if newcopies is copies:
416 newcopies = copies.copy()
406 newcopies = copies.copy()
417 for f in changes.removed:
407 for dest, source in pycompat.iteritems(childcopies):
418 if f in newcopies:
408 prev = copies.get(source)
419 if newcopies is copies:
409 if prev is not None and prev[1] is not None:
420 # copy on write to avoid affecting potential other
410 source = prev[1]
421 # branches. when there are no other branches, this
411 newcopies[dest] = (current_rev, source)
422 # could be avoided.
412 assert newcopies is not copies
423 newcopies = copies.copy()
413 if changes.removed:
424 newcopies[f] = (current_rev, None)
414 if newcopies is copies:
415 newcopies = copies.copy()
416 for f in changes.removed:
417 if f in newcopies:
418 if newcopies is copies:
419 # copy on write to avoid affecting potential other
420 # branches. when there are no other branches, this
421 # could be avoided.
422 newcopies = copies.copy()
423 newcopies[f] = (current_rev, None)
424 # check potential need to combine the data from another parent (for
425 # that child). See comment below for details.
426 if current_copies is None:
427 current_copies = newcopies
428 elif current_copies is newcopies:
429 # nothing to merge:
430 pass
431 else:
432 # we are the second parent to work on c, we need to merge our
433 # work with the other.
434 #
435 # In case of conflict, parent 1 take precedence over parent 2.
436 # This is an arbitrary choice made anew when implementing
437 # changeset based copies. It was made without regards with
438 # potential filelog related behavior.
439 assert parent == 2
440 current_copies = _merge_copies_dict(
441 newcopies, current_copies, isancestor, changes
442 )
443 all_copies[current_rev] = current_copies
425
444
426 # check potential need to combine the data from another parent (for
445 # filter out internal details and return a {dest: source mapping}
427 # that child). See comment below for details.
446 final_copies = {}
428 if current_copies is None:
447 for dest, (tt, source) in all_copies[targetrev].items():
429 current_copies = newcopies
448 if source is not None:
430 elif current_copies is newcopies:
449 final_copies[dest] = source
431 # nothing to merge:
432 pass
433 else:
434 # we are the second parent to work on c, we need to merge our
435 # work with the other.
436 #
437 # In case of conflict, parent 1 take precedence over parent 2.
438 # This is an arbitrary choice made anew when implementing
439 # changeset based copies. It was made without regards with
440 # potential filelog related behavior.
441 assert parent == 2
442 current_copies = _merge_copies_dict(
443 newcopies, current_copies, isancestor, changes
444 )
445 all_copies[current_rev] = current_copies
446
447 # filter out internal details and return a {dest: source mapping}
448 final_copies = {}
449 for dest, (tt, source) in all_copies[targetrev].items():
450 if source is not None:
451 final_copies[dest] = source
452 return final_copies
450 return final_copies
453
451
454
452
455 def _merge_copies_dict(minor, major, isancestor, changes):
453 def _merge_copies_dict(minor, major, isancestor, changes):
456 """merge two copies-mapping together, minor and major
454 """merge two copies-mapping together, minor and major
457
455
458 In case of conflict, value from "major" will be picked.
456 In case of conflict, value from "major" will be picked.
459
457
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
458 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 ancestors of `high_rev`,
459 ancestors of `high_rev`,
462
460
463 - `ismerged(path)`: callable return True if `path` have been merged in the
461 - `ismerged(path)`: callable return True if `path` have been merged in the
464 current revision,
462 current revision,
465
463
466 return the resulting dict (in practice, the "minor" object, updated)
464 return the resulting dict (in practice, the "minor" object, updated)
467 """
465 """
468 for dest, value in major.items():
466 for dest, value in major.items():
469 other = minor.get(dest)
467 other = minor.get(dest)
470 if other is None:
468 if other is None:
471 minor[dest] = value
469 minor[dest] = value
472 else:
470 else:
473 new_tt = value[0]
471 new_tt = value[0]
474 other_tt = other[0]
472 other_tt = other[0]
475 if value[1] == other[1]:
473 if value[1] == other[1]:
476 continue
474 continue
477 # content from "major" wins, unless it is older
475 # content from "major" wins, unless it is older
478 # than the branch point or there is a merge
476 # than the branch point or there is a merge
479 if new_tt == other_tt:
477 if new_tt == other_tt:
480 minor[dest] = value
478 minor[dest] = value
481 elif (
479 elif (
482 changes is not None
480 changes is not None
483 and value[1] is None
481 and value[1] is None
484 and dest in changes.salvaged
482 and dest in changes.salvaged
485 ):
483 ):
486 pass
484 pass
487 elif (
485 elif (
488 changes is not None
486 changes is not None
489 and other[1] is None
487 and other[1] is None
490 and dest in changes.salvaged
488 and dest in changes.salvaged
491 ):
489 ):
492 minor[dest] = value
490 minor[dest] = value
493 elif changes is not None and dest in changes.merged:
491 elif changes is not None and dest in changes.merged:
494 minor[dest] = value
492 minor[dest] = value
495 elif not isancestor(new_tt, other_tt):
493 elif not isancestor(new_tt, other_tt):
496 if value[1] is not None:
494 if value[1] is not None:
497 minor[dest] = value
495 minor[dest] = value
498 elif isancestor(other_tt, new_tt):
496 elif isancestor(other_tt, new_tt):
499 minor[dest] = value
497 minor[dest] = value
500 return minor
498 return minor
501
499
502
500
503 def _revinfo_getter_extra(repo):
501 def _revinfo_getter_extra(repo):
504 """return a function that return multiple data given a <rev>"i
502 """return a function that return multiple data given a <rev>"i
505
503
506 * p1: revision number of first parent
504 * p1: revision number of first parent
507 * p2: revision number of first parent
505 * p2: revision number of first parent
508 * p1copies: mapping of copies from p1
506 * p1copies: mapping of copies from p1
509 * p2copies: mapping of copies from p2
507 * p2copies: mapping of copies from p2
510 * removed: a list of removed files
508 * removed: a list of removed files
511 * ismerged: a callback to know if file was merged in that revision
509 * ismerged: a callback to know if file was merged in that revision
512 """
510 """
513 cl = repo.changelog
511 cl = repo.changelog
514 parents = cl.parentrevs
512 parents = cl.parentrevs
515
513
516 def get_ismerged(rev):
514 def get_ismerged(rev):
517 ctx = repo[rev]
515 ctx = repo[rev]
518
516
519 def ismerged(path):
517 def ismerged(path):
520 if path not in ctx.files():
518 if path not in ctx.files():
521 return False
519 return False
522 fctx = ctx[path]
520 fctx = ctx[path]
523 parents = fctx._filelog.parents(fctx._filenode)
521 parents = fctx._filelog.parents(fctx._filenode)
524 nb_parents = 0
522 nb_parents = 0
525 for n in parents:
523 for n in parents:
526 if n != nullid:
524 if n != nullid:
527 nb_parents += 1
525 nb_parents += 1
528 return nb_parents >= 2
526 return nb_parents >= 2
529
527
530 return ismerged
528 return ismerged
531
529
532 def revinfo(rev):
530 def revinfo(rev):
533 p1, p2 = parents(rev)
531 p1, p2 = parents(rev)
534 ctx = repo[rev]
532 ctx = repo[rev]
535 p1copies, p2copies = ctx._copies
533 p1copies, p2copies = ctx._copies
536 removed = ctx.filesremoved()
534 removed = ctx.filesremoved()
537 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
535 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
538
536
539 return revinfo
537 return revinfo
540
538
541
539
542 def _combine_changeset_copies_extra(
540 def _combine_changeset_copies_extra(
543 revs, children, targetrev, revinfo, match, isancestor
541 revs, children, targetrev, revinfo, match, isancestor
544 ):
542 ):
545 """version of `_combine_changeset_copies` that works with the Google
543 """version of `_combine_changeset_copies` that works with the Google
546 specific "extra" based storage for copy information"""
544 specific "extra" based storage for copy information"""
547 all_copies = {}
545 all_copies = {}
548 alwaysmatch = match.always()
546 alwaysmatch = match.always()
549 for r in revs:
547 for r in revs:
550 copies = all_copies.pop(r, None)
548 copies = all_copies.pop(r, None)
551 if copies is None:
549 if copies is None:
552 # this is a root
550 # this is a root
553 copies = {}
551 copies = {}
554 for i, c in enumerate(children[r]):
552 for i, c in enumerate(children[r]):
555 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
553 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
556 if r == p1:
554 if r == p1:
557 parent = 1
555 parent = 1
558 childcopies = p1copies
556 childcopies = p1copies
559 else:
557 else:
560 assert r == p2
558 assert r == p2
561 parent = 2
559 parent = 2
562 childcopies = p2copies
560 childcopies = p2copies
563 if not alwaysmatch:
561 if not alwaysmatch:
564 childcopies = {
562 childcopies = {
565 dst: src for dst, src in childcopies.items() if match(dst)
563 dst: src for dst, src in childcopies.items() if match(dst)
566 }
564 }
567 newcopies = copies
565 newcopies = copies
568 if childcopies:
566 if childcopies:
569 newcopies = copies.copy()
567 newcopies = copies.copy()
570 for dest, source in pycompat.iteritems(childcopies):
568 for dest, source in pycompat.iteritems(childcopies):
571 prev = copies.get(source)
569 prev = copies.get(source)
572 if prev is not None and prev[1] is not None:
570 if prev is not None and prev[1] is not None:
573 source = prev[1]
571 source = prev[1]
574 newcopies[dest] = (c, source)
572 newcopies[dest] = (c, source)
575 assert newcopies is not copies
573 assert newcopies is not copies
576 for f in removed:
574 for f in removed:
577 if f in newcopies:
575 if f in newcopies:
578 if newcopies is copies:
576 if newcopies is copies:
579 # copy on write to avoid affecting potential other
577 # copy on write to avoid affecting potential other
580 # branches. when there are no other branches, this
578 # branches. when there are no other branches, this
581 # could be avoided.
579 # could be avoided.
582 newcopies = copies.copy()
580 newcopies = copies.copy()
583 newcopies[f] = (c, None)
581 newcopies[f] = (c, None)
584 othercopies = all_copies.get(c)
582 othercopies = all_copies.get(c)
585 if othercopies is None:
583 if othercopies is None:
586 all_copies[c] = newcopies
584 all_copies[c] = newcopies
587 else:
585 else:
588 # we are the second parent to work on c, we need to merge our
586 # we are the second parent to work on c, we need to merge our
589 # work with the other.
587 # work with the other.
590 #
588 #
591 # In case of conflict, parent 1 take precedence over parent 2.
589 # In case of conflict, parent 1 take precedence over parent 2.
592 # This is an arbitrary choice made anew when implementing
590 # This is an arbitrary choice made anew when implementing
593 # changeset based copies. It was made without regards with
591 # changeset based copies. It was made without regards with
594 # potential filelog related behavior.
592 # potential filelog related behavior.
595 if parent == 1:
593 if parent == 1:
596 _merge_copies_dict_extra(
594 _merge_copies_dict_extra(
597 othercopies, newcopies, isancestor, ismerged
595 othercopies, newcopies, isancestor, ismerged
598 )
596 )
599 else:
597 else:
600 _merge_copies_dict_extra(
598 _merge_copies_dict_extra(
601 newcopies, othercopies, isancestor, ismerged
599 newcopies, othercopies, isancestor, ismerged
602 )
600 )
603 all_copies[c] = newcopies
601 all_copies[c] = newcopies
604
602
605 final_copies = {}
603 final_copies = {}
606 for dest, (tt, source) in all_copies[targetrev].items():
604 for dest, (tt, source) in all_copies[targetrev].items():
607 if source is not None:
605 if source is not None:
608 final_copies[dest] = source
606 final_copies[dest] = source
609 return final_copies
607 return final_copies
610
608
611
609
612 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
610 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
613 """version of `_merge_copies_dict` that works with the Google
611 """version of `_merge_copies_dict` that works with the Google
614 specific "extra" based storage for copy information"""
612 specific "extra" based storage for copy information"""
615 for dest, value in major.items():
613 for dest, value in major.items():
616 other = minor.get(dest)
614 other = minor.get(dest)
617 if other is None:
615 if other is None:
618 minor[dest] = value
616 minor[dest] = value
619 else:
617 else:
620 new_tt = value[0]
618 new_tt = value[0]
621 other_tt = other[0]
619 other_tt = other[0]
622 if value[1] == other[1]:
620 if value[1] == other[1]:
623 continue
621 continue
624 # content from "major" wins, unless it is older
622 # content from "major" wins, unless it is older
625 # than the branch point or there is a merge
623 # than the branch point or there is a merge
626 if (
624 if (
627 new_tt == other_tt
625 new_tt == other_tt
628 or not isancestor(new_tt, other_tt)
626 or not isancestor(new_tt, other_tt)
629 or ismerged(dest)
627 or ismerged(dest)
630 ):
628 ):
631 minor[dest] = value
629 minor[dest] = value
632
630
633
631
634 def _forwardcopies(a, b, base=None, match=None):
632 def _forwardcopies(a, b, base=None, match=None):
635 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
633 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
636
634
637 if base is None:
635 if base is None:
638 base = a
636 base = a
639 match = a.repo().narrowmatch(match)
637 match = a.repo().narrowmatch(match)
640 # check for working copy
638 # check for working copy
641 if b.rev() is None:
639 if b.rev() is None:
642 cm = _committedforwardcopies(a, b.p1(), base, match)
640 cm = _committedforwardcopies(a, b.p1(), base, match)
643 # combine copies from dirstate if necessary
641 # combine copies from dirstate if necessary
644 copies = _chain(cm, _dirstatecopies(b._repo, match))
642 copies = _chain(cm, _dirstatecopies(b._repo, match))
645 else:
643 else:
646 copies = _committedforwardcopies(a, b, base, match)
644 copies = _committedforwardcopies(a, b, base, match)
647 return copies
645 return copies
648
646
649
647
650 def _backwardrenames(a, b, match):
648 def _backwardrenames(a, b, match):
651 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
649 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
652 return {}
650 return {}
653
651
654 # Even though we're not taking copies into account, 1:n rename situations
652 # Even though we're not taking copies into account, 1:n rename situations
655 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
653 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
656 # arbitrarily pick one of the renames.
654 # arbitrarily pick one of the renames.
657 # We don't want to pass in "match" here, since that would filter
655 # We don't want to pass in "match" here, since that would filter
658 # the destination by it. Since we're reversing the copies, we want
656 # the destination by it. Since we're reversing the copies, we want
659 # to filter the source instead.
657 # to filter the source instead.
660 f = _forwardcopies(b, a)
658 f = _forwardcopies(b, a)
661 r = {}
659 r = {}
662 for k, v in sorted(pycompat.iteritems(f)):
660 for k, v in sorted(pycompat.iteritems(f)):
663 if match and not match(v):
661 if match and not match(v):
664 continue
662 continue
665 # remove copies
663 # remove copies
666 if v in a:
664 if v in a:
667 continue
665 continue
668 r[v] = k
666 r[v] = k
669 return r
667 return r
670
668
671
669
672 def pathcopies(x, y, match=None):
670 def pathcopies(x, y, match=None):
673 """find {dst@y: src@x} copy mapping for directed compare"""
671 """find {dst@y: src@x} copy mapping for directed compare"""
674 repo = x._repo
672 repo = x._repo
675 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
673 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
676 if debug:
674 if debug:
677 repo.ui.debug(
675 repo.ui.debug(
678 b'debug.copies: searching copies from %s to %s\n' % (x, y)
676 b'debug.copies: searching copies from %s to %s\n' % (x, y)
679 )
677 )
680 if x == y or not x or not y:
678 if x == y or not x or not y:
681 return {}
679 return {}
682 if y.rev() is None and x == y.p1():
680 if y.rev() is None and x == y.p1():
683 if debug:
681 if debug:
684 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
682 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
685 # short-circuit to avoid issues with merge states
683 # short-circuit to avoid issues with merge states
686 return _dirstatecopies(repo, match)
684 return _dirstatecopies(repo, match)
687 a = y.ancestor(x)
685 a = y.ancestor(x)
688 if a == x:
686 if a == x:
689 if debug:
687 if debug:
690 repo.ui.debug(b'debug.copies: search mode: forward\n')
688 repo.ui.debug(b'debug.copies: search mode: forward\n')
691 copies = _forwardcopies(x, y, match=match)
689 copies = _forwardcopies(x, y, match=match)
692 elif a == y:
690 elif a == y:
693 if debug:
691 if debug:
694 repo.ui.debug(b'debug.copies: search mode: backward\n')
692 repo.ui.debug(b'debug.copies: search mode: backward\n')
695 copies = _backwardrenames(x, y, match=match)
693 copies = _backwardrenames(x, y, match=match)
696 else:
694 else:
697 if debug:
695 if debug:
698 repo.ui.debug(b'debug.copies: search mode: combined\n')
696 repo.ui.debug(b'debug.copies: search mode: combined\n')
699 base = None
697 base = None
700 if a.rev() != nullrev:
698 if a.rev() != nullrev:
701 base = x
699 base = x
702 copies = _chain(
700 copies = _chain(
703 _backwardrenames(x, a, match=match),
701 _backwardrenames(x, a, match=match),
704 _forwardcopies(a, y, base, match=match),
702 _forwardcopies(a, y, base, match=match),
705 )
703 )
706 _filter(x, y, copies)
704 _filter(x, y, copies)
707 return copies
705 return copies
708
706
709
707
710 def mergecopies(repo, c1, c2, base):
708 def mergecopies(repo, c1, c2, base):
711 """
709 """
712 Finds moves and copies between context c1 and c2 that are relevant for
710 Finds moves and copies between context c1 and c2 that are relevant for
713 merging. 'base' will be used as the merge base.
711 merging. 'base' will be used as the merge base.
714
712
715 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
713 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
716 files that were moved/ copied in one merge parent and modified in another.
714 files that were moved/ copied in one merge parent and modified in another.
717 For example:
715 For example:
718
716
719 o ---> 4 another commit
717 o ---> 4 another commit
720 |
718 |
721 | o ---> 3 commit that modifies a.txt
719 | o ---> 3 commit that modifies a.txt
722 | /
720 | /
723 o / ---> 2 commit that moves a.txt to b.txt
721 o / ---> 2 commit that moves a.txt to b.txt
724 |/
722 |/
725 o ---> 1 merge base
723 o ---> 1 merge base
726
724
727 If we try to rebase revision 3 on revision 4, since there is no a.txt in
725 If we try to rebase revision 3 on revision 4, since there is no a.txt in
728 revision 4, and if user have copytrace disabled, we prints the following
726 revision 4, and if user have copytrace disabled, we prints the following
729 message:
727 message:
730
728
731 ```other changed <file> which local deleted```
729 ```other changed <file> which local deleted```
732
730
733 Returns a tuple where:
731 Returns a tuple where:
734
732
735 "branch_copies" an instance of branch_copies.
733 "branch_copies" an instance of branch_copies.
736
734
737 "diverge" is a mapping of source name -> list of destination names
735 "diverge" is a mapping of source name -> list of destination names
738 for divergent renames.
736 for divergent renames.
739
737
740 This function calls different copytracing algorithms based on config.
738 This function calls different copytracing algorithms based on config.
741 """
739 """
742 # avoid silly behavior for update from empty dir
740 # avoid silly behavior for update from empty dir
743 if not c1 or not c2 or c1 == c2:
741 if not c1 or not c2 or c1 == c2:
744 return branch_copies(), branch_copies(), {}
742 return branch_copies(), branch_copies(), {}
745
743
746 narrowmatch = c1.repo().narrowmatch()
744 narrowmatch = c1.repo().narrowmatch()
747
745
748 # avoid silly behavior for parent -> working dir
746 # avoid silly behavior for parent -> working dir
749 if c2.node() is None and c1.node() == repo.dirstate.p1():
747 if c2.node() is None and c1.node() == repo.dirstate.p1():
750 return (
748 return (
751 branch_copies(_dirstatecopies(repo, narrowmatch)),
749 branch_copies(_dirstatecopies(repo, narrowmatch)),
752 branch_copies(),
750 branch_copies(),
753 {},
751 {},
754 )
752 )
755
753
756 copytracing = repo.ui.config(b'experimental', b'copytrace')
754 copytracing = repo.ui.config(b'experimental', b'copytrace')
757 if stringutil.parsebool(copytracing) is False:
755 if stringutil.parsebool(copytracing) is False:
758 # stringutil.parsebool() returns None when it is unable to parse the
756 # stringutil.parsebool() returns None when it is unable to parse the
759 # value, so we should rely on making sure copytracing is on such cases
757 # value, so we should rely on making sure copytracing is on such cases
760 return branch_copies(), branch_copies(), {}
758 return branch_copies(), branch_copies(), {}
761
759
762 if usechangesetcentricalgo(repo):
760 if usechangesetcentricalgo(repo):
763 # The heuristics don't make sense when we need changeset-centric algos
761 # The heuristics don't make sense when we need changeset-centric algos
764 return _fullcopytracing(repo, c1, c2, base)
762 return _fullcopytracing(repo, c1, c2, base)
765
763
766 # Copy trace disabling is explicitly below the node == p1 logic above
764 # Copy trace disabling is explicitly below the node == p1 logic above
767 # because the logic above is required for a simple copy to be kept across a
765 # because the logic above is required for a simple copy to be kept across a
768 # rebase.
766 # rebase.
769 if copytracing == b'heuristics':
767 if copytracing == b'heuristics':
770 # Do full copytracing if only non-public revisions are involved as
768 # Do full copytracing if only non-public revisions are involved as
771 # that will be fast enough and will also cover the copies which could
769 # that will be fast enough and will also cover the copies which could
772 # be missed by heuristics
770 # be missed by heuristics
773 if _isfullcopytraceable(repo, c1, base):
771 if _isfullcopytraceable(repo, c1, base):
774 return _fullcopytracing(repo, c1, c2, base)
772 return _fullcopytracing(repo, c1, c2, base)
775 return _heuristicscopytracing(repo, c1, c2, base)
773 return _heuristicscopytracing(repo, c1, c2, base)
776 else:
774 else:
777 return _fullcopytracing(repo, c1, c2, base)
775 return _fullcopytracing(repo, c1, c2, base)
778
776
779
777
780 def _isfullcopytraceable(repo, c1, base):
778 def _isfullcopytraceable(repo, c1, base):
781 """Checks that if base, source and destination are all no-public branches,
779 """Checks that if base, source and destination are all no-public branches,
782 if yes let's use the full copytrace algorithm for increased capabilities
780 if yes let's use the full copytrace algorithm for increased capabilities
783 since it will be fast enough.
781 since it will be fast enough.
784
782
785 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
783 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
786 number of changesets from c1 to base such that if number of changesets are
784 number of changesets from c1 to base such that if number of changesets are
787 more than the limit, full copytracing algorithm won't be used.
785 more than the limit, full copytracing algorithm won't be used.
788 """
786 """
789 if c1.rev() is None:
787 if c1.rev() is None:
790 c1 = c1.p1()
788 c1 = c1.p1()
791 if c1.mutable() and base.mutable():
789 if c1.mutable() and base.mutable():
792 sourcecommitlimit = repo.ui.configint(
790 sourcecommitlimit = repo.ui.configint(
793 b'experimental', b'copytrace.sourcecommitlimit'
791 b'experimental', b'copytrace.sourcecommitlimit'
794 )
792 )
795 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
793 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
796 return commits < sourcecommitlimit
794 return commits < sourcecommitlimit
797 return False
795 return False
798
796
799
797
800 def _checksinglesidecopies(
798 def _checksinglesidecopies(
801 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
799 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
802 ):
800 ):
803 if src not in m2:
801 if src not in m2:
804 # deleted on side 2
802 # deleted on side 2
805 if src not in m1:
803 if src not in m1:
806 # renamed on side 1, deleted on side 2
804 # renamed on side 1, deleted on side 2
807 renamedelete[src] = dsts1
805 renamedelete[src] = dsts1
808 elif src not in mb:
806 elif src not in mb:
809 # Work around the "short-circuit to avoid issues with merge states"
807 # Work around the "short-circuit to avoid issues with merge states"
810 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
808 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
811 # destination doesn't exist in y.
809 # destination doesn't exist in y.
812 pass
810 pass
813 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
811 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
814 return
812 return
815 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
813 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
816 # modified on side 2
814 # modified on side 2
817 for dst in dsts1:
815 for dst in dsts1:
818 copy[dst] = src
816 copy[dst] = src
819
817
820
818
821 class branch_copies(object):
819 class branch_copies(object):
822 """Information about copies made on one side of a merge/graft.
820 """Information about copies made on one side of a merge/graft.
823
821
824 "copy" is a mapping from destination name -> source name,
822 "copy" is a mapping from destination name -> source name,
825 where source is in c1 and destination is in c2 or vice-versa.
823 where source is in c1 and destination is in c2 or vice-versa.
826
824
827 "movewithdir" is a mapping from source name -> destination name,
825 "movewithdir" is a mapping from source name -> destination name,
828 where the file at source present in one context but not the other
826 where the file at source present in one context but not the other
829 needs to be moved to destination by the merge process, because the
827 needs to be moved to destination by the merge process, because the
830 other context moved the directory it is in.
828 other context moved the directory it is in.
831
829
832 "renamedelete" is a mapping of source name -> list of destination
830 "renamedelete" is a mapping of source name -> list of destination
833 names for files deleted in c1 that were renamed in c2 or vice-versa.
831 names for files deleted in c1 that were renamed in c2 or vice-versa.
834
832
835 "dirmove" is a mapping of detected source dir -> destination dir renames.
833 "dirmove" is a mapping of detected source dir -> destination dir renames.
836 This is needed for handling changes to new files previously grafted into
834 This is needed for handling changes to new files previously grafted into
837 renamed directories.
835 renamed directories.
838 """
836 """
839
837
840 def __init__(
838 def __init__(
841 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
839 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
842 ):
840 ):
843 self.copy = {} if copy is None else copy
841 self.copy = {} if copy is None else copy
844 self.renamedelete = {} if renamedelete is None else renamedelete
842 self.renamedelete = {} if renamedelete is None else renamedelete
845 self.dirmove = {} if dirmove is None else dirmove
843 self.dirmove = {} if dirmove is None else dirmove
846 self.movewithdir = {} if movewithdir is None else movewithdir
844 self.movewithdir = {} if movewithdir is None else movewithdir
847
845
848 def __repr__(self):
846 def __repr__(self):
849 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
847 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
850 self.copy,
848 self.copy,
851 self.renamedelete,
849 self.renamedelete,
852 self.dirmove,
850 self.dirmove,
853 self.movewithdir,
851 self.movewithdir,
854 )
852 )
855
853
856
854
857 def _fullcopytracing(repo, c1, c2, base):
855 def _fullcopytracing(repo, c1, c2, base):
858 """The full copytracing algorithm which finds all the new files that were
856 """The full copytracing algorithm which finds all the new files that were
859 added from merge base up to the top commit and for each file it checks if
857 added from merge base up to the top commit and for each file it checks if
860 this file was copied from another file.
858 this file was copied from another file.
861
859
862 This is pretty slow when a lot of changesets are involved but will track all
860 This is pretty slow when a lot of changesets are involved but will track all
863 the copies.
861 the copies.
864 """
862 """
865 m1 = c1.manifest()
863 m1 = c1.manifest()
866 m2 = c2.manifest()
864 m2 = c2.manifest()
867 mb = base.manifest()
865 mb = base.manifest()
868
866
869 copies1 = pathcopies(base, c1)
867 copies1 = pathcopies(base, c1)
870 copies2 = pathcopies(base, c2)
868 copies2 = pathcopies(base, c2)
871
869
872 if not (copies1 or copies2):
870 if not (copies1 or copies2):
873 return branch_copies(), branch_copies(), {}
871 return branch_copies(), branch_copies(), {}
874
872
875 inversecopies1 = {}
873 inversecopies1 = {}
876 inversecopies2 = {}
874 inversecopies2 = {}
877 for dst, src in copies1.items():
875 for dst, src in copies1.items():
878 inversecopies1.setdefault(src, []).append(dst)
876 inversecopies1.setdefault(src, []).append(dst)
879 for dst, src in copies2.items():
877 for dst, src in copies2.items():
880 inversecopies2.setdefault(src, []).append(dst)
878 inversecopies2.setdefault(src, []).append(dst)
881
879
882 copy1 = {}
880 copy1 = {}
883 copy2 = {}
881 copy2 = {}
884 diverge = {}
882 diverge = {}
885 renamedelete1 = {}
883 renamedelete1 = {}
886 renamedelete2 = {}
884 renamedelete2 = {}
887 allsources = set(inversecopies1) | set(inversecopies2)
885 allsources = set(inversecopies1) | set(inversecopies2)
888 for src in allsources:
886 for src in allsources:
889 dsts1 = inversecopies1.get(src)
887 dsts1 = inversecopies1.get(src)
890 dsts2 = inversecopies2.get(src)
888 dsts2 = inversecopies2.get(src)
891 if dsts1 and dsts2:
889 if dsts1 and dsts2:
892 # copied/renamed on both sides
890 # copied/renamed on both sides
893 if src not in m1 and src not in m2:
891 if src not in m1 and src not in m2:
894 # renamed on both sides
892 # renamed on both sides
895 dsts1 = set(dsts1)
893 dsts1 = set(dsts1)
896 dsts2 = set(dsts2)
894 dsts2 = set(dsts2)
897 # If there's some overlap in the rename destinations, we
895 # If there's some overlap in the rename destinations, we
898 # consider it not divergent. For example, if side 1 copies 'a'
896 # consider it not divergent. For example, if side 1 copies 'a'
899 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
897 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
900 # and 'd' and deletes 'a'.
898 # and 'd' and deletes 'a'.
901 if dsts1 & dsts2:
899 if dsts1 & dsts2:
902 for dst in dsts1 & dsts2:
900 for dst in dsts1 & dsts2:
903 copy1[dst] = src
901 copy1[dst] = src
904 copy2[dst] = src
902 copy2[dst] = src
905 else:
903 else:
906 diverge[src] = sorted(dsts1 | dsts2)
904 diverge[src] = sorted(dsts1 | dsts2)
907 elif src in m1 and src in m2:
905 elif src in m1 and src in m2:
908 # copied on both sides
906 # copied on both sides
909 dsts1 = set(dsts1)
907 dsts1 = set(dsts1)
910 dsts2 = set(dsts2)
908 dsts2 = set(dsts2)
911 for dst in dsts1 & dsts2:
909 for dst in dsts1 & dsts2:
912 copy1[dst] = src
910 copy1[dst] = src
913 copy2[dst] = src
911 copy2[dst] = src
914 # TODO: Handle cases where it was renamed on one side and copied
912 # TODO: Handle cases where it was renamed on one side and copied
915 # on the other side
913 # on the other side
916 elif dsts1:
914 elif dsts1:
917 # copied/renamed only on side 1
915 # copied/renamed only on side 1
918 _checksinglesidecopies(
916 _checksinglesidecopies(
919 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
917 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
920 )
918 )
921 elif dsts2:
919 elif dsts2:
922 # copied/renamed only on side 2
920 # copied/renamed only on side 2
923 _checksinglesidecopies(
921 _checksinglesidecopies(
924 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
922 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
925 )
923 )
926
924
927 # find interesting file sets from manifests
925 # find interesting file sets from manifests
928 cache = []
926 cache = []
929
927
930 def _get_addedfiles(idx):
928 def _get_addedfiles(idx):
931 if not cache:
929 if not cache:
932 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
930 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
933 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
931 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
934 u1 = sorted(addedinm1 - addedinm2)
932 u1 = sorted(addedinm1 - addedinm2)
935 u2 = sorted(addedinm2 - addedinm1)
933 u2 = sorted(addedinm2 - addedinm1)
936 cache.extend((u1, u2))
934 cache.extend((u1, u2))
937 return cache[idx]
935 return cache[idx]
938
936
939 u1fn = lambda: _get_addedfiles(0)
937 u1fn = lambda: _get_addedfiles(0)
940 u2fn = lambda: _get_addedfiles(1)
938 u2fn = lambda: _get_addedfiles(1)
941 if repo.ui.debugflag:
939 if repo.ui.debugflag:
942 u1 = u1fn()
940 u1 = u1fn()
943 u2 = u2fn()
941 u2 = u2fn()
944
942
945 header = b" unmatched files in %s"
943 header = b" unmatched files in %s"
946 if u1:
944 if u1:
947 repo.ui.debug(
945 repo.ui.debug(
948 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
946 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
949 )
947 )
950 if u2:
948 if u2:
951 repo.ui.debug(
949 repo.ui.debug(
952 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
950 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
953 )
951 )
954
952
955 renamedeleteset = set()
953 renamedeleteset = set()
956 divergeset = set()
954 divergeset = set()
957 for dsts in diverge.values():
955 for dsts in diverge.values():
958 divergeset.update(dsts)
956 divergeset.update(dsts)
959 for dsts in renamedelete1.values():
957 for dsts in renamedelete1.values():
960 renamedeleteset.update(dsts)
958 renamedeleteset.update(dsts)
961 for dsts in renamedelete2.values():
959 for dsts in renamedelete2.values():
962 renamedeleteset.update(dsts)
960 renamedeleteset.update(dsts)
963
961
964 repo.ui.debug(
962 repo.ui.debug(
965 b" all copies found (* = to merge, ! = divergent, "
963 b" all copies found (* = to merge, ! = divergent, "
966 b"% = renamed and deleted):\n"
964 b"% = renamed and deleted):\n"
967 )
965 )
968 for side, copies in ((b"local", copies1), (b"remote", copies2)):
966 for side, copies in ((b"local", copies1), (b"remote", copies2)):
969 if not copies:
967 if not copies:
970 continue
968 continue
971 repo.ui.debug(b" on %s side:\n" % side)
969 repo.ui.debug(b" on %s side:\n" % side)
972 for f in sorted(copies):
970 for f in sorted(copies):
973 note = b""
971 note = b""
974 if f in copy1 or f in copy2:
972 if f in copy1 or f in copy2:
975 note += b"*"
973 note += b"*"
976 if f in divergeset:
974 if f in divergeset:
977 note += b"!"
975 note += b"!"
978 if f in renamedeleteset:
976 if f in renamedeleteset:
979 note += b"%"
977 note += b"%"
980 repo.ui.debug(
978 repo.ui.debug(
981 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
979 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
982 )
980 )
983 del renamedeleteset
981 del renamedeleteset
984 del divergeset
982 del divergeset
985
983
986 repo.ui.debug(b" checking for directory renames\n")
984 repo.ui.debug(b" checking for directory renames\n")
987
985
988 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
986 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
989 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
987 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
990
988
991 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
989 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
992 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
990 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
993
991
994 return branch_copies1, branch_copies2, diverge
992 return branch_copies1, branch_copies2, diverge
995
993
996
994
997 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
995 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
998 """Finds moved directories and files that should move with them.
996 """Finds moved directories and files that should move with them.
999
997
1000 ctx: the context for one of the sides
998 ctx: the context for one of the sides
1001 copy: files copied on the same side (as ctx)
999 copy: files copied on the same side (as ctx)
1002 fullcopy: files copied on the same side (as ctx), including those that
1000 fullcopy: files copied on the same side (as ctx), including those that
1003 merge.manifestmerge() won't care about
1001 merge.manifestmerge() won't care about
1004 addedfilesfn: function returning added files on the other side (compared to
1002 addedfilesfn: function returning added files on the other side (compared to
1005 ctx)
1003 ctx)
1006 """
1004 """
1007 # generate a directory move map
1005 # generate a directory move map
1008 invalid = set()
1006 invalid = set()
1009 dirmove = {}
1007 dirmove = {}
1010
1008
1011 # examine each file copy for a potential directory move, which is
1009 # examine each file copy for a potential directory move, which is
1012 # when all the files in a directory are moved to a new directory
1010 # when all the files in a directory are moved to a new directory
1013 for dst, src in pycompat.iteritems(fullcopy):
1011 for dst, src in pycompat.iteritems(fullcopy):
1014 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1012 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1015 if dsrc in invalid:
1013 if dsrc in invalid:
1016 # already seen to be uninteresting
1014 # already seen to be uninteresting
1017 continue
1015 continue
1018 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1016 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1019 # directory wasn't entirely moved locally
1017 # directory wasn't entirely moved locally
1020 invalid.add(dsrc)
1018 invalid.add(dsrc)
1021 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1019 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1022 # files from the same directory moved to two different places
1020 # files from the same directory moved to two different places
1023 invalid.add(dsrc)
1021 invalid.add(dsrc)
1024 else:
1022 else:
1025 # looks good so far
1023 # looks good so far
1026 dirmove[dsrc] = ddst
1024 dirmove[dsrc] = ddst
1027
1025
1028 for i in invalid:
1026 for i in invalid:
1029 if i in dirmove:
1027 if i in dirmove:
1030 del dirmove[i]
1028 del dirmove[i]
1031 del invalid
1029 del invalid
1032
1030
1033 if not dirmove:
1031 if not dirmove:
1034 return {}, {}
1032 return {}, {}
1035
1033
1036 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1034 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1037
1035
1038 for d in dirmove:
1036 for d in dirmove:
1039 repo.ui.debug(
1037 repo.ui.debug(
1040 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1038 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1041 )
1039 )
1042
1040
1043 movewithdir = {}
1041 movewithdir = {}
1044 # check unaccounted nonoverlapping files against directory moves
1042 # check unaccounted nonoverlapping files against directory moves
1045 for f in addedfilesfn():
1043 for f in addedfilesfn():
1046 if f not in fullcopy:
1044 if f not in fullcopy:
1047 for d in dirmove:
1045 for d in dirmove:
1048 if f.startswith(d):
1046 if f.startswith(d):
1049 # new file added in a directory that was moved, move it
1047 # new file added in a directory that was moved, move it
1050 df = dirmove[d] + f[len(d) :]
1048 df = dirmove[d] + f[len(d) :]
1051 if df not in copy:
1049 if df not in copy:
1052 movewithdir[f] = df
1050 movewithdir[f] = df
1053 repo.ui.debug(
1051 repo.ui.debug(
1054 b" pending file src: '%s' -> dst: '%s'\n"
1052 b" pending file src: '%s' -> dst: '%s'\n"
1055 % (f, df)
1053 % (f, df)
1056 )
1054 )
1057 break
1055 break
1058
1056
1059 return dirmove, movewithdir
1057 return dirmove, movewithdir
1060
1058
1061
1059
1062 def _heuristicscopytracing(repo, c1, c2, base):
1060 def _heuristicscopytracing(repo, c1, c2, base):
1063 """Fast copytracing using filename heuristics
1061 """Fast copytracing using filename heuristics
1064
1062
1065 Assumes that moves or renames are of following two types:
1063 Assumes that moves or renames are of following two types:
1066
1064
1067 1) Inside a directory only (same directory name but different filenames)
1065 1) Inside a directory only (same directory name but different filenames)
1068 2) Move from one directory to another
1066 2) Move from one directory to another
1069 (same filenames but different directory names)
1067 (same filenames but different directory names)
1070
1068
1071 Works only when there are no merge commits in the "source branch".
1069 Works only when there are no merge commits in the "source branch".
1072 Source branch is commits from base up to c2 not including base.
1070 Source branch is commits from base up to c2 not including base.
1073
1071
1074 If merge is involved it fallbacks to _fullcopytracing().
1072 If merge is involved it fallbacks to _fullcopytracing().
1075
1073
1076 Can be used by setting the following config:
1074 Can be used by setting the following config:
1077
1075
1078 [experimental]
1076 [experimental]
1079 copytrace = heuristics
1077 copytrace = heuristics
1080
1078
1081 In some cases the copy/move candidates found by heuristics can be very large
1079 In some cases the copy/move candidates found by heuristics can be very large
1082 in number and that will make the algorithm slow. The number of possible
1080 in number and that will make the algorithm slow. The number of possible
1083 candidates to check can be limited by using the config
1081 candidates to check can be limited by using the config
1084 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1082 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1085 """
1083 """
1086
1084
1087 if c1.rev() is None:
1085 if c1.rev() is None:
1088 c1 = c1.p1()
1086 c1 = c1.p1()
1089 if c2.rev() is None:
1087 if c2.rev() is None:
1090 c2 = c2.p1()
1088 c2 = c2.p1()
1091
1089
1092 changedfiles = set()
1090 changedfiles = set()
1093 m1 = c1.manifest()
1091 m1 = c1.manifest()
1094 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1092 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1095 # If base is not in c2 branch, we switch to fullcopytracing
1093 # If base is not in c2 branch, we switch to fullcopytracing
1096 repo.ui.debug(
1094 repo.ui.debug(
1097 b"switching to full copytracing as base is not "
1095 b"switching to full copytracing as base is not "
1098 b"an ancestor of c2\n"
1096 b"an ancestor of c2\n"
1099 )
1097 )
1100 return _fullcopytracing(repo, c1, c2, base)
1098 return _fullcopytracing(repo, c1, c2, base)
1101
1099
1102 ctx = c2
1100 ctx = c2
1103 while ctx != base:
1101 while ctx != base:
1104 if len(ctx.parents()) == 2:
1102 if len(ctx.parents()) == 2:
1105 # To keep things simple let's not handle merges
1103 # To keep things simple let's not handle merges
1106 repo.ui.debug(b"switching to full copytracing because of merges\n")
1104 repo.ui.debug(b"switching to full copytracing because of merges\n")
1107 return _fullcopytracing(repo, c1, c2, base)
1105 return _fullcopytracing(repo, c1, c2, base)
1108 changedfiles.update(ctx.files())
1106 changedfiles.update(ctx.files())
1109 ctx = ctx.p1()
1107 ctx = ctx.p1()
1110
1108
1111 copies2 = {}
1109 copies2 = {}
1112 cp = _forwardcopies(base, c2)
1110 cp = _forwardcopies(base, c2)
1113 for dst, src in pycompat.iteritems(cp):
1111 for dst, src in pycompat.iteritems(cp):
1114 if src in m1:
1112 if src in m1:
1115 copies2[dst] = src
1113 copies2[dst] = src
1116
1114
1117 # file is missing if it isn't present in the destination, but is present in
1115 # file is missing if it isn't present in the destination, but is present in
1118 # the base and present in the source.
1116 # the base and present in the source.
1119 # Presence in the base is important to exclude added files, presence in the
1117 # Presence in the base is important to exclude added files, presence in the
1120 # source is important to exclude removed files.
1118 # source is important to exclude removed files.
1121 filt = lambda f: f not in m1 and f in base and f in c2
1119 filt = lambda f: f not in m1 and f in base and f in c2
1122 missingfiles = [f for f in changedfiles if filt(f)]
1120 missingfiles = [f for f in changedfiles if filt(f)]
1123
1121
1124 copies1 = {}
1122 copies1 = {}
1125 if missingfiles:
1123 if missingfiles:
1126 basenametofilename = collections.defaultdict(list)
1124 basenametofilename = collections.defaultdict(list)
1127 dirnametofilename = collections.defaultdict(list)
1125 dirnametofilename = collections.defaultdict(list)
1128
1126
1129 for f in m1.filesnotin(base.manifest()):
1127 for f in m1.filesnotin(base.manifest()):
1130 basename = os.path.basename(f)
1128 basename = os.path.basename(f)
1131 dirname = os.path.dirname(f)
1129 dirname = os.path.dirname(f)
1132 basenametofilename[basename].append(f)
1130 basenametofilename[basename].append(f)
1133 dirnametofilename[dirname].append(f)
1131 dirnametofilename[dirname].append(f)
1134
1132
1135 for f in missingfiles:
1133 for f in missingfiles:
1136 basename = os.path.basename(f)
1134 basename = os.path.basename(f)
1137 dirname = os.path.dirname(f)
1135 dirname = os.path.dirname(f)
1138 samebasename = basenametofilename[basename]
1136 samebasename = basenametofilename[basename]
1139 samedirname = dirnametofilename[dirname]
1137 samedirname = dirnametofilename[dirname]
1140 movecandidates = samebasename + samedirname
1138 movecandidates = samebasename + samedirname
1141 # f is guaranteed to be present in c2, that's why
1139 # f is guaranteed to be present in c2, that's why
1142 # c2.filectx(f) won't fail
1140 # c2.filectx(f) won't fail
1143 f2 = c2.filectx(f)
1141 f2 = c2.filectx(f)
1144 # we can have a lot of candidates which can slow down the heuristics
1142 # we can have a lot of candidates which can slow down the heuristics
1145 # config value to limit the number of candidates moves to check
1143 # config value to limit the number of candidates moves to check
1146 maxcandidates = repo.ui.configint(
1144 maxcandidates = repo.ui.configint(
1147 b'experimental', b'copytrace.movecandidateslimit'
1145 b'experimental', b'copytrace.movecandidateslimit'
1148 )
1146 )
1149
1147
1150 if len(movecandidates) > maxcandidates:
1148 if len(movecandidates) > maxcandidates:
1151 repo.ui.status(
1149 repo.ui.status(
1152 _(
1150 _(
1153 b"skipping copytracing for '%s', more "
1151 b"skipping copytracing for '%s', more "
1154 b"candidates than the limit: %d\n"
1152 b"candidates than the limit: %d\n"
1155 )
1153 )
1156 % (f, len(movecandidates))
1154 % (f, len(movecandidates))
1157 )
1155 )
1158 continue
1156 continue
1159
1157
1160 for candidate in movecandidates:
1158 for candidate in movecandidates:
1161 f1 = c1.filectx(candidate)
1159 f1 = c1.filectx(candidate)
1162 if _related(f1, f2):
1160 if _related(f1, f2):
1163 # if there are a few related copies then we'll merge
1161 # if there are a few related copies then we'll merge
1164 # changes into all of them. This matches the behaviour
1162 # changes into all of them. This matches the behaviour
1165 # of upstream copytracing
1163 # of upstream copytracing
1166 copies1[candidate] = f
1164 copies1[candidate] = f
1167
1165
1168 return branch_copies(copies1), branch_copies(copies2), {}
1166 return branch_copies(copies1), branch_copies(copies2), {}
1169
1167
1170
1168
1171 def _related(f1, f2):
1169 def _related(f1, f2):
1172 """return True if f1 and f2 filectx have a common ancestor
1170 """return True if f1 and f2 filectx have a common ancestor
1173
1171
1174 Walk back to common ancestor to see if the two files originate
1172 Walk back to common ancestor to see if the two files originate
1175 from the same file. Since workingfilectx's rev() is None it messes
1173 from the same file. Since workingfilectx's rev() is None it messes
1176 up the integer comparison logic, hence the pre-step check for
1174 up the integer comparison logic, hence the pre-step check for
1177 None (f1 and f2 can only be workingfilectx's initially).
1175 None (f1 and f2 can only be workingfilectx's initially).
1178 """
1176 """
1179
1177
1180 if f1 == f2:
1178 if f1 == f2:
1181 return True # a match
1179 return True # a match
1182
1180
1183 g1, g2 = f1.ancestors(), f2.ancestors()
1181 g1, g2 = f1.ancestors(), f2.ancestors()
1184 try:
1182 try:
1185 f1r, f2r = f1.linkrev(), f2.linkrev()
1183 f1r, f2r = f1.linkrev(), f2.linkrev()
1186
1184
1187 if f1r is None:
1185 if f1r is None:
1188 f1 = next(g1)
1186 f1 = next(g1)
1189 if f2r is None:
1187 if f2r is None:
1190 f2 = next(g2)
1188 f2 = next(g2)
1191
1189
1192 while True:
1190 while True:
1193 f1r, f2r = f1.linkrev(), f2.linkrev()
1191 f1r, f2r = f1.linkrev(), f2.linkrev()
1194 if f1r > f2r:
1192 if f1r > f2r:
1195 f1 = next(g1)
1193 f1 = next(g1)
1196 elif f2r > f1r:
1194 elif f2r > f1r:
1197 f2 = next(g2)
1195 f2 = next(g2)
1198 else: # f1 and f2 point to files in the same linkrev
1196 else: # f1 and f2 point to files in the same linkrev
1199 return f1 == f2 # true if they point to the same file
1197 return f1 == f2 # true if they point to the same file
1200 except StopIteration:
1198 except StopIteration:
1201 return False
1199 return False
1202
1200
1203
1201
1204 def graftcopies(wctx, ctx, base):
1202 def graftcopies(wctx, ctx, base):
1205 """reproduce copies between base and ctx in the wctx
1203 """reproduce copies between base and ctx in the wctx
1206
1204
1207 Unlike mergecopies(), this function will only consider copies between base
1205 Unlike mergecopies(), this function will only consider copies between base
1208 and ctx; it will ignore copies between base and wctx. Also unlike
1206 and ctx; it will ignore copies between base and wctx. Also unlike
1209 mergecopies(), this function will apply copies to the working copy (instead
1207 mergecopies(), this function will apply copies to the working copy (instead
1210 of just returning information about the copies). That makes it cheaper
1208 of just returning information about the copies). That makes it cheaper
1211 (especially in the common case of base==ctx.p1()) and useful also when
1209 (especially in the common case of base==ctx.p1()) and useful also when
1212 experimental.copytrace=off.
1210 experimental.copytrace=off.
1213
1211
1214 merge.update() will have already marked most copies, but it will only
1212 merge.update() will have already marked most copies, but it will only
1215 mark copies if it thinks the source files are related (see
1213 mark copies if it thinks the source files are related (see
1216 merge._related()). It will also not mark copies if the file wasn't modified
1214 merge._related()). It will also not mark copies if the file wasn't modified
1217 on the local side. This function adds the copies that were "missed"
1215 on the local side. This function adds the copies that were "missed"
1218 by merge.update().
1216 by merge.update().
1219 """
1217 """
1220 new_copies = pathcopies(base, ctx)
1218 new_copies = pathcopies(base, ctx)
1221 _filter(wctx.p1(), wctx, new_copies)
1219 _filter(wctx.p1(), wctx, new_copies)
1222 for dst, src in pycompat.iteritems(new_copies):
1220 for dst, src in pycompat.iteritems(new_copies):
1223 wctx[dst].markcopied(src)
1221 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now