##// END OF EJS Templates
copies: stop attempt to avoid extra dict copies around branching...
marmoute -
r46800:cb8b2ee8 default
parent child Browse files
Show More
@@ -1,1228 +1,1225 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 if rustmod is not None:
199 if rustmod is not None:
200
200
201 def revinfo(rev):
201 def revinfo(rev):
202 p1, p2 = parents(rev)
202 p1, p2 = parents(rev)
203 if flags(rev) & HASCOPIESINFO:
203 if flags(rev) & HASCOPIESINFO:
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 else:
205 else:
206 raw = None
206 raw = None
207 return (p1, p2, raw)
207 return (p1, p2, raw)
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 if flags(rev) & HASCOPIESINFO:
213 if flags(rev) & HASCOPIESINFO:
214 changes = changelogrevision(rev).changes
214 changes = changelogrevision(rev).changes
215 else:
215 else:
216 changes = None
216 changes = None
217 return (p1, p2, changes)
217 return (p1, p2, changes)
218
218
219 return revinfo
219 return revinfo
220
220
221
221
222 def cached_is_ancestor(is_ancestor):
222 def cached_is_ancestor(is_ancestor):
223 """return a cached version of is_ancestor"""
223 """return a cached version of is_ancestor"""
224 cache = {}
224 cache = {}
225
225
226 def _is_ancestor(anc, desc):
226 def _is_ancestor(anc, desc):
227 if anc > desc:
227 if anc > desc:
228 return False
228 return False
229 elif anc == desc:
229 elif anc == desc:
230 return True
230 return True
231 key = (anc, desc)
231 key = (anc, desc)
232 ret = cache.get(key)
232 ret = cache.get(key)
233 if ret is None:
233 if ret is None:
234 ret = cache[key] = is_ancestor(anc, desc)
234 ret = cache[key] = is_ancestor(anc, desc)
235 return ret
235 return ret
236
236
237 return _is_ancestor
237 return _is_ancestor
238
238
239
239
240 def _changesetforwardcopies(a, b, match):
240 def _changesetforwardcopies(a, b, match):
241 if a.rev() in (nullrev, b.rev()):
241 if a.rev() in (nullrev, b.rev()):
242 return {}
242 return {}
243
243
244 repo = a.repo().unfiltered()
244 repo = a.repo().unfiltered()
245 children = {}
245 children = {}
246
246
247 cl = repo.changelog
247 cl = repo.changelog
248 isancestor = cl.isancestorrev
248 isancestor = cl.isancestorrev
249
249
250 # To track rename from "A" to B, we need to gather all parent β†’ children
250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 # edges that are contains in `::B` but not in `::A`.
251 # edges that are contains in `::B` but not in `::A`.
252 #
252 #
253 #
253 #
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 # that belong to ::a. These are exactly all the revisions needed to express
256 # that belong to ::a. These are exactly all the revisions needed to express
257 # the parent β†’ children we need to combine.
257 # the parent β†’ children we need to combine.
258 #
258 #
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 # excluding paths that leads to roots that are not ancestors of `a`. We
260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 # keep this out of the explanation because it is hard enough without this special case..
261 # keep this out of the explanation because it is hard enough without this special case..
262
262
263 parents = cl._uncheckedparentrevs
263 parents = cl._uncheckedparentrevs
264 graph_roots = (nullrev, nullrev)
264 graph_roots = (nullrev, nullrev)
265
265
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 roots = set()
268 roots = set()
269 has_graph_roots = False
269 has_graph_roots = False
270
270
271 # iterate over `only(B, A)`
271 # iterate over `only(B, A)`
272 for r in revs:
272 for r in revs:
273 ps = parents(r)
273 ps = parents(r)
274 if ps == graph_roots:
274 if ps == graph_roots:
275 has_graph_roots = True
275 has_graph_roots = True
276 else:
276 else:
277 p1, p2 = ps
277 p1, p2 = ps
278
278
279 # find all the "root points" (see larger comment above)
279 # find all the "root points" (see larger comment above)
280 if p1 != nullrev and p1 in ancestors:
280 if p1 != nullrev and p1 in ancestors:
281 roots.add(p1)
281 roots.add(p1)
282 if p2 != nullrev and p2 in ancestors:
282 if p2 != nullrev and p2 in ancestors:
283 roots.add(p2)
283 roots.add(p2)
284 if not roots:
284 if not roots:
285 # no common revision to track copies from
285 # no common revision to track copies from
286 return {}
286 return {}
287 if has_graph_roots:
287 if has_graph_roots:
288 # this deal with the special case mentionned in the [1] footnotes. We
288 # this deal with the special case mentionned in the [1] footnotes. We
289 # must filter out revisions that leads to non-common graphroots.
289 # must filter out revisions that leads to non-common graphroots.
290 roots = list(roots)
290 roots = list(roots)
291 m = min(roots)
291 m = min(roots)
292 h = [b.rev()]
292 h = [b.rev()]
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 roots_to_head = set(roots_to_head)
294 roots_to_head = set(roots_to_head)
295 revs = [r for r in revs if r in roots_to_head]
295 revs = [r for r in revs if r in roots_to_head]
296
296
297 if repo.filecopiesmode == b'changeset-sidedata':
297 if repo.filecopiesmode == b'changeset-sidedata':
298 # When using side-data, we will process the edges "from" the children.
298 # When using side-data, we will process the edges "from" the children.
299 # We iterate over the childre, gathering previous collected data for
299 # We iterate over the childre, gathering previous collected data for
300 # the parents. Do know when the parents data is no longer necessary, we
300 # the parents. Do know when the parents data is no longer necessary, we
301 # keep a counter of how many children each revision has.
301 # keep a counter of how many children each revision has.
302 #
302 #
303 # An interresting property of `children_count` is that it only contains
303 # An interresting property of `children_count` is that it only contains
304 # revision that will be relevant for a edge of the graph. So if a
304 # revision that will be relevant for a edge of the graph. So if a
305 # children has parent not in `children_count`, that edges should not be
305 # children has parent not in `children_count`, that edges should not be
306 # processed.
306 # processed.
307 children_count = dict((r, 0) for r in roots)
307 children_count = dict((r, 0) for r in roots)
308 for r in revs:
308 for r in revs:
309 for p in cl.parentrevs(r):
309 for p in cl.parentrevs(r):
310 if p == nullrev:
310 if p == nullrev:
311 continue
311 continue
312 children_count[r] = 0
312 children_count[r] = 0
313 if p in children_count:
313 if p in children_count:
314 children_count[p] += 1
314 children_count[p] += 1
315 revinfo = _revinfo_getter(repo, match)
315 revinfo = _revinfo_getter(repo, match)
316 return _combine_changeset_copies(
316 return _combine_changeset_copies(
317 revs, children_count, b.rev(), revinfo, match, isancestor
317 revs, children_count, b.rev(), revinfo, match, isancestor
318 )
318 )
319 else:
319 else:
320 # When not using side-data, we will process the edges "from" the parent.
320 # When not using side-data, we will process the edges "from" the parent.
321 # so we need a full mapping of the parent -> children relation.
321 # so we need a full mapping of the parent -> children relation.
322 children = dict((r, []) for r in roots)
322 children = dict((r, []) for r in roots)
323 for r in revs:
323 for r in revs:
324 for p in cl.parentrevs(r):
324 for p in cl.parentrevs(r):
325 if p == nullrev:
325 if p == nullrev:
326 continue
326 continue
327 children[r] = []
327 children[r] = []
328 if p in children:
328 if p in children:
329 children[p].append(r)
329 children[p].append(r)
330 x = revs.pop()
330 x = revs.pop()
331 assert x == b.rev()
331 assert x == b.rev()
332 revs.extend(roots)
332 revs.extend(roots)
333 revs.sort()
333 revs.sort()
334
334
335 revinfo = _revinfo_getter_extra(repo)
335 revinfo = _revinfo_getter_extra(repo)
336 return _combine_changeset_copies_extra(
336 return _combine_changeset_copies_extra(
337 revs, children, b.rev(), revinfo, match, isancestor
337 revs, children, b.rev(), revinfo, match, isancestor
338 )
338 )
339
339
340
340
341 def _combine_changeset_copies(
341 def _combine_changeset_copies(
342 revs, children_count, targetrev, revinfo, match, isancestor
342 revs, children_count, targetrev, revinfo, match, isancestor
343 ):
343 ):
344 """combine the copies information for each item of iterrevs
344 """combine the copies information for each item of iterrevs
345
345
346 revs: sorted iterable of revision to visit
346 revs: sorted iterable of revision to visit
347 children_count: a {parent: <number-of-relevant-children>} mapping.
347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 targetrev: the final copies destination revision (not in iterrevs)
348 targetrev: the final copies destination revision (not in iterrevs)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 match: a matcher
350 match: a matcher
351
351
352 It returns the aggregated copies information for `targetrev`.
352 It returns the aggregated copies information for `targetrev`.
353 """
353 """
354
354
355 alwaysmatch = match.always()
355 alwaysmatch = match.always()
356
356
357 if rustmod is not None:
357 if rustmod is not None:
358 final_copies = rustmod.combine_changeset_copies(
358 final_copies = rustmod.combine_changeset_copies(
359 list(revs), children_count, targetrev, revinfo, isancestor
359 list(revs), children_count, targetrev, revinfo, isancestor
360 )
360 )
361 else:
361 else:
362 isancestor = cached_is_ancestor(isancestor)
362 isancestor = cached_is_ancestor(isancestor)
363
363
364 all_copies = {}
364 all_copies = {}
365 # iterate over all the "children" side of copy tracing "edge"
365 # iterate over all the "children" side of copy tracing "edge"
366 for current_rev in revs:
366 for current_rev in revs:
367 p1, p2, changes = revinfo(current_rev)
367 p1, p2, changes = revinfo(current_rev)
368 current_copies = None
368 current_copies = None
369 # iterate over all parents to chain the existing data with the
369 # iterate over all parents to chain the existing data with the
370 # data from the parent β†’ child edge.
370 # data from the parent β†’ child edge.
371 for parent, parent_rev in ((1, p1), (2, p2)):
371 for parent, parent_rev in ((1, p1), (2, p2)):
372 if parent_rev == nullrev:
372 if parent_rev == nullrev:
373 continue
373 continue
374 remaining_children = children_count.get(parent_rev)
374 remaining_children = children_count.get(parent_rev)
375 if remaining_children is None:
375 if remaining_children is None:
376 continue
376 continue
377 remaining_children -= 1
377 remaining_children -= 1
378 children_count[parent_rev] = remaining_children
378 children_count[parent_rev] = remaining_children
379 if remaining_children:
379 if remaining_children:
380 copies = all_copies.get(parent_rev, None)
380 copies = all_copies.get(parent_rev, None)
381 else:
381 else:
382 copies = all_copies.pop(parent_rev, None)
382 copies = all_copies.pop(parent_rev, None)
383
383
384 if copies is None:
384 if copies is None:
385 # this is a root
385 # this is a root
386 copies = {}
386 newcopies = copies = {}
387
387 elif remaining_children:
388 newcopies = copies.copy()
389 else:
388 newcopies = copies
390 newcopies = copies
389 # chain the data in the edge with the existing data
391 # chain the data in the edge with the existing data
390 if changes is not None:
392 if changes is not None:
391 childcopies = {}
393 childcopies = {}
392 if parent == 1:
394 if parent == 1:
393 childcopies = changes.copied_from_p1
395 childcopies = changes.copied_from_p1
394 elif parent == 2:
396 elif parent == 2:
395 childcopies = changes.copied_from_p2
397 childcopies = changes.copied_from_p2
396
398
397 if childcopies:
399 if childcopies:
398 newcopies = copies.copy()
400 newcopies = copies.copy()
399 for dest, source in pycompat.iteritems(childcopies):
401 for dest, source in pycompat.iteritems(childcopies):
400 prev = copies.get(source)
402 prev = copies.get(source)
401 if prev is not None and prev[1] is not None:
403 if prev is not None and prev[1] is not None:
402 source = prev[1]
404 source = prev[1]
403 newcopies[dest] = (current_rev, source)
405 newcopies[dest] = (current_rev, source)
404 assert newcopies is not copies
406 assert newcopies is not copies
405 if changes.removed:
407 if changes.removed:
406 if newcopies is copies:
407 newcopies = copies.copy()
408 for f in changes.removed:
408 for f in changes.removed:
409 if f in newcopies:
409 if f in newcopies:
410 if newcopies is copies:
410 if newcopies is copies:
411 # copy on write to avoid affecting potential other
411 # copy on write to avoid affecting potential other
412 # branches. when there are no other branches, this
412 # branches. when there are no other branches, this
413 # could be avoided.
413 # could be avoided.
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 newcopies[f] = (current_rev, None)
415 newcopies[f] = (current_rev, None)
416 # check potential need to combine the data from another parent (for
416 # check potential need to combine the data from another parent (for
417 # that child). See comment below for details.
417 # that child). See comment below for details.
418 if current_copies is None:
418 if current_copies is None:
419 current_copies = newcopies
419 current_copies = newcopies
420 elif current_copies is newcopies:
421 # nothing to merge:
422 pass
423 else:
420 else:
424 # we are the second parent to work on c, we need to merge our
421 # we are the second parent to work on c, we need to merge our
425 # work with the other.
422 # work with the other.
426 #
423 #
427 # In case of conflict, parent 1 take precedence over parent 2.
424 # In case of conflict, parent 1 take precedence over parent 2.
428 # This is an arbitrary choice made anew when implementing
425 # This is an arbitrary choice made anew when implementing
429 # changeset based copies. It was made without regards with
426 # changeset based copies. It was made without regards with
430 # potential filelog related behavior.
427 # potential filelog related behavior.
431 assert parent == 2
428 assert parent == 2
432 current_copies = _merge_copies_dict(
429 current_copies = _merge_copies_dict(
433 newcopies, current_copies, isancestor, changes
430 newcopies, current_copies, isancestor, changes
434 )
431 )
435 all_copies[current_rev] = current_copies
432 all_copies[current_rev] = current_copies
436
433
437 # filter out internal details and return a {dest: source mapping}
434 # filter out internal details and return a {dest: source mapping}
438 final_copies = {}
435 final_copies = {}
439 for dest, (tt, source) in all_copies[targetrev].items():
436 for dest, (tt, source) in all_copies[targetrev].items():
440 if source is not None:
437 if source is not None:
441 final_copies[dest] = source
438 final_copies[dest] = source
442 if not alwaysmatch:
439 if not alwaysmatch:
443 for filename in list(final_copies.keys()):
440 for filename in list(final_copies.keys()):
444 if not match(filename):
441 if not match(filename):
445 del final_copies[filename]
442 del final_copies[filename]
446 return final_copies
443 return final_copies
447
444
448
445
449 # constant to decide which side to pick with _merge_copies_dict
446 # constant to decide which side to pick with _merge_copies_dict
450 PICK_MINOR = 0
447 PICK_MINOR = 0
451 PICK_MAJOR = 1
448 PICK_MAJOR = 1
452 PICK_EITHER = 2
449 PICK_EITHER = 2
453
450
454
451
455 def _merge_copies_dict(minor, major, isancestor, changes):
452 def _merge_copies_dict(minor, major, isancestor, changes):
456 """merge two copies-mapping together, minor and major
453 """merge two copies-mapping together, minor and major
457
454
458 In case of conflict, value from "major" will be picked.
455 In case of conflict, value from "major" will be picked.
459
456
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
457 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 ancestors of `high_rev`,
458 ancestors of `high_rev`,
462
459
463 - `ismerged(path)`: callable return True if `path` have been merged in the
460 - `ismerged(path)`: callable return True if `path` have been merged in the
464 current revision,
461 current revision,
465
462
466 return the resulting dict (in practice, the "minor" object, updated)
463 return the resulting dict (in practice, the "minor" object, updated)
467 """
464 """
468 for dest, value in major.items():
465 for dest, value in major.items():
469 other = minor.get(dest)
466 other = minor.get(dest)
470 if other is None:
467 if other is None:
471 minor[dest] = value
468 minor[dest] = value
472 else:
469 else:
473 pick = _compare_values(changes, isancestor, dest, other, value)
470 pick = _compare_values(changes, isancestor, dest, other, value)
474 if pick == PICK_MAJOR:
471 if pick == PICK_MAJOR:
475 minor[dest] = value
472 minor[dest] = value
476 return minor
473 return minor
477
474
478
475
479 def _compare_values(changes, isancestor, dest, minor, major):
476 def _compare_values(changes, isancestor, dest, minor, major):
480 """compare two value within a _merge_copies_dict loop iteration"""
477 """compare two value within a _merge_copies_dict loop iteration"""
481 major_tt, major_value = major
478 major_tt, major_value = major
482 minor_tt, minor_value = minor
479 minor_tt, minor_value = minor
483
480
484 # evacuate some simple case first:
481 # evacuate some simple case first:
485 if major_tt == minor_tt:
482 if major_tt == minor_tt:
486 # if it comes from the same revision it must be the same value
483 # if it comes from the same revision it must be the same value
487 assert major_value == minor_value
484 assert major_value == minor_value
488 return PICK_EITHER
485 return PICK_EITHER
489 elif major[1] == minor[1]:
486 elif major[1] == minor[1]:
490 return PICK_EITHER
487 return PICK_EITHER
491
488
492 # actual merging needed: content from "major" wins, unless it is older than
489 # actual merging needed: content from "major" wins, unless it is older than
493 # the branch point or there is a merge
490 # the branch point or there is a merge
494 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 elif changes is not None and major[1] is None and dest in changes.salvaged:
495 return PICK_MINOR
492 return PICK_MINOR
496 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 elif changes is not None and minor[1] is None and dest in changes.salvaged:
497 return PICK_MAJOR
494 return PICK_MAJOR
498 elif changes is not None and dest in changes.merged:
495 elif changes is not None and dest in changes.merged:
499 return PICK_MAJOR
496 return PICK_MAJOR
500 elif not isancestor(major_tt, minor_tt):
497 elif not isancestor(major_tt, minor_tt):
501 if major[1] is not None:
498 if major[1] is not None:
502 return PICK_MAJOR
499 return PICK_MAJOR
503 elif isancestor(minor_tt, major_tt):
500 elif isancestor(minor_tt, major_tt):
504 return PICK_MAJOR
501 return PICK_MAJOR
505 return PICK_MINOR
502 return PICK_MINOR
506
503
507
504
508 def _revinfo_getter_extra(repo):
505 def _revinfo_getter_extra(repo):
509 """return a function that return multiple data given a <rev>"i
506 """return a function that return multiple data given a <rev>"i
510
507
511 * p1: revision number of first parent
508 * p1: revision number of first parent
512 * p2: revision number of first parent
509 * p2: revision number of first parent
513 * p1copies: mapping of copies from p1
510 * p1copies: mapping of copies from p1
514 * p2copies: mapping of copies from p2
511 * p2copies: mapping of copies from p2
515 * removed: a list of removed files
512 * removed: a list of removed files
516 * ismerged: a callback to know if file was merged in that revision
513 * ismerged: a callback to know if file was merged in that revision
517 """
514 """
518 cl = repo.changelog
515 cl = repo.changelog
519 parents = cl.parentrevs
516 parents = cl.parentrevs
520
517
521 def get_ismerged(rev):
518 def get_ismerged(rev):
522 ctx = repo[rev]
519 ctx = repo[rev]
523
520
524 def ismerged(path):
521 def ismerged(path):
525 if path not in ctx.files():
522 if path not in ctx.files():
526 return False
523 return False
527 fctx = ctx[path]
524 fctx = ctx[path]
528 parents = fctx._filelog.parents(fctx._filenode)
525 parents = fctx._filelog.parents(fctx._filenode)
529 nb_parents = 0
526 nb_parents = 0
530 for n in parents:
527 for n in parents:
531 if n != nullid:
528 if n != nullid:
532 nb_parents += 1
529 nb_parents += 1
533 return nb_parents >= 2
530 return nb_parents >= 2
534
531
535 return ismerged
532 return ismerged
536
533
537 def revinfo(rev):
534 def revinfo(rev):
538 p1, p2 = parents(rev)
535 p1, p2 = parents(rev)
539 ctx = repo[rev]
536 ctx = repo[rev]
540 p1copies, p2copies = ctx._copies
537 p1copies, p2copies = ctx._copies
541 removed = ctx.filesremoved()
538 removed = ctx.filesremoved()
542 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
543
540
544 return revinfo
541 return revinfo
545
542
546
543
547 def _combine_changeset_copies_extra(
544 def _combine_changeset_copies_extra(
548 revs, children, targetrev, revinfo, match, isancestor
545 revs, children, targetrev, revinfo, match, isancestor
549 ):
546 ):
550 """version of `_combine_changeset_copies` that works with the Google
547 """version of `_combine_changeset_copies` that works with the Google
551 specific "extra" based storage for copy information"""
548 specific "extra" based storage for copy information"""
552 all_copies = {}
549 all_copies = {}
553 alwaysmatch = match.always()
550 alwaysmatch = match.always()
554 for r in revs:
551 for r in revs:
555 copies = all_copies.pop(r, None)
552 copies = all_copies.pop(r, None)
556 if copies is None:
553 if copies is None:
557 # this is a root
554 # this is a root
558 copies = {}
555 copies = {}
559 for i, c in enumerate(children[r]):
556 for i, c in enumerate(children[r]):
560 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
561 if r == p1:
558 if r == p1:
562 parent = 1
559 parent = 1
563 childcopies = p1copies
560 childcopies = p1copies
564 else:
561 else:
565 assert r == p2
562 assert r == p2
566 parent = 2
563 parent = 2
567 childcopies = p2copies
564 childcopies = p2copies
568 if not alwaysmatch:
565 if not alwaysmatch:
569 childcopies = {
566 childcopies = {
570 dst: src for dst, src in childcopies.items() if match(dst)
567 dst: src for dst, src in childcopies.items() if match(dst)
571 }
568 }
572 newcopies = copies
569 newcopies = copies
573 if childcopies:
570 if childcopies:
574 newcopies = copies.copy()
571 newcopies = copies.copy()
575 for dest, source in pycompat.iteritems(childcopies):
572 for dest, source in pycompat.iteritems(childcopies):
576 prev = copies.get(source)
573 prev = copies.get(source)
577 if prev is not None and prev[1] is not None:
574 if prev is not None and prev[1] is not None:
578 source = prev[1]
575 source = prev[1]
579 newcopies[dest] = (c, source)
576 newcopies[dest] = (c, source)
580 assert newcopies is not copies
577 assert newcopies is not copies
581 for f in removed:
578 for f in removed:
582 if f in newcopies:
579 if f in newcopies:
583 if newcopies is copies:
580 if newcopies is copies:
584 # copy on write to avoid affecting potential other
581 # copy on write to avoid affecting potential other
585 # branches. when there are no other branches, this
582 # branches. when there are no other branches, this
586 # could be avoided.
583 # could be avoided.
587 newcopies = copies.copy()
584 newcopies = copies.copy()
588 newcopies[f] = (c, None)
585 newcopies[f] = (c, None)
589 othercopies = all_copies.get(c)
586 othercopies = all_copies.get(c)
590 if othercopies is None:
587 if othercopies is None:
591 all_copies[c] = newcopies
588 all_copies[c] = newcopies
592 else:
589 else:
593 # we are the second parent to work on c, we need to merge our
590 # we are the second parent to work on c, we need to merge our
594 # work with the other.
591 # work with the other.
595 #
592 #
596 # In case of conflict, parent 1 take precedence over parent 2.
593 # In case of conflict, parent 1 take precedence over parent 2.
597 # This is an arbitrary choice made anew when implementing
594 # This is an arbitrary choice made anew when implementing
598 # changeset based copies. It was made without regards with
595 # changeset based copies. It was made without regards with
599 # potential filelog related behavior.
596 # potential filelog related behavior.
600 if parent == 1:
597 if parent == 1:
601 _merge_copies_dict_extra(
598 _merge_copies_dict_extra(
602 othercopies, newcopies, isancestor, ismerged
599 othercopies, newcopies, isancestor, ismerged
603 )
600 )
604 else:
601 else:
605 _merge_copies_dict_extra(
602 _merge_copies_dict_extra(
606 newcopies, othercopies, isancestor, ismerged
603 newcopies, othercopies, isancestor, ismerged
607 )
604 )
608 all_copies[c] = newcopies
605 all_copies[c] = newcopies
609
606
610 final_copies = {}
607 final_copies = {}
611 for dest, (tt, source) in all_copies[targetrev].items():
608 for dest, (tt, source) in all_copies[targetrev].items():
612 if source is not None:
609 if source is not None:
613 final_copies[dest] = source
610 final_copies[dest] = source
614 return final_copies
611 return final_copies
615
612
616
613
617 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
618 """version of `_merge_copies_dict` that works with the Google
615 """version of `_merge_copies_dict` that works with the Google
619 specific "extra" based storage for copy information"""
616 specific "extra" based storage for copy information"""
620 for dest, value in major.items():
617 for dest, value in major.items():
621 other = minor.get(dest)
618 other = minor.get(dest)
622 if other is None:
619 if other is None:
623 minor[dest] = value
620 minor[dest] = value
624 else:
621 else:
625 new_tt = value[0]
622 new_tt = value[0]
626 other_tt = other[0]
623 other_tt = other[0]
627 if value[1] == other[1]:
624 if value[1] == other[1]:
628 continue
625 continue
629 # content from "major" wins, unless it is older
626 # content from "major" wins, unless it is older
630 # than the branch point or there is a merge
627 # than the branch point or there is a merge
631 if (
628 if (
632 new_tt == other_tt
629 new_tt == other_tt
633 or not isancestor(new_tt, other_tt)
630 or not isancestor(new_tt, other_tt)
634 or ismerged(dest)
631 or ismerged(dest)
635 ):
632 ):
636 minor[dest] = value
633 minor[dest] = value
637
634
638
635
639 def _forwardcopies(a, b, base=None, match=None):
636 def _forwardcopies(a, b, base=None, match=None):
640 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
641
638
642 if base is None:
639 if base is None:
643 base = a
640 base = a
644 match = a.repo().narrowmatch(match)
641 match = a.repo().narrowmatch(match)
645 # check for working copy
642 # check for working copy
646 if b.rev() is None:
643 if b.rev() is None:
647 cm = _committedforwardcopies(a, b.p1(), base, match)
644 cm = _committedforwardcopies(a, b.p1(), base, match)
648 # combine copies from dirstate if necessary
645 # combine copies from dirstate if necessary
649 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 copies = _chain(cm, _dirstatecopies(b._repo, match))
650 else:
647 else:
651 copies = _committedforwardcopies(a, b, base, match)
648 copies = _committedforwardcopies(a, b, base, match)
652 return copies
649 return copies
653
650
654
651
655 def _backwardrenames(a, b, match):
652 def _backwardrenames(a, b, match):
656 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
657 return {}
654 return {}
658
655
659 # Even though we're not taking copies into account, 1:n rename situations
656 # Even though we're not taking copies into account, 1:n rename situations
660 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
661 # arbitrarily pick one of the renames.
658 # arbitrarily pick one of the renames.
662 # We don't want to pass in "match" here, since that would filter
659 # We don't want to pass in "match" here, since that would filter
663 # the destination by it. Since we're reversing the copies, we want
660 # the destination by it. Since we're reversing the copies, we want
664 # to filter the source instead.
661 # to filter the source instead.
665 f = _forwardcopies(b, a)
662 f = _forwardcopies(b, a)
666 r = {}
663 r = {}
667 for k, v in sorted(pycompat.iteritems(f)):
664 for k, v in sorted(pycompat.iteritems(f)):
668 if match and not match(v):
665 if match and not match(v):
669 continue
666 continue
670 # remove copies
667 # remove copies
671 if v in a:
668 if v in a:
672 continue
669 continue
673 r[v] = k
670 r[v] = k
674 return r
671 return r
675
672
676
673
677 def pathcopies(x, y, match=None):
674 def pathcopies(x, y, match=None):
678 """find {dst@y: src@x} copy mapping for directed compare"""
675 """find {dst@y: src@x} copy mapping for directed compare"""
679 repo = x._repo
676 repo = x._repo
680 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
681 if debug:
678 if debug:
682 repo.ui.debug(
679 repo.ui.debug(
683 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 b'debug.copies: searching copies from %s to %s\n' % (x, y)
684 )
681 )
685 if x == y or not x or not y:
682 if x == y or not x or not y:
686 return {}
683 return {}
687 if y.rev() is None and x == y.p1():
684 if y.rev() is None and x == y.p1():
688 if debug:
685 if debug:
689 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
690 # short-circuit to avoid issues with merge states
687 # short-circuit to avoid issues with merge states
691 return _dirstatecopies(repo, match)
688 return _dirstatecopies(repo, match)
692 a = y.ancestor(x)
689 a = y.ancestor(x)
693 if a == x:
690 if a == x:
694 if debug:
691 if debug:
695 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 repo.ui.debug(b'debug.copies: search mode: forward\n')
696 copies = _forwardcopies(x, y, match=match)
693 copies = _forwardcopies(x, y, match=match)
697 elif a == y:
694 elif a == y:
698 if debug:
695 if debug:
699 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 repo.ui.debug(b'debug.copies: search mode: backward\n')
700 copies = _backwardrenames(x, y, match=match)
697 copies = _backwardrenames(x, y, match=match)
701 else:
698 else:
702 if debug:
699 if debug:
703 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 repo.ui.debug(b'debug.copies: search mode: combined\n')
704 base = None
701 base = None
705 if a.rev() != nullrev:
702 if a.rev() != nullrev:
706 base = x
703 base = x
707 copies = _chain(
704 copies = _chain(
708 _backwardrenames(x, a, match=match),
705 _backwardrenames(x, a, match=match),
709 _forwardcopies(a, y, base, match=match),
706 _forwardcopies(a, y, base, match=match),
710 )
707 )
711 _filter(x, y, copies)
708 _filter(x, y, copies)
712 return copies
709 return copies
713
710
714
711
715 def mergecopies(repo, c1, c2, base):
712 def mergecopies(repo, c1, c2, base):
716 """
713 """
717 Finds moves and copies between context c1 and c2 that are relevant for
714 Finds moves and copies between context c1 and c2 that are relevant for
718 merging. 'base' will be used as the merge base.
715 merging. 'base' will be used as the merge base.
719
716
720 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
721 files that were moved/ copied in one merge parent and modified in another.
718 files that were moved/ copied in one merge parent and modified in another.
722 For example:
719 For example:
723
720
724 o ---> 4 another commit
721 o ---> 4 another commit
725 |
722 |
726 | o ---> 3 commit that modifies a.txt
723 | o ---> 3 commit that modifies a.txt
727 | /
724 | /
728 o / ---> 2 commit that moves a.txt to b.txt
725 o / ---> 2 commit that moves a.txt to b.txt
729 |/
726 |/
730 o ---> 1 merge base
727 o ---> 1 merge base
731
728
732 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 If we try to rebase revision 3 on revision 4, since there is no a.txt in
733 revision 4, and if user have copytrace disabled, we prints the following
730 revision 4, and if user have copytrace disabled, we prints the following
734 message:
731 message:
735
732
736 ```other changed <file> which local deleted```
733 ```other changed <file> which local deleted```
737
734
738 Returns a tuple where:
735 Returns a tuple where:
739
736
740 "branch_copies" an instance of branch_copies.
737 "branch_copies" an instance of branch_copies.
741
738
742 "diverge" is a mapping of source name -> list of destination names
739 "diverge" is a mapping of source name -> list of destination names
743 for divergent renames.
740 for divergent renames.
744
741
745 This function calls different copytracing algorithms based on config.
742 This function calls different copytracing algorithms based on config.
746 """
743 """
747 # avoid silly behavior for update from empty dir
744 # avoid silly behavior for update from empty dir
748 if not c1 or not c2 or c1 == c2:
745 if not c1 or not c2 or c1 == c2:
749 return branch_copies(), branch_copies(), {}
746 return branch_copies(), branch_copies(), {}
750
747
751 narrowmatch = c1.repo().narrowmatch()
748 narrowmatch = c1.repo().narrowmatch()
752
749
753 # avoid silly behavior for parent -> working dir
750 # avoid silly behavior for parent -> working dir
754 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 if c2.node() is None and c1.node() == repo.dirstate.p1():
755 return (
752 return (
756 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 branch_copies(_dirstatecopies(repo, narrowmatch)),
757 branch_copies(),
754 branch_copies(),
758 {},
755 {},
759 )
756 )
760
757
761 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 copytracing = repo.ui.config(b'experimental', b'copytrace')
762 if stringutil.parsebool(copytracing) is False:
759 if stringutil.parsebool(copytracing) is False:
763 # stringutil.parsebool() returns None when it is unable to parse the
760 # stringutil.parsebool() returns None when it is unable to parse the
764 # value, so we should rely on making sure copytracing is on such cases
761 # value, so we should rely on making sure copytracing is on such cases
765 return branch_copies(), branch_copies(), {}
762 return branch_copies(), branch_copies(), {}
766
763
767 if usechangesetcentricalgo(repo):
764 if usechangesetcentricalgo(repo):
768 # The heuristics don't make sense when we need changeset-centric algos
765 # The heuristics don't make sense when we need changeset-centric algos
769 return _fullcopytracing(repo, c1, c2, base)
766 return _fullcopytracing(repo, c1, c2, base)
770
767
771 # Copy trace disabling is explicitly below the node == p1 logic above
768 # Copy trace disabling is explicitly below the node == p1 logic above
772 # because the logic above is required for a simple copy to be kept across a
769 # because the logic above is required for a simple copy to be kept across a
773 # rebase.
770 # rebase.
774 if copytracing == b'heuristics':
771 if copytracing == b'heuristics':
775 # Do full copytracing if only non-public revisions are involved as
772 # Do full copytracing if only non-public revisions are involved as
776 # that will be fast enough and will also cover the copies which could
773 # that will be fast enough and will also cover the copies which could
777 # be missed by heuristics
774 # be missed by heuristics
778 if _isfullcopytraceable(repo, c1, base):
775 if _isfullcopytraceable(repo, c1, base):
779 return _fullcopytracing(repo, c1, c2, base)
776 return _fullcopytracing(repo, c1, c2, base)
780 return _heuristicscopytracing(repo, c1, c2, base)
777 return _heuristicscopytracing(repo, c1, c2, base)
781 else:
778 else:
782 return _fullcopytracing(repo, c1, c2, base)
779 return _fullcopytracing(repo, c1, c2, base)
783
780
784
781
785 def _isfullcopytraceable(repo, c1, base):
782 def _isfullcopytraceable(repo, c1, base):
786 """Checks that if base, source and destination are all no-public branches,
783 """Checks that if base, source and destination are all no-public branches,
787 if yes let's use the full copytrace algorithm for increased capabilities
784 if yes let's use the full copytrace algorithm for increased capabilities
788 since it will be fast enough.
785 since it will be fast enough.
789
786
790 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
791 number of changesets from c1 to base such that if number of changesets are
788 number of changesets from c1 to base such that if number of changesets are
792 more than the limit, full copytracing algorithm won't be used.
789 more than the limit, full copytracing algorithm won't be used.
793 """
790 """
794 if c1.rev() is None:
791 if c1.rev() is None:
795 c1 = c1.p1()
792 c1 = c1.p1()
796 if c1.mutable() and base.mutable():
793 if c1.mutable() and base.mutable():
797 sourcecommitlimit = repo.ui.configint(
794 sourcecommitlimit = repo.ui.configint(
798 b'experimental', b'copytrace.sourcecommitlimit'
795 b'experimental', b'copytrace.sourcecommitlimit'
799 )
796 )
800 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
801 return commits < sourcecommitlimit
798 return commits < sourcecommitlimit
802 return False
799 return False
803
800
804
801
805 def _checksinglesidecopies(
802 def _checksinglesidecopies(
806 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
807 ):
804 ):
808 if src not in m2:
805 if src not in m2:
809 # deleted on side 2
806 # deleted on side 2
810 if src not in m1:
807 if src not in m1:
811 # renamed on side 1, deleted on side 2
808 # renamed on side 1, deleted on side 2
812 renamedelete[src] = dsts1
809 renamedelete[src] = dsts1
813 elif src not in mb:
810 elif src not in mb:
814 # Work around the "short-circuit to avoid issues with merge states"
811 # Work around the "short-circuit to avoid issues with merge states"
815 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
816 # destination doesn't exist in y.
813 # destination doesn't exist in y.
817 pass
814 pass
818 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
819 return
816 return
820 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
821 # modified on side 2
818 # modified on side 2
822 for dst in dsts1:
819 for dst in dsts1:
823 copy[dst] = src
820 copy[dst] = src
824
821
825
822
826 class branch_copies(object):
823 class branch_copies(object):
827 """Information about copies made on one side of a merge/graft.
824 """Information about copies made on one side of a merge/graft.
828
825
829 "copy" is a mapping from destination name -> source name,
826 "copy" is a mapping from destination name -> source name,
830 where source is in c1 and destination is in c2 or vice-versa.
827 where source is in c1 and destination is in c2 or vice-versa.
831
828
832 "movewithdir" is a mapping from source name -> destination name,
829 "movewithdir" is a mapping from source name -> destination name,
833 where the file at source present in one context but not the other
830 where the file at source present in one context but not the other
834 needs to be moved to destination by the merge process, because the
831 needs to be moved to destination by the merge process, because the
835 other context moved the directory it is in.
832 other context moved the directory it is in.
836
833
837 "renamedelete" is a mapping of source name -> list of destination
834 "renamedelete" is a mapping of source name -> list of destination
838 names for files deleted in c1 that were renamed in c2 or vice-versa.
835 names for files deleted in c1 that were renamed in c2 or vice-versa.
839
836
840 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 "dirmove" is a mapping of detected source dir -> destination dir renames.
841 This is needed for handling changes to new files previously grafted into
838 This is needed for handling changes to new files previously grafted into
842 renamed directories.
839 renamed directories.
843 """
840 """
844
841
845 def __init__(
842 def __init__(
846 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
847 ):
844 ):
848 self.copy = {} if copy is None else copy
845 self.copy = {} if copy is None else copy
849 self.renamedelete = {} if renamedelete is None else renamedelete
846 self.renamedelete = {} if renamedelete is None else renamedelete
850 self.dirmove = {} if dirmove is None else dirmove
847 self.dirmove = {} if dirmove is None else dirmove
851 self.movewithdir = {} if movewithdir is None else movewithdir
848 self.movewithdir = {} if movewithdir is None else movewithdir
852
849
853 def __repr__(self):
850 def __repr__(self):
854 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
855 self.copy,
852 self.copy,
856 self.renamedelete,
853 self.renamedelete,
857 self.dirmove,
854 self.dirmove,
858 self.movewithdir,
855 self.movewithdir,
859 )
856 )
860
857
861
858
862 def _fullcopytracing(repo, c1, c2, base):
859 def _fullcopytracing(repo, c1, c2, base):
863 """The full copytracing algorithm which finds all the new files that were
860 """The full copytracing algorithm which finds all the new files that were
864 added from merge base up to the top commit and for each file it checks if
861 added from merge base up to the top commit and for each file it checks if
865 this file was copied from another file.
862 this file was copied from another file.
866
863
867 This is pretty slow when a lot of changesets are involved but will track all
864 This is pretty slow when a lot of changesets are involved but will track all
868 the copies.
865 the copies.
869 """
866 """
870 m1 = c1.manifest()
867 m1 = c1.manifest()
871 m2 = c2.manifest()
868 m2 = c2.manifest()
872 mb = base.manifest()
869 mb = base.manifest()
873
870
874 copies1 = pathcopies(base, c1)
871 copies1 = pathcopies(base, c1)
875 copies2 = pathcopies(base, c2)
872 copies2 = pathcopies(base, c2)
876
873
877 if not (copies1 or copies2):
874 if not (copies1 or copies2):
878 return branch_copies(), branch_copies(), {}
875 return branch_copies(), branch_copies(), {}
879
876
880 inversecopies1 = {}
877 inversecopies1 = {}
881 inversecopies2 = {}
878 inversecopies2 = {}
882 for dst, src in copies1.items():
879 for dst, src in copies1.items():
883 inversecopies1.setdefault(src, []).append(dst)
880 inversecopies1.setdefault(src, []).append(dst)
884 for dst, src in copies2.items():
881 for dst, src in copies2.items():
885 inversecopies2.setdefault(src, []).append(dst)
882 inversecopies2.setdefault(src, []).append(dst)
886
883
887 copy1 = {}
884 copy1 = {}
888 copy2 = {}
885 copy2 = {}
889 diverge = {}
886 diverge = {}
890 renamedelete1 = {}
887 renamedelete1 = {}
891 renamedelete2 = {}
888 renamedelete2 = {}
892 allsources = set(inversecopies1) | set(inversecopies2)
889 allsources = set(inversecopies1) | set(inversecopies2)
893 for src in allsources:
890 for src in allsources:
894 dsts1 = inversecopies1.get(src)
891 dsts1 = inversecopies1.get(src)
895 dsts2 = inversecopies2.get(src)
892 dsts2 = inversecopies2.get(src)
896 if dsts1 and dsts2:
893 if dsts1 and dsts2:
897 # copied/renamed on both sides
894 # copied/renamed on both sides
898 if src not in m1 and src not in m2:
895 if src not in m1 and src not in m2:
899 # renamed on both sides
896 # renamed on both sides
900 dsts1 = set(dsts1)
897 dsts1 = set(dsts1)
901 dsts2 = set(dsts2)
898 dsts2 = set(dsts2)
902 # If there's some overlap in the rename destinations, we
899 # If there's some overlap in the rename destinations, we
903 # consider it not divergent. For example, if side 1 copies 'a'
900 # consider it not divergent. For example, if side 1 copies 'a'
904 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
905 # and 'd' and deletes 'a'.
902 # and 'd' and deletes 'a'.
906 if dsts1 & dsts2:
903 if dsts1 & dsts2:
907 for dst in dsts1 & dsts2:
904 for dst in dsts1 & dsts2:
908 copy1[dst] = src
905 copy1[dst] = src
909 copy2[dst] = src
906 copy2[dst] = src
910 else:
907 else:
911 diverge[src] = sorted(dsts1 | dsts2)
908 diverge[src] = sorted(dsts1 | dsts2)
912 elif src in m1 and src in m2:
909 elif src in m1 and src in m2:
913 # copied on both sides
910 # copied on both sides
914 dsts1 = set(dsts1)
911 dsts1 = set(dsts1)
915 dsts2 = set(dsts2)
912 dsts2 = set(dsts2)
916 for dst in dsts1 & dsts2:
913 for dst in dsts1 & dsts2:
917 copy1[dst] = src
914 copy1[dst] = src
918 copy2[dst] = src
915 copy2[dst] = src
919 # TODO: Handle cases where it was renamed on one side and copied
916 # TODO: Handle cases where it was renamed on one side and copied
920 # on the other side
917 # on the other side
921 elif dsts1:
918 elif dsts1:
922 # copied/renamed only on side 1
919 # copied/renamed only on side 1
923 _checksinglesidecopies(
920 _checksinglesidecopies(
924 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
925 )
922 )
926 elif dsts2:
923 elif dsts2:
927 # copied/renamed only on side 2
924 # copied/renamed only on side 2
928 _checksinglesidecopies(
925 _checksinglesidecopies(
929 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
930 )
927 )
931
928
932 # find interesting file sets from manifests
929 # find interesting file sets from manifests
933 cache = []
930 cache = []
934
931
935 def _get_addedfiles(idx):
932 def _get_addedfiles(idx):
936 if not cache:
933 if not cache:
937 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
938 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
939 u1 = sorted(addedinm1 - addedinm2)
936 u1 = sorted(addedinm1 - addedinm2)
940 u2 = sorted(addedinm2 - addedinm1)
937 u2 = sorted(addedinm2 - addedinm1)
941 cache.extend((u1, u2))
938 cache.extend((u1, u2))
942 return cache[idx]
939 return cache[idx]
943
940
944 u1fn = lambda: _get_addedfiles(0)
941 u1fn = lambda: _get_addedfiles(0)
945 u2fn = lambda: _get_addedfiles(1)
942 u2fn = lambda: _get_addedfiles(1)
946 if repo.ui.debugflag:
943 if repo.ui.debugflag:
947 u1 = u1fn()
944 u1 = u1fn()
948 u2 = u2fn()
945 u2 = u2fn()
949
946
950 header = b" unmatched files in %s"
947 header = b" unmatched files in %s"
951 if u1:
948 if u1:
952 repo.ui.debug(
949 repo.ui.debug(
953 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
954 )
951 )
955 if u2:
952 if u2:
956 repo.ui.debug(
953 repo.ui.debug(
957 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
958 )
955 )
959
956
960 renamedeleteset = set()
957 renamedeleteset = set()
961 divergeset = set()
958 divergeset = set()
962 for dsts in diverge.values():
959 for dsts in diverge.values():
963 divergeset.update(dsts)
960 divergeset.update(dsts)
964 for dsts in renamedelete1.values():
961 for dsts in renamedelete1.values():
965 renamedeleteset.update(dsts)
962 renamedeleteset.update(dsts)
966 for dsts in renamedelete2.values():
963 for dsts in renamedelete2.values():
967 renamedeleteset.update(dsts)
964 renamedeleteset.update(dsts)
968
965
969 repo.ui.debug(
966 repo.ui.debug(
970 b" all copies found (* = to merge, ! = divergent, "
967 b" all copies found (* = to merge, ! = divergent, "
971 b"% = renamed and deleted):\n"
968 b"% = renamed and deleted):\n"
972 )
969 )
973 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 for side, copies in ((b"local", copies1), (b"remote", copies2)):
974 if not copies:
971 if not copies:
975 continue
972 continue
976 repo.ui.debug(b" on %s side:\n" % side)
973 repo.ui.debug(b" on %s side:\n" % side)
977 for f in sorted(copies):
974 for f in sorted(copies):
978 note = b""
975 note = b""
979 if f in copy1 or f in copy2:
976 if f in copy1 or f in copy2:
980 note += b"*"
977 note += b"*"
981 if f in divergeset:
978 if f in divergeset:
982 note += b"!"
979 note += b"!"
983 if f in renamedeleteset:
980 if f in renamedeleteset:
984 note += b"%"
981 note += b"%"
985 repo.ui.debug(
982 repo.ui.debug(
986 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
987 )
984 )
988 del renamedeleteset
985 del renamedeleteset
989 del divergeset
986 del divergeset
990
987
991 repo.ui.debug(b" checking for directory renames\n")
988 repo.ui.debug(b" checking for directory renames\n")
992
989
993 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
994 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
995
992
996 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
997 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
998
995
999 return branch_copies1, branch_copies2, diverge
996 return branch_copies1, branch_copies2, diverge
1000
997
1001
998
1002 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1003 """Finds moved directories and files that should move with them.
1000 """Finds moved directories and files that should move with them.
1004
1001
1005 ctx: the context for one of the sides
1002 ctx: the context for one of the sides
1006 copy: files copied on the same side (as ctx)
1003 copy: files copied on the same side (as ctx)
1007 fullcopy: files copied on the same side (as ctx), including those that
1004 fullcopy: files copied on the same side (as ctx), including those that
1008 merge.manifestmerge() won't care about
1005 merge.manifestmerge() won't care about
1009 addedfilesfn: function returning added files on the other side (compared to
1006 addedfilesfn: function returning added files on the other side (compared to
1010 ctx)
1007 ctx)
1011 """
1008 """
1012 # generate a directory move map
1009 # generate a directory move map
1013 invalid = set()
1010 invalid = set()
1014 dirmove = {}
1011 dirmove = {}
1015
1012
1016 # examine each file copy for a potential directory move, which is
1013 # examine each file copy for a potential directory move, which is
1017 # when all the files in a directory are moved to a new directory
1014 # when all the files in a directory are moved to a new directory
1018 for dst, src in pycompat.iteritems(fullcopy):
1015 for dst, src in pycompat.iteritems(fullcopy):
1019 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1020 if dsrc in invalid:
1017 if dsrc in invalid:
1021 # already seen to be uninteresting
1018 # already seen to be uninteresting
1022 continue
1019 continue
1023 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1024 # directory wasn't entirely moved locally
1021 # directory wasn't entirely moved locally
1025 invalid.add(dsrc)
1022 invalid.add(dsrc)
1026 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1027 # files from the same directory moved to two different places
1024 # files from the same directory moved to two different places
1028 invalid.add(dsrc)
1025 invalid.add(dsrc)
1029 else:
1026 else:
1030 # looks good so far
1027 # looks good so far
1031 dirmove[dsrc] = ddst
1028 dirmove[dsrc] = ddst
1032
1029
1033 for i in invalid:
1030 for i in invalid:
1034 if i in dirmove:
1031 if i in dirmove:
1035 del dirmove[i]
1032 del dirmove[i]
1036 del invalid
1033 del invalid
1037
1034
1038 if not dirmove:
1035 if not dirmove:
1039 return {}, {}
1036 return {}, {}
1040
1037
1041 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1042
1039
1043 for d in dirmove:
1040 for d in dirmove:
1044 repo.ui.debug(
1041 repo.ui.debug(
1045 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1046 )
1043 )
1047
1044
1048 movewithdir = {}
1045 movewithdir = {}
1049 # check unaccounted nonoverlapping files against directory moves
1046 # check unaccounted nonoverlapping files against directory moves
1050 for f in addedfilesfn():
1047 for f in addedfilesfn():
1051 if f not in fullcopy:
1048 if f not in fullcopy:
1052 for d in dirmove:
1049 for d in dirmove:
1053 if f.startswith(d):
1050 if f.startswith(d):
1054 # new file added in a directory that was moved, move it
1051 # new file added in a directory that was moved, move it
1055 df = dirmove[d] + f[len(d) :]
1052 df = dirmove[d] + f[len(d) :]
1056 if df not in copy:
1053 if df not in copy:
1057 movewithdir[f] = df
1054 movewithdir[f] = df
1058 repo.ui.debug(
1055 repo.ui.debug(
1059 b" pending file src: '%s' -> dst: '%s'\n"
1056 b" pending file src: '%s' -> dst: '%s'\n"
1060 % (f, df)
1057 % (f, df)
1061 )
1058 )
1062 break
1059 break
1063
1060
1064 return dirmove, movewithdir
1061 return dirmove, movewithdir
1065
1062
1066
1063
1067 def _heuristicscopytracing(repo, c1, c2, base):
1064 def _heuristicscopytracing(repo, c1, c2, base):
1068 """Fast copytracing using filename heuristics
1065 """Fast copytracing using filename heuristics
1069
1066
1070 Assumes that moves or renames are of following two types:
1067 Assumes that moves or renames are of following two types:
1071
1068
1072 1) Inside a directory only (same directory name but different filenames)
1069 1) Inside a directory only (same directory name but different filenames)
1073 2) Move from one directory to another
1070 2) Move from one directory to another
1074 (same filenames but different directory names)
1071 (same filenames but different directory names)
1075
1072
1076 Works only when there are no merge commits in the "source branch".
1073 Works only when there are no merge commits in the "source branch".
1077 Source branch is commits from base up to c2 not including base.
1074 Source branch is commits from base up to c2 not including base.
1078
1075
1079 If merge is involved it fallbacks to _fullcopytracing().
1076 If merge is involved it fallbacks to _fullcopytracing().
1080
1077
1081 Can be used by setting the following config:
1078 Can be used by setting the following config:
1082
1079
1083 [experimental]
1080 [experimental]
1084 copytrace = heuristics
1081 copytrace = heuristics
1085
1082
1086 In some cases the copy/move candidates found by heuristics can be very large
1083 In some cases the copy/move candidates found by heuristics can be very large
1087 in number and that will make the algorithm slow. The number of possible
1084 in number and that will make the algorithm slow. The number of possible
1088 candidates to check can be limited by using the config
1085 candidates to check can be limited by using the config
1089 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1090 """
1087 """
1091
1088
1092 if c1.rev() is None:
1089 if c1.rev() is None:
1093 c1 = c1.p1()
1090 c1 = c1.p1()
1094 if c2.rev() is None:
1091 if c2.rev() is None:
1095 c2 = c2.p1()
1092 c2 = c2.p1()
1096
1093
1097 changedfiles = set()
1094 changedfiles = set()
1098 m1 = c1.manifest()
1095 m1 = c1.manifest()
1099 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1100 # If base is not in c2 branch, we switch to fullcopytracing
1097 # If base is not in c2 branch, we switch to fullcopytracing
1101 repo.ui.debug(
1098 repo.ui.debug(
1102 b"switching to full copytracing as base is not "
1099 b"switching to full copytracing as base is not "
1103 b"an ancestor of c2\n"
1100 b"an ancestor of c2\n"
1104 )
1101 )
1105 return _fullcopytracing(repo, c1, c2, base)
1102 return _fullcopytracing(repo, c1, c2, base)
1106
1103
1107 ctx = c2
1104 ctx = c2
1108 while ctx != base:
1105 while ctx != base:
1109 if len(ctx.parents()) == 2:
1106 if len(ctx.parents()) == 2:
1110 # To keep things simple let's not handle merges
1107 # To keep things simple let's not handle merges
1111 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 repo.ui.debug(b"switching to full copytracing because of merges\n")
1112 return _fullcopytracing(repo, c1, c2, base)
1109 return _fullcopytracing(repo, c1, c2, base)
1113 changedfiles.update(ctx.files())
1110 changedfiles.update(ctx.files())
1114 ctx = ctx.p1()
1111 ctx = ctx.p1()
1115
1112
1116 copies2 = {}
1113 copies2 = {}
1117 cp = _forwardcopies(base, c2)
1114 cp = _forwardcopies(base, c2)
1118 for dst, src in pycompat.iteritems(cp):
1115 for dst, src in pycompat.iteritems(cp):
1119 if src in m1:
1116 if src in m1:
1120 copies2[dst] = src
1117 copies2[dst] = src
1121
1118
1122 # file is missing if it isn't present in the destination, but is present in
1119 # file is missing if it isn't present in the destination, but is present in
1123 # the base and present in the source.
1120 # the base and present in the source.
1124 # Presence in the base is important to exclude added files, presence in the
1121 # Presence in the base is important to exclude added files, presence in the
1125 # source is important to exclude removed files.
1122 # source is important to exclude removed files.
1126 filt = lambda f: f not in m1 and f in base and f in c2
1123 filt = lambda f: f not in m1 and f in base and f in c2
1127 missingfiles = [f for f in changedfiles if filt(f)]
1124 missingfiles = [f for f in changedfiles if filt(f)]
1128
1125
1129 copies1 = {}
1126 copies1 = {}
1130 if missingfiles:
1127 if missingfiles:
1131 basenametofilename = collections.defaultdict(list)
1128 basenametofilename = collections.defaultdict(list)
1132 dirnametofilename = collections.defaultdict(list)
1129 dirnametofilename = collections.defaultdict(list)
1133
1130
1134 for f in m1.filesnotin(base.manifest()):
1131 for f in m1.filesnotin(base.manifest()):
1135 basename = os.path.basename(f)
1132 basename = os.path.basename(f)
1136 dirname = os.path.dirname(f)
1133 dirname = os.path.dirname(f)
1137 basenametofilename[basename].append(f)
1134 basenametofilename[basename].append(f)
1138 dirnametofilename[dirname].append(f)
1135 dirnametofilename[dirname].append(f)
1139
1136
1140 for f in missingfiles:
1137 for f in missingfiles:
1141 basename = os.path.basename(f)
1138 basename = os.path.basename(f)
1142 dirname = os.path.dirname(f)
1139 dirname = os.path.dirname(f)
1143 samebasename = basenametofilename[basename]
1140 samebasename = basenametofilename[basename]
1144 samedirname = dirnametofilename[dirname]
1141 samedirname = dirnametofilename[dirname]
1145 movecandidates = samebasename + samedirname
1142 movecandidates = samebasename + samedirname
1146 # f is guaranteed to be present in c2, that's why
1143 # f is guaranteed to be present in c2, that's why
1147 # c2.filectx(f) won't fail
1144 # c2.filectx(f) won't fail
1148 f2 = c2.filectx(f)
1145 f2 = c2.filectx(f)
1149 # we can have a lot of candidates which can slow down the heuristics
1146 # we can have a lot of candidates which can slow down the heuristics
1150 # config value to limit the number of candidates moves to check
1147 # config value to limit the number of candidates moves to check
1151 maxcandidates = repo.ui.configint(
1148 maxcandidates = repo.ui.configint(
1152 b'experimental', b'copytrace.movecandidateslimit'
1149 b'experimental', b'copytrace.movecandidateslimit'
1153 )
1150 )
1154
1151
1155 if len(movecandidates) > maxcandidates:
1152 if len(movecandidates) > maxcandidates:
1156 repo.ui.status(
1153 repo.ui.status(
1157 _(
1154 _(
1158 b"skipping copytracing for '%s', more "
1155 b"skipping copytracing for '%s', more "
1159 b"candidates than the limit: %d\n"
1156 b"candidates than the limit: %d\n"
1160 )
1157 )
1161 % (f, len(movecandidates))
1158 % (f, len(movecandidates))
1162 )
1159 )
1163 continue
1160 continue
1164
1161
1165 for candidate in movecandidates:
1162 for candidate in movecandidates:
1166 f1 = c1.filectx(candidate)
1163 f1 = c1.filectx(candidate)
1167 if _related(f1, f2):
1164 if _related(f1, f2):
1168 # if there are a few related copies then we'll merge
1165 # if there are a few related copies then we'll merge
1169 # changes into all of them. This matches the behaviour
1166 # changes into all of them. This matches the behaviour
1170 # of upstream copytracing
1167 # of upstream copytracing
1171 copies1[candidate] = f
1168 copies1[candidate] = f
1172
1169
1173 return branch_copies(copies1), branch_copies(copies2), {}
1170 return branch_copies(copies1), branch_copies(copies2), {}
1174
1171
1175
1172
1176 def _related(f1, f2):
1173 def _related(f1, f2):
1177 """return True if f1 and f2 filectx have a common ancestor
1174 """return True if f1 and f2 filectx have a common ancestor
1178
1175
1179 Walk back to common ancestor to see if the two files originate
1176 Walk back to common ancestor to see if the two files originate
1180 from the same file. Since workingfilectx's rev() is None it messes
1177 from the same file. Since workingfilectx's rev() is None it messes
1181 up the integer comparison logic, hence the pre-step check for
1178 up the integer comparison logic, hence the pre-step check for
1182 None (f1 and f2 can only be workingfilectx's initially).
1179 None (f1 and f2 can only be workingfilectx's initially).
1183 """
1180 """
1184
1181
1185 if f1 == f2:
1182 if f1 == f2:
1186 return True # a match
1183 return True # a match
1187
1184
1188 g1, g2 = f1.ancestors(), f2.ancestors()
1185 g1, g2 = f1.ancestors(), f2.ancestors()
1189 try:
1186 try:
1190 f1r, f2r = f1.linkrev(), f2.linkrev()
1187 f1r, f2r = f1.linkrev(), f2.linkrev()
1191
1188
1192 if f1r is None:
1189 if f1r is None:
1193 f1 = next(g1)
1190 f1 = next(g1)
1194 if f2r is None:
1191 if f2r is None:
1195 f2 = next(g2)
1192 f2 = next(g2)
1196
1193
1197 while True:
1194 while True:
1198 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 f1r, f2r = f1.linkrev(), f2.linkrev()
1199 if f1r > f2r:
1196 if f1r > f2r:
1200 f1 = next(g1)
1197 f1 = next(g1)
1201 elif f2r > f1r:
1198 elif f2r > f1r:
1202 f2 = next(g2)
1199 f2 = next(g2)
1203 else: # f1 and f2 point to files in the same linkrev
1200 else: # f1 and f2 point to files in the same linkrev
1204 return f1 == f2 # true if they point to the same file
1201 return f1 == f2 # true if they point to the same file
1205 except StopIteration:
1202 except StopIteration:
1206 return False
1203 return False
1207
1204
1208
1205
1209 def graftcopies(wctx, ctx, base):
1206 def graftcopies(wctx, ctx, base):
1210 """reproduce copies between base and ctx in the wctx
1207 """reproduce copies between base and ctx in the wctx
1211
1208
1212 Unlike mergecopies(), this function will only consider copies between base
1209 Unlike mergecopies(), this function will only consider copies between base
1213 and ctx; it will ignore copies between base and wctx. Also unlike
1210 and ctx; it will ignore copies between base and wctx. Also unlike
1214 mergecopies(), this function will apply copies to the working copy (instead
1211 mergecopies(), this function will apply copies to the working copy (instead
1215 of just returning information about the copies). That makes it cheaper
1212 of just returning information about the copies). That makes it cheaper
1216 (especially in the common case of base==ctx.p1()) and useful also when
1213 (especially in the common case of base==ctx.p1()) and useful also when
1217 experimental.copytrace=off.
1214 experimental.copytrace=off.
1218
1215
1219 merge.update() will have already marked most copies, but it will only
1216 merge.update() will have already marked most copies, but it will only
1220 mark copies if it thinks the source files are related (see
1217 mark copies if it thinks the source files are related (see
1221 merge._related()). It will also not mark copies if the file wasn't modified
1218 merge._related()). It will also not mark copies if the file wasn't modified
1222 on the local side. This function adds the copies that were "missed"
1219 on the local side. This function adds the copies that were "missed"
1223 by merge.update().
1220 by merge.update().
1224 """
1221 """
1225 new_copies = pathcopies(base, ctx)
1222 new_copies = pathcopies(base, ctx)
1226 _filter(wctx.p1(), wctx, new_copies)
1223 _filter(wctx.p1(), wctx, new_copies)
1227 for dst, src in pycompat.iteritems(new_copies):
1224 for dst, src in pycompat.iteritems(new_copies):
1228 wctx[dst].markcopied(src)
1225 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now