##// END OF EJS Templates
copies: rename value/other variable to minor/major for clarity...
marmoute -
r46777:6b9d6529 default
parent child Browse files
Show More
@@ -1,1224 +1,1224 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 if rustmod is not None:
199 if rustmod is not None:
200
200
201 def revinfo(rev):
201 def revinfo(rev):
202 p1, p2 = parents(rev)
202 p1, p2 = parents(rev)
203 if flags(rev) & HASCOPIESINFO:
203 if flags(rev) & HASCOPIESINFO:
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 else:
205 else:
206 raw = None
206 raw = None
207 return (p1, p2, raw)
207 return (p1, p2, raw)
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 if flags(rev) & HASCOPIESINFO:
213 if flags(rev) & HASCOPIESINFO:
214 changes = changelogrevision(rev).changes
214 changes = changelogrevision(rev).changes
215 else:
215 else:
216 changes = None
216 changes = None
217 return (p1, p2, changes)
217 return (p1, p2, changes)
218
218
219 return revinfo
219 return revinfo
220
220
221
221
222 def cached_is_ancestor(is_ancestor):
222 def cached_is_ancestor(is_ancestor):
223 """return a cached version of is_ancestor"""
223 """return a cached version of is_ancestor"""
224 cache = {}
224 cache = {}
225
225
226 def _is_ancestor(anc, desc):
226 def _is_ancestor(anc, desc):
227 if anc > desc:
227 if anc > desc:
228 return False
228 return False
229 elif anc == desc:
229 elif anc == desc:
230 return True
230 return True
231 key = (anc, desc)
231 key = (anc, desc)
232 ret = cache.get(key)
232 ret = cache.get(key)
233 if ret is None:
233 if ret is None:
234 ret = cache[key] = is_ancestor(anc, desc)
234 ret = cache[key] = is_ancestor(anc, desc)
235 return ret
235 return ret
236
236
237 return _is_ancestor
237 return _is_ancestor
238
238
239
239
240 def _changesetforwardcopies(a, b, match):
240 def _changesetforwardcopies(a, b, match):
241 if a.rev() in (nullrev, b.rev()):
241 if a.rev() in (nullrev, b.rev()):
242 return {}
242 return {}
243
243
244 repo = a.repo().unfiltered()
244 repo = a.repo().unfiltered()
245 children = {}
245 children = {}
246
246
247 cl = repo.changelog
247 cl = repo.changelog
248 isancestor = cl.isancestorrev
248 isancestor = cl.isancestorrev
249
249
250 # To track rename from "A" to B, we need to gather all parent β†’ children
250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 # edges that are contains in `::B` but not in `::A`.
251 # edges that are contains in `::B` but not in `::A`.
252 #
252 #
253 #
253 #
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 # that belong to ::a. These are exactly all the revisions needed to express
256 # that belong to ::a. These are exactly all the revisions needed to express
257 # the parent β†’ children we need to combine.
257 # the parent β†’ children we need to combine.
258 #
258 #
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 # excluding paths that leads to roots that are not ancestors of `a`. We
260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 # keep this out of the explanation because it is hard enough without this special case..
261 # keep this out of the explanation because it is hard enough without this special case..
262
262
263 parents = cl._uncheckedparentrevs
263 parents = cl._uncheckedparentrevs
264 graph_roots = (nullrev, nullrev)
264 graph_roots = (nullrev, nullrev)
265
265
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 roots = set()
268 roots = set()
269 has_graph_roots = False
269 has_graph_roots = False
270
270
271 # iterate over `only(B, A)`
271 # iterate over `only(B, A)`
272 for r in revs:
272 for r in revs:
273 ps = parents(r)
273 ps = parents(r)
274 if ps == graph_roots:
274 if ps == graph_roots:
275 has_graph_roots = True
275 has_graph_roots = True
276 else:
276 else:
277 p1, p2 = ps
277 p1, p2 = ps
278
278
279 # find all the "root points" (see larger comment above)
279 # find all the "root points" (see larger comment above)
280 if p1 != nullrev and p1 in ancestors:
280 if p1 != nullrev and p1 in ancestors:
281 roots.add(p1)
281 roots.add(p1)
282 if p2 != nullrev and p2 in ancestors:
282 if p2 != nullrev and p2 in ancestors:
283 roots.add(p2)
283 roots.add(p2)
284 if not roots:
284 if not roots:
285 # no common revision to track copies from
285 # no common revision to track copies from
286 return {}
286 return {}
287 if has_graph_roots:
287 if has_graph_roots:
288 # this deal with the special case mentionned in the [1] footnotes. We
288 # this deal with the special case mentionned in the [1] footnotes. We
289 # must filter out revisions that leads to non-common graphroots.
289 # must filter out revisions that leads to non-common graphroots.
290 roots = list(roots)
290 roots = list(roots)
291 m = min(roots)
291 m = min(roots)
292 h = [b.rev()]
292 h = [b.rev()]
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 roots_to_head = set(roots_to_head)
294 roots_to_head = set(roots_to_head)
295 revs = [r for r in revs if r in roots_to_head]
295 revs = [r for r in revs if r in roots_to_head]
296
296
297 if repo.filecopiesmode == b'changeset-sidedata':
297 if repo.filecopiesmode == b'changeset-sidedata':
298 # When using side-data, we will process the edges "from" the children.
298 # When using side-data, we will process the edges "from" the children.
299 # We iterate over the childre, gathering previous collected data for
299 # We iterate over the childre, gathering previous collected data for
300 # the parents. Do know when the parents data is no longer necessary, we
300 # the parents. Do know when the parents data is no longer necessary, we
301 # keep a counter of how many children each revision has.
301 # keep a counter of how many children each revision has.
302 #
302 #
303 # An interresting property of `children_count` is that it only contains
303 # An interresting property of `children_count` is that it only contains
304 # revision that will be relevant for a edge of the graph. So if a
304 # revision that will be relevant for a edge of the graph. So if a
305 # children has parent not in `children_count`, that edges should not be
305 # children has parent not in `children_count`, that edges should not be
306 # processed.
306 # processed.
307 children_count = dict((r, 0) for r in roots)
307 children_count = dict((r, 0) for r in roots)
308 for r in revs:
308 for r in revs:
309 for p in cl.parentrevs(r):
309 for p in cl.parentrevs(r):
310 if p == nullrev:
310 if p == nullrev:
311 continue
311 continue
312 children_count[r] = 0
312 children_count[r] = 0
313 if p in children_count:
313 if p in children_count:
314 children_count[p] += 1
314 children_count[p] += 1
315 revinfo = _revinfo_getter(repo, match)
315 revinfo = _revinfo_getter(repo, match)
316 return _combine_changeset_copies(
316 return _combine_changeset_copies(
317 revs, children_count, b.rev(), revinfo, match, isancestor
317 revs, children_count, b.rev(), revinfo, match, isancestor
318 )
318 )
319 else:
319 else:
320 # When not using side-data, we will process the edges "from" the parent.
320 # When not using side-data, we will process the edges "from" the parent.
321 # so we need a full mapping of the parent -> children relation.
321 # so we need a full mapping of the parent -> children relation.
322 children = dict((r, []) for r in roots)
322 children = dict((r, []) for r in roots)
323 for r in revs:
323 for r in revs:
324 for p in cl.parentrevs(r):
324 for p in cl.parentrevs(r):
325 if p == nullrev:
325 if p == nullrev:
326 continue
326 continue
327 children[r] = []
327 children[r] = []
328 if p in children:
328 if p in children:
329 children[p].append(r)
329 children[p].append(r)
330 x = revs.pop()
330 x = revs.pop()
331 assert x == b.rev()
331 assert x == b.rev()
332 revs.extend(roots)
332 revs.extend(roots)
333 revs.sort()
333 revs.sort()
334
334
335 revinfo = _revinfo_getter_extra(repo)
335 revinfo = _revinfo_getter_extra(repo)
336 return _combine_changeset_copies_extra(
336 return _combine_changeset_copies_extra(
337 revs, children, b.rev(), revinfo, match, isancestor
337 revs, children, b.rev(), revinfo, match, isancestor
338 )
338 )
339
339
340
340
341 def _combine_changeset_copies(
341 def _combine_changeset_copies(
342 revs, children_count, targetrev, revinfo, match, isancestor
342 revs, children_count, targetrev, revinfo, match, isancestor
343 ):
343 ):
344 """combine the copies information for each item of iterrevs
344 """combine the copies information for each item of iterrevs
345
345
346 revs: sorted iterable of revision to visit
346 revs: sorted iterable of revision to visit
347 children_count: a {parent: <number-of-relevant-children>} mapping.
347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 targetrev: the final copies destination revision (not in iterrevs)
348 targetrev: the final copies destination revision (not in iterrevs)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 match: a matcher
350 match: a matcher
351
351
352 It returns the aggregated copies information for `targetrev`.
352 It returns the aggregated copies information for `targetrev`.
353 """
353 """
354
354
355 alwaysmatch = match.always()
355 alwaysmatch = match.always()
356
356
357 if rustmod is not None:
357 if rustmod is not None:
358 final_copies = rustmod.combine_changeset_copies(
358 final_copies = rustmod.combine_changeset_copies(
359 list(revs), children_count, targetrev, revinfo, isancestor
359 list(revs), children_count, targetrev, revinfo, isancestor
360 )
360 )
361 else:
361 else:
362 isancestor = cached_is_ancestor(isancestor)
362 isancestor = cached_is_ancestor(isancestor)
363
363
364 all_copies = {}
364 all_copies = {}
365 # iterate over all the "children" side of copy tracing "edge"
365 # iterate over all the "children" side of copy tracing "edge"
366 for current_rev in revs:
366 for current_rev in revs:
367 p1, p2, changes = revinfo(current_rev)
367 p1, p2, changes = revinfo(current_rev)
368 current_copies = None
368 current_copies = None
369 # iterate over all parents to chain the existing data with the
369 # iterate over all parents to chain the existing data with the
370 # data from the parent β†’ child edge.
370 # data from the parent β†’ child edge.
371 for parent, parent_rev in ((1, p1), (2, p2)):
371 for parent, parent_rev in ((1, p1), (2, p2)):
372 if parent_rev == nullrev:
372 if parent_rev == nullrev:
373 continue
373 continue
374 remaining_children = children_count.get(parent_rev)
374 remaining_children = children_count.get(parent_rev)
375 if remaining_children is None:
375 if remaining_children is None:
376 continue
376 continue
377 remaining_children -= 1
377 remaining_children -= 1
378 children_count[parent_rev] = remaining_children
378 children_count[parent_rev] = remaining_children
379 if remaining_children:
379 if remaining_children:
380 copies = all_copies.get(parent_rev, None)
380 copies = all_copies.get(parent_rev, None)
381 else:
381 else:
382 copies = all_copies.pop(parent_rev, None)
382 copies = all_copies.pop(parent_rev, None)
383
383
384 if copies is None:
384 if copies is None:
385 # this is a root
385 # this is a root
386 copies = {}
386 copies = {}
387
387
388 newcopies = copies
388 newcopies = copies
389 # chain the data in the edge with the existing data
389 # chain the data in the edge with the existing data
390 if changes is not None:
390 if changes is not None:
391 childcopies = {}
391 childcopies = {}
392 if parent == 1:
392 if parent == 1:
393 childcopies = changes.copied_from_p1
393 childcopies = changes.copied_from_p1
394 elif parent == 2:
394 elif parent == 2:
395 childcopies = changes.copied_from_p2
395 childcopies = changes.copied_from_p2
396
396
397 if childcopies:
397 if childcopies:
398 newcopies = copies.copy()
398 newcopies = copies.copy()
399 for dest, source in pycompat.iteritems(childcopies):
399 for dest, source in pycompat.iteritems(childcopies):
400 prev = copies.get(source)
400 prev = copies.get(source)
401 if prev is not None and prev[1] is not None:
401 if prev is not None and prev[1] is not None:
402 source = prev[1]
402 source = prev[1]
403 newcopies[dest] = (current_rev, source)
403 newcopies[dest] = (current_rev, source)
404 assert newcopies is not copies
404 assert newcopies is not copies
405 if changes.removed:
405 if changes.removed:
406 if newcopies is copies:
406 if newcopies is copies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for f in changes.removed:
408 for f in changes.removed:
409 if f in newcopies:
409 if f in newcopies:
410 if newcopies is copies:
410 if newcopies is copies:
411 # copy on write to avoid affecting potential other
411 # copy on write to avoid affecting potential other
412 # branches. when there are no other branches, this
412 # branches. when there are no other branches, this
413 # could be avoided.
413 # could be avoided.
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 newcopies[f] = (current_rev, None)
415 newcopies[f] = (current_rev, None)
416 # check potential need to combine the data from another parent (for
416 # check potential need to combine the data from another parent (for
417 # that child). See comment below for details.
417 # that child). See comment below for details.
418 if current_copies is None:
418 if current_copies is None:
419 current_copies = newcopies
419 current_copies = newcopies
420 elif current_copies is newcopies:
420 elif current_copies is newcopies:
421 # nothing to merge:
421 # nothing to merge:
422 pass
422 pass
423 else:
423 else:
424 # we are the second parent to work on c, we need to merge our
424 # we are the second parent to work on c, we need to merge our
425 # work with the other.
425 # work with the other.
426 #
426 #
427 # In case of conflict, parent 1 take precedence over parent 2.
427 # In case of conflict, parent 1 take precedence over parent 2.
428 # This is an arbitrary choice made anew when implementing
428 # This is an arbitrary choice made anew when implementing
429 # changeset based copies. It was made without regards with
429 # changeset based copies. It was made without regards with
430 # potential filelog related behavior.
430 # potential filelog related behavior.
431 assert parent == 2
431 assert parent == 2
432 current_copies = _merge_copies_dict(
432 current_copies = _merge_copies_dict(
433 newcopies, current_copies, isancestor, changes
433 newcopies, current_copies, isancestor, changes
434 )
434 )
435 all_copies[current_rev] = current_copies
435 all_copies[current_rev] = current_copies
436
436
437 # filter out internal details and return a {dest: source mapping}
437 # filter out internal details and return a {dest: source mapping}
438 final_copies = {}
438 final_copies = {}
439 for dest, (tt, source) in all_copies[targetrev].items():
439 for dest, (tt, source) in all_copies[targetrev].items():
440 if source is not None:
440 if source is not None:
441 final_copies[dest] = source
441 final_copies[dest] = source
442 if not alwaysmatch:
442 if not alwaysmatch:
443 for filename in list(final_copies.keys()):
443 for filename in list(final_copies.keys()):
444 if not match(filename):
444 if not match(filename):
445 del final_copies[filename]
445 del final_copies[filename]
446 return final_copies
446 return final_copies
447
447
448
448
449 # constant to decide which side to pick with _merge_copies_dict
449 # constant to decide which side to pick with _merge_copies_dict
450 PICK_MINOR = 0
450 PICK_MINOR = 0
451 PICK_MAJOR = 1
451 PICK_MAJOR = 1
452 PICK_EITHER = 2
452 PICK_EITHER = 2
453
453
454
454
455 def _merge_copies_dict(minor, major, isancestor, changes):
455 def _merge_copies_dict(minor, major, isancestor, changes):
456 """merge two copies-mapping together, minor and major
456 """merge two copies-mapping together, minor and major
457
457
458 In case of conflict, value from "major" will be picked.
458 In case of conflict, value from "major" will be picked.
459
459
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 ancestors of `high_rev`,
461 ancestors of `high_rev`,
462
462
463 - `ismerged(path)`: callable return True if `path` have been merged in the
463 - `ismerged(path)`: callable return True if `path` have been merged in the
464 current revision,
464 current revision,
465
465
466 return the resulting dict (in practice, the "minor" object, updated)
466 return the resulting dict (in practice, the "minor" object, updated)
467 """
467 """
468 for dest, value in major.items():
468 for dest, value in major.items():
469 other = minor.get(dest)
469 other = minor.get(dest)
470 if other is None:
470 if other is None:
471 minor[dest] = value
471 minor[dest] = value
472 else:
472 else:
473 pick = _compare_values(changes, isancestor, dest, other, value)
473 pick = _compare_values(changes, isancestor, dest, other, value)
474 if pick == PICK_MAJOR:
474 if pick == PICK_MAJOR:
475 minor[dest] = value
475 minor[dest] = value
476 return minor
476 return minor
477
477
478
478
479 def _compare_values(changes, isancestor, dest, other, value):
479 def _compare_values(changes, isancestor, dest, minor, major):
480 """compare two value within a _merge_copies_dict loop iteration"""
480 """compare two value within a _merge_copies_dict loop iteration"""
481 new_tt = value[0]
481 major_tt = major[0]
482 other_tt = other[0]
482 minor_tt = minor[0]
483
483
484 if value[1] == other[1]:
484 if major[1] == minor[1]:
485 return PICK_EITHER
485 return PICK_EITHER
486 # content from "major" wins, unless it is older
486 # content from "major" wins, unless it is older
487 # than the branch point or there is a merge
487 # than the branch point or there is a merge
488 if new_tt == other_tt:
488 if major_tt == minor_tt:
489 return PICK_MAJOR
489 return PICK_MAJOR
490 elif changes is not None and value[1] is None and dest in changes.salvaged:
490 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 return PICK_MINOR
491 return PICK_MINOR
492 elif changes is not None and other[1] is None and dest in changes.salvaged:
492 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 return PICK_MAJOR
493 return PICK_MAJOR
494 elif changes is not None and dest in changes.merged:
494 elif changes is not None and dest in changes.merged:
495 return PICK_MAJOR
495 return PICK_MAJOR
496 elif not isancestor(new_tt, other_tt):
496 elif not isancestor(major_tt, minor_tt):
497 if value[1] is not None:
497 if major[1] is not None:
498 return PICK_MAJOR
498 return PICK_MAJOR
499 elif isancestor(other_tt, new_tt):
499 elif isancestor(minor_tt, major_tt):
500 return PICK_MAJOR
500 return PICK_MAJOR
501 return PICK_MINOR
501 return PICK_MINOR
502
502
503
503
504 def _revinfo_getter_extra(repo):
504 def _revinfo_getter_extra(repo):
505 """return a function that return multiple data given a <rev>"i
505 """return a function that return multiple data given a <rev>"i
506
506
507 * p1: revision number of first parent
507 * p1: revision number of first parent
508 * p2: revision number of first parent
508 * p2: revision number of first parent
509 * p1copies: mapping of copies from p1
509 * p1copies: mapping of copies from p1
510 * p2copies: mapping of copies from p2
510 * p2copies: mapping of copies from p2
511 * removed: a list of removed files
511 * removed: a list of removed files
512 * ismerged: a callback to know if file was merged in that revision
512 * ismerged: a callback to know if file was merged in that revision
513 """
513 """
514 cl = repo.changelog
514 cl = repo.changelog
515 parents = cl.parentrevs
515 parents = cl.parentrevs
516
516
517 def get_ismerged(rev):
517 def get_ismerged(rev):
518 ctx = repo[rev]
518 ctx = repo[rev]
519
519
520 def ismerged(path):
520 def ismerged(path):
521 if path not in ctx.files():
521 if path not in ctx.files():
522 return False
522 return False
523 fctx = ctx[path]
523 fctx = ctx[path]
524 parents = fctx._filelog.parents(fctx._filenode)
524 parents = fctx._filelog.parents(fctx._filenode)
525 nb_parents = 0
525 nb_parents = 0
526 for n in parents:
526 for n in parents:
527 if n != nullid:
527 if n != nullid:
528 nb_parents += 1
528 nb_parents += 1
529 return nb_parents >= 2
529 return nb_parents >= 2
530
530
531 return ismerged
531 return ismerged
532
532
533 def revinfo(rev):
533 def revinfo(rev):
534 p1, p2 = parents(rev)
534 p1, p2 = parents(rev)
535 ctx = repo[rev]
535 ctx = repo[rev]
536 p1copies, p2copies = ctx._copies
536 p1copies, p2copies = ctx._copies
537 removed = ctx.filesremoved()
537 removed = ctx.filesremoved()
538 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
538 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539
539
540 return revinfo
540 return revinfo
541
541
542
542
543 def _combine_changeset_copies_extra(
543 def _combine_changeset_copies_extra(
544 revs, children, targetrev, revinfo, match, isancestor
544 revs, children, targetrev, revinfo, match, isancestor
545 ):
545 ):
546 """version of `_combine_changeset_copies` that works with the Google
546 """version of `_combine_changeset_copies` that works with the Google
547 specific "extra" based storage for copy information"""
547 specific "extra" based storage for copy information"""
548 all_copies = {}
548 all_copies = {}
549 alwaysmatch = match.always()
549 alwaysmatch = match.always()
550 for r in revs:
550 for r in revs:
551 copies = all_copies.pop(r, None)
551 copies = all_copies.pop(r, None)
552 if copies is None:
552 if copies is None:
553 # this is a root
553 # this is a root
554 copies = {}
554 copies = {}
555 for i, c in enumerate(children[r]):
555 for i, c in enumerate(children[r]):
556 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
556 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 if r == p1:
557 if r == p1:
558 parent = 1
558 parent = 1
559 childcopies = p1copies
559 childcopies = p1copies
560 else:
560 else:
561 assert r == p2
561 assert r == p2
562 parent = 2
562 parent = 2
563 childcopies = p2copies
563 childcopies = p2copies
564 if not alwaysmatch:
564 if not alwaysmatch:
565 childcopies = {
565 childcopies = {
566 dst: src for dst, src in childcopies.items() if match(dst)
566 dst: src for dst, src in childcopies.items() if match(dst)
567 }
567 }
568 newcopies = copies
568 newcopies = copies
569 if childcopies:
569 if childcopies:
570 newcopies = copies.copy()
570 newcopies = copies.copy()
571 for dest, source in pycompat.iteritems(childcopies):
571 for dest, source in pycompat.iteritems(childcopies):
572 prev = copies.get(source)
572 prev = copies.get(source)
573 if prev is not None and prev[1] is not None:
573 if prev is not None and prev[1] is not None:
574 source = prev[1]
574 source = prev[1]
575 newcopies[dest] = (c, source)
575 newcopies[dest] = (c, source)
576 assert newcopies is not copies
576 assert newcopies is not copies
577 for f in removed:
577 for f in removed:
578 if f in newcopies:
578 if f in newcopies:
579 if newcopies is copies:
579 if newcopies is copies:
580 # copy on write to avoid affecting potential other
580 # copy on write to avoid affecting potential other
581 # branches. when there are no other branches, this
581 # branches. when there are no other branches, this
582 # could be avoided.
582 # could be avoided.
583 newcopies = copies.copy()
583 newcopies = copies.copy()
584 newcopies[f] = (c, None)
584 newcopies[f] = (c, None)
585 othercopies = all_copies.get(c)
585 othercopies = all_copies.get(c)
586 if othercopies is None:
586 if othercopies is None:
587 all_copies[c] = newcopies
587 all_copies[c] = newcopies
588 else:
588 else:
589 # we are the second parent to work on c, we need to merge our
589 # we are the second parent to work on c, we need to merge our
590 # work with the other.
590 # work with the other.
591 #
591 #
592 # In case of conflict, parent 1 take precedence over parent 2.
592 # In case of conflict, parent 1 take precedence over parent 2.
593 # This is an arbitrary choice made anew when implementing
593 # This is an arbitrary choice made anew when implementing
594 # changeset based copies. It was made without regards with
594 # changeset based copies. It was made without regards with
595 # potential filelog related behavior.
595 # potential filelog related behavior.
596 if parent == 1:
596 if parent == 1:
597 _merge_copies_dict_extra(
597 _merge_copies_dict_extra(
598 othercopies, newcopies, isancestor, ismerged
598 othercopies, newcopies, isancestor, ismerged
599 )
599 )
600 else:
600 else:
601 _merge_copies_dict_extra(
601 _merge_copies_dict_extra(
602 newcopies, othercopies, isancestor, ismerged
602 newcopies, othercopies, isancestor, ismerged
603 )
603 )
604 all_copies[c] = newcopies
604 all_copies[c] = newcopies
605
605
606 final_copies = {}
606 final_copies = {}
607 for dest, (tt, source) in all_copies[targetrev].items():
607 for dest, (tt, source) in all_copies[targetrev].items():
608 if source is not None:
608 if source is not None:
609 final_copies[dest] = source
609 final_copies[dest] = source
610 return final_copies
610 return final_copies
611
611
612
612
613 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
613 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 """version of `_merge_copies_dict` that works with the Google
614 """version of `_merge_copies_dict` that works with the Google
615 specific "extra" based storage for copy information"""
615 specific "extra" based storage for copy information"""
616 for dest, value in major.items():
616 for dest, value in major.items():
617 other = minor.get(dest)
617 other = minor.get(dest)
618 if other is None:
618 if other is None:
619 minor[dest] = value
619 minor[dest] = value
620 else:
620 else:
621 new_tt = value[0]
621 new_tt = value[0]
622 other_tt = other[0]
622 other_tt = other[0]
623 if value[1] == other[1]:
623 if value[1] == other[1]:
624 continue
624 continue
625 # content from "major" wins, unless it is older
625 # content from "major" wins, unless it is older
626 # than the branch point or there is a merge
626 # than the branch point or there is a merge
627 if (
627 if (
628 new_tt == other_tt
628 new_tt == other_tt
629 or not isancestor(new_tt, other_tt)
629 or not isancestor(new_tt, other_tt)
630 or ismerged(dest)
630 or ismerged(dest)
631 ):
631 ):
632 minor[dest] = value
632 minor[dest] = value
633
633
634
634
635 def _forwardcopies(a, b, base=None, match=None):
635 def _forwardcopies(a, b, base=None, match=None):
636 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
636 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637
637
638 if base is None:
638 if base is None:
639 base = a
639 base = a
640 match = a.repo().narrowmatch(match)
640 match = a.repo().narrowmatch(match)
641 # check for working copy
641 # check for working copy
642 if b.rev() is None:
642 if b.rev() is None:
643 cm = _committedforwardcopies(a, b.p1(), base, match)
643 cm = _committedforwardcopies(a, b.p1(), base, match)
644 # combine copies from dirstate if necessary
644 # combine copies from dirstate if necessary
645 copies = _chain(cm, _dirstatecopies(b._repo, match))
645 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 else:
646 else:
647 copies = _committedforwardcopies(a, b, base, match)
647 copies = _committedforwardcopies(a, b, base, match)
648 return copies
648 return copies
649
649
650
650
651 def _backwardrenames(a, b, match):
651 def _backwardrenames(a, b, match):
652 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
652 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 return {}
653 return {}
654
654
655 # Even though we're not taking copies into account, 1:n rename situations
655 # Even though we're not taking copies into account, 1:n rename situations
656 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
656 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 # arbitrarily pick one of the renames.
657 # arbitrarily pick one of the renames.
658 # We don't want to pass in "match" here, since that would filter
658 # We don't want to pass in "match" here, since that would filter
659 # the destination by it. Since we're reversing the copies, we want
659 # the destination by it. Since we're reversing the copies, we want
660 # to filter the source instead.
660 # to filter the source instead.
661 f = _forwardcopies(b, a)
661 f = _forwardcopies(b, a)
662 r = {}
662 r = {}
663 for k, v in sorted(pycompat.iteritems(f)):
663 for k, v in sorted(pycompat.iteritems(f)):
664 if match and not match(v):
664 if match and not match(v):
665 continue
665 continue
666 # remove copies
666 # remove copies
667 if v in a:
667 if v in a:
668 continue
668 continue
669 r[v] = k
669 r[v] = k
670 return r
670 return r
671
671
672
672
673 def pathcopies(x, y, match=None):
673 def pathcopies(x, y, match=None):
674 """find {dst@y: src@x} copy mapping for directed compare"""
674 """find {dst@y: src@x} copy mapping for directed compare"""
675 repo = x._repo
675 repo = x._repo
676 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
676 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 if debug:
677 if debug:
678 repo.ui.debug(
678 repo.ui.debug(
679 b'debug.copies: searching copies from %s to %s\n' % (x, y)
679 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 )
680 )
681 if x == y or not x or not y:
681 if x == y or not x or not y:
682 return {}
682 return {}
683 if y.rev() is None and x == y.p1():
683 if y.rev() is None and x == y.p1():
684 if debug:
684 if debug:
685 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
685 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 # short-circuit to avoid issues with merge states
686 # short-circuit to avoid issues with merge states
687 return _dirstatecopies(repo, match)
687 return _dirstatecopies(repo, match)
688 a = y.ancestor(x)
688 a = y.ancestor(x)
689 if a == x:
689 if a == x:
690 if debug:
690 if debug:
691 repo.ui.debug(b'debug.copies: search mode: forward\n')
691 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 copies = _forwardcopies(x, y, match=match)
692 copies = _forwardcopies(x, y, match=match)
693 elif a == y:
693 elif a == y:
694 if debug:
694 if debug:
695 repo.ui.debug(b'debug.copies: search mode: backward\n')
695 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 copies = _backwardrenames(x, y, match=match)
696 copies = _backwardrenames(x, y, match=match)
697 else:
697 else:
698 if debug:
698 if debug:
699 repo.ui.debug(b'debug.copies: search mode: combined\n')
699 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 base = None
700 base = None
701 if a.rev() != nullrev:
701 if a.rev() != nullrev:
702 base = x
702 base = x
703 copies = _chain(
703 copies = _chain(
704 _backwardrenames(x, a, match=match),
704 _backwardrenames(x, a, match=match),
705 _forwardcopies(a, y, base, match=match),
705 _forwardcopies(a, y, base, match=match),
706 )
706 )
707 _filter(x, y, copies)
707 _filter(x, y, copies)
708 return copies
708 return copies
709
709
710
710
711 def mergecopies(repo, c1, c2, base):
711 def mergecopies(repo, c1, c2, base):
712 """
712 """
713 Finds moves and copies between context c1 and c2 that are relevant for
713 Finds moves and copies between context c1 and c2 that are relevant for
714 merging. 'base' will be used as the merge base.
714 merging. 'base' will be used as the merge base.
715
715
716 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
716 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 files that were moved/ copied in one merge parent and modified in another.
717 files that were moved/ copied in one merge parent and modified in another.
718 For example:
718 For example:
719
719
720 o ---> 4 another commit
720 o ---> 4 another commit
721 |
721 |
722 | o ---> 3 commit that modifies a.txt
722 | o ---> 3 commit that modifies a.txt
723 | /
723 | /
724 o / ---> 2 commit that moves a.txt to b.txt
724 o / ---> 2 commit that moves a.txt to b.txt
725 |/
725 |/
726 o ---> 1 merge base
726 o ---> 1 merge base
727
727
728 If we try to rebase revision 3 on revision 4, since there is no a.txt in
728 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 revision 4, and if user have copytrace disabled, we prints the following
729 revision 4, and if user have copytrace disabled, we prints the following
730 message:
730 message:
731
731
732 ```other changed <file> which local deleted```
732 ```other changed <file> which local deleted```
733
733
734 Returns a tuple where:
734 Returns a tuple where:
735
735
736 "branch_copies" an instance of branch_copies.
736 "branch_copies" an instance of branch_copies.
737
737
738 "diverge" is a mapping of source name -> list of destination names
738 "diverge" is a mapping of source name -> list of destination names
739 for divergent renames.
739 for divergent renames.
740
740
741 This function calls different copytracing algorithms based on config.
741 This function calls different copytracing algorithms based on config.
742 """
742 """
743 # avoid silly behavior for update from empty dir
743 # avoid silly behavior for update from empty dir
744 if not c1 or not c2 or c1 == c2:
744 if not c1 or not c2 or c1 == c2:
745 return branch_copies(), branch_copies(), {}
745 return branch_copies(), branch_copies(), {}
746
746
747 narrowmatch = c1.repo().narrowmatch()
747 narrowmatch = c1.repo().narrowmatch()
748
748
749 # avoid silly behavior for parent -> working dir
749 # avoid silly behavior for parent -> working dir
750 if c2.node() is None and c1.node() == repo.dirstate.p1():
750 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 return (
751 return (
752 branch_copies(_dirstatecopies(repo, narrowmatch)),
752 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 branch_copies(),
753 branch_copies(),
754 {},
754 {},
755 )
755 )
756
756
757 copytracing = repo.ui.config(b'experimental', b'copytrace')
757 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 if stringutil.parsebool(copytracing) is False:
758 if stringutil.parsebool(copytracing) is False:
759 # stringutil.parsebool() returns None when it is unable to parse the
759 # stringutil.parsebool() returns None when it is unable to parse the
760 # value, so we should rely on making sure copytracing is on such cases
760 # value, so we should rely on making sure copytracing is on such cases
761 return branch_copies(), branch_copies(), {}
761 return branch_copies(), branch_copies(), {}
762
762
763 if usechangesetcentricalgo(repo):
763 if usechangesetcentricalgo(repo):
764 # The heuristics don't make sense when we need changeset-centric algos
764 # The heuristics don't make sense when we need changeset-centric algos
765 return _fullcopytracing(repo, c1, c2, base)
765 return _fullcopytracing(repo, c1, c2, base)
766
766
767 # Copy trace disabling is explicitly below the node == p1 logic above
767 # Copy trace disabling is explicitly below the node == p1 logic above
768 # because the logic above is required for a simple copy to be kept across a
768 # because the logic above is required for a simple copy to be kept across a
769 # rebase.
769 # rebase.
770 if copytracing == b'heuristics':
770 if copytracing == b'heuristics':
771 # Do full copytracing if only non-public revisions are involved as
771 # Do full copytracing if only non-public revisions are involved as
772 # that will be fast enough and will also cover the copies which could
772 # that will be fast enough and will also cover the copies which could
773 # be missed by heuristics
773 # be missed by heuristics
774 if _isfullcopytraceable(repo, c1, base):
774 if _isfullcopytraceable(repo, c1, base):
775 return _fullcopytracing(repo, c1, c2, base)
775 return _fullcopytracing(repo, c1, c2, base)
776 return _heuristicscopytracing(repo, c1, c2, base)
776 return _heuristicscopytracing(repo, c1, c2, base)
777 else:
777 else:
778 return _fullcopytracing(repo, c1, c2, base)
778 return _fullcopytracing(repo, c1, c2, base)
779
779
780
780
781 def _isfullcopytraceable(repo, c1, base):
781 def _isfullcopytraceable(repo, c1, base):
782 """Checks that if base, source and destination are all no-public branches,
782 """Checks that if base, source and destination are all no-public branches,
783 if yes let's use the full copytrace algorithm for increased capabilities
783 if yes let's use the full copytrace algorithm for increased capabilities
784 since it will be fast enough.
784 since it will be fast enough.
785
785
786 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
786 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 number of changesets from c1 to base such that if number of changesets are
787 number of changesets from c1 to base such that if number of changesets are
788 more than the limit, full copytracing algorithm won't be used.
788 more than the limit, full copytracing algorithm won't be used.
789 """
789 """
790 if c1.rev() is None:
790 if c1.rev() is None:
791 c1 = c1.p1()
791 c1 = c1.p1()
792 if c1.mutable() and base.mutable():
792 if c1.mutable() and base.mutable():
793 sourcecommitlimit = repo.ui.configint(
793 sourcecommitlimit = repo.ui.configint(
794 b'experimental', b'copytrace.sourcecommitlimit'
794 b'experimental', b'copytrace.sourcecommitlimit'
795 )
795 )
796 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
796 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 return commits < sourcecommitlimit
797 return commits < sourcecommitlimit
798 return False
798 return False
799
799
800
800
801 def _checksinglesidecopies(
801 def _checksinglesidecopies(
802 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
802 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 ):
803 ):
804 if src not in m2:
804 if src not in m2:
805 # deleted on side 2
805 # deleted on side 2
806 if src not in m1:
806 if src not in m1:
807 # renamed on side 1, deleted on side 2
807 # renamed on side 1, deleted on side 2
808 renamedelete[src] = dsts1
808 renamedelete[src] = dsts1
809 elif src not in mb:
809 elif src not in mb:
810 # Work around the "short-circuit to avoid issues with merge states"
810 # Work around the "short-circuit to avoid issues with merge states"
811 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
811 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 # destination doesn't exist in y.
812 # destination doesn't exist in y.
813 pass
813 pass
814 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
814 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 return
815 return
816 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
816 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 # modified on side 2
817 # modified on side 2
818 for dst in dsts1:
818 for dst in dsts1:
819 copy[dst] = src
819 copy[dst] = src
820
820
821
821
822 class branch_copies(object):
822 class branch_copies(object):
823 """Information about copies made on one side of a merge/graft.
823 """Information about copies made on one side of a merge/graft.
824
824
825 "copy" is a mapping from destination name -> source name,
825 "copy" is a mapping from destination name -> source name,
826 where source is in c1 and destination is in c2 or vice-versa.
826 where source is in c1 and destination is in c2 or vice-versa.
827
827
828 "movewithdir" is a mapping from source name -> destination name,
828 "movewithdir" is a mapping from source name -> destination name,
829 where the file at source present in one context but not the other
829 where the file at source present in one context but not the other
830 needs to be moved to destination by the merge process, because the
830 needs to be moved to destination by the merge process, because the
831 other context moved the directory it is in.
831 other context moved the directory it is in.
832
832
833 "renamedelete" is a mapping of source name -> list of destination
833 "renamedelete" is a mapping of source name -> list of destination
834 names for files deleted in c1 that were renamed in c2 or vice-versa.
834 names for files deleted in c1 that were renamed in c2 or vice-versa.
835
835
836 "dirmove" is a mapping of detected source dir -> destination dir renames.
836 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 This is needed for handling changes to new files previously grafted into
837 This is needed for handling changes to new files previously grafted into
838 renamed directories.
838 renamed directories.
839 """
839 """
840
840
841 def __init__(
841 def __init__(
842 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
842 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 ):
843 ):
844 self.copy = {} if copy is None else copy
844 self.copy = {} if copy is None else copy
845 self.renamedelete = {} if renamedelete is None else renamedelete
845 self.renamedelete = {} if renamedelete is None else renamedelete
846 self.dirmove = {} if dirmove is None else dirmove
846 self.dirmove = {} if dirmove is None else dirmove
847 self.movewithdir = {} if movewithdir is None else movewithdir
847 self.movewithdir = {} if movewithdir is None else movewithdir
848
848
849 def __repr__(self):
849 def __repr__(self):
850 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
850 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 self.copy,
851 self.copy,
852 self.renamedelete,
852 self.renamedelete,
853 self.dirmove,
853 self.dirmove,
854 self.movewithdir,
854 self.movewithdir,
855 )
855 )
856
856
857
857
858 def _fullcopytracing(repo, c1, c2, base):
858 def _fullcopytracing(repo, c1, c2, base):
859 """The full copytracing algorithm which finds all the new files that were
859 """The full copytracing algorithm which finds all the new files that were
860 added from merge base up to the top commit and for each file it checks if
860 added from merge base up to the top commit and for each file it checks if
861 this file was copied from another file.
861 this file was copied from another file.
862
862
863 This is pretty slow when a lot of changesets are involved but will track all
863 This is pretty slow when a lot of changesets are involved but will track all
864 the copies.
864 the copies.
865 """
865 """
866 m1 = c1.manifest()
866 m1 = c1.manifest()
867 m2 = c2.manifest()
867 m2 = c2.manifest()
868 mb = base.manifest()
868 mb = base.manifest()
869
869
870 copies1 = pathcopies(base, c1)
870 copies1 = pathcopies(base, c1)
871 copies2 = pathcopies(base, c2)
871 copies2 = pathcopies(base, c2)
872
872
873 if not (copies1 or copies2):
873 if not (copies1 or copies2):
874 return branch_copies(), branch_copies(), {}
874 return branch_copies(), branch_copies(), {}
875
875
876 inversecopies1 = {}
876 inversecopies1 = {}
877 inversecopies2 = {}
877 inversecopies2 = {}
878 for dst, src in copies1.items():
878 for dst, src in copies1.items():
879 inversecopies1.setdefault(src, []).append(dst)
879 inversecopies1.setdefault(src, []).append(dst)
880 for dst, src in copies2.items():
880 for dst, src in copies2.items():
881 inversecopies2.setdefault(src, []).append(dst)
881 inversecopies2.setdefault(src, []).append(dst)
882
882
883 copy1 = {}
883 copy1 = {}
884 copy2 = {}
884 copy2 = {}
885 diverge = {}
885 diverge = {}
886 renamedelete1 = {}
886 renamedelete1 = {}
887 renamedelete2 = {}
887 renamedelete2 = {}
888 allsources = set(inversecopies1) | set(inversecopies2)
888 allsources = set(inversecopies1) | set(inversecopies2)
889 for src in allsources:
889 for src in allsources:
890 dsts1 = inversecopies1.get(src)
890 dsts1 = inversecopies1.get(src)
891 dsts2 = inversecopies2.get(src)
891 dsts2 = inversecopies2.get(src)
892 if dsts1 and dsts2:
892 if dsts1 and dsts2:
893 # copied/renamed on both sides
893 # copied/renamed on both sides
894 if src not in m1 and src not in m2:
894 if src not in m1 and src not in m2:
895 # renamed on both sides
895 # renamed on both sides
896 dsts1 = set(dsts1)
896 dsts1 = set(dsts1)
897 dsts2 = set(dsts2)
897 dsts2 = set(dsts2)
898 # If there's some overlap in the rename destinations, we
898 # If there's some overlap in the rename destinations, we
899 # consider it not divergent. For example, if side 1 copies 'a'
899 # consider it not divergent. For example, if side 1 copies 'a'
900 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
900 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 # and 'd' and deletes 'a'.
901 # and 'd' and deletes 'a'.
902 if dsts1 & dsts2:
902 if dsts1 & dsts2:
903 for dst in dsts1 & dsts2:
903 for dst in dsts1 & dsts2:
904 copy1[dst] = src
904 copy1[dst] = src
905 copy2[dst] = src
905 copy2[dst] = src
906 else:
906 else:
907 diverge[src] = sorted(dsts1 | dsts2)
907 diverge[src] = sorted(dsts1 | dsts2)
908 elif src in m1 and src in m2:
908 elif src in m1 and src in m2:
909 # copied on both sides
909 # copied on both sides
910 dsts1 = set(dsts1)
910 dsts1 = set(dsts1)
911 dsts2 = set(dsts2)
911 dsts2 = set(dsts2)
912 for dst in dsts1 & dsts2:
912 for dst in dsts1 & dsts2:
913 copy1[dst] = src
913 copy1[dst] = src
914 copy2[dst] = src
914 copy2[dst] = src
915 # TODO: Handle cases where it was renamed on one side and copied
915 # TODO: Handle cases where it was renamed on one side and copied
916 # on the other side
916 # on the other side
917 elif dsts1:
917 elif dsts1:
918 # copied/renamed only on side 1
918 # copied/renamed only on side 1
919 _checksinglesidecopies(
919 _checksinglesidecopies(
920 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
920 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 )
921 )
922 elif dsts2:
922 elif dsts2:
923 # copied/renamed only on side 2
923 # copied/renamed only on side 2
924 _checksinglesidecopies(
924 _checksinglesidecopies(
925 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
925 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 )
926 )
927
927
928 # find interesting file sets from manifests
928 # find interesting file sets from manifests
929 cache = []
929 cache = []
930
930
931 def _get_addedfiles(idx):
931 def _get_addedfiles(idx):
932 if not cache:
932 if not cache:
933 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
933 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
934 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 u1 = sorted(addedinm1 - addedinm2)
935 u1 = sorted(addedinm1 - addedinm2)
936 u2 = sorted(addedinm2 - addedinm1)
936 u2 = sorted(addedinm2 - addedinm1)
937 cache.extend((u1, u2))
937 cache.extend((u1, u2))
938 return cache[idx]
938 return cache[idx]
939
939
940 u1fn = lambda: _get_addedfiles(0)
940 u1fn = lambda: _get_addedfiles(0)
941 u2fn = lambda: _get_addedfiles(1)
941 u2fn = lambda: _get_addedfiles(1)
942 if repo.ui.debugflag:
942 if repo.ui.debugflag:
943 u1 = u1fn()
943 u1 = u1fn()
944 u2 = u2fn()
944 u2 = u2fn()
945
945
946 header = b" unmatched files in %s"
946 header = b" unmatched files in %s"
947 if u1:
947 if u1:
948 repo.ui.debug(
948 repo.ui.debug(
949 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
949 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 )
950 )
951 if u2:
951 if u2:
952 repo.ui.debug(
952 repo.ui.debug(
953 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
953 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 )
954 )
955
955
956 renamedeleteset = set()
956 renamedeleteset = set()
957 divergeset = set()
957 divergeset = set()
958 for dsts in diverge.values():
958 for dsts in diverge.values():
959 divergeset.update(dsts)
959 divergeset.update(dsts)
960 for dsts in renamedelete1.values():
960 for dsts in renamedelete1.values():
961 renamedeleteset.update(dsts)
961 renamedeleteset.update(dsts)
962 for dsts in renamedelete2.values():
962 for dsts in renamedelete2.values():
963 renamedeleteset.update(dsts)
963 renamedeleteset.update(dsts)
964
964
965 repo.ui.debug(
965 repo.ui.debug(
966 b" all copies found (* = to merge, ! = divergent, "
966 b" all copies found (* = to merge, ! = divergent, "
967 b"% = renamed and deleted):\n"
967 b"% = renamed and deleted):\n"
968 )
968 )
969 for side, copies in ((b"local", copies1), (b"remote", copies2)):
969 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 if not copies:
970 if not copies:
971 continue
971 continue
972 repo.ui.debug(b" on %s side:\n" % side)
972 repo.ui.debug(b" on %s side:\n" % side)
973 for f in sorted(copies):
973 for f in sorted(copies):
974 note = b""
974 note = b""
975 if f in copy1 or f in copy2:
975 if f in copy1 or f in copy2:
976 note += b"*"
976 note += b"*"
977 if f in divergeset:
977 if f in divergeset:
978 note += b"!"
978 note += b"!"
979 if f in renamedeleteset:
979 if f in renamedeleteset:
980 note += b"%"
980 note += b"%"
981 repo.ui.debug(
981 repo.ui.debug(
982 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
982 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 )
983 )
984 del renamedeleteset
984 del renamedeleteset
985 del divergeset
985 del divergeset
986
986
987 repo.ui.debug(b" checking for directory renames\n")
987 repo.ui.debug(b" checking for directory renames\n")
988
988
989 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
989 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
990 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991
991
992 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
992 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
993 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994
994
995 return branch_copies1, branch_copies2, diverge
995 return branch_copies1, branch_copies2, diverge
996
996
997
997
998 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
998 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 """Finds moved directories and files that should move with them.
999 """Finds moved directories and files that should move with them.
1000
1000
1001 ctx: the context for one of the sides
1001 ctx: the context for one of the sides
1002 copy: files copied on the same side (as ctx)
1002 copy: files copied on the same side (as ctx)
1003 fullcopy: files copied on the same side (as ctx), including those that
1003 fullcopy: files copied on the same side (as ctx), including those that
1004 merge.manifestmerge() won't care about
1004 merge.manifestmerge() won't care about
1005 addedfilesfn: function returning added files on the other side (compared to
1005 addedfilesfn: function returning added files on the other side (compared to
1006 ctx)
1006 ctx)
1007 """
1007 """
1008 # generate a directory move map
1008 # generate a directory move map
1009 invalid = set()
1009 invalid = set()
1010 dirmove = {}
1010 dirmove = {}
1011
1011
1012 # examine each file copy for a potential directory move, which is
1012 # examine each file copy for a potential directory move, which is
1013 # when all the files in a directory are moved to a new directory
1013 # when all the files in a directory are moved to a new directory
1014 for dst, src in pycompat.iteritems(fullcopy):
1014 for dst, src in pycompat.iteritems(fullcopy):
1015 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1015 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 if dsrc in invalid:
1016 if dsrc in invalid:
1017 # already seen to be uninteresting
1017 # already seen to be uninteresting
1018 continue
1018 continue
1019 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1019 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 # directory wasn't entirely moved locally
1020 # directory wasn't entirely moved locally
1021 invalid.add(dsrc)
1021 invalid.add(dsrc)
1022 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1022 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 # files from the same directory moved to two different places
1023 # files from the same directory moved to two different places
1024 invalid.add(dsrc)
1024 invalid.add(dsrc)
1025 else:
1025 else:
1026 # looks good so far
1026 # looks good so far
1027 dirmove[dsrc] = ddst
1027 dirmove[dsrc] = ddst
1028
1028
1029 for i in invalid:
1029 for i in invalid:
1030 if i in dirmove:
1030 if i in dirmove:
1031 del dirmove[i]
1031 del dirmove[i]
1032 del invalid
1032 del invalid
1033
1033
1034 if not dirmove:
1034 if not dirmove:
1035 return {}, {}
1035 return {}, {}
1036
1036
1037 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1037 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038
1038
1039 for d in dirmove:
1039 for d in dirmove:
1040 repo.ui.debug(
1040 repo.ui.debug(
1041 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1041 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 )
1042 )
1043
1043
1044 movewithdir = {}
1044 movewithdir = {}
1045 # check unaccounted nonoverlapping files against directory moves
1045 # check unaccounted nonoverlapping files against directory moves
1046 for f in addedfilesfn():
1046 for f in addedfilesfn():
1047 if f not in fullcopy:
1047 if f not in fullcopy:
1048 for d in dirmove:
1048 for d in dirmove:
1049 if f.startswith(d):
1049 if f.startswith(d):
1050 # new file added in a directory that was moved, move it
1050 # new file added in a directory that was moved, move it
1051 df = dirmove[d] + f[len(d) :]
1051 df = dirmove[d] + f[len(d) :]
1052 if df not in copy:
1052 if df not in copy:
1053 movewithdir[f] = df
1053 movewithdir[f] = df
1054 repo.ui.debug(
1054 repo.ui.debug(
1055 b" pending file src: '%s' -> dst: '%s'\n"
1055 b" pending file src: '%s' -> dst: '%s'\n"
1056 % (f, df)
1056 % (f, df)
1057 )
1057 )
1058 break
1058 break
1059
1059
1060 return dirmove, movewithdir
1060 return dirmove, movewithdir
1061
1061
1062
1062
1063 def _heuristicscopytracing(repo, c1, c2, base):
1063 def _heuristicscopytracing(repo, c1, c2, base):
1064 """Fast copytracing using filename heuristics
1064 """Fast copytracing using filename heuristics
1065
1065
1066 Assumes that moves or renames are of following two types:
1066 Assumes that moves or renames are of following two types:
1067
1067
1068 1) Inside a directory only (same directory name but different filenames)
1068 1) Inside a directory only (same directory name but different filenames)
1069 2) Move from one directory to another
1069 2) Move from one directory to another
1070 (same filenames but different directory names)
1070 (same filenames but different directory names)
1071
1071
1072 Works only when there are no merge commits in the "source branch".
1072 Works only when there are no merge commits in the "source branch".
1073 Source branch is commits from base up to c2 not including base.
1073 Source branch is commits from base up to c2 not including base.
1074
1074
1075 If merge is involved it fallbacks to _fullcopytracing().
1075 If merge is involved it fallbacks to _fullcopytracing().
1076
1076
1077 Can be used by setting the following config:
1077 Can be used by setting the following config:
1078
1078
1079 [experimental]
1079 [experimental]
1080 copytrace = heuristics
1080 copytrace = heuristics
1081
1081
1082 In some cases the copy/move candidates found by heuristics can be very large
1082 In some cases the copy/move candidates found by heuristics can be very large
1083 in number and that will make the algorithm slow. The number of possible
1083 in number and that will make the algorithm slow. The number of possible
1084 candidates to check can be limited by using the config
1084 candidates to check can be limited by using the config
1085 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1085 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 """
1086 """
1087
1087
1088 if c1.rev() is None:
1088 if c1.rev() is None:
1089 c1 = c1.p1()
1089 c1 = c1.p1()
1090 if c2.rev() is None:
1090 if c2.rev() is None:
1091 c2 = c2.p1()
1091 c2 = c2.p1()
1092
1092
1093 changedfiles = set()
1093 changedfiles = set()
1094 m1 = c1.manifest()
1094 m1 = c1.manifest()
1095 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1095 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 # If base is not in c2 branch, we switch to fullcopytracing
1096 # If base is not in c2 branch, we switch to fullcopytracing
1097 repo.ui.debug(
1097 repo.ui.debug(
1098 b"switching to full copytracing as base is not "
1098 b"switching to full copytracing as base is not "
1099 b"an ancestor of c2\n"
1099 b"an ancestor of c2\n"
1100 )
1100 )
1101 return _fullcopytracing(repo, c1, c2, base)
1101 return _fullcopytracing(repo, c1, c2, base)
1102
1102
1103 ctx = c2
1103 ctx = c2
1104 while ctx != base:
1104 while ctx != base:
1105 if len(ctx.parents()) == 2:
1105 if len(ctx.parents()) == 2:
1106 # To keep things simple let's not handle merges
1106 # To keep things simple let's not handle merges
1107 repo.ui.debug(b"switching to full copytracing because of merges\n")
1107 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 return _fullcopytracing(repo, c1, c2, base)
1108 return _fullcopytracing(repo, c1, c2, base)
1109 changedfiles.update(ctx.files())
1109 changedfiles.update(ctx.files())
1110 ctx = ctx.p1()
1110 ctx = ctx.p1()
1111
1111
1112 copies2 = {}
1112 copies2 = {}
1113 cp = _forwardcopies(base, c2)
1113 cp = _forwardcopies(base, c2)
1114 for dst, src in pycompat.iteritems(cp):
1114 for dst, src in pycompat.iteritems(cp):
1115 if src in m1:
1115 if src in m1:
1116 copies2[dst] = src
1116 copies2[dst] = src
1117
1117
1118 # file is missing if it isn't present in the destination, but is present in
1118 # file is missing if it isn't present in the destination, but is present in
1119 # the base and present in the source.
1119 # the base and present in the source.
1120 # Presence in the base is important to exclude added files, presence in the
1120 # Presence in the base is important to exclude added files, presence in the
1121 # source is important to exclude removed files.
1121 # source is important to exclude removed files.
1122 filt = lambda f: f not in m1 and f in base and f in c2
1122 filt = lambda f: f not in m1 and f in base and f in c2
1123 missingfiles = [f for f in changedfiles if filt(f)]
1123 missingfiles = [f for f in changedfiles if filt(f)]
1124
1124
1125 copies1 = {}
1125 copies1 = {}
1126 if missingfiles:
1126 if missingfiles:
1127 basenametofilename = collections.defaultdict(list)
1127 basenametofilename = collections.defaultdict(list)
1128 dirnametofilename = collections.defaultdict(list)
1128 dirnametofilename = collections.defaultdict(list)
1129
1129
1130 for f in m1.filesnotin(base.manifest()):
1130 for f in m1.filesnotin(base.manifest()):
1131 basename = os.path.basename(f)
1131 basename = os.path.basename(f)
1132 dirname = os.path.dirname(f)
1132 dirname = os.path.dirname(f)
1133 basenametofilename[basename].append(f)
1133 basenametofilename[basename].append(f)
1134 dirnametofilename[dirname].append(f)
1134 dirnametofilename[dirname].append(f)
1135
1135
1136 for f in missingfiles:
1136 for f in missingfiles:
1137 basename = os.path.basename(f)
1137 basename = os.path.basename(f)
1138 dirname = os.path.dirname(f)
1138 dirname = os.path.dirname(f)
1139 samebasename = basenametofilename[basename]
1139 samebasename = basenametofilename[basename]
1140 samedirname = dirnametofilename[dirname]
1140 samedirname = dirnametofilename[dirname]
1141 movecandidates = samebasename + samedirname
1141 movecandidates = samebasename + samedirname
1142 # f is guaranteed to be present in c2, that's why
1142 # f is guaranteed to be present in c2, that's why
1143 # c2.filectx(f) won't fail
1143 # c2.filectx(f) won't fail
1144 f2 = c2.filectx(f)
1144 f2 = c2.filectx(f)
1145 # we can have a lot of candidates which can slow down the heuristics
1145 # we can have a lot of candidates which can slow down the heuristics
1146 # config value to limit the number of candidates moves to check
1146 # config value to limit the number of candidates moves to check
1147 maxcandidates = repo.ui.configint(
1147 maxcandidates = repo.ui.configint(
1148 b'experimental', b'copytrace.movecandidateslimit'
1148 b'experimental', b'copytrace.movecandidateslimit'
1149 )
1149 )
1150
1150
1151 if len(movecandidates) > maxcandidates:
1151 if len(movecandidates) > maxcandidates:
1152 repo.ui.status(
1152 repo.ui.status(
1153 _(
1153 _(
1154 b"skipping copytracing for '%s', more "
1154 b"skipping copytracing for '%s', more "
1155 b"candidates than the limit: %d\n"
1155 b"candidates than the limit: %d\n"
1156 )
1156 )
1157 % (f, len(movecandidates))
1157 % (f, len(movecandidates))
1158 )
1158 )
1159 continue
1159 continue
1160
1160
1161 for candidate in movecandidates:
1161 for candidate in movecandidates:
1162 f1 = c1.filectx(candidate)
1162 f1 = c1.filectx(candidate)
1163 if _related(f1, f2):
1163 if _related(f1, f2):
1164 # if there are a few related copies then we'll merge
1164 # if there are a few related copies then we'll merge
1165 # changes into all of them. This matches the behaviour
1165 # changes into all of them. This matches the behaviour
1166 # of upstream copytracing
1166 # of upstream copytracing
1167 copies1[candidate] = f
1167 copies1[candidate] = f
1168
1168
1169 return branch_copies(copies1), branch_copies(copies2), {}
1169 return branch_copies(copies1), branch_copies(copies2), {}
1170
1170
1171
1171
1172 def _related(f1, f2):
1172 def _related(f1, f2):
1173 """return True if f1 and f2 filectx have a common ancestor
1173 """return True if f1 and f2 filectx have a common ancestor
1174
1174
1175 Walk back to common ancestor to see if the two files originate
1175 Walk back to common ancestor to see if the two files originate
1176 from the same file. Since workingfilectx's rev() is None it messes
1176 from the same file. Since workingfilectx's rev() is None it messes
1177 up the integer comparison logic, hence the pre-step check for
1177 up the integer comparison logic, hence the pre-step check for
1178 None (f1 and f2 can only be workingfilectx's initially).
1178 None (f1 and f2 can only be workingfilectx's initially).
1179 """
1179 """
1180
1180
1181 if f1 == f2:
1181 if f1 == f2:
1182 return True # a match
1182 return True # a match
1183
1183
1184 g1, g2 = f1.ancestors(), f2.ancestors()
1184 g1, g2 = f1.ancestors(), f2.ancestors()
1185 try:
1185 try:
1186 f1r, f2r = f1.linkrev(), f2.linkrev()
1186 f1r, f2r = f1.linkrev(), f2.linkrev()
1187
1187
1188 if f1r is None:
1188 if f1r is None:
1189 f1 = next(g1)
1189 f1 = next(g1)
1190 if f2r is None:
1190 if f2r is None:
1191 f2 = next(g2)
1191 f2 = next(g2)
1192
1192
1193 while True:
1193 while True:
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 if f1r > f2r:
1195 if f1r > f2r:
1196 f1 = next(g1)
1196 f1 = next(g1)
1197 elif f2r > f1r:
1197 elif f2r > f1r:
1198 f2 = next(g2)
1198 f2 = next(g2)
1199 else: # f1 and f2 point to files in the same linkrev
1199 else: # f1 and f2 point to files in the same linkrev
1200 return f1 == f2 # true if they point to the same file
1200 return f1 == f2 # true if they point to the same file
1201 except StopIteration:
1201 except StopIteration:
1202 return False
1202 return False
1203
1203
1204
1204
1205 def graftcopies(wctx, ctx, base):
1205 def graftcopies(wctx, ctx, base):
1206 """reproduce copies between base and ctx in the wctx
1206 """reproduce copies between base and ctx in the wctx
1207
1207
1208 Unlike mergecopies(), this function will only consider copies between base
1208 Unlike mergecopies(), this function will only consider copies between base
1209 and ctx; it will ignore copies between base and wctx. Also unlike
1209 and ctx; it will ignore copies between base and wctx. Also unlike
1210 mergecopies(), this function will apply copies to the working copy (instead
1210 mergecopies(), this function will apply copies to the working copy (instead
1211 of just returning information about the copies). That makes it cheaper
1211 of just returning information about the copies). That makes it cheaper
1212 (especially in the common case of base==ctx.p1()) and useful also when
1212 (especially in the common case of base==ctx.p1()) and useful also when
1213 experimental.copytrace=off.
1213 experimental.copytrace=off.
1214
1214
1215 merge.update() will have already marked most copies, but it will only
1215 merge.update() will have already marked most copies, but it will only
1216 mark copies if it thinks the source files are related (see
1216 mark copies if it thinks the source files are related (see
1217 merge._related()). It will also not mark copies if the file wasn't modified
1217 merge._related()). It will also not mark copies if the file wasn't modified
1218 on the local side. This function adds the copies that were "missed"
1218 on the local side. This function adds the copies that were "missed"
1219 by merge.update().
1219 by merge.update().
1220 """
1220 """
1221 new_copies = pathcopies(base, ctx)
1221 new_copies = pathcopies(base, ctx)
1222 _filter(wctx.p1(), wctx, new_copies)
1222 _filter(wctx.p1(), wctx, new_copies)
1223 for dst, src in pycompat.iteritems(new_copies):
1223 for dst, src in pycompat.iteritems(new_copies):
1224 wctx[dst].markcopied(src)
1224 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now