##// END OF EJS Templates
copies: fix an incorrect comment in graftcopies() from recent D9802...
Martin von Zweigbergk -
r47139:892eb7c5 default
parent child Browse files
Show More
@@ -1,1238 +1,1241 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 if k == v: # case 3
62 if k == v: # case 3
63 del t[k]
63 del t[k]
64 elif v not in src: # case 5
64 elif v not in src: # case 5
65 # remove copies from files that didn't exist
65 # remove copies from files that didn't exist
66 del t[k]
66 del t[k]
67 elif k not in dst: # case 1
67 elif k not in dst: # case 1
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 del t[k]
69 del t[k]
70
70
71
71
72 def _chain(prefix, suffix):
72 def _chain(prefix, suffix):
73 """chain two sets of copies 'prefix' and 'suffix'"""
73 """chain two sets of copies 'prefix' and 'suffix'"""
74 result = prefix.copy()
74 result = prefix.copy()
75 for key, value in pycompat.iteritems(suffix):
75 for key, value in pycompat.iteritems(suffix):
76 result[key] = prefix.get(value, value)
76 result[key] = prefix.get(value, value)
77 return result
77 return result
78
78
79
79
80 def _tracefile(fctx, am, basemf):
80 def _tracefile(fctx, am, basemf):
81 """return file context that is the ancestor of fctx present in ancestor
81 """return file context that is the ancestor of fctx present in ancestor
82 manifest am
82 manifest am
83
83
84 Note: we used to try and stop after a given limit, however checking if that
84 Note: we used to try and stop after a given limit, however checking if that
85 limit is reached turned out to be very expensive. we are better off
85 limit is reached turned out to be very expensive. we are better off
86 disabling that feature."""
86 disabling that feature."""
87
87
88 for f in fctx.ancestors():
88 for f in fctx.ancestors():
89 path = f.path()
89 path = f.path()
90 if am.get(path, None) == f.filenode():
90 if am.get(path, None) == f.filenode():
91 return path
91 return path
92 if basemf and basemf.get(path, None) == f.filenode():
92 if basemf and basemf.get(path, None) == f.filenode():
93 return path
93 return path
94
94
95
95
96 def _dirstatecopies(repo, match=None):
96 def _dirstatecopies(repo, match=None):
97 ds = repo.dirstate
97 ds = repo.dirstate
98 c = ds.copies().copy()
98 c = ds.copies().copy()
99 for k in list(c):
99 for k in list(c):
100 if ds[k] not in b'anm' or (match and not match(k)):
100 if ds[k] not in b'anm' or (match and not match(k)):
101 del c[k]
101 del c[k]
102 return c
102 return c
103
103
104
104
105 def _computeforwardmissing(a, b, match=None):
105 def _computeforwardmissing(a, b, match=None):
106 """Computes which files are in b but not a.
106 """Computes which files are in b but not a.
107 This is its own function so extensions can easily wrap this call to see what
107 This is its own function so extensions can easily wrap this call to see what
108 files _forwardcopies is about to process.
108 files _forwardcopies is about to process.
109 """
109 """
110 ma = a.manifest()
110 ma = a.manifest()
111 mb = b.manifest()
111 mb = b.manifest()
112 return mb.filesnotin(ma, match=match)
112 return mb.filesnotin(ma, match=match)
113
113
114
114
115 def usechangesetcentricalgo(repo):
115 def usechangesetcentricalgo(repo):
116 """Checks if we should use changeset-centric copy algorithms"""
116 """Checks if we should use changeset-centric copy algorithms"""
117 if repo.filecopiesmode == b'changeset-sidedata':
117 if repo.filecopiesmode == b'changeset-sidedata':
118 return True
118 return True
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 changesetsource = (b'changeset-only', b'compatibility')
120 changesetsource = (b'changeset-only', b'compatibility')
121 return readfrom in changesetsource
121 return readfrom in changesetsource
122
122
123
123
124 def _committedforwardcopies(a, b, base, match):
124 def _committedforwardcopies(a, b, base, match):
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 # files might have to be traced back to the fctx parent of the last
126 # files might have to be traced back to the fctx parent of the last
127 # one-side-only changeset, but not further back than that
127 # one-side-only changeset, but not further back than that
128 repo = a._repo
128 repo = a._repo
129
129
130 if usechangesetcentricalgo(repo):
130 if usechangesetcentricalgo(repo):
131 return _changesetforwardcopies(a, b, match)
131 return _changesetforwardcopies(a, b, match)
132
132
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 dbg = repo.ui.debug
134 dbg = repo.ui.debug
135 if debug:
135 if debug:
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 am = a.manifest()
137 am = a.manifest()
138 basemf = None if base is None else base.manifest()
138 basemf = None if base is None else base.manifest()
139
139
140 # find where new files came from
140 # find where new files came from
141 # we currently don't try to find where old files went, too expensive
141 # we currently don't try to find where old files went, too expensive
142 # this means we can miss a case like 'hg rm b; hg cp a b'
142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 cm = {}
143 cm = {}
144
144
145 # Computing the forward missing is quite expensive on large manifests, since
145 # Computing the forward missing is quite expensive on large manifests, since
146 # it compares the entire manifests. We can optimize it in the common use
146 # it compares the entire manifests. We can optimize it in the common use
147 # case of computing what copies are in a commit versus its parent (like
147 # case of computing what copies are in a commit versus its parent (like
148 # during a rebase or histedit). Note, we exclude merge commits from this
148 # during a rebase or histedit). Note, we exclude merge commits from this
149 # optimization, since the ctx.files() for a merge commit is not correct for
149 # optimization, since the ctx.files() for a merge commit is not correct for
150 # this comparison.
150 # this comparison.
151 forwardmissingmatch = match
151 forwardmissingmatch = match
152 if b.p1() == a and b.p2().node() == nullid:
152 if b.p1() == a and b.p2().node() == nullid:
153 filesmatcher = matchmod.exact(b.files())
153 filesmatcher = matchmod.exact(b.files())
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 missing = list(b.walk(match))
156 missing = list(b.walk(match))
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 if debug:
158 if debug:
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 else:
160 else:
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 if debug:
162 if debug:
163 dbg(
163 dbg(
164 b'debug.copies: missing files to search: %d\n'
164 b'debug.copies: missing files to search: %d\n'
165 % len(missing)
165 % len(missing)
166 )
166 )
167
167
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169
169
170 for f in sorted(missing):
170 for f in sorted(missing):
171 if debug:
171 if debug:
172 dbg(b'debug.copies: tracing file: %s\n' % f)
172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 fctx = b[f]
173 fctx = b[f]
174 fctx._ancestrycontext = ancestrycontext
174 fctx._ancestrycontext = ancestrycontext
175
175
176 if debug:
176 if debug:
177 start = util.timer()
177 start = util.timer()
178 opath = _tracefile(fctx, am, basemf)
178 opath = _tracefile(fctx, am, basemf)
179 if opath:
179 if opath:
180 if debug:
180 if debug:
181 dbg(b'debug.copies: rename of: %s\n' % opath)
181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 cm[f] = opath
182 cm[f] = opath
183 if debug:
183 if debug:
184 dbg(
184 dbg(
185 b'debug.copies: time: %f seconds\n'
185 b'debug.copies: time: %f seconds\n'
186 % (util.timer() - start)
186 % (util.timer() - start)
187 )
187 )
188 return cm
188 return cm
189
189
190
190
191 def _revinfo_getter(repo, match):
191 def _revinfo_getter(repo, match):
192 """returns a function that returns the following data given a <rev>"
192 """returns a function that returns the following data given a <rev>"
193
193
194 * p1: revision number of first parent
194 * p1: revision number of first parent
195 * p2: revision number of first parent
195 * p2: revision number of first parent
196 * changes: a ChangingFiles object
196 * changes: a ChangingFiles object
197 """
197 """
198 cl = repo.changelog
198 cl = repo.changelog
199 parents = cl.parentrevs
199 parents = cl.parentrevs
200 flags = cl.flags
200 flags = cl.flags
201
201
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203
203
204 changelogrevision = cl.changelogrevision
204 changelogrevision = cl.changelogrevision
205
205
206 if rustmod is not None:
206 if rustmod is not None:
207
207
208 def revinfo(rev):
208 def revinfo(rev):
209 p1, p2 = parents(rev)
209 p1, p2 = parents(rev)
210 if flags(rev) & HASCOPIESINFO:
210 if flags(rev) & HASCOPIESINFO:
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 else:
212 else:
213 raw = None
213 raw = None
214 return (p1, p2, raw)
214 return (p1, p2, raw)
215
215
216 else:
216 else:
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & HASCOPIESINFO:
220 if flags(rev) & HASCOPIESINFO:
221 changes = changelogrevision(rev).changes
221 changes = changelogrevision(rev).changes
222 else:
222 else:
223 changes = None
223 changes = None
224 return (p1, p2, changes)
224 return (p1, p2, changes)
225
225
226 return revinfo
226 return revinfo
227
227
228
228
229 def cached_is_ancestor(is_ancestor):
229 def cached_is_ancestor(is_ancestor):
230 """return a cached version of is_ancestor"""
230 """return a cached version of is_ancestor"""
231 cache = {}
231 cache = {}
232
232
233 def _is_ancestor(anc, desc):
233 def _is_ancestor(anc, desc):
234 if anc > desc:
234 if anc > desc:
235 return False
235 return False
236 elif anc == desc:
236 elif anc == desc:
237 return True
237 return True
238 key = (anc, desc)
238 key = (anc, desc)
239 ret = cache.get(key)
239 ret = cache.get(key)
240 if ret is None:
240 if ret is None:
241 ret = cache[key] = is_ancestor(anc, desc)
241 ret = cache[key] = is_ancestor(anc, desc)
242 return ret
242 return ret
243
243
244 return _is_ancestor
244 return _is_ancestor
245
245
246
246
247 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
248 if a.rev() in (nullrev, b.rev()):
248 if a.rev() in (nullrev, b.rev()):
249 return {}
249 return {}
250
250
251 repo = a.repo().unfiltered()
251 repo = a.repo().unfiltered()
252 children = {}
252 children = {}
253
253
254 cl = repo.changelog
254 cl = repo.changelog
255 isancestor = cl.isancestorrev
255 isancestor = cl.isancestorrev
256
256
257 # To track rename from "A" to B, we need to gather all parent β†’ children
257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 # edges that are contains in `::B` but not in `::A`.
258 # edges that are contains in `::B` but not in `::A`.
259 #
259 #
260 #
260 #
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 # that belong to ::a. These are exactly all the revisions needed to express
263 # that belong to ::a. These are exactly all the revisions needed to express
264 # the parent β†’ children we need to combine.
264 # the parent β†’ children we need to combine.
265 #
265 #
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 # excluding paths that leads to roots that are not ancestors of `a`. We
267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 # keep this out of the explanation because it is hard enough without this special case..
268 # keep this out of the explanation because it is hard enough without this special case..
269
269
270 parents = cl._uncheckedparentrevs
270 parents = cl._uncheckedparentrevs
271 graph_roots = (nullrev, nullrev)
271 graph_roots = (nullrev, nullrev)
272
272
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 roots = set()
275 roots = set()
276 has_graph_roots = False
276 has_graph_roots = False
277
277
278 # iterate over `only(B, A)`
278 # iterate over `only(B, A)`
279 for r in revs:
279 for r in revs:
280 ps = parents(r)
280 ps = parents(r)
281 if ps == graph_roots:
281 if ps == graph_roots:
282 has_graph_roots = True
282 has_graph_roots = True
283 else:
283 else:
284 p1, p2 = ps
284 p1, p2 = ps
285
285
286 # find all the "root points" (see larger comment above)
286 # find all the "root points" (see larger comment above)
287 if p1 != nullrev and p1 in ancestors:
287 if p1 != nullrev and p1 in ancestors:
288 roots.add(p1)
288 roots.add(p1)
289 if p2 != nullrev and p2 in ancestors:
289 if p2 != nullrev and p2 in ancestors:
290 roots.add(p2)
290 roots.add(p2)
291 if not roots:
291 if not roots:
292 # no common revision to track copies from
292 # no common revision to track copies from
293 return {}
293 return {}
294 if has_graph_roots:
294 if has_graph_roots:
295 # this deal with the special case mentionned in the [1] footnotes. We
295 # this deal with the special case mentionned in the [1] footnotes. We
296 # must filter out revisions that leads to non-common graphroots.
296 # must filter out revisions that leads to non-common graphroots.
297 roots = list(roots)
297 roots = list(roots)
298 m = min(roots)
298 m = min(roots)
299 h = [b.rev()]
299 h = [b.rev()]
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 roots_to_head = set(roots_to_head)
301 roots_to_head = set(roots_to_head)
302 revs = [r for r in revs if r in roots_to_head]
302 revs = [r for r in revs if r in roots_to_head]
303
303
304 if repo.filecopiesmode == b'changeset-sidedata':
304 if repo.filecopiesmode == b'changeset-sidedata':
305 # When using side-data, we will process the edges "from" the children.
305 # When using side-data, we will process the edges "from" the children.
306 # We iterate over the childre, gathering previous collected data for
306 # We iterate over the childre, gathering previous collected data for
307 # the parents. Do know when the parents data is no longer necessary, we
307 # the parents. Do know when the parents data is no longer necessary, we
308 # keep a counter of how many children each revision has.
308 # keep a counter of how many children each revision has.
309 #
309 #
310 # An interresting property of `children_count` is that it only contains
310 # An interresting property of `children_count` is that it only contains
311 # revision that will be relevant for a edge of the graph. So if a
311 # revision that will be relevant for a edge of the graph. So if a
312 # children has parent not in `children_count`, that edges should not be
312 # children has parent not in `children_count`, that edges should not be
313 # processed.
313 # processed.
314 children_count = dict((r, 0) for r in roots)
314 children_count = dict((r, 0) for r in roots)
315 for r in revs:
315 for r in revs:
316 for p in cl.parentrevs(r):
316 for p in cl.parentrevs(r):
317 if p == nullrev:
317 if p == nullrev:
318 continue
318 continue
319 children_count[r] = 0
319 children_count[r] = 0
320 if p in children_count:
320 if p in children_count:
321 children_count[p] += 1
321 children_count[p] += 1
322 revinfo = _revinfo_getter(repo, match)
322 revinfo = _revinfo_getter(repo, match)
323 return _combine_changeset_copies(
323 return _combine_changeset_copies(
324 revs, children_count, b.rev(), revinfo, match, isancestor
324 revs, children_count, b.rev(), revinfo, match, isancestor
325 )
325 )
326 else:
326 else:
327 # When not using side-data, we will process the edges "from" the parent.
327 # When not using side-data, we will process the edges "from" the parent.
328 # so we need a full mapping of the parent -> children relation.
328 # so we need a full mapping of the parent -> children relation.
329 children = dict((r, []) for r in roots)
329 children = dict((r, []) for r in roots)
330 for r in revs:
330 for r in revs:
331 for p in cl.parentrevs(r):
331 for p in cl.parentrevs(r):
332 if p == nullrev:
332 if p == nullrev:
333 continue
333 continue
334 children[r] = []
334 children[r] = []
335 if p in children:
335 if p in children:
336 children[p].append(r)
336 children[p].append(r)
337 x = revs.pop()
337 x = revs.pop()
338 assert x == b.rev()
338 assert x == b.rev()
339 revs.extend(roots)
339 revs.extend(roots)
340 revs.sort()
340 revs.sort()
341
341
342 revinfo = _revinfo_getter_extra(repo)
342 revinfo = _revinfo_getter_extra(repo)
343 return _combine_changeset_copies_extra(
343 return _combine_changeset_copies_extra(
344 revs, children, b.rev(), revinfo, match, isancestor
344 revs, children, b.rev(), revinfo, match, isancestor
345 )
345 )
346
346
347
347
348 def _combine_changeset_copies(
348 def _combine_changeset_copies(
349 revs, children_count, targetrev, revinfo, match, isancestor
349 revs, children_count, targetrev, revinfo, match, isancestor
350 ):
350 ):
351 """combine the copies information for each item of iterrevs
351 """combine the copies information for each item of iterrevs
352
352
353 revs: sorted iterable of revision to visit
353 revs: sorted iterable of revision to visit
354 children_count: a {parent: <number-of-relevant-children>} mapping.
354 children_count: a {parent: <number-of-relevant-children>} mapping.
355 targetrev: the final copies destination revision (not in iterrevs)
355 targetrev: the final copies destination revision (not in iterrevs)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
357 match: a matcher
357 match: a matcher
358
358
359 It returns the aggregated copies information for `targetrev`.
359 It returns the aggregated copies information for `targetrev`.
360 """
360 """
361
361
362 alwaysmatch = match.always()
362 alwaysmatch = match.always()
363
363
364 if rustmod is not None:
364 if rustmod is not None:
365 final_copies = rustmod.combine_changeset_copies(
365 final_copies = rustmod.combine_changeset_copies(
366 list(revs), children_count, targetrev, revinfo, isancestor
366 list(revs), children_count, targetrev, revinfo, isancestor
367 )
367 )
368 else:
368 else:
369 isancestor = cached_is_ancestor(isancestor)
369 isancestor = cached_is_ancestor(isancestor)
370
370
371 all_copies = {}
371 all_copies = {}
372 # iterate over all the "children" side of copy tracing "edge"
372 # iterate over all the "children" side of copy tracing "edge"
373 for current_rev in revs:
373 for current_rev in revs:
374 p1, p2, changes = revinfo(current_rev)
374 p1, p2, changes = revinfo(current_rev)
375 current_copies = None
375 current_copies = None
376 # iterate over all parents to chain the existing data with the
376 # iterate over all parents to chain the existing data with the
377 # data from the parent β†’ child edge.
377 # data from the parent β†’ child edge.
378 for parent, parent_rev in ((1, p1), (2, p2)):
378 for parent, parent_rev in ((1, p1), (2, p2)):
379 if parent_rev == nullrev:
379 if parent_rev == nullrev:
380 continue
380 continue
381 remaining_children = children_count.get(parent_rev)
381 remaining_children = children_count.get(parent_rev)
382 if remaining_children is None:
382 if remaining_children is None:
383 continue
383 continue
384 remaining_children -= 1
384 remaining_children -= 1
385 children_count[parent_rev] = remaining_children
385 children_count[parent_rev] = remaining_children
386 if remaining_children:
386 if remaining_children:
387 copies = all_copies.get(parent_rev, None)
387 copies = all_copies.get(parent_rev, None)
388 else:
388 else:
389 copies = all_copies.pop(parent_rev, None)
389 copies = all_copies.pop(parent_rev, None)
390
390
391 if copies is None:
391 if copies is None:
392 # this is a root
392 # this is a root
393 newcopies = copies = {}
393 newcopies = copies = {}
394 elif remaining_children:
394 elif remaining_children:
395 newcopies = copies.copy()
395 newcopies = copies.copy()
396 else:
396 else:
397 newcopies = copies
397 newcopies = copies
398 # chain the data in the edge with the existing data
398 # chain the data in the edge with the existing data
399 if changes is not None:
399 if changes is not None:
400 childcopies = {}
400 childcopies = {}
401 if parent == 1:
401 if parent == 1:
402 childcopies = changes.copied_from_p1
402 childcopies = changes.copied_from_p1
403 elif parent == 2:
403 elif parent == 2:
404 childcopies = changes.copied_from_p2
404 childcopies = changes.copied_from_p2
405
405
406 if childcopies:
406 if childcopies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for dest, source in pycompat.iteritems(childcopies):
408 for dest, source in pycompat.iteritems(childcopies):
409 prev = copies.get(source)
409 prev = copies.get(source)
410 if prev is not None and prev[1] is not None:
410 if prev is not None and prev[1] is not None:
411 source = prev[1]
411 source = prev[1]
412 newcopies[dest] = (current_rev, source)
412 newcopies[dest] = (current_rev, source)
413 assert newcopies is not copies
413 assert newcopies is not copies
414 if changes.removed:
414 if changes.removed:
415 for f in changes.removed:
415 for f in changes.removed:
416 if f in newcopies:
416 if f in newcopies:
417 if newcopies is copies:
417 if newcopies is copies:
418 # copy on write to avoid affecting potential other
418 # copy on write to avoid affecting potential other
419 # branches. when there are no other branches, this
419 # branches. when there are no other branches, this
420 # could be avoided.
420 # could be avoided.
421 newcopies = copies.copy()
421 newcopies = copies.copy()
422 newcopies[f] = (current_rev, None)
422 newcopies[f] = (current_rev, None)
423 # check potential need to combine the data from another parent (for
423 # check potential need to combine the data from another parent (for
424 # that child). See comment below for details.
424 # that child). See comment below for details.
425 if current_copies is None:
425 if current_copies is None:
426 current_copies = newcopies
426 current_copies = newcopies
427 else:
427 else:
428 # we are the second parent to work on c, we need to merge our
428 # we are the second parent to work on c, we need to merge our
429 # work with the other.
429 # work with the other.
430 #
430 #
431 # In case of conflict, parent 1 take precedence over parent 2.
431 # In case of conflict, parent 1 take precedence over parent 2.
432 # This is an arbitrary choice made anew when implementing
432 # This is an arbitrary choice made anew when implementing
433 # changeset based copies. It was made without regards with
433 # changeset based copies. It was made without regards with
434 # potential filelog related behavior.
434 # potential filelog related behavior.
435 assert parent == 2
435 assert parent == 2
436 current_copies = _merge_copies_dict(
436 current_copies = _merge_copies_dict(
437 newcopies, current_copies, isancestor, changes
437 newcopies, current_copies, isancestor, changes
438 )
438 )
439 all_copies[current_rev] = current_copies
439 all_copies[current_rev] = current_copies
440
440
441 # filter out internal details and return a {dest: source mapping}
441 # filter out internal details and return a {dest: source mapping}
442 final_copies = {}
442 final_copies = {}
443 for dest, (tt, source) in all_copies[targetrev].items():
443 for dest, (tt, source) in all_copies[targetrev].items():
444 if source is not None:
444 if source is not None:
445 final_copies[dest] = source
445 final_copies[dest] = source
446 if not alwaysmatch:
446 if not alwaysmatch:
447 for filename in list(final_copies.keys()):
447 for filename in list(final_copies.keys()):
448 if not match(filename):
448 if not match(filename):
449 del final_copies[filename]
449 del final_copies[filename]
450 return final_copies
450 return final_copies
451
451
452
452
453 # constant to decide which side to pick with _merge_copies_dict
453 # constant to decide which side to pick with _merge_copies_dict
454 PICK_MINOR = 0
454 PICK_MINOR = 0
455 PICK_MAJOR = 1
455 PICK_MAJOR = 1
456 PICK_EITHER = 2
456 PICK_EITHER = 2
457
457
458
458
459 def _merge_copies_dict(minor, major, isancestor, changes):
459 def _merge_copies_dict(minor, major, isancestor, changes):
460 """merge two copies-mapping together, minor and major
460 """merge two copies-mapping together, minor and major
461
461
462 In case of conflict, value from "major" will be picked.
462 In case of conflict, value from "major" will be picked.
463
463
464 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
464 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
465 ancestors of `high_rev`,
465 ancestors of `high_rev`,
466
466
467 - `ismerged(path)`: callable return True if `path` have been merged in the
467 - `ismerged(path)`: callable return True if `path` have been merged in the
468 current revision,
468 current revision,
469
469
470 return the resulting dict (in practice, the "minor" object, updated)
470 return the resulting dict (in practice, the "minor" object, updated)
471 """
471 """
472 for dest, value in major.items():
472 for dest, value in major.items():
473 other = minor.get(dest)
473 other = minor.get(dest)
474 if other is None:
474 if other is None:
475 minor[dest] = value
475 minor[dest] = value
476 else:
476 else:
477 pick = _compare_values(changes, isancestor, dest, other, value)
477 pick = _compare_values(changes, isancestor, dest, other, value)
478 if pick == PICK_MAJOR:
478 if pick == PICK_MAJOR:
479 minor[dest] = value
479 minor[dest] = value
480 return minor
480 return minor
481
481
482
482
483 def _compare_values(changes, isancestor, dest, minor, major):
483 def _compare_values(changes, isancestor, dest, minor, major):
484 """compare two value within a _merge_copies_dict loop iteration"""
484 """compare two value within a _merge_copies_dict loop iteration"""
485 major_tt, major_value = major
485 major_tt, major_value = major
486 minor_tt, minor_value = minor
486 minor_tt, minor_value = minor
487
487
488 # evacuate some simple case first:
488 # evacuate some simple case first:
489 if major_tt == minor_tt:
489 if major_tt == minor_tt:
490 # if it comes from the same revision it must be the same value
490 # if it comes from the same revision it must be the same value
491 assert major_value == minor_value
491 assert major_value == minor_value
492 return PICK_EITHER
492 return PICK_EITHER
493 elif major[1] == minor[1]:
493 elif major[1] == minor[1]:
494 return PICK_EITHER
494 return PICK_EITHER
495
495
496 # actual merging needed: content from "major" wins, unless it is older than
496 # actual merging needed: content from "major" wins, unless it is older than
497 # the branch point or there is a merge
497 # the branch point or there is a merge
498 elif changes is not None and major[1] is None and dest in changes.salvaged:
498 elif changes is not None and major[1] is None and dest in changes.salvaged:
499 return PICK_MINOR
499 return PICK_MINOR
500 elif changes is not None and minor[1] is None and dest in changes.salvaged:
500 elif changes is not None and minor[1] is None and dest in changes.salvaged:
501 return PICK_MAJOR
501 return PICK_MAJOR
502 elif changes is not None and dest in changes.merged:
502 elif changes is not None and dest in changes.merged:
503 return PICK_MAJOR
503 return PICK_MAJOR
504 elif not isancestor(major_tt, minor_tt):
504 elif not isancestor(major_tt, minor_tt):
505 if major[1] is not None:
505 if major[1] is not None:
506 return PICK_MAJOR
506 return PICK_MAJOR
507 elif isancestor(minor_tt, major_tt):
507 elif isancestor(minor_tt, major_tt):
508 return PICK_MAJOR
508 return PICK_MAJOR
509 return PICK_MINOR
509 return PICK_MINOR
510
510
511
511
512 def _revinfo_getter_extra(repo):
512 def _revinfo_getter_extra(repo):
513 """return a function that return multiple data given a <rev>"i
513 """return a function that return multiple data given a <rev>"i
514
514
515 * p1: revision number of first parent
515 * p1: revision number of first parent
516 * p2: revision number of first parent
516 * p2: revision number of first parent
517 * p1copies: mapping of copies from p1
517 * p1copies: mapping of copies from p1
518 * p2copies: mapping of copies from p2
518 * p2copies: mapping of copies from p2
519 * removed: a list of removed files
519 * removed: a list of removed files
520 * ismerged: a callback to know if file was merged in that revision
520 * ismerged: a callback to know if file was merged in that revision
521 """
521 """
522 cl = repo.changelog
522 cl = repo.changelog
523 parents = cl.parentrevs
523 parents = cl.parentrevs
524
524
525 def get_ismerged(rev):
525 def get_ismerged(rev):
526 ctx = repo[rev]
526 ctx = repo[rev]
527
527
528 def ismerged(path):
528 def ismerged(path):
529 if path not in ctx.files():
529 if path not in ctx.files():
530 return False
530 return False
531 fctx = ctx[path]
531 fctx = ctx[path]
532 parents = fctx._filelog.parents(fctx._filenode)
532 parents = fctx._filelog.parents(fctx._filenode)
533 nb_parents = 0
533 nb_parents = 0
534 for n in parents:
534 for n in parents:
535 if n != nullid:
535 if n != nullid:
536 nb_parents += 1
536 nb_parents += 1
537 return nb_parents >= 2
537 return nb_parents >= 2
538
538
539 return ismerged
539 return ismerged
540
540
541 def revinfo(rev):
541 def revinfo(rev):
542 p1, p2 = parents(rev)
542 p1, p2 = parents(rev)
543 ctx = repo[rev]
543 ctx = repo[rev]
544 p1copies, p2copies = ctx._copies
544 p1copies, p2copies = ctx._copies
545 removed = ctx.filesremoved()
545 removed = ctx.filesremoved()
546 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
546 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
547
547
548 return revinfo
548 return revinfo
549
549
550
550
551 def _combine_changeset_copies_extra(
551 def _combine_changeset_copies_extra(
552 revs, children, targetrev, revinfo, match, isancestor
552 revs, children, targetrev, revinfo, match, isancestor
553 ):
553 ):
554 """version of `_combine_changeset_copies` that works with the Google
554 """version of `_combine_changeset_copies` that works with the Google
555 specific "extra" based storage for copy information"""
555 specific "extra" based storage for copy information"""
556 all_copies = {}
556 all_copies = {}
557 alwaysmatch = match.always()
557 alwaysmatch = match.always()
558 for r in revs:
558 for r in revs:
559 copies = all_copies.pop(r, None)
559 copies = all_copies.pop(r, None)
560 if copies is None:
560 if copies is None:
561 # this is a root
561 # this is a root
562 copies = {}
562 copies = {}
563 for i, c in enumerate(children[r]):
563 for i, c in enumerate(children[r]):
564 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
564 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
565 if r == p1:
565 if r == p1:
566 parent = 1
566 parent = 1
567 childcopies = p1copies
567 childcopies = p1copies
568 else:
568 else:
569 assert r == p2
569 assert r == p2
570 parent = 2
570 parent = 2
571 childcopies = p2copies
571 childcopies = p2copies
572 if not alwaysmatch:
572 if not alwaysmatch:
573 childcopies = {
573 childcopies = {
574 dst: src for dst, src in childcopies.items() if match(dst)
574 dst: src for dst, src in childcopies.items() if match(dst)
575 }
575 }
576 newcopies = copies
576 newcopies = copies
577 if childcopies:
577 if childcopies:
578 newcopies = copies.copy()
578 newcopies = copies.copy()
579 for dest, source in pycompat.iteritems(childcopies):
579 for dest, source in pycompat.iteritems(childcopies):
580 prev = copies.get(source)
580 prev = copies.get(source)
581 if prev is not None and prev[1] is not None:
581 if prev is not None and prev[1] is not None:
582 source = prev[1]
582 source = prev[1]
583 newcopies[dest] = (c, source)
583 newcopies[dest] = (c, source)
584 assert newcopies is not copies
584 assert newcopies is not copies
585 for f in removed:
585 for f in removed:
586 if f in newcopies:
586 if f in newcopies:
587 if newcopies is copies:
587 if newcopies is copies:
588 # copy on write to avoid affecting potential other
588 # copy on write to avoid affecting potential other
589 # branches. when there are no other branches, this
589 # branches. when there are no other branches, this
590 # could be avoided.
590 # could be avoided.
591 newcopies = copies.copy()
591 newcopies = copies.copy()
592 newcopies[f] = (c, None)
592 newcopies[f] = (c, None)
593 othercopies = all_copies.get(c)
593 othercopies = all_copies.get(c)
594 if othercopies is None:
594 if othercopies is None:
595 all_copies[c] = newcopies
595 all_copies[c] = newcopies
596 else:
596 else:
597 # we are the second parent to work on c, we need to merge our
597 # we are the second parent to work on c, we need to merge our
598 # work with the other.
598 # work with the other.
599 #
599 #
600 # In case of conflict, parent 1 take precedence over parent 2.
600 # In case of conflict, parent 1 take precedence over parent 2.
601 # This is an arbitrary choice made anew when implementing
601 # This is an arbitrary choice made anew when implementing
602 # changeset based copies. It was made without regards with
602 # changeset based copies. It was made without regards with
603 # potential filelog related behavior.
603 # potential filelog related behavior.
604 if parent == 1:
604 if parent == 1:
605 _merge_copies_dict_extra(
605 _merge_copies_dict_extra(
606 othercopies, newcopies, isancestor, ismerged
606 othercopies, newcopies, isancestor, ismerged
607 )
607 )
608 else:
608 else:
609 _merge_copies_dict_extra(
609 _merge_copies_dict_extra(
610 newcopies, othercopies, isancestor, ismerged
610 newcopies, othercopies, isancestor, ismerged
611 )
611 )
612 all_copies[c] = newcopies
612 all_copies[c] = newcopies
613
613
614 final_copies = {}
614 final_copies = {}
615 for dest, (tt, source) in all_copies[targetrev].items():
615 for dest, (tt, source) in all_copies[targetrev].items():
616 if source is not None:
616 if source is not None:
617 final_copies[dest] = source
617 final_copies[dest] = source
618 return final_copies
618 return final_copies
619
619
620
620
621 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
621 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
622 """version of `_merge_copies_dict` that works with the Google
622 """version of `_merge_copies_dict` that works with the Google
623 specific "extra" based storage for copy information"""
623 specific "extra" based storage for copy information"""
624 for dest, value in major.items():
624 for dest, value in major.items():
625 other = minor.get(dest)
625 other = minor.get(dest)
626 if other is None:
626 if other is None:
627 minor[dest] = value
627 minor[dest] = value
628 else:
628 else:
629 new_tt = value[0]
629 new_tt = value[0]
630 other_tt = other[0]
630 other_tt = other[0]
631 if value[1] == other[1]:
631 if value[1] == other[1]:
632 continue
632 continue
633 # content from "major" wins, unless it is older
633 # content from "major" wins, unless it is older
634 # than the branch point or there is a merge
634 # than the branch point or there is a merge
635 if (
635 if (
636 new_tt == other_tt
636 new_tt == other_tt
637 or not isancestor(new_tt, other_tt)
637 or not isancestor(new_tt, other_tt)
638 or ismerged(dest)
638 or ismerged(dest)
639 ):
639 ):
640 minor[dest] = value
640 minor[dest] = value
641
641
642
642
643 def _forwardcopies(a, b, base=None, match=None):
643 def _forwardcopies(a, b, base=None, match=None):
644 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
644 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
645
645
646 if base is None:
646 if base is None:
647 base = a
647 base = a
648 match = a.repo().narrowmatch(match)
648 match = a.repo().narrowmatch(match)
649 # check for working copy
649 # check for working copy
650 if b.rev() is None:
650 if b.rev() is None:
651 cm = _committedforwardcopies(a, b.p1(), base, match)
651 cm = _committedforwardcopies(a, b.p1(), base, match)
652 # combine copies from dirstate if necessary
652 # combine copies from dirstate if necessary
653 copies = _chain(cm, _dirstatecopies(b._repo, match))
653 copies = _chain(cm, _dirstatecopies(b._repo, match))
654 else:
654 else:
655 copies = _committedforwardcopies(a, b, base, match)
655 copies = _committedforwardcopies(a, b, base, match)
656 return copies
656 return copies
657
657
658
658
659 def _backwardrenames(a, b, match):
659 def _backwardrenames(a, b, match):
660 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
660 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
661 return {}
661 return {}
662
662
663 # Even though we're not taking copies into account, 1:n rename situations
663 # Even though we're not taking copies into account, 1:n rename situations
664 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
664 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
665 # arbitrarily pick one of the renames.
665 # arbitrarily pick one of the renames.
666 # We don't want to pass in "match" here, since that would filter
666 # We don't want to pass in "match" here, since that would filter
667 # the destination by it. Since we're reversing the copies, we want
667 # the destination by it. Since we're reversing the copies, we want
668 # to filter the source instead.
668 # to filter the source instead.
669 f = _forwardcopies(b, a)
669 f = _forwardcopies(b, a)
670 r = {}
670 r = {}
671 for k, v in sorted(pycompat.iteritems(f)):
671 for k, v in sorted(pycompat.iteritems(f)):
672 if match and not match(v):
672 if match and not match(v):
673 continue
673 continue
674 # remove copies
674 # remove copies
675 if v in a:
675 if v in a:
676 continue
676 continue
677 r[v] = k
677 r[v] = k
678 return r
678 return r
679
679
680
680
681 def pathcopies(x, y, match=None):
681 def pathcopies(x, y, match=None):
682 """find {dst@y: src@x} copy mapping for directed compare"""
682 """find {dst@y: src@x} copy mapping for directed compare"""
683 repo = x._repo
683 repo = x._repo
684 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
684 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
685 if debug:
685 if debug:
686 repo.ui.debug(
686 repo.ui.debug(
687 b'debug.copies: searching copies from %s to %s\n' % (x, y)
687 b'debug.copies: searching copies from %s to %s\n' % (x, y)
688 )
688 )
689 if x == y or not x or not y:
689 if x == y or not x or not y:
690 return {}
690 return {}
691 if y.rev() is None and x == y.p1():
691 if y.rev() is None and x == y.p1():
692 if debug:
692 if debug:
693 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
693 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
694 # short-circuit to avoid issues with merge states
694 # short-circuit to avoid issues with merge states
695 return _dirstatecopies(repo, match)
695 return _dirstatecopies(repo, match)
696 a = y.ancestor(x)
696 a = y.ancestor(x)
697 if a == x:
697 if a == x:
698 if debug:
698 if debug:
699 repo.ui.debug(b'debug.copies: search mode: forward\n')
699 repo.ui.debug(b'debug.copies: search mode: forward\n')
700 copies = _forwardcopies(x, y, match=match)
700 copies = _forwardcopies(x, y, match=match)
701 elif a == y:
701 elif a == y:
702 if debug:
702 if debug:
703 repo.ui.debug(b'debug.copies: search mode: backward\n')
703 repo.ui.debug(b'debug.copies: search mode: backward\n')
704 copies = _backwardrenames(x, y, match=match)
704 copies = _backwardrenames(x, y, match=match)
705 else:
705 else:
706 if debug:
706 if debug:
707 repo.ui.debug(b'debug.copies: search mode: combined\n')
707 repo.ui.debug(b'debug.copies: search mode: combined\n')
708 base = None
708 base = None
709 if a.rev() != nullrev:
709 if a.rev() != nullrev:
710 base = x
710 base = x
711 copies = _chain(
711 copies = _chain(
712 _backwardrenames(x, a, match=match),
712 _backwardrenames(x, a, match=match),
713 _forwardcopies(a, y, base, match=match),
713 _forwardcopies(a, y, base, match=match),
714 )
714 )
715 _filter(x, y, copies)
715 _filter(x, y, copies)
716 return copies
716 return copies
717
717
718
718
719 def mergecopies(repo, c1, c2, base):
719 def mergecopies(repo, c1, c2, base):
720 """
720 """
721 Finds moves and copies between context c1 and c2 that are relevant for
721 Finds moves and copies between context c1 and c2 that are relevant for
722 merging. 'base' will be used as the merge base.
722 merging. 'base' will be used as the merge base.
723
723
724 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
724 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
725 files that were moved/ copied in one merge parent and modified in another.
725 files that were moved/ copied in one merge parent and modified in another.
726 For example:
726 For example:
727
727
728 o ---> 4 another commit
728 o ---> 4 another commit
729 |
729 |
730 | o ---> 3 commit that modifies a.txt
730 | o ---> 3 commit that modifies a.txt
731 | /
731 | /
732 o / ---> 2 commit that moves a.txt to b.txt
732 o / ---> 2 commit that moves a.txt to b.txt
733 |/
733 |/
734 o ---> 1 merge base
734 o ---> 1 merge base
735
735
736 If we try to rebase revision 3 on revision 4, since there is no a.txt in
736 If we try to rebase revision 3 on revision 4, since there is no a.txt in
737 revision 4, and if user have copytrace disabled, we prints the following
737 revision 4, and if user have copytrace disabled, we prints the following
738 message:
738 message:
739
739
740 ```other changed <file> which local deleted```
740 ```other changed <file> which local deleted```
741
741
742 Returns a tuple where:
742 Returns a tuple where:
743
743
744 "branch_copies" an instance of branch_copies.
744 "branch_copies" an instance of branch_copies.
745
745
746 "diverge" is a mapping of source name -> list of destination names
746 "diverge" is a mapping of source name -> list of destination names
747 for divergent renames.
747 for divergent renames.
748
748
749 This function calls different copytracing algorithms based on config.
749 This function calls different copytracing algorithms based on config.
750 """
750 """
751 # avoid silly behavior for update from empty dir
751 # avoid silly behavior for update from empty dir
752 if not c1 or not c2 or c1 == c2:
752 if not c1 or not c2 or c1 == c2:
753 return branch_copies(), branch_copies(), {}
753 return branch_copies(), branch_copies(), {}
754
754
755 narrowmatch = c1.repo().narrowmatch()
755 narrowmatch = c1.repo().narrowmatch()
756
756
757 # avoid silly behavior for parent -> working dir
757 # avoid silly behavior for parent -> working dir
758 if c2.node() is None and c1.node() == repo.dirstate.p1():
758 if c2.node() is None and c1.node() == repo.dirstate.p1():
759 return (
759 return (
760 branch_copies(_dirstatecopies(repo, narrowmatch)),
760 branch_copies(_dirstatecopies(repo, narrowmatch)),
761 branch_copies(),
761 branch_copies(),
762 {},
762 {},
763 )
763 )
764
764
765 copytracing = repo.ui.config(b'experimental', b'copytrace')
765 copytracing = repo.ui.config(b'experimental', b'copytrace')
766 if stringutil.parsebool(copytracing) is False:
766 if stringutil.parsebool(copytracing) is False:
767 # stringutil.parsebool() returns None when it is unable to parse the
767 # stringutil.parsebool() returns None when it is unable to parse the
768 # value, so we should rely on making sure copytracing is on such cases
768 # value, so we should rely on making sure copytracing is on such cases
769 return branch_copies(), branch_copies(), {}
769 return branch_copies(), branch_copies(), {}
770
770
771 if usechangesetcentricalgo(repo):
771 if usechangesetcentricalgo(repo):
772 # The heuristics don't make sense when we need changeset-centric algos
772 # The heuristics don't make sense when we need changeset-centric algos
773 return _fullcopytracing(repo, c1, c2, base)
773 return _fullcopytracing(repo, c1, c2, base)
774
774
775 # Copy trace disabling is explicitly below the node == p1 logic above
775 # Copy trace disabling is explicitly below the node == p1 logic above
776 # because the logic above is required for a simple copy to be kept across a
776 # because the logic above is required for a simple copy to be kept across a
777 # rebase.
777 # rebase.
778 if copytracing == b'heuristics':
778 if copytracing == b'heuristics':
779 # Do full copytracing if only non-public revisions are involved as
779 # Do full copytracing if only non-public revisions are involved as
780 # that will be fast enough and will also cover the copies which could
780 # that will be fast enough and will also cover the copies which could
781 # be missed by heuristics
781 # be missed by heuristics
782 if _isfullcopytraceable(repo, c1, base):
782 if _isfullcopytraceable(repo, c1, base):
783 return _fullcopytracing(repo, c1, c2, base)
783 return _fullcopytracing(repo, c1, c2, base)
784 return _heuristicscopytracing(repo, c1, c2, base)
784 return _heuristicscopytracing(repo, c1, c2, base)
785 else:
785 else:
786 return _fullcopytracing(repo, c1, c2, base)
786 return _fullcopytracing(repo, c1, c2, base)
787
787
788
788
789 def _isfullcopytraceable(repo, c1, base):
789 def _isfullcopytraceable(repo, c1, base):
790 """Checks that if base, source and destination are all no-public branches,
790 """Checks that if base, source and destination are all no-public branches,
791 if yes let's use the full copytrace algorithm for increased capabilities
791 if yes let's use the full copytrace algorithm for increased capabilities
792 since it will be fast enough.
792 since it will be fast enough.
793
793
794 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
794 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
795 number of changesets from c1 to base such that if number of changesets are
795 number of changesets from c1 to base such that if number of changesets are
796 more than the limit, full copytracing algorithm won't be used.
796 more than the limit, full copytracing algorithm won't be used.
797 """
797 """
798 if c1.rev() is None:
798 if c1.rev() is None:
799 c1 = c1.p1()
799 c1 = c1.p1()
800 if c1.mutable() and base.mutable():
800 if c1.mutable() and base.mutable():
801 sourcecommitlimit = repo.ui.configint(
801 sourcecommitlimit = repo.ui.configint(
802 b'experimental', b'copytrace.sourcecommitlimit'
802 b'experimental', b'copytrace.sourcecommitlimit'
803 )
803 )
804 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
804 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
805 return commits < sourcecommitlimit
805 return commits < sourcecommitlimit
806 return False
806 return False
807
807
808
808
809 def _checksinglesidecopies(
809 def _checksinglesidecopies(
810 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
810 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
811 ):
811 ):
812 if src not in m2:
812 if src not in m2:
813 # deleted on side 2
813 # deleted on side 2
814 if src not in m1:
814 if src not in m1:
815 # renamed on side 1, deleted on side 2
815 # renamed on side 1, deleted on side 2
816 renamedelete[src] = dsts1
816 renamedelete[src] = dsts1
817 elif src not in mb:
817 elif src not in mb:
818 # Work around the "short-circuit to avoid issues with merge states"
818 # Work around the "short-circuit to avoid issues with merge states"
819 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
819 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
820 # destination doesn't exist in y.
820 # destination doesn't exist in y.
821 pass
821 pass
822 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
822 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
823 return
823 return
824 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
824 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
825 # modified on side 2
825 # modified on side 2
826 for dst in dsts1:
826 for dst in dsts1:
827 copy[dst] = src
827 copy[dst] = src
828
828
829
829
830 class branch_copies(object):
830 class branch_copies(object):
831 """Information about copies made on one side of a merge/graft.
831 """Information about copies made on one side of a merge/graft.
832
832
833 "copy" is a mapping from destination name -> source name,
833 "copy" is a mapping from destination name -> source name,
834 where source is in c1 and destination is in c2 or vice-versa.
834 where source is in c1 and destination is in c2 or vice-versa.
835
835
836 "movewithdir" is a mapping from source name -> destination name,
836 "movewithdir" is a mapping from source name -> destination name,
837 where the file at source present in one context but not the other
837 where the file at source present in one context but not the other
838 needs to be moved to destination by the merge process, because the
838 needs to be moved to destination by the merge process, because the
839 other context moved the directory it is in.
839 other context moved the directory it is in.
840
840
841 "renamedelete" is a mapping of source name -> list of destination
841 "renamedelete" is a mapping of source name -> list of destination
842 names for files deleted in c1 that were renamed in c2 or vice-versa.
842 names for files deleted in c1 that were renamed in c2 or vice-versa.
843
843
844 "dirmove" is a mapping of detected source dir -> destination dir renames.
844 "dirmove" is a mapping of detected source dir -> destination dir renames.
845 This is needed for handling changes to new files previously grafted into
845 This is needed for handling changes to new files previously grafted into
846 renamed directories.
846 renamed directories.
847 """
847 """
848
848
849 def __init__(
849 def __init__(
850 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
850 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
851 ):
851 ):
852 self.copy = {} if copy is None else copy
852 self.copy = {} if copy is None else copy
853 self.renamedelete = {} if renamedelete is None else renamedelete
853 self.renamedelete = {} if renamedelete is None else renamedelete
854 self.dirmove = {} if dirmove is None else dirmove
854 self.dirmove = {} if dirmove is None else dirmove
855 self.movewithdir = {} if movewithdir is None else movewithdir
855 self.movewithdir = {} if movewithdir is None else movewithdir
856
856
857 def __repr__(self):
857 def __repr__(self):
858 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
858 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
859 self.copy,
859 self.copy,
860 self.renamedelete,
860 self.renamedelete,
861 self.dirmove,
861 self.dirmove,
862 self.movewithdir,
862 self.movewithdir,
863 )
863 )
864
864
865
865
866 def _fullcopytracing(repo, c1, c2, base):
866 def _fullcopytracing(repo, c1, c2, base):
867 """The full copytracing algorithm which finds all the new files that were
867 """The full copytracing algorithm which finds all the new files that were
868 added from merge base up to the top commit and for each file it checks if
868 added from merge base up to the top commit and for each file it checks if
869 this file was copied from another file.
869 this file was copied from another file.
870
870
871 This is pretty slow when a lot of changesets are involved but will track all
871 This is pretty slow when a lot of changesets are involved but will track all
872 the copies.
872 the copies.
873 """
873 """
874 m1 = c1.manifest()
874 m1 = c1.manifest()
875 m2 = c2.manifest()
875 m2 = c2.manifest()
876 mb = base.manifest()
876 mb = base.manifest()
877
877
878 copies1 = pathcopies(base, c1)
878 copies1 = pathcopies(base, c1)
879 copies2 = pathcopies(base, c2)
879 copies2 = pathcopies(base, c2)
880
880
881 if not (copies1 or copies2):
881 if not (copies1 or copies2):
882 return branch_copies(), branch_copies(), {}
882 return branch_copies(), branch_copies(), {}
883
883
884 inversecopies1 = {}
884 inversecopies1 = {}
885 inversecopies2 = {}
885 inversecopies2 = {}
886 for dst, src in copies1.items():
886 for dst, src in copies1.items():
887 inversecopies1.setdefault(src, []).append(dst)
887 inversecopies1.setdefault(src, []).append(dst)
888 for dst, src in copies2.items():
888 for dst, src in copies2.items():
889 inversecopies2.setdefault(src, []).append(dst)
889 inversecopies2.setdefault(src, []).append(dst)
890
890
891 copy1 = {}
891 copy1 = {}
892 copy2 = {}
892 copy2 = {}
893 diverge = {}
893 diverge = {}
894 renamedelete1 = {}
894 renamedelete1 = {}
895 renamedelete2 = {}
895 renamedelete2 = {}
896 allsources = set(inversecopies1) | set(inversecopies2)
896 allsources = set(inversecopies1) | set(inversecopies2)
897 for src in allsources:
897 for src in allsources:
898 dsts1 = inversecopies1.get(src)
898 dsts1 = inversecopies1.get(src)
899 dsts2 = inversecopies2.get(src)
899 dsts2 = inversecopies2.get(src)
900 if dsts1 and dsts2:
900 if dsts1 and dsts2:
901 # copied/renamed on both sides
901 # copied/renamed on both sides
902 if src not in m1 and src not in m2:
902 if src not in m1 and src not in m2:
903 # renamed on both sides
903 # renamed on both sides
904 dsts1 = set(dsts1)
904 dsts1 = set(dsts1)
905 dsts2 = set(dsts2)
905 dsts2 = set(dsts2)
906 # If there's some overlap in the rename destinations, we
906 # If there's some overlap in the rename destinations, we
907 # consider it not divergent. For example, if side 1 copies 'a'
907 # consider it not divergent. For example, if side 1 copies 'a'
908 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
908 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
909 # and 'd' and deletes 'a'.
909 # and 'd' and deletes 'a'.
910 if dsts1 & dsts2:
910 if dsts1 & dsts2:
911 for dst in dsts1 & dsts2:
911 for dst in dsts1 & dsts2:
912 copy1[dst] = src
912 copy1[dst] = src
913 copy2[dst] = src
913 copy2[dst] = src
914 else:
914 else:
915 diverge[src] = sorted(dsts1 | dsts2)
915 diverge[src] = sorted(dsts1 | dsts2)
916 elif src in m1 and src in m2:
916 elif src in m1 and src in m2:
917 # copied on both sides
917 # copied on both sides
918 dsts1 = set(dsts1)
918 dsts1 = set(dsts1)
919 dsts2 = set(dsts2)
919 dsts2 = set(dsts2)
920 for dst in dsts1 & dsts2:
920 for dst in dsts1 & dsts2:
921 copy1[dst] = src
921 copy1[dst] = src
922 copy2[dst] = src
922 copy2[dst] = src
923 # TODO: Handle cases where it was renamed on one side and copied
923 # TODO: Handle cases where it was renamed on one side and copied
924 # on the other side
924 # on the other side
925 elif dsts1:
925 elif dsts1:
926 # copied/renamed only on side 1
926 # copied/renamed only on side 1
927 _checksinglesidecopies(
927 _checksinglesidecopies(
928 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
928 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
929 )
929 )
930 elif dsts2:
930 elif dsts2:
931 # copied/renamed only on side 2
931 # copied/renamed only on side 2
932 _checksinglesidecopies(
932 _checksinglesidecopies(
933 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
933 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
934 )
934 )
935
935
936 # find interesting file sets from manifests
936 # find interesting file sets from manifests
937 cache = []
937 cache = []
938
938
939 def _get_addedfiles(idx):
939 def _get_addedfiles(idx):
940 if not cache:
940 if not cache:
941 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
941 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
942 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
942 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
943 u1 = sorted(addedinm1 - addedinm2)
943 u1 = sorted(addedinm1 - addedinm2)
944 u2 = sorted(addedinm2 - addedinm1)
944 u2 = sorted(addedinm2 - addedinm1)
945 cache.extend((u1, u2))
945 cache.extend((u1, u2))
946 return cache[idx]
946 return cache[idx]
947
947
948 u1fn = lambda: _get_addedfiles(0)
948 u1fn = lambda: _get_addedfiles(0)
949 u2fn = lambda: _get_addedfiles(1)
949 u2fn = lambda: _get_addedfiles(1)
950 if repo.ui.debugflag:
950 if repo.ui.debugflag:
951 u1 = u1fn()
951 u1 = u1fn()
952 u2 = u2fn()
952 u2 = u2fn()
953
953
954 header = b" unmatched files in %s"
954 header = b" unmatched files in %s"
955 if u1:
955 if u1:
956 repo.ui.debug(
956 repo.ui.debug(
957 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
957 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
958 )
958 )
959 if u2:
959 if u2:
960 repo.ui.debug(
960 repo.ui.debug(
961 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
961 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
962 )
962 )
963
963
964 renamedeleteset = set()
964 renamedeleteset = set()
965 divergeset = set()
965 divergeset = set()
966 for dsts in diverge.values():
966 for dsts in diverge.values():
967 divergeset.update(dsts)
967 divergeset.update(dsts)
968 for dsts in renamedelete1.values():
968 for dsts in renamedelete1.values():
969 renamedeleteset.update(dsts)
969 renamedeleteset.update(dsts)
970 for dsts in renamedelete2.values():
970 for dsts in renamedelete2.values():
971 renamedeleteset.update(dsts)
971 renamedeleteset.update(dsts)
972
972
973 repo.ui.debug(
973 repo.ui.debug(
974 b" all copies found (* = to merge, ! = divergent, "
974 b" all copies found (* = to merge, ! = divergent, "
975 b"% = renamed and deleted):\n"
975 b"% = renamed and deleted):\n"
976 )
976 )
977 for side, copies in ((b"local", copies1), (b"remote", copies2)):
977 for side, copies in ((b"local", copies1), (b"remote", copies2)):
978 if not copies:
978 if not copies:
979 continue
979 continue
980 repo.ui.debug(b" on %s side:\n" % side)
980 repo.ui.debug(b" on %s side:\n" % side)
981 for f in sorted(copies):
981 for f in sorted(copies):
982 note = b""
982 note = b""
983 if f in copy1 or f in copy2:
983 if f in copy1 or f in copy2:
984 note += b"*"
984 note += b"*"
985 if f in divergeset:
985 if f in divergeset:
986 note += b"!"
986 note += b"!"
987 if f in renamedeleteset:
987 if f in renamedeleteset:
988 note += b"%"
988 note += b"%"
989 repo.ui.debug(
989 repo.ui.debug(
990 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
990 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
991 )
991 )
992 del renamedeleteset
992 del renamedeleteset
993 del divergeset
993 del divergeset
994
994
995 repo.ui.debug(b" checking for directory renames\n")
995 repo.ui.debug(b" checking for directory renames\n")
996
996
997 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
997 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
998 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
998 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
999
999
1000 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1000 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1001 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1001 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1002
1002
1003 return branch_copies1, branch_copies2, diverge
1003 return branch_copies1, branch_copies2, diverge
1004
1004
1005
1005
1006 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1006 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1007 """Finds moved directories and files that should move with them.
1007 """Finds moved directories and files that should move with them.
1008
1008
1009 ctx: the context for one of the sides
1009 ctx: the context for one of the sides
1010 copy: files copied on the same side (as ctx)
1010 copy: files copied on the same side (as ctx)
1011 fullcopy: files copied on the same side (as ctx), including those that
1011 fullcopy: files copied on the same side (as ctx), including those that
1012 merge.manifestmerge() won't care about
1012 merge.manifestmerge() won't care about
1013 addedfilesfn: function returning added files on the other side (compared to
1013 addedfilesfn: function returning added files on the other side (compared to
1014 ctx)
1014 ctx)
1015 """
1015 """
1016 # generate a directory move map
1016 # generate a directory move map
1017 invalid = set()
1017 invalid = set()
1018 dirmove = {}
1018 dirmove = {}
1019
1019
1020 # examine each file copy for a potential directory move, which is
1020 # examine each file copy for a potential directory move, which is
1021 # when all the files in a directory are moved to a new directory
1021 # when all the files in a directory are moved to a new directory
1022 for dst, src in pycompat.iteritems(fullcopy):
1022 for dst, src in pycompat.iteritems(fullcopy):
1023 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1023 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1024 if dsrc in invalid:
1024 if dsrc in invalid:
1025 # already seen to be uninteresting
1025 # already seen to be uninteresting
1026 continue
1026 continue
1027 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1027 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1028 # directory wasn't entirely moved locally
1028 # directory wasn't entirely moved locally
1029 invalid.add(dsrc)
1029 invalid.add(dsrc)
1030 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1030 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1031 # files from the same directory moved to two different places
1031 # files from the same directory moved to two different places
1032 invalid.add(dsrc)
1032 invalid.add(dsrc)
1033 else:
1033 else:
1034 # looks good so far
1034 # looks good so far
1035 dirmove[dsrc] = ddst
1035 dirmove[dsrc] = ddst
1036
1036
1037 for i in invalid:
1037 for i in invalid:
1038 if i in dirmove:
1038 if i in dirmove:
1039 del dirmove[i]
1039 del dirmove[i]
1040 del invalid
1040 del invalid
1041
1041
1042 if not dirmove:
1042 if not dirmove:
1043 return {}, {}
1043 return {}, {}
1044
1044
1045 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1045 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1046
1046
1047 for d in dirmove:
1047 for d in dirmove:
1048 repo.ui.debug(
1048 repo.ui.debug(
1049 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1049 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1050 )
1050 )
1051
1051
1052 movewithdir = {}
1052 movewithdir = {}
1053 # check unaccounted nonoverlapping files against directory moves
1053 # check unaccounted nonoverlapping files against directory moves
1054 for f in addedfilesfn():
1054 for f in addedfilesfn():
1055 if f not in fullcopy:
1055 if f not in fullcopy:
1056 for d in dirmove:
1056 for d in dirmove:
1057 if f.startswith(d):
1057 if f.startswith(d):
1058 # new file added in a directory that was moved, move it
1058 # new file added in a directory that was moved, move it
1059 df = dirmove[d] + f[len(d) :]
1059 df = dirmove[d] + f[len(d) :]
1060 if df not in copy:
1060 if df not in copy:
1061 movewithdir[f] = df
1061 movewithdir[f] = df
1062 repo.ui.debug(
1062 repo.ui.debug(
1063 b" pending file src: '%s' -> dst: '%s'\n"
1063 b" pending file src: '%s' -> dst: '%s'\n"
1064 % (f, df)
1064 % (f, df)
1065 )
1065 )
1066 break
1066 break
1067
1067
1068 return dirmove, movewithdir
1068 return dirmove, movewithdir
1069
1069
1070
1070
1071 def _heuristicscopytracing(repo, c1, c2, base):
1071 def _heuristicscopytracing(repo, c1, c2, base):
1072 """Fast copytracing using filename heuristics
1072 """Fast copytracing using filename heuristics
1073
1073
1074 Assumes that moves or renames are of following two types:
1074 Assumes that moves or renames are of following two types:
1075
1075
1076 1) Inside a directory only (same directory name but different filenames)
1076 1) Inside a directory only (same directory name but different filenames)
1077 2) Move from one directory to another
1077 2) Move from one directory to another
1078 (same filenames but different directory names)
1078 (same filenames but different directory names)
1079
1079
1080 Works only when there are no merge commits in the "source branch".
1080 Works only when there are no merge commits in the "source branch".
1081 Source branch is commits from base up to c2 not including base.
1081 Source branch is commits from base up to c2 not including base.
1082
1082
1083 If merge is involved it fallbacks to _fullcopytracing().
1083 If merge is involved it fallbacks to _fullcopytracing().
1084
1084
1085 Can be used by setting the following config:
1085 Can be used by setting the following config:
1086
1086
1087 [experimental]
1087 [experimental]
1088 copytrace = heuristics
1088 copytrace = heuristics
1089
1089
1090 In some cases the copy/move candidates found by heuristics can be very large
1090 In some cases the copy/move candidates found by heuristics can be very large
1091 in number and that will make the algorithm slow. The number of possible
1091 in number and that will make the algorithm slow. The number of possible
1092 candidates to check can be limited by using the config
1092 candidates to check can be limited by using the config
1093 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1093 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1094 """
1094 """
1095
1095
1096 if c1.rev() is None:
1096 if c1.rev() is None:
1097 c1 = c1.p1()
1097 c1 = c1.p1()
1098 if c2.rev() is None:
1098 if c2.rev() is None:
1099 c2 = c2.p1()
1099 c2 = c2.p1()
1100
1100
1101 changedfiles = set()
1101 changedfiles = set()
1102 m1 = c1.manifest()
1102 m1 = c1.manifest()
1103 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1103 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1104 # If base is not in c2 branch, we switch to fullcopytracing
1104 # If base is not in c2 branch, we switch to fullcopytracing
1105 repo.ui.debug(
1105 repo.ui.debug(
1106 b"switching to full copytracing as base is not "
1106 b"switching to full copytracing as base is not "
1107 b"an ancestor of c2\n"
1107 b"an ancestor of c2\n"
1108 )
1108 )
1109 return _fullcopytracing(repo, c1, c2, base)
1109 return _fullcopytracing(repo, c1, c2, base)
1110
1110
1111 ctx = c2
1111 ctx = c2
1112 while ctx != base:
1112 while ctx != base:
1113 if len(ctx.parents()) == 2:
1113 if len(ctx.parents()) == 2:
1114 # To keep things simple let's not handle merges
1114 # To keep things simple let's not handle merges
1115 repo.ui.debug(b"switching to full copytracing because of merges\n")
1115 repo.ui.debug(b"switching to full copytracing because of merges\n")
1116 return _fullcopytracing(repo, c1, c2, base)
1116 return _fullcopytracing(repo, c1, c2, base)
1117 changedfiles.update(ctx.files())
1117 changedfiles.update(ctx.files())
1118 ctx = ctx.p1()
1118 ctx = ctx.p1()
1119
1119
1120 copies2 = {}
1120 copies2 = {}
1121 cp = _forwardcopies(base, c2)
1121 cp = _forwardcopies(base, c2)
1122 for dst, src in pycompat.iteritems(cp):
1122 for dst, src in pycompat.iteritems(cp):
1123 if src in m1:
1123 if src in m1:
1124 copies2[dst] = src
1124 copies2[dst] = src
1125
1125
1126 # file is missing if it isn't present in the destination, but is present in
1126 # file is missing if it isn't present in the destination, but is present in
1127 # the base and present in the source.
1127 # the base and present in the source.
1128 # Presence in the base is important to exclude added files, presence in the
1128 # Presence in the base is important to exclude added files, presence in the
1129 # source is important to exclude removed files.
1129 # source is important to exclude removed files.
1130 filt = lambda f: f not in m1 and f in base and f in c2
1130 filt = lambda f: f not in m1 and f in base and f in c2
1131 missingfiles = [f for f in changedfiles if filt(f)]
1131 missingfiles = [f for f in changedfiles if filt(f)]
1132
1132
1133 copies1 = {}
1133 copies1 = {}
1134 if missingfiles:
1134 if missingfiles:
1135 basenametofilename = collections.defaultdict(list)
1135 basenametofilename = collections.defaultdict(list)
1136 dirnametofilename = collections.defaultdict(list)
1136 dirnametofilename = collections.defaultdict(list)
1137
1137
1138 for f in m1.filesnotin(base.manifest()):
1138 for f in m1.filesnotin(base.manifest()):
1139 basename = os.path.basename(f)
1139 basename = os.path.basename(f)
1140 dirname = os.path.dirname(f)
1140 dirname = os.path.dirname(f)
1141 basenametofilename[basename].append(f)
1141 basenametofilename[basename].append(f)
1142 dirnametofilename[dirname].append(f)
1142 dirnametofilename[dirname].append(f)
1143
1143
1144 for f in missingfiles:
1144 for f in missingfiles:
1145 basename = os.path.basename(f)
1145 basename = os.path.basename(f)
1146 dirname = os.path.dirname(f)
1146 dirname = os.path.dirname(f)
1147 samebasename = basenametofilename[basename]
1147 samebasename = basenametofilename[basename]
1148 samedirname = dirnametofilename[dirname]
1148 samedirname = dirnametofilename[dirname]
1149 movecandidates = samebasename + samedirname
1149 movecandidates = samebasename + samedirname
1150 # f is guaranteed to be present in c2, that's why
1150 # f is guaranteed to be present in c2, that's why
1151 # c2.filectx(f) won't fail
1151 # c2.filectx(f) won't fail
1152 f2 = c2.filectx(f)
1152 f2 = c2.filectx(f)
1153 # we can have a lot of candidates which can slow down the heuristics
1153 # we can have a lot of candidates which can slow down the heuristics
1154 # config value to limit the number of candidates moves to check
1154 # config value to limit the number of candidates moves to check
1155 maxcandidates = repo.ui.configint(
1155 maxcandidates = repo.ui.configint(
1156 b'experimental', b'copytrace.movecandidateslimit'
1156 b'experimental', b'copytrace.movecandidateslimit'
1157 )
1157 )
1158
1158
1159 if len(movecandidates) > maxcandidates:
1159 if len(movecandidates) > maxcandidates:
1160 repo.ui.status(
1160 repo.ui.status(
1161 _(
1161 _(
1162 b"skipping copytracing for '%s', more "
1162 b"skipping copytracing for '%s', more "
1163 b"candidates than the limit: %d\n"
1163 b"candidates than the limit: %d\n"
1164 )
1164 )
1165 % (f, len(movecandidates))
1165 % (f, len(movecandidates))
1166 )
1166 )
1167 continue
1167 continue
1168
1168
1169 for candidate in movecandidates:
1169 for candidate in movecandidates:
1170 f1 = c1.filectx(candidate)
1170 f1 = c1.filectx(candidate)
1171 if _related(f1, f2):
1171 if _related(f1, f2):
1172 # if there are a few related copies then we'll merge
1172 # if there are a few related copies then we'll merge
1173 # changes into all of them. This matches the behaviour
1173 # changes into all of them. This matches the behaviour
1174 # of upstream copytracing
1174 # of upstream copytracing
1175 copies1[candidate] = f
1175 copies1[candidate] = f
1176
1176
1177 return branch_copies(copies1), branch_copies(copies2), {}
1177 return branch_copies(copies1), branch_copies(copies2), {}
1178
1178
1179
1179
1180 def _related(f1, f2):
1180 def _related(f1, f2):
1181 """return True if f1 and f2 filectx have a common ancestor
1181 """return True if f1 and f2 filectx have a common ancestor
1182
1182
1183 Walk back to common ancestor to see if the two files originate
1183 Walk back to common ancestor to see if the two files originate
1184 from the same file. Since workingfilectx's rev() is None it messes
1184 from the same file. Since workingfilectx's rev() is None it messes
1185 up the integer comparison logic, hence the pre-step check for
1185 up the integer comparison logic, hence the pre-step check for
1186 None (f1 and f2 can only be workingfilectx's initially).
1186 None (f1 and f2 can only be workingfilectx's initially).
1187 """
1187 """
1188
1188
1189 if f1 == f2:
1189 if f1 == f2:
1190 return True # a match
1190 return True # a match
1191
1191
1192 g1, g2 = f1.ancestors(), f2.ancestors()
1192 g1, g2 = f1.ancestors(), f2.ancestors()
1193 try:
1193 try:
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1195
1195
1196 if f1r is None:
1196 if f1r is None:
1197 f1 = next(g1)
1197 f1 = next(g1)
1198 if f2r is None:
1198 if f2r is None:
1199 f2 = next(g2)
1199 f2 = next(g2)
1200
1200
1201 while True:
1201 while True:
1202 f1r, f2r = f1.linkrev(), f2.linkrev()
1202 f1r, f2r = f1.linkrev(), f2.linkrev()
1203 if f1r > f2r:
1203 if f1r > f2r:
1204 f1 = next(g1)
1204 f1 = next(g1)
1205 elif f2r > f1r:
1205 elif f2r > f1r:
1206 f2 = next(g2)
1206 f2 = next(g2)
1207 else: # f1 and f2 point to files in the same linkrev
1207 else: # f1 and f2 point to files in the same linkrev
1208 return f1 == f2 # true if they point to the same file
1208 return f1 == f2 # true if they point to the same file
1209 except StopIteration:
1209 except StopIteration:
1210 return False
1210 return False
1211
1211
1212
1212
1213 def graftcopies(wctx, ctx, base):
1213 def graftcopies(wctx, ctx, base):
1214 """reproduce copies between base and ctx in the wctx
1214 """reproduce copies between base and ctx in the wctx
1215
1215
1216 Unlike mergecopies(), this function will only consider copies between base
1216 Unlike mergecopies(), this function will only consider copies between base
1217 and ctx; it will ignore copies between base and wctx. Also unlike
1217 and ctx; it will ignore copies between base and wctx. Also unlike
1218 mergecopies(), this function will apply copies to the working copy (instead
1218 mergecopies(), this function will apply copies to the working copy (instead
1219 of just returning information about the copies). That makes it cheaper
1219 of just returning information about the copies). That makes it cheaper
1220 (especially in the common case of base==ctx.p1()) and useful also when
1220 (especially in the common case of base==ctx.p1()) and useful also when
1221 experimental.copytrace=off.
1221 experimental.copytrace=off.
1222
1222
1223 merge.update() will have already marked most copies, but it will only
1223 merge.update() will have already marked most copies, but it will only
1224 mark copies if it thinks the source files are related (see
1224 mark copies if it thinks the source files are related (see
1225 merge._related()). It will also not mark copies if the file wasn't modified
1225 merge._related()). It will also not mark copies if the file wasn't modified
1226 on the local side. This function adds the copies that were "missed"
1226 on the local side. This function adds the copies that were "missed"
1227 by merge.update().
1227 by merge.update().
1228 """
1228 """
1229 new_copies = pathcopies(base, ctx)
1229 new_copies = pathcopies(base, ctx)
1230 parent = wctx.p1()
1230 parent = wctx.p1()
1231 _filter(parent, wctx, new_copies)
1231 _filter(parent, wctx, new_copies)
1232 # extra filtering to drop copy information for files that existed before
1232 # Extra filtering to drop copy information for files that existed before
1233 # the graft (otherwise we would create merge filelog for non-merge commit
1233 # the graft. This is to handle the case of grafting a rename onto a commit
1234 # that already has the rename. Otherwise the presence of copy information
1235 # would result in the creation of an empty commit where we would prefer to
1236 # not create one.
1234 for dest, __ in list(new_copies.items()):
1237 for dest, __ in list(new_copies.items()):
1235 if dest in parent:
1238 if dest in parent:
1236 del new_copies[dest]
1239 del new_copies[dest]
1237 for dst, src in pycompat.iteritems(new_copies):
1240 for dst, src in pycompat.iteritems(new_copies):
1238 wctx[dst].markcopied(src)
1241 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now