##// END OF EJS Templates
copies: deal with the "same revision" special case earlier...
marmoute -
r46799:ee63c117 default
parent child Browse files
Show More
@@ -1,1224 +1,1228
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 if rustmod is not None:
199 if rustmod is not None:
200
200
201 def revinfo(rev):
201 def revinfo(rev):
202 p1, p2 = parents(rev)
202 p1, p2 = parents(rev)
203 if flags(rev) & HASCOPIESINFO:
203 if flags(rev) & HASCOPIESINFO:
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
204 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
205 else:
205 else:
206 raw = None
206 raw = None
207 return (p1, p2, raw)
207 return (p1, p2, raw)
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 if flags(rev) & HASCOPIESINFO:
213 if flags(rev) & HASCOPIESINFO:
214 changes = changelogrevision(rev).changes
214 changes = changelogrevision(rev).changes
215 else:
215 else:
216 changes = None
216 changes = None
217 return (p1, p2, changes)
217 return (p1, p2, changes)
218
218
219 return revinfo
219 return revinfo
220
220
221
221
222 def cached_is_ancestor(is_ancestor):
222 def cached_is_ancestor(is_ancestor):
223 """return a cached version of is_ancestor"""
223 """return a cached version of is_ancestor"""
224 cache = {}
224 cache = {}
225
225
226 def _is_ancestor(anc, desc):
226 def _is_ancestor(anc, desc):
227 if anc > desc:
227 if anc > desc:
228 return False
228 return False
229 elif anc == desc:
229 elif anc == desc:
230 return True
230 return True
231 key = (anc, desc)
231 key = (anc, desc)
232 ret = cache.get(key)
232 ret = cache.get(key)
233 if ret is None:
233 if ret is None:
234 ret = cache[key] = is_ancestor(anc, desc)
234 ret = cache[key] = is_ancestor(anc, desc)
235 return ret
235 return ret
236
236
237 return _is_ancestor
237 return _is_ancestor
238
238
239
239
240 def _changesetforwardcopies(a, b, match):
240 def _changesetforwardcopies(a, b, match):
241 if a.rev() in (nullrev, b.rev()):
241 if a.rev() in (nullrev, b.rev()):
242 return {}
242 return {}
243
243
244 repo = a.repo().unfiltered()
244 repo = a.repo().unfiltered()
245 children = {}
245 children = {}
246
246
247 cl = repo.changelog
247 cl = repo.changelog
248 isancestor = cl.isancestorrev
248 isancestor = cl.isancestorrev
249
249
250 # To track rename from "A" to B, we need to gather all parent β†’ children
250 # To track rename from "A" to B, we need to gather all parent β†’ children
251 # edges that are contains in `::B` but not in `::A`.
251 # edges that are contains in `::B` but not in `::A`.
252 #
252 #
253 #
253 #
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
254 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
255 # ::a`) and also all the "roots point", ie the parents of the exclusive set
256 # that belong to ::a. These are exactly all the revisions needed to express
256 # that belong to ::a. These are exactly all the revisions needed to express
257 # the parent β†’ children we need to combine.
257 # the parent β†’ children we need to combine.
258 #
258 #
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
259 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
260 # excluding paths that leads to roots that are not ancestors of `a`. We
260 # excluding paths that leads to roots that are not ancestors of `a`. We
261 # keep this out of the explanation because it is hard enough without this special case..
261 # keep this out of the explanation because it is hard enough without this special case..
262
262
263 parents = cl._uncheckedparentrevs
263 parents = cl._uncheckedparentrevs
264 graph_roots = (nullrev, nullrev)
264 graph_roots = (nullrev, nullrev)
265
265
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
266 ancestors = cl.ancestors([a.rev()], inclusive=True)
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
267 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
268 roots = set()
268 roots = set()
269 has_graph_roots = False
269 has_graph_roots = False
270
270
271 # iterate over `only(B, A)`
271 # iterate over `only(B, A)`
272 for r in revs:
272 for r in revs:
273 ps = parents(r)
273 ps = parents(r)
274 if ps == graph_roots:
274 if ps == graph_roots:
275 has_graph_roots = True
275 has_graph_roots = True
276 else:
276 else:
277 p1, p2 = ps
277 p1, p2 = ps
278
278
279 # find all the "root points" (see larger comment above)
279 # find all the "root points" (see larger comment above)
280 if p1 != nullrev and p1 in ancestors:
280 if p1 != nullrev and p1 in ancestors:
281 roots.add(p1)
281 roots.add(p1)
282 if p2 != nullrev and p2 in ancestors:
282 if p2 != nullrev and p2 in ancestors:
283 roots.add(p2)
283 roots.add(p2)
284 if not roots:
284 if not roots:
285 # no common revision to track copies from
285 # no common revision to track copies from
286 return {}
286 return {}
287 if has_graph_roots:
287 if has_graph_roots:
288 # this deal with the special case mentionned in the [1] footnotes. We
288 # this deal with the special case mentionned in the [1] footnotes. We
289 # must filter out revisions that leads to non-common graphroots.
289 # must filter out revisions that leads to non-common graphroots.
290 roots = list(roots)
290 roots = list(roots)
291 m = min(roots)
291 m = min(roots)
292 h = [b.rev()]
292 h = [b.rev()]
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
293 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
294 roots_to_head = set(roots_to_head)
294 roots_to_head = set(roots_to_head)
295 revs = [r for r in revs if r in roots_to_head]
295 revs = [r for r in revs if r in roots_to_head]
296
296
297 if repo.filecopiesmode == b'changeset-sidedata':
297 if repo.filecopiesmode == b'changeset-sidedata':
298 # When using side-data, we will process the edges "from" the children.
298 # When using side-data, we will process the edges "from" the children.
299 # We iterate over the childre, gathering previous collected data for
299 # We iterate over the childre, gathering previous collected data for
300 # the parents. Do know when the parents data is no longer necessary, we
300 # the parents. Do know when the parents data is no longer necessary, we
301 # keep a counter of how many children each revision has.
301 # keep a counter of how many children each revision has.
302 #
302 #
303 # An interresting property of `children_count` is that it only contains
303 # An interresting property of `children_count` is that it only contains
304 # revision that will be relevant for a edge of the graph. So if a
304 # revision that will be relevant for a edge of the graph. So if a
305 # children has parent not in `children_count`, that edges should not be
305 # children has parent not in `children_count`, that edges should not be
306 # processed.
306 # processed.
307 children_count = dict((r, 0) for r in roots)
307 children_count = dict((r, 0) for r in roots)
308 for r in revs:
308 for r in revs:
309 for p in cl.parentrevs(r):
309 for p in cl.parentrevs(r):
310 if p == nullrev:
310 if p == nullrev:
311 continue
311 continue
312 children_count[r] = 0
312 children_count[r] = 0
313 if p in children_count:
313 if p in children_count:
314 children_count[p] += 1
314 children_count[p] += 1
315 revinfo = _revinfo_getter(repo, match)
315 revinfo = _revinfo_getter(repo, match)
316 return _combine_changeset_copies(
316 return _combine_changeset_copies(
317 revs, children_count, b.rev(), revinfo, match, isancestor
317 revs, children_count, b.rev(), revinfo, match, isancestor
318 )
318 )
319 else:
319 else:
320 # When not using side-data, we will process the edges "from" the parent.
320 # When not using side-data, we will process the edges "from" the parent.
321 # so we need a full mapping of the parent -> children relation.
321 # so we need a full mapping of the parent -> children relation.
322 children = dict((r, []) for r in roots)
322 children = dict((r, []) for r in roots)
323 for r in revs:
323 for r in revs:
324 for p in cl.parentrevs(r):
324 for p in cl.parentrevs(r):
325 if p == nullrev:
325 if p == nullrev:
326 continue
326 continue
327 children[r] = []
327 children[r] = []
328 if p in children:
328 if p in children:
329 children[p].append(r)
329 children[p].append(r)
330 x = revs.pop()
330 x = revs.pop()
331 assert x == b.rev()
331 assert x == b.rev()
332 revs.extend(roots)
332 revs.extend(roots)
333 revs.sort()
333 revs.sort()
334
334
335 revinfo = _revinfo_getter_extra(repo)
335 revinfo = _revinfo_getter_extra(repo)
336 return _combine_changeset_copies_extra(
336 return _combine_changeset_copies_extra(
337 revs, children, b.rev(), revinfo, match, isancestor
337 revs, children, b.rev(), revinfo, match, isancestor
338 )
338 )
339
339
340
340
341 def _combine_changeset_copies(
341 def _combine_changeset_copies(
342 revs, children_count, targetrev, revinfo, match, isancestor
342 revs, children_count, targetrev, revinfo, match, isancestor
343 ):
343 ):
344 """combine the copies information for each item of iterrevs
344 """combine the copies information for each item of iterrevs
345
345
346 revs: sorted iterable of revision to visit
346 revs: sorted iterable of revision to visit
347 children_count: a {parent: <number-of-relevant-children>} mapping.
347 children_count: a {parent: <number-of-relevant-children>} mapping.
348 targetrev: the final copies destination revision (not in iterrevs)
348 targetrev: the final copies destination revision (not in iterrevs)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
349 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
350 match: a matcher
350 match: a matcher
351
351
352 It returns the aggregated copies information for `targetrev`.
352 It returns the aggregated copies information for `targetrev`.
353 """
353 """
354
354
355 alwaysmatch = match.always()
355 alwaysmatch = match.always()
356
356
357 if rustmod is not None:
357 if rustmod is not None:
358 final_copies = rustmod.combine_changeset_copies(
358 final_copies = rustmod.combine_changeset_copies(
359 list(revs), children_count, targetrev, revinfo, isancestor
359 list(revs), children_count, targetrev, revinfo, isancestor
360 )
360 )
361 else:
361 else:
362 isancestor = cached_is_ancestor(isancestor)
362 isancestor = cached_is_ancestor(isancestor)
363
363
364 all_copies = {}
364 all_copies = {}
365 # iterate over all the "children" side of copy tracing "edge"
365 # iterate over all the "children" side of copy tracing "edge"
366 for current_rev in revs:
366 for current_rev in revs:
367 p1, p2, changes = revinfo(current_rev)
367 p1, p2, changes = revinfo(current_rev)
368 current_copies = None
368 current_copies = None
369 # iterate over all parents to chain the existing data with the
369 # iterate over all parents to chain the existing data with the
370 # data from the parent β†’ child edge.
370 # data from the parent β†’ child edge.
371 for parent, parent_rev in ((1, p1), (2, p2)):
371 for parent, parent_rev in ((1, p1), (2, p2)):
372 if parent_rev == nullrev:
372 if parent_rev == nullrev:
373 continue
373 continue
374 remaining_children = children_count.get(parent_rev)
374 remaining_children = children_count.get(parent_rev)
375 if remaining_children is None:
375 if remaining_children is None:
376 continue
376 continue
377 remaining_children -= 1
377 remaining_children -= 1
378 children_count[parent_rev] = remaining_children
378 children_count[parent_rev] = remaining_children
379 if remaining_children:
379 if remaining_children:
380 copies = all_copies.get(parent_rev, None)
380 copies = all_copies.get(parent_rev, None)
381 else:
381 else:
382 copies = all_copies.pop(parent_rev, None)
382 copies = all_copies.pop(parent_rev, None)
383
383
384 if copies is None:
384 if copies is None:
385 # this is a root
385 # this is a root
386 copies = {}
386 copies = {}
387
387
388 newcopies = copies
388 newcopies = copies
389 # chain the data in the edge with the existing data
389 # chain the data in the edge with the existing data
390 if changes is not None:
390 if changes is not None:
391 childcopies = {}
391 childcopies = {}
392 if parent == 1:
392 if parent == 1:
393 childcopies = changes.copied_from_p1
393 childcopies = changes.copied_from_p1
394 elif parent == 2:
394 elif parent == 2:
395 childcopies = changes.copied_from_p2
395 childcopies = changes.copied_from_p2
396
396
397 if childcopies:
397 if childcopies:
398 newcopies = copies.copy()
398 newcopies = copies.copy()
399 for dest, source in pycompat.iteritems(childcopies):
399 for dest, source in pycompat.iteritems(childcopies):
400 prev = copies.get(source)
400 prev = copies.get(source)
401 if prev is not None and prev[1] is not None:
401 if prev is not None and prev[1] is not None:
402 source = prev[1]
402 source = prev[1]
403 newcopies[dest] = (current_rev, source)
403 newcopies[dest] = (current_rev, source)
404 assert newcopies is not copies
404 assert newcopies is not copies
405 if changes.removed:
405 if changes.removed:
406 if newcopies is copies:
406 if newcopies is copies:
407 newcopies = copies.copy()
407 newcopies = copies.copy()
408 for f in changes.removed:
408 for f in changes.removed:
409 if f in newcopies:
409 if f in newcopies:
410 if newcopies is copies:
410 if newcopies is copies:
411 # copy on write to avoid affecting potential other
411 # copy on write to avoid affecting potential other
412 # branches. when there are no other branches, this
412 # branches. when there are no other branches, this
413 # could be avoided.
413 # could be avoided.
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 newcopies[f] = (current_rev, None)
415 newcopies[f] = (current_rev, None)
416 # check potential need to combine the data from another parent (for
416 # check potential need to combine the data from another parent (for
417 # that child). See comment below for details.
417 # that child). See comment below for details.
418 if current_copies is None:
418 if current_copies is None:
419 current_copies = newcopies
419 current_copies = newcopies
420 elif current_copies is newcopies:
420 elif current_copies is newcopies:
421 # nothing to merge:
421 # nothing to merge:
422 pass
422 pass
423 else:
423 else:
424 # we are the second parent to work on c, we need to merge our
424 # we are the second parent to work on c, we need to merge our
425 # work with the other.
425 # work with the other.
426 #
426 #
427 # In case of conflict, parent 1 take precedence over parent 2.
427 # In case of conflict, parent 1 take precedence over parent 2.
428 # This is an arbitrary choice made anew when implementing
428 # This is an arbitrary choice made anew when implementing
429 # changeset based copies. It was made without regards with
429 # changeset based copies. It was made without regards with
430 # potential filelog related behavior.
430 # potential filelog related behavior.
431 assert parent == 2
431 assert parent == 2
432 current_copies = _merge_copies_dict(
432 current_copies = _merge_copies_dict(
433 newcopies, current_copies, isancestor, changes
433 newcopies, current_copies, isancestor, changes
434 )
434 )
435 all_copies[current_rev] = current_copies
435 all_copies[current_rev] = current_copies
436
436
437 # filter out internal details and return a {dest: source mapping}
437 # filter out internal details and return a {dest: source mapping}
438 final_copies = {}
438 final_copies = {}
439 for dest, (tt, source) in all_copies[targetrev].items():
439 for dest, (tt, source) in all_copies[targetrev].items():
440 if source is not None:
440 if source is not None:
441 final_copies[dest] = source
441 final_copies[dest] = source
442 if not alwaysmatch:
442 if not alwaysmatch:
443 for filename in list(final_copies.keys()):
443 for filename in list(final_copies.keys()):
444 if not match(filename):
444 if not match(filename):
445 del final_copies[filename]
445 del final_copies[filename]
446 return final_copies
446 return final_copies
447
447
448
448
449 # constant to decide which side to pick with _merge_copies_dict
449 # constant to decide which side to pick with _merge_copies_dict
450 PICK_MINOR = 0
450 PICK_MINOR = 0
451 PICK_MAJOR = 1
451 PICK_MAJOR = 1
452 PICK_EITHER = 2
452 PICK_EITHER = 2
453
453
454
454
455 def _merge_copies_dict(minor, major, isancestor, changes):
455 def _merge_copies_dict(minor, major, isancestor, changes):
456 """merge two copies-mapping together, minor and major
456 """merge two copies-mapping together, minor and major
457
457
458 In case of conflict, value from "major" will be picked.
458 In case of conflict, value from "major" will be picked.
459
459
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
461 ancestors of `high_rev`,
461 ancestors of `high_rev`,
462
462
463 - `ismerged(path)`: callable return True if `path` have been merged in the
463 - `ismerged(path)`: callable return True if `path` have been merged in the
464 current revision,
464 current revision,
465
465
466 return the resulting dict (in practice, the "minor" object, updated)
466 return the resulting dict (in practice, the "minor" object, updated)
467 """
467 """
468 for dest, value in major.items():
468 for dest, value in major.items():
469 other = minor.get(dest)
469 other = minor.get(dest)
470 if other is None:
470 if other is None:
471 minor[dest] = value
471 minor[dest] = value
472 else:
472 else:
473 pick = _compare_values(changes, isancestor, dest, other, value)
473 pick = _compare_values(changes, isancestor, dest, other, value)
474 if pick == PICK_MAJOR:
474 if pick == PICK_MAJOR:
475 minor[dest] = value
475 minor[dest] = value
476 return minor
476 return minor
477
477
478
478
479 def _compare_values(changes, isancestor, dest, minor, major):
479 def _compare_values(changes, isancestor, dest, minor, major):
480 """compare two value within a _merge_copies_dict loop iteration"""
480 """compare two value within a _merge_copies_dict loop iteration"""
481 major_tt = major[0]
481 major_tt, major_value = major
482 minor_tt = minor[0]
482 minor_tt, minor_value = minor
483
483
484 if major[1] == minor[1]:
484 # evacuate some simple case first:
485 if major_tt == minor_tt:
486 # if it comes from the same revision it must be the same value
487 assert major_value == minor_value
485 return PICK_EITHER
488 return PICK_EITHER
486 # content from "major" wins, unless it is older
489 elif major[1] == minor[1]:
487 # than the branch point or there is a merge
490 return PICK_EITHER
488 if major_tt == minor_tt:
491
489 return PICK_MAJOR
492 # actual merging needed: content from "major" wins, unless it is older than
493 # the branch point or there is a merge
490 elif changes is not None and major[1] is None and dest in changes.salvaged:
494 elif changes is not None and major[1] is None and dest in changes.salvaged:
491 return PICK_MINOR
495 return PICK_MINOR
492 elif changes is not None and minor[1] is None and dest in changes.salvaged:
496 elif changes is not None and minor[1] is None and dest in changes.salvaged:
493 return PICK_MAJOR
497 return PICK_MAJOR
494 elif changes is not None and dest in changes.merged:
498 elif changes is not None and dest in changes.merged:
495 return PICK_MAJOR
499 return PICK_MAJOR
496 elif not isancestor(major_tt, minor_tt):
500 elif not isancestor(major_tt, minor_tt):
497 if major[1] is not None:
501 if major[1] is not None:
498 return PICK_MAJOR
502 return PICK_MAJOR
499 elif isancestor(minor_tt, major_tt):
503 elif isancestor(minor_tt, major_tt):
500 return PICK_MAJOR
504 return PICK_MAJOR
501 return PICK_MINOR
505 return PICK_MINOR
502
506
503
507
504 def _revinfo_getter_extra(repo):
508 def _revinfo_getter_extra(repo):
505 """return a function that return multiple data given a <rev>"i
509 """return a function that return multiple data given a <rev>"i
506
510
507 * p1: revision number of first parent
511 * p1: revision number of first parent
508 * p2: revision number of first parent
512 * p2: revision number of first parent
509 * p1copies: mapping of copies from p1
513 * p1copies: mapping of copies from p1
510 * p2copies: mapping of copies from p2
514 * p2copies: mapping of copies from p2
511 * removed: a list of removed files
515 * removed: a list of removed files
512 * ismerged: a callback to know if file was merged in that revision
516 * ismerged: a callback to know if file was merged in that revision
513 """
517 """
514 cl = repo.changelog
518 cl = repo.changelog
515 parents = cl.parentrevs
519 parents = cl.parentrevs
516
520
517 def get_ismerged(rev):
521 def get_ismerged(rev):
518 ctx = repo[rev]
522 ctx = repo[rev]
519
523
520 def ismerged(path):
524 def ismerged(path):
521 if path not in ctx.files():
525 if path not in ctx.files():
522 return False
526 return False
523 fctx = ctx[path]
527 fctx = ctx[path]
524 parents = fctx._filelog.parents(fctx._filenode)
528 parents = fctx._filelog.parents(fctx._filenode)
525 nb_parents = 0
529 nb_parents = 0
526 for n in parents:
530 for n in parents:
527 if n != nullid:
531 if n != nullid:
528 nb_parents += 1
532 nb_parents += 1
529 return nb_parents >= 2
533 return nb_parents >= 2
530
534
531 return ismerged
535 return ismerged
532
536
533 def revinfo(rev):
537 def revinfo(rev):
534 p1, p2 = parents(rev)
538 p1, p2 = parents(rev)
535 ctx = repo[rev]
539 ctx = repo[rev]
536 p1copies, p2copies = ctx._copies
540 p1copies, p2copies = ctx._copies
537 removed = ctx.filesremoved()
541 removed = ctx.filesremoved()
538 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
542 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
539
543
540 return revinfo
544 return revinfo
541
545
542
546
543 def _combine_changeset_copies_extra(
547 def _combine_changeset_copies_extra(
544 revs, children, targetrev, revinfo, match, isancestor
548 revs, children, targetrev, revinfo, match, isancestor
545 ):
549 ):
546 """version of `_combine_changeset_copies` that works with the Google
550 """version of `_combine_changeset_copies` that works with the Google
547 specific "extra" based storage for copy information"""
551 specific "extra" based storage for copy information"""
548 all_copies = {}
552 all_copies = {}
549 alwaysmatch = match.always()
553 alwaysmatch = match.always()
550 for r in revs:
554 for r in revs:
551 copies = all_copies.pop(r, None)
555 copies = all_copies.pop(r, None)
552 if copies is None:
556 if copies is None:
553 # this is a root
557 # this is a root
554 copies = {}
558 copies = {}
555 for i, c in enumerate(children[r]):
559 for i, c in enumerate(children[r]):
556 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
560 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
557 if r == p1:
561 if r == p1:
558 parent = 1
562 parent = 1
559 childcopies = p1copies
563 childcopies = p1copies
560 else:
564 else:
561 assert r == p2
565 assert r == p2
562 parent = 2
566 parent = 2
563 childcopies = p2copies
567 childcopies = p2copies
564 if not alwaysmatch:
568 if not alwaysmatch:
565 childcopies = {
569 childcopies = {
566 dst: src for dst, src in childcopies.items() if match(dst)
570 dst: src for dst, src in childcopies.items() if match(dst)
567 }
571 }
568 newcopies = copies
572 newcopies = copies
569 if childcopies:
573 if childcopies:
570 newcopies = copies.copy()
574 newcopies = copies.copy()
571 for dest, source in pycompat.iteritems(childcopies):
575 for dest, source in pycompat.iteritems(childcopies):
572 prev = copies.get(source)
576 prev = copies.get(source)
573 if prev is not None and prev[1] is not None:
577 if prev is not None and prev[1] is not None:
574 source = prev[1]
578 source = prev[1]
575 newcopies[dest] = (c, source)
579 newcopies[dest] = (c, source)
576 assert newcopies is not copies
580 assert newcopies is not copies
577 for f in removed:
581 for f in removed:
578 if f in newcopies:
582 if f in newcopies:
579 if newcopies is copies:
583 if newcopies is copies:
580 # copy on write to avoid affecting potential other
584 # copy on write to avoid affecting potential other
581 # branches. when there are no other branches, this
585 # branches. when there are no other branches, this
582 # could be avoided.
586 # could be avoided.
583 newcopies = copies.copy()
587 newcopies = copies.copy()
584 newcopies[f] = (c, None)
588 newcopies[f] = (c, None)
585 othercopies = all_copies.get(c)
589 othercopies = all_copies.get(c)
586 if othercopies is None:
590 if othercopies is None:
587 all_copies[c] = newcopies
591 all_copies[c] = newcopies
588 else:
592 else:
589 # we are the second parent to work on c, we need to merge our
593 # we are the second parent to work on c, we need to merge our
590 # work with the other.
594 # work with the other.
591 #
595 #
592 # In case of conflict, parent 1 take precedence over parent 2.
596 # In case of conflict, parent 1 take precedence over parent 2.
593 # This is an arbitrary choice made anew when implementing
597 # This is an arbitrary choice made anew when implementing
594 # changeset based copies. It was made without regards with
598 # changeset based copies. It was made without regards with
595 # potential filelog related behavior.
599 # potential filelog related behavior.
596 if parent == 1:
600 if parent == 1:
597 _merge_copies_dict_extra(
601 _merge_copies_dict_extra(
598 othercopies, newcopies, isancestor, ismerged
602 othercopies, newcopies, isancestor, ismerged
599 )
603 )
600 else:
604 else:
601 _merge_copies_dict_extra(
605 _merge_copies_dict_extra(
602 newcopies, othercopies, isancestor, ismerged
606 newcopies, othercopies, isancestor, ismerged
603 )
607 )
604 all_copies[c] = newcopies
608 all_copies[c] = newcopies
605
609
606 final_copies = {}
610 final_copies = {}
607 for dest, (tt, source) in all_copies[targetrev].items():
611 for dest, (tt, source) in all_copies[targetrev].items():
608 if source is not None:
612 if source is not None:
609 final_copies[dest] = source
613 final_copies[dest] = source
610 return final_copies
614 return final_copies
611
615
612
616
613 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
617 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
614 """version of `_merge_copies_dict` that works with the Google
618 """version of `_merge_copies_dict` that works with the Google
615 specific "extra" based storage for copy information"""
619 specific "extra" based storage for copy information"""
616 for dest, value in major.items():
620 for dest, value in major.items():
617 other = minor.get(dest)
621 other = minor.get(dest)
618 if other is None:
622 if other is None:
619 minor[dest] = value
623 minor[dest] = value
620 else:
624 else:
621 new_tt = value[0]
625 new_tt = value[0]
622 other_tt = other[0]
626 other_tt = other[0]
623 if value[1] == other[1]:
627 if value[1] == other[1]:
624 continue
628 continue
625 # content from "major" wins, unless it is older
629 # content from "major" wins, unless it is older
626 # than the branch point or there is a merge
630 # than the branch point or there is a merge
627 if (
631 if (
628 new_tt == other_tt
632 new_tt == other_tt
629 or not isancestor(new_tt, other_tt)
633 or not isancestor(new_tt, other_tt)
630 or ismerged(dest)
634 or ismerged(dest)
631 ):
635 ):
632 minor[dest] = value
636 minor[dest] = value
633
637
634
638
635 def _forwardcopies(a, b, base=None, match=None):
639 def _forwardcopies(a, b, base=None, match=None):
636 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
640 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
637
641
638 if base is None:
642 if base is None:
639 base = a
643 base = a
640 match = a.repo().narrowmatch(match)
644 match = a.repo().narrowmatch(match)
641 # check for working copy
645 # check for working copy
642 if b.rev() is None:
646 if b.rev() is None:
643 cm = _committedforwardcopies(a, b.p1(), base, match)
647 cm = _committedforwardcopies(a, b.p1(), base, match)
644 # combine copies from dirstate if necessary
648 # combine copies from dirstate if necessary
645 copies = _chain(cm, _dirstatecopies(b._repo, match))
649 copies = _chain(cm, _dirstatecopies(b._repo, match))
646 else:
650 else:
647 copies = _committedforwardcopies(a, b, base, match)
651 copies = _committedforwardcopies(a, b, base, match)
648 return copies
652 return copies
649
653
650
654
651 def _backwardrenames(a, b, match):
655 def _backwardrenames(a, b, match):
652 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
656 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
653 return {}
657 return {}
654
658
655 # Even though we're not taking copies into account, 1:n rename situations
659 # Even though we're not taking copies into account, 1:n rename situations
656 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
660 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
657 # arbitrarily pick one of the renames.
661 # arbitrarily pick one of the renames.
658 # We don't want to pass in "match" here, since that would filter
662 # We don't want to pass in "match" here, since that would filter
659 # the destination by it. Since we're reversing the copies, we want
663 # the destination by it. Since we're reversing the copies, we want
660 # to filter the source instead.
664 # to filter the source instead.
661 f = _forwardcopies(b, a)
665 f = _forwardcopies(b, a)
662 r = {}
666 r = {}
663 for k, v in sorted(pycompat.iteritems(f)):
667 for k, v in sorted(pycompat.iteritems(f)):
664 if match and not match(v):
668 if match and not match(v):
665 continue
669 continue
666 # remove copies
670 # remove copies
667 if v in a:
671 if v in a:
668 continue
672 continue
669 r[v] = k
673 r[v] = k
670 return r
674 return r
671
675
672
676
673 def pathcopies(x, y, match=None):
677 def pathcopies(x, y, match=None):
674 """find {dst@y: src@x} copy mapping for directed compare"""
678 """find {dst@y: src@x} copy mapping for directed compare"""
675 repo = x._repo
679 repo = x._repo
676 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
680 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
677 if debug:
681 if debug:
678 repo.ui.debug(
682 repo.ui.debug(
679 b'debug.copies: searching copies from %s to %s\n' % (x, y)
683 b'debug.copies: searching copies from %s to %s\n' % (x, y)
680 )
684 )
681 if x == y or not x or not y:
685 if x == y or not x or not y:
682 return {}
686 return {}
683 if y.rev() is None and x == y.p1():
687 if y.rev() is None and x == y.p1():
684 if debug:
688 if debug:
685 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
689 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
686 # short-circuit to avoid issues with merge states
690 # short-circuit to avoid issues with merge states
687 return _dirstatecopies(repo, match)
691 return _dirstatecopies(repo, match)
688 a = y.ancestor(x)
692 a = y.ancestor(x)
689 if a == x:
693 if a == x:
690 if debug:
694 if debug:
691 repo.ui.debug(b'debug.copies: search mode: forward\n')
695 repo.ui.debug(b'debug.copies: search mode: forward\n')
692 copies = _forwardcopies(x, y, match=match)
696 copies = _forwardcopies(x, y, match=match)
693 elif a == y:
697 elif a == y:
694 if debug:
698 if debug:
695 repo.ui.debug(b'debug.copies: search mode: backward\n')
699 repo.ui.debug(b'debug.copies: search mode: backward\n')
696 copies = _backwardrenames(x, y, match=match)
700 copies = _backwardrenames(x, y, match=match)
697 else:
701 else:
698 if debug:
702 if debug:
699 repo.ui.debug(b'debug.copies: search mode: combined\n')
703 repo.ui.debug(b'debug.copies: search mode: combined\n')
700 base = None
704 base = None
701 if a.rev() != nullrev:
705 if a.rev() != nullrev:
702 base = x
706 base = x
703 copies = _chain(
707 copies = _chain(
704 _backwardrenames(x, a, match=match),
708 _backwardrenames(x, a, match=match),
705 _forwardcopies(a, y, base, match=match),
709 _forwardcopies(a, y, base, match=match),
706 )
710 )
707 _filter(x, y, copies)
711 _filter(x, y, copies)
708 return copies
712 return copies
709
713
710
714
711 def mergecopies(repo, c1, c2, base):
715 def mergecopies(repo, c1, c2, base):
712 """
716 """
713 Finds moves and copies between context c1 and c2 that are relevant for
717 Finds moves and copies between context c1 and c2 that are relevant for
714 merging. 'base' will be used as the merge base.
718 merging. 'base' will be used as the merge base.
715
719
716 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
720 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
717 files that were moved/ copied in one merge parent and modified in another.
721 files that were moved/ copied in one merge parent and modified in another.
718 For example:
722 For example:
719
723
720 o ---> 4 another commit
724 o ---> 4 another commit
721 |
725 |
722 | o ---> 3 commit that modifies a.txt
726 | o ---> 3 commit that modifies a.txt
723 | /
727 | /
724 o / ---> 2 commit that moves a.txt to b.txt
728 o / ---> 2 commit that moves a.txt to b.txt
725 |/
729 |/
726 o ---> 1 merge base
730 o ---> 1 merge base
727
731
728 If we try to rebase revision 3 on revision 4, since there is no a.txt in
732 If we try to rebase revision 3 on revision 4, since there is no a.txt in
729 revision 4, and if user have copytrace disabled, we prints the following
733 revision 4, and if user have copytrace disabled, we prints the following
730 message:
734 message:
731
735
732 ```other changed <file> which local deleted```
736 ```other changed <file> which local deleted```
733
737
734 Returns a tuple where:
738 Returns a tuple where:
735
739
736 "branch_copies" an instance of branch_copies.
740 "branch_copies" an instance of branch_copies.
737
741
738 "diverge" is a mapping of source name -> list of destination names
742 "diverge" is a mapping of source name -> list of destination names
739 for divergent renames.
743 for divergent renames.
740
744
741 This function calls different copytracing algorithms based on config.
745 This function calls different copytracing algorithms based on config.
742 """
746 """
743 # avoid silly behavior for update from empty dir
747 # avoid silly behavior for update from empty dir
744 if not c1 or not c2 or c1 == c2:
748 if not c1 or not c2 or c1 == c2:
745 return branch_copies(), branch_copies(), {}
749 return branch_copies(), branch_copies(), {}
746
750
747 narrowmatch = c1.repo().narrowmatch()
751 narrowmatch = c1.repo().narrowmatch()
748
752
749 # avoid silly behavior for parent -> working dir
753 # avoid silly behavior for parent -> working dir
750 if c2.node() is None and c1.node() == repo.dirstate.p1():
754 if c2.node() is None and c1.node() == repo.dirstate.p1():
751 return (
755 return (
752 branch_copies(_dirstatecopies(repo, narrowmatch)),
756 branch_copies(_dirstatecopies(repo, narrowmatch)),
753 branch_copies(),
757 branch_copies(),
754 {},
758 {},
755 )
759 )
756
760
757 copytracing = repo.ui.config(b'experimental', b'copytrace')
761 copytracing = repo.ui.config(b'experimental', b'copytrace')
758 if stringutil.parsebool(copytracing) is False:
762 if stringutil.parsebool(copytracing) is False:
759 # stringutil.parsebool() returns None when it is unable to parse the
763 # stringutil.parsebool() returns None when it is unable to parse the
760 # value, so we should rely on making sure copytracing is on such cases
764 # value, so we should rely on making sure copytracing is on such cases
761 return branch_copies(), branch_copies(), {}
765 return branch_copies(), branch_copies(), {}
762
766
763 if usechangesetcentricalgo(repo):
767 if usechangesetcentricalgo(repo):
764 # The heuristics don't make sense when we need changeset-centric algos
768 # The heuristics don't make sense when we need changeset-centric algos
765 return _fullcopytracing(repo, c1, c2, base)
769 return _fullcopytracing(repo, c1, c2, base)
766
770
767 # Copy trace disabling is explicitly below the node == p1 logic above
771 # Copy trace disabling is explicitly below the node == p1 logic above
768 # because the logic above is required for a simple copy to be kept across a
772 # because the logic above is required for a simple copy to be kept across a
769 # rebase.
773 # rebase.
770 if copytracing == b'heuristics':
774 if copytracing == b'heuristics':
771 # Do full copytracing if only non-public revisions are involved as
775 # Do full copytracing if only non-public revisions are involved as
772 # that will be fast enough and will also cover the copies which could
776 # that will be fast enough and will also cover the copies which could
773 # be missed by heuristics
777 # be missed by heuristics
774 if _isfullcopytraceable(repo, c1, base):
778 if _isfullcopytraceable(repo, c1, base):
775 return _fullcopytracing(repo, c1, c2, base)
779 return _fullcopytracing(repo, c1, c2, base)
776 return _heuristicscopytracing(repo, c1, c2, base)
780 return _heuristicscopytracing(repo, c1, c2, base)
777 else:
781 else:
778 return _fullcopytracing(repo, c1, c2, base)
782 return _fullcopytracing(repo, c1, c2, base)
779
783
780
784
781 def _isfullcopytraceable(repo, c1, base):
785 def _isfullcopytraceable(repo, c1, base):
782 """Checks that if base, source and destination are all no-public branches,
786 """Checks that if base, source and destination are all no-public branches,
783 if yes let's use the full copytrace algorithm for increased capabilities
787 if yes let's use the full copytrace algorithm for increased capabilities
784 since it will be fast enough.
788 since it will be fast enough.
785
789
786 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
790 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
787 number of changesets from c1 to base such that if number of changesets are
791 number of changesets from c1 to base such that if number of changesets are
788 more than the limit, full copytracing algorithm won't be used.
792 more than the limit, full copytracing algorithm won't be used.
789 """
793 """
790 if c1.rev() is None:
794 if c1.rev() is None:
791 c1 = c1.p1()
795 c1 = c1.p1()
792 if c1.mutable() and base.mutable():
796 if c1.mutable() and base.mutable():
793 sourcecommitlimit = repo.ui.configint(
797 sourcecommitlimit = repo.ui.configint(
794 b'experimental', b'copytrace.sourcecommitlimit'
798 b'experimental', b'copytrace.sourcecommitlimit'
795 )
799 )
796 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
800 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
797 return commits < sourcecommitlimit
801 return commits < sourcecommitlimit
798 return False
802 return False
799
803
800
804
801 def _checksinglesidecopies(
805 def _checksinglesidecopies(
802 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
806 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
803 ):
807 ):
804 if src not in m2:
808 if src not in m2:
805 # deleted on side 2
809 # deleted on side 2
806 if src not in m1:
810 if src not in m1:
807 # renamed on side 1, deleted on side 2
811 # renamed on side 1, deleted on side 2
808 renamedelete[src] = dsts1
812 renamedelete[src] = dsts1
809 elif src not in mb:
813 elif src not in mb:
810 # Work around the "short-circuit to avoid issues with merge states"
814 # Work around the "short-circuit to avoid issues with merge states"
811 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
815 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
812 # destination doesn't exist in y.
816 # destination doesn't exist in y.
813 pass
817 pass
814 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
818 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
815 return
819 return
816 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
820 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
817 # modified on side 2
821 # modified on side 2
818 for dst in dsts1:
822 for dst in dsts1:
819 copy[dst] = src
823 copy[dst] = src
820
824
821
825
822 class branch_copies(object):
826 class branch_copies(object):
823 """Information about copies made on one side of a merge/graft.
827 """Information about copies made on one side of a merge/graft.
824
828
825 "copy" is a mapping from destination name -> source name,
829 "copy" is a mapping from destination name -> source name,
826 where source is in c1 and destination is in c2 or vice-versa.
830 where source is in c1 and destination is in c2 or vice-versa.
827
831
828 "movewithdir" is a mapping from source name -> destination name,
832 "movewithdir" is a mapping from source name -> destination name,
829 where the file at source present in one context but not the other
833 where the file at source present in one context but not the other
830 needs to be moved to destination by the merge process, because the
834 needs to be moved to destination by the merge process, because the
831 other context moved the directory it is in.
835 other context moved the directory it is in.
832
836
833 "renamedelete" is a mapping of source name -> list of destination
837 "renamedelete" is a mapping of source name -> list of destination
834 names for files deleted in c1 that were renamed in c2 or vice-versa.
838 names for files deleted in c1 that were renamed in c2 or vice-versa.
835
839
836 "dirmove" is a mapping of detected source dir -> destination dir renames.
840 "dirmove" is a mapping of detected source dir -> destination dir renames.
837 This is needed for handling changes to new files previously grafted into
841 This is needed for handling changes to new files previously grafted into
838 renamed directories.
842 renamed directories.
839 """
843 """
840
844
841 def __init__(
845 def __init__(
842 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
846 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
843 ):
847 ):
844 self.copy = {} if copy is None else copy
848 self.copy = {} if copy is None else copy
845 self.renamedelete = {} if renamedelete is None else renamedelete
849 self.renamedelete = {} if renamedelete is None else renamedelete
846 self.dirmove = {} if dirmove is None else dirmove
850 self.dirmove = {} if dirmove is None else dirmove
847 self.movewithdir = {} if movewithdir is None else movewithdir
851 self.movewithdir = {} if movewithdir is None else movewithdir
848
852
849 def __repr__(self):
853 def __repr__(self):
850 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
854 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
851 self.copy,
855 self.copy,
852 self.renamedelete,
856 self.renamedelete,
853 self.dirmove,
857 self.dirmove,
854 self.movewithdir,
858 self.movewithdir,
855 )
859 )
856
860
857
861
858 def _fullcopytracing(repo, c1, c2, base):
862 def _fullcopytracing(repo, c1, c2, base):
859 """The full copytracing algorithm which finds all the new files that were
863 """The full copytracing algorithm which finds all the new files that were
860 added from merge base up to the top commit and for each file it checks if
864 added from merge base up to the top commit and for each file it checks if
861 this file was copied from another file.
865 this file was copied from another file.
862
866
863 This is pretty slow when a lot of changesets are involved but will track all
867 This is pretty slow when a lot of changesets are involved but will track all
864 the copies.
868 the copies.
865 """
869 """
866 m1 = c1.manifest()
870 m1 = c1.manifest()
867 m2 = c2.manifest()
871 m2 = c2.manifest()
868 mb = base.manifest()
872 mb = base.manifest()
869
873
870 copies1 = pathcopies(base, c1)
874 copies1 = pathcopies(base, c1)
871 copies2 = pathcopies(base, c2)
875 copies2 = pathcopies(base, c2)
872
876
873 if not (copies1 or copies2):
877 if not (copies1 or copies2):
874 return branch_copies(), branch_copies(), {}
878 return branch_copies(), branch_copies(), {}
875
879
876 inversecopies1 = {}
880 inversecopies1 = {}
877 inversecopies2 = {}
881 inversecopies2 = {}
878 for dst, src in copies1.items():
882 for dst, src in copies1.items():
879 inversecopies1.setdefault(src, []).append(dst)
883 inversecopies1.setdefault(src, []).append(dst)
880 for dst, src in copies2.items():
884 for dst, src in copies2.items():
881 inversecopies2.setdefault(src, []).append(dst)
885 inversecopies2.setdefault(src, []).append(dst)
882
886
883 copy1 = {}
887 copy1 = {}
884 copy2 = {}
888 copy2 = {}
885 diverge = {}
889 diverge = {}
886 renamedelete1 = {}
890 renamedelete1 = {}
887 renamedelete2 = {}
891 renamedelete2 = {}
888 allsources = set(inversecopies1) | set(inversecopies2)
892 allsources = set(inversecopies1) | set(inversecopies2)
889 for src in allsources:
893 for src in allsources:
890 dsts1 = inversecopies1.get(src)
894 dsts1 = inversecopies1.get(src)
891 dsts2 = inversecopies2.get(src)
895 dsts2 = inversecopies2.get(src)
892 if dsts1 and dsts2:
896 if dsts1 and dsts2:
893 # copied/renamed on both sides
897 # copied/renamed on both sides
894 if src not in m1 and src not in m2:
898 if src not in m1 and src not in m2:
895 # renamed on both sides
899 # renamed on both sides
896 dsts1 = set(dsts1)
900 dsts1 = set(dsts1)
897 dsts2 = set(dsts2)
901 dsts2 = set(dsts2)
898 # If there's some overlap in the rename destinations, we
902 # If there's some overlap in the rename destinations, we
899 # consider it not divergent. For example, if side 1 copies 'a'
903 # consider it not divergent. For example, if side 1 copies 'a'
900 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
904 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
901 # and 'd' and deletes 'a'.
905 # and 'd' and deletes 'a'.
902 if dsts1 & dsts2:
906 if dsts1 & dsts2:
903 for dst in dsts1 & dsts2:
907 for dst in dsts1 & dsts2:
904 copy1[dst] = src
908 copy1[dst] = src
905 copy2[dst] = src
909 copy2[dst] = src
906 else:
910 else:
907 diverge[src] = sorted(dsts1 | dsts2)
911 diverge[src] = sorted(dsts1 | dsts2)
908 elif src in m1 and src in m2:
912 elif src in m1 and src in m2:
909 # copied on both sides
913 # copied on both sides
910 dsts1 = set(dsts1)
914 dsts1 = set(dsts1)
911 dsts2 = set(dsts2)
915 dsts2 = set(dsts2)
912 for dst in dsts1 & dsts2:
916 for dst in dsts1 & dsts2:
913 copy1[dst] = src
917 copy1[dst] = src
914 copy2[dst] = src
918 copy2[dst] = src
915 # TODO: Handle cases where it was renamed on one side and copied
919 # TODO: Handle cases where it was renamed on one side and copied
916 # on the other side
920 # on the other side
917 elif dsts1:
921 elif dsts1:
918 # copied/renamed only on side 1
922 # copied/renamed only on side 1
919 _checksinglesidecopies(
923 _checksinglesidecopies(
920 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
924 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
921 )
925 )
922 elif dsts2:
926 elif dsts2:
923 # copied/renamed only on side 2
927 # copied/renamed only on side 2
924 _checksinglesidecopies(
928 _checksinglesidecopies(
925 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
929 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
926 )
930 )
927
931
928 # find interesting file sets from manifests
932 # find interesting file sets from manifests
929 cache = []
933 cache = []
930
934
931 def _get_addedfiles(idx):
935 def _get_addedfiles(idx):
932 if not cache:
936 if not cache:
933 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
937 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
934 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
938 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
935 u1 = sorted(addedinm1 - addedinm2)
939 u1 = sorted(addedinm1 - addedinm2)
936 u2 = sorted(addedinm2 - addedinm1)
940 u2 = sorted(addedinm2 - addedinm1)
937 cache.extend((u1, u2))
941 cache.extend((u1, u2))
938 return cache[idx]
942 return cache[idx]
939
943
940 u1fn = lambda: _get_addedfiles(0)
944 u1fn = lambda: _get_addedfiles(0)
941 u2fn = lambda: _get_addedfiles(1)
945 u2fn = lambda: _get_addedfiles(1)
942 if repo.ui.debugflag:
946 if repo.ui.debugflag:
943 u1 = u1fn()
947 u1 = u1fn()
944 u2 = u2fn()
948 u2 = u2fn()
945
949
946 header = b" unmatched files in %s"
950 header = b" unmatched files in %s"
947 if u1:
951 if u1:
948 repo.ui.debug(
952 repo.ui.debug(
949 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
953 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
950 )
954 )
951 if u2:
955 if u2:
952 repo.ui.debug(
956 repo.ui.debug(
953 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
957 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
954 )
958 )
955
959
956 renamedeleteset = set()
960 renamedeleteset = set()
957 divergeset = set()
961 divergeset = set()
958 for dsts in diverge.values():
962 for dsts in diverge.values():
959 divergeset.update(dsts)
963 divergeset.update(dsts)
960 for dsts in renamedelete1.values():
964 for dsts in renamedelete1.values():
961 renamedeleteset.update(dsts)
965 renamedeleteset.update(dsts)
962 for dsts in renamedelete2.values():
966 for dsts in renamedelete2.values():
963 renamedeleteset.update(dsts)
967 renamedeleteset.update(dsts)
964
968
965 repo.ui.debug(
969 repo.ui.debug(
966 b" all copies found (* = to merge, ! = divergent, "
970 b" all copies found (* = to merge, ! = divergent, "
967 b"% = renamed and deleted):\n"
971 b"% = renamed and deleted):\n"
968 )
972 )
969 for side, copies in ((b"local", copies1), (b"remote", copies2)):
973 for side, copies in ((b"local", copies1), (b"remote", copies2)):
970 if not copies:
974 if not copies:
971 continue
975 continue
972 repo.ui.debug(b" on %s side:\n" % side)
976 repo.ui.debug(b" on %s side:\n" % side)
973 for f in sorted(copies):
977 for f in sorted(copies):
974 note = b""
978 note = b""
975 if f in copy1 or f in copy2:
979 if f in copy1 or f in copy2:
976 note += b"*"
980 note += b"*"
977 if f in divergeset:
981 if f in divergeset:
978 note += b"!"
982 note += b"!"
979 if f in renamedeleteset:
983 if f in renamedeleteset:
980 note += b"%"
984 note += b"%"
981 repo.ui.debug(
985 repo.ui.debug(
982 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
986 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
983 )
987 )
984 del renamedeleteset
988 del renamedeleteset
985 del divergeset
989 del divergeset
986
990
987 repo.ui.debug(b" checking for directory renames\n")
991 repo.ui.debug(b" checking for directory renames\n")
988
992
989 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
993 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
990 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
994 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
991
995
992 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
996 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
993 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
997 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
994
998
995 return branch_copies1, branch_copies2, diverge
999 return branch_copies1, branch_copies2, diverge
996
1000
997
1001
998 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1002 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
999 """Finds moved directories and files that should move with them.
1003 """Finds moved directories and files that should move with them.
1000
1004
1001 ctx: the context for one of the sides
1005 ctx: the context for one of the sides
1002 copy: files copied on the same side (as ctx)
1006 copy: files copied on the same side (as ctx)
1003 fullcopy: files copied on the same side (as ctx), including those that
1007 fullcopy: files copied on the same side (as ctx), including those that
1004 merge.manifestmerge() won't care about
1008 merge.manifestmerge() won't care about
1005 addedfilesfn: function returning added files on the other side (compared to
1009 addedfilesfn: function returning added files on the other side (compared to
1006 ctx)
1010 ctx)
1007 """
1011 """
1008 # generate a directory move map
1012 # generate a directory move map
1009 invalid = set()
1013 invalid = set()
1010 dirmove = {}
1014 dirmove = {}
1011
1015
1012 # examine each file copy for a potential directory move, which is
1016 # examine each file copy for a potential directory move, which is
1013 # when all the files in a directory are moved to a new directory
1017 # when all the files in a directory are moved to a new directory
1014 for dst, src in pycompat.iteritems(fullcopy):
1018 for dst, src in pycompat.iteritems(fullcopy):
1015 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1019 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1016 if dsrc in invalid:
1020 if dsrc in invalid:
1017 # already seen to be uninteresting
1021 # already seen to be uninteresting
1018 continue
1022 continue
1019 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1023 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1020 # directory wasn't entirely moved locally
1024 # directory wasn't entirely moved locally
1021 invalid.add(dsrc)
1025 invalid.add(dsrc)
1022 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1026 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1023 # files from the same directory moved to two different places
1027 # files from the same directory moved to two different places
1024 invalid.add(dsrc)
1028 invalid.add(dsrc)
1025 else:
1029 else:
1026 # looks good so far
1030 # looks good so far
1027 dirmove[dsrc] = ddst
1031 dirmove[dsrc] = ddst
1028
1032
1029 for i in invalid:
1033 for i in invalid:
1030 if i in dirmove:
1034 if i in dirmove:
1031 del dirmove[i]
1035 del dirmove[i]
1032 del invalid
1036 del invalid
1033
1037
1034 if not dirmove:
1038 if not dirmove:
1035 return {}, {}
1039 return {}, {}
1036
1040
1037 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1041 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1038
1042
1039 for d in dirmove:
1043 for d in dirmove:
1040 repo.ui.debug(
1044 repo.ui.debug(
1041 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1045 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1042 )
1046 )
1043
1047
1044 movewithdir = {}
1048 movewithdir = {}
1045 # check unaccounted nonoverlapping files against directory moves
1049 # check unaccounted nonoverlapping files against directory moves
1046 for f in addedfilesfn():
1050 for f in addedfilesfn():
1047 if f not in fullcopy:
1051 if f not in fullcopy:
1048 for d in dirmove:
1052 for d in dirmove:
1049 if f.startswith(d):
1053 if f.startswith(d):
1050 # new file added in a directory that was moved, move it
1054 # new file added in a directory that was moved, move it
1051 df = dirmove[d] + f[len(d) :]
1055 df = dirmove[d] + f[len(d) :]
1052 if df not in copy:
1056 if df not in copy:
1053 movewithdir[f] = df
1057 movewithdir[f] = df
1054 repo.ui.debug(
1058 repo.ui.debug(
1055 b" pending file src: '%s' -> dst: '%s'\n"
1059 b" pending file src: '%s' -> dst: '%s'\n"
1056 % (f, df)
1060 % (f, df)
1057 )
1061 )
1058 break
1062 break
1059
1063
1060 return dirmove, movewithdir
1064 return dirmove, movewithdir
1061
1065
1062
1066
1063 def _heuristicscopytracing(repo, c1, c2, base):
1067 def _heuristicscopytracing(repo, c1, c2, base):
1064 """Fast copytracing using filename heuristics
1068 """Fast copytracing using filename heuristics
1065
1069
1066 Assumes that moves or renames are of following two types:
1070 Assumes that moves or renames are of following two types:
1067
1071
1068 1) Inside a directory only (same directory name but different filenames)
1072 1) Inside a directory only (same directory name but different filenames)
1069 2) Move from one directory to another
1073 2) Move from one directory to another
1070 (same filenames but different directory names)
1074 (same filenames but different directory names)
1071
1075
1072 Works only when there are no merge commits in the "source branch".
1076 Works only when there are no merge commits in the "source branch".
1073 Source branch is commits from base up to c2 not including base.
1077 Source branch is commits from base up to c2 not including base.
1074
1078
1075 If merge is involved it fallbacks to _fullcopytracing().
1079 If merge is involved it fallbacks to _fullcopytracing().
1076
1080
1077 Can be used by setting the following config:
1081 Can be used by setting the following config:
1078
1082
1079 [experimental]
1083 [experimental]
1080 copytrace = heuristics
1084 copytrace = heuristics
1081
1085
1082 In some cases the copy/move candidates found by heuristics can be very large
1086 In some cases the copy/move candidates found by heuristics can be very large
1083 in number and that will make the algorithm slow. The number of possible
1087 in number and that will make the algorithm slow. The number of possible
1084 candidates to check can be limited by using the config
1088 candidates to check can be limited by using the config
1085 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1089 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1086 """
1090 """
1087
1091
1088 if c1.rev() is None:
1092 if c1.rev() is None:
1089 c1 = c1.p1()
1093 c1 = c1.p1()
1090 if c2.rev() is None:
1094 if c2.rev() is None:
1091 c2 = c2.p1()
1095 c2 = c2.p1()
1092
1096
1093 changedfiles = set()
1097 changedfiles = set()
1094 m1 = c1.manifest()
1098 m1 = c1.manifest()
1095 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1099 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1096 # If base is not in c2 branch, we switch to fullcopytracing
1100 # If base is not in c2 branch, we switch to fullcopytracing
1097 repo.ui.debug(
1101 repo.ui.debug(
1098 b"switching to full copytracing as base is not "
1102 b"switching to full copytracing as base is not "
1099 b"an ancestor of c2\n"
1103 b"an ancestor of c2\n"
1100 )
1104 )
1101 return _fullcopytracing(repo, c1, c2, base)
1105 return _fullcopytracing(repo, c1, c2, base)
1102
1106
1103 ctx = c2
1107 ctx = c2
1104 while ctx != base:
1108 while ctx != base:
1105 if len(ctx.parents()) == 2:
1109 if len(ctx.parents()) == 2:
1106 # To keep things simple let's not handle merges
1110 # To keep things simple let's not handle merges
1107 repo.ui.debug(b"switching to full copytracing because of merges\n")
1111 repo.ui.debug(b"switching to full copytracing because of merges\n")
1108 return _fullcopytracing(repo, c1, c2, base)
1112 return _fullcopytracing(repo, c1, c2, base)
1109 changedfiles.update(ctx.files())
1113 changedfiles.update(ctx.files())
1110 ctx = ctx.p1()
1114 ctx = ctx.p1()
1111
1115
1112 copies2 = {}
1116 copies2 = {}
1113 cp = _forwardcopies(base, c2)
1117 cp = _forwardcopies(base, c2)
1114 for dst, src in pycompat.iteritems(cp):
1118 for dst, src in pycompat.iteritems(cp):
1115 if src in m1:
1119 if src in m1:
1116 copies2[dst] = src
1120 copies2[dst] = src
1117
1121
1118 # file is missing if it isn't present in the destination, but is present in
1122 # file is missing if it isn't present in the destination, but is present in
1119 # the base and present in the source.
1123 # the base and present in the source.
1120 # Presence in the base is important to exclude added files, presence in the
1124 # Presence in the base is important to exclude added files, presence in the
1121 # source is important to exclude removed files.
1125 # source is important to exclude removed files.
1122 filt = lambda f: f not in m1 and f in base and f in c2
1126 filt = lambda f: f not in m1 and f in base and f in c2
1123 missingfiles = [f for f in changedfiles if filt(f)]
1127 missingfiles = [f for f in changedfiles if filt(f)]
1124
1128
1125 copies1 = {}
1129 copies1 = {}
1126 if missingfiles:
1130 if missingfiles:
1127 basenametofilename = collections.defaultdict(list)
1131 basenametofilename = collections.defaultdict(list)
1128 dirnametofilename = collections.defaultdict(list)
1132 dirnametofilename = collections.defaultdict(list)
1129
1133
1130 for f in m1.filesnotin(base.manifest()):
1134 for f in m1.filesnotin(base.manifest()):
1131 basename = os.path.basename(f)
1135 basename = os.path.basename(f)
1132 dirname = os.path.dirname(f)
1136 dirname = os.path.dirname(f)
1133 basenametofilename[basename].append(f)
1137 basenametofilename[basename].append(f)
1134 dirnametofilename[dirname].append(f)
1138 dirnametofilename[dirname].append(f)
1135
1139
1136 for f in missingfiles:
1140 for f in missingfiles:
1137 basename = os.path.basename(f)
1141 basename = os.path.basename(f)
1138 dirname = os.path.dirname(f)
1142 dirname = os.path.dirname(f)
1139 samebasename = basenametofilename[basename]
1143 samebasename = basenametofilename[basename]
1140 samedirname = dirnametofilename[dirname]
1144 samedirname = dirnametofilename[dirname]
1141 movecandidates = samebasename + samedirname
1145 movecandidates = samebasename + samedirname
1142 # f is guaranteed to be present in c2, that's why
1146 # f is guaranteed to be present in c2, that's why
1143 # c2.filectx(f) won't fail
1147 # c2.filectx(f) won't fail
1144 f2 = c2.filectx(f)
1148 f2 = c2.filectx(f)
1145 # we can have a lot of candidates which can slow down the heuristics
1149 # we can have a lot of candidates which can slow down the heuristics
1146 # config value to limit the number of candidates moves to check
1150 # config value to limit the number of candidates moves to check
1147 maxcandidates = repo.ui.configint(
1151 maxcandidates = repo.ui.configint(
1148 b'experimental', b'copytrace.movecandidateslimit'
1152 b'experimental', b'copytrace.movecandidateslimit'
1149 )
1153 )
1150
1154
1151 if len(movecandidates) > maxcandidates:
1155 if len(movecandidates) > maxcandidates:
1152 repo.ui.status(
1156 repo.ui.status(
1153 _(
1157 _(
1154 b"skipping copytracing for '%s', more "
1158 b"skipping copytracing for '%s', more "
1155 b"candidates than the limit: %d\n"
1159 b"candidates than the limit: %d\n"
1156 )
1160 )
1157 % (f, len(movecandidates))
1161 % (f, len(movecandidates))
1158 )
1162 )
1159 continue
1163 continue
1160
1164
1161 for candidate in movecandidates:
1165 for candidate in movecandidates:
1162 f1 = c1.filectx(candidate)
1166 f1 = c1.filectx(candidate)
1163 if _related(f1, f2):
1167 if _related(f1, f2):
1164 # if there are a few related copies then we'll merge
1168 # if there are a few related copies then we'll merge
1165 # changes into all of them. This matches the behaviour
1169 # changes into all of them. This matches the behaviour
1166 # of upstream copytracing
1170 # of upstream copytracing
1167 copies1[candidate] = f
1171 copies1[candidate] = f
1168
1172
1169 return branch_copies(copies1), branch_copies(copies2), {}
1173 return branch_copies(copies1), branch_copies(copies2), {}
1170
1174
1171
1175
1172 def _related(f1, f2):
1176 def _related(f1, f2):
1173 """return True if f1 and f2 filectx have a common ancestor
1177 """return True if f1 and f2 filectx have a common ancestor
1174
1178
1175 Walk back to common ancestor to see if the two files originate
1179 Walk back to common ancestor to see if the two files originate
1176 from the same file. Since workingfilectx's rev() is None it messes
1180 from the same file. Since workingfilectx's rev() is None it messes
1177 up the integer comparison logic, hence the pre-step check for
1181 up the integer comparison logic, hence the pre-step check for
1178 None (f1 and f2 can only be workingfilectx's initially).
1182 None (f1 and f2 can only be workingfilectx's initially).
1179 """
1183 """
1180
1184
1181 if f1 == f2:
1185 if f1 == f2:
1182 return True # a match
1186 return True # a match
1183
1187
1184 g1, g2 = f1.ancestors(), f2.ancestors()
1188 g1, g2 = f1.ancestors(), f2.ancestors()
1185 try:
1189 try:
1186 f1r, f2r = f1.linkrev(), f2.linkrev()
1190 f1r, f2r = f1.linkrev(), f2.linkrev()
1187
1191
1188 if f1r is None:
1192 if f1r is None:
1189 f1 = next(g1)
1193 f1 = next(g1)
1190 if f2r is None:
1194 if f2r is None:
1191 f2 = next(g2)
1195 f2 = next(g2)
1192
1196
1193 while True:
1197 while True:
1194 f1r, f2r = f1.linkrev(), f2.linkrev()
1198 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 if f1r > f2r:
1199 if f1r > f2r:
1196 f1 = next(g1)
1200 f1 = next(g1)
1197 elif f2r > f1r:
1201 elif f2r > f1r:
1198 f2 = next(g2)
1202 f2 = next(g2)
1199 else: # f1 and f2 point to files in the same linkrev
1203 else: # f1 and f2 point to files in the same linkrev
1200 return f1 == f2 # true if they point to the same file
1204 return f1 == f2 # true if they point to the same file
1201 except StopIteration:
1205 except StopIteration:
1202 return False
1206 return False
1203
1207
1204
1208
1205 def graftcopies(wctx, ctx, base):
1209 def graftcopies(wctx, ctx, base):
1206 """reproduce copies between base and ctx in the wctx
1210 """reproduce copies between base and ctx in the wctx
1207
1211
1208 Unlike mergecopies(), this function will only consider copies between base
1212 Unlike mergecopies(), this function will only consider copies between base
1209 and ctx; it will ignore copies between base and wctx. Also unlike
1213 and ctx; it will ignore copies between base and wctx. Also unlike
1210 mergecopies(), this function will apply copies to the working copy (instead
1214 mergecopies(), this function will apply copies to the working copy (instead
1211 of just returning information about the copies). That makes it cheaper
1215 of just returning information about the copies). That makes it cheaper
1212 (especially in the common case of base==ctx.p1()) and useful also when
1216 (especially in the common case of base==ctx.p1()) and useful also when
1213 experimental.copytrace=off.
1217 experimental.copytrace=off.
1214
1218
1215 merge.update() will have already marked most copies, but it will only
1219 merge.update() will have already marked most copies, but it will only
1216 mark copies if it thinks the source files are related (see
1220 mark copies if it thinks the source files are related (see
1217 merge._related()). It will also not mark copies if the file wasn't modified
1221 merge._related()). It will also not mark copies if the file wasn't modified
1218 on the local side. This function adds the copies that were "missed"
1222 on the local side. This function adds the copies that were "missed"
1219 by merge.update().
1223 by merge.update().
1220 """
1224 """
1221 new_copies = pathcopies(base, ctx)
1225 new_copies = pathcopies(base, ctx)
1222 _filter(wctx.p1(), wctx, new_copies)
1226 _filter(wctx.p1(), wctx, new_copies)
1223 for dst, src in pycompat.iteritems(new_copies):
1227 for dst, src in pycompat.iteritems(new_copies):
1224 wctx[dst].markcopied(src)
1228 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now