##// END OF EJS Templates
copies: no longer cache the ChangedFiles during copy tracing...
marmoute -
r46765:a132aa59 default
parent child Browse files
Show More
@@ -1,1263 +1,1223 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 2
44 # in the following table (not including trivial cases). For example, case 2
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 if v not in src:
63 if v not in src:
64 del t[k]
64 del t[k]
65 # remove criss-crossed copies
65 # remove criss-crossed copies
66 elif k in src and v in dst:
66 elif k in src and v in dst:
67 del t[k]
67 del t[k]
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 elif k not in dst:
69 elif k not in dst:
70 del t[k]
70 del t[k]
71
71
72
72
73 def _chain(prefix, suffix):
73 def _chain(prefix, suffix):
74 """chain two sets of copies 'prefix' and 'suffix'"""
74 """chain two sets of copies 'prefix' and 'suffix'"""
75 result = prefix.copy()
75 result = prefix.copy()
76 for key, value in pycompat.iteritems(suffix):
76 for key, value in pycompat.iteritems(suffix):
77 result[key] = prefix.get(value, value)
77 result[key] = prefix.get(value, value)
78 return result
78 return result
79
79
80
80
81 def _tracefile(fctx, am, basemf):
81 def _tracefile(fctx, am, basemf):
82 """return file context that is the ancestor of fctx present in ancestor
82 """return file context that is the ancestor of fctx present in ancestor
83 manifest am
83 manifest am
84
84
85 Note: we used to try and stop after a given limit, however checking if that
85 Note: we used to try and stop after a given limit, however checking if that
86 limit is reached turned out to be very expensive. we are better off
86 limit is reached turned out to be very expensive. we are better off
87 disabling that feature."""
87 disabling that feature."""
88
88
89 for f in fctx.ancestors():
89 for f in fctx.ancestors():
90 path = f.path()
90 path = f.path()
91 if am.get(path, None) == f.filenode():
91 if am.get(path, None) == f.filenode():
92 return path
92 return path
93 if basemf and basemf.get(path, None) == f.filenode():
93 if basemf and basemf.get(path, None) == f.filenode():
94 return path
94 return path
95
95
96
96
97 def _dirstatecopies(repo, match=None):
97 def _dirstatecopies(repo, match=None):
98 ds = repo.dirstate
98 ds = repo.dirstate
99 c = ds.copies().copy()
99 c = ds.copies().copy()
100 for k in list(c):
100 for k in list(c):
101 if ds[k] not in b'anm' or (match and not match(k)):
101 if ds[k] not in b'anm' or (match and not match(k)):
102 del c[k]
102 del c[k]
103 return c
103 return c
104
104
105
105
106 def _computeforwardmissing(a, b, match=None):
106 def _computeforwardmissing(a, b, match=None):
107 """Computes which files are in b but not a.
107 """Computes which files are in b but not a.
108 This is its own function so extensions can easily wrap this call to see what
108 This is its own function so extensions can easily wrap this call to see what
109 files _forwardcopies is about to process.
109 files _forwardcopies is about to process.
110 """
110 """
111 ma = a.manifest()
111 ma = a.manifest()
112 mb = b.manifest()
112 mb = b.manifest()
113 return mb.filesnotin(ma, match=match)
113 return mb.filesnotin(ma, match=match)
114
114
115
115
116 def usechangesetcentricalgo(repo):
116 def usechangesetcentricalgo(repo):
117 """Checks if we should use changeset-centric copy algorithms"""
117 """Checks if we should use changeset-centric copy algorithms"""
118 if repo.filecopiesmode == b'changeset-sidedata':
118 if repo.filecopiesmode == b'changeset-sidedata':
119 return True
119 return True
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
121 changesetsource = (b'changeset-only', b'compatibility')
121 changesetsource = (b'changeset-only', b'compatibility')
122 return readfrom in changesetsource
122 return readfrom in changesetsource
123
123
124
124
125 def _committedforwardcopies(a, b, base, match):
125 def _committedforwardcopies(a, b, base, match):
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
127 # files might have to be traced back to the fctx parent of the last
127 # files might have to be traced back to the fctx parent of the last
128 # one-side-only changeset, but not further back than that
128 # one-side-only changeset, but not further back than that
129 repo = a._repo
129 repo = a._repo
130
130
131 if usechangesetcentricalgo(repo):
131 if usechangesetcentricalgo(repo):
132 return _changesetforwardcopies(a, b, match)
132 return _changesetforwardcopies(a, b, match)
133
133
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
135 dbg = repo.ui.debug
135 dbg = repo.ui.debug
136 if debug:
136 if debug:
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
138 am = a.manifest()
138 am = a.manifest()
139 basemf = None if base is None else base.manifest()
139 basemf = None if base is None else base.manifest()
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145
145
146 # Computing the forward missing is quite expensive on large manifests, since
146 # Computing the forward missing is quite expensive on large manifests, since
147 # it compares the entire manifests. We can optimize it in the common use
147 # it compares the entire manifests. We can optimize it in the common use
148 # case of computing what copies are in a commit versus its parent (like
148 # case of computing what copies are in a commit versus its parent (like
149 # during a rebase or histedit). Note, we exclude merge commits from this
149 # during a rebase or histedit). Note, we exclude merge commits from this
150 # optimization, since the ctx.files() for a merge commit is not correct for
150 # optimization, since the ctx.files() for a merge commit is not correct for
151 # this comparison.
151 # this comparison.
152 forwardmissingmatch = match
152 forwardmissingmatch = match
153 if b.p1() == a and b.p2().node() == nullid:
153 if b.p1() == a and b.p2().node() == nullid:
154 filesmatcher = matchmod.exact(b.files())
154 filesmatcher = matchmod.exact(b.files())
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
156 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
157
157
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
158 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
159
159
160 if debug:
160 if debug:
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
161 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
162
162
163 for f in sorted(missing):
163 for f in sorted(missing):
164 if debug:
164 if debug:
165 dbg(b'debug.copies: tracing file: %s\n' % f)
165 dbg(b'debug.copies: tracing file: %s\n' % f)
166 fctx = b[f]
166 fctx = b[f]
167 fctx._ancestrycontext = ancestrycontext
167 fctx._ancestrycontext = ancestrycontext
168
168
169 if debug:
169 if debug:
170 start = util.timer()
170 start = util.timer()
171 opath = _tracefile(fctx, am, basemf)
171 opath = _tracefile(fctx, am, basemf)
172 if opath:
172 if opath:
173 if debug:
173 if debug:
174 dbg(b'debug.copies: rename of: %s\n' % opath)
174 dbg(b'debug.copies: rename of: %s\n' % opath)
175 cm[f] = opath
175 cm[f] = opath
176 if debug:
176 if debug:
177 dbg(
177 dbg(
178 b'debug.copies: time: %f seconds\n'
178 b'debug.copies: time: %f seconds\n'
179 % (util.timer() - start)
179 % (util.timer() - start)
180 )
180 )
181 return cm
181 return cm
182
182
183
183
184 def _revinfo_getter(repo, match):
184 def _revinfo_getter(repo, match):
185 """returns a function that returns the following data given a <rev>"
185 """returns a function that returns the following data given a <rev>"
186
186
187 * p1: revision number of first parent
187 * p1: revision number of first parent
188 * p2: revision number of first parent
188 * p2: revision number of first parent
189 * changes: a ChangingFiles object
189 * changes: a ChangingFiles object
190 """
190 """
191 cl = repo.changelog
191 cl = repo.changelog
192 parents = cl.parentrevs
192 parents = cl.parentrevs
193 flags = cl.flags
193 flags = cl.flags
194
194
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
195 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
196
196
197 changelogrevision = cl.changelogrevision
197 changelogrevision = cl.changelogrevision
198
198
199 # A small cache to avoid doing the work twice for merges
200 #
201 # In the vast majority of cases, if we ask information for a revision
202 # about 1 parent, we'll later ask it for the other. So it make sense to
203 # keep the information around when reaching the first parent of a merge
204 # and dropping it after it was provided for the second parents.
205 #
206 # It exists cases were only one parent of the merge will be walked. It
207 # happens when the "destination" the copy tracing is descendant from a
208 # new root, not common with the "source". In that case, we will only walk
209 # through merge parents that are descendant of changesets common
210 # between "source" and "destination".
211 #
212 # With the current case implementation if such changesets have a copy
213 # information, we'll keep them in memory until the end of
214 # _changesetforwardcopies. We don't expect the case to be frequent
215 # enough to matters.
216 #
217 # In addition, it would be possible to reach pathological case, were
218 # many first parent are met before any second parent is reached. In
219 # that case the cache could grow. If this even become an issue one can
220 # safely introduce a maximum cache size. This would trade extra CPU/IO
221 # time to save memory.
222 merge_caches = {}
223
224 alwaysmatch = match.always()
199 alwaysmatch = match.always()
225
200
226 if rustmod is not None and alwaysmatch:
201 if rustmod is not None and alwaysmatch:
227
202
228 def revinfo(rev):
203 def revinfo(rev):
229 p1, p2 = parents(rev)
204 p1, p2 = parents(rev)
230 value = None
231 e = merge_caches.pop(rev, None)
232 if e is not None:
233 return e
234 if flags(rev) & HASCOPIESINFO:
205 if flags(rev) & HASCOPIESINFO:
235 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
206 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
236 else:
207 else:
237 raw = None
208 raw = None
238 value = (p1, p2, raw)
209 return (p1, p2, raw)
239 if p1 != nullrev and p2 != nullrev:
240 # XXX some case we over cache, IGNORE
241 merge_caches[rev] = value
242 return value
243
210
244 else:
211 else:
245
212
246 def revinfo(rev):
213 def revinfo(rev):
247 p1, p2 = parents(rev)
214 p1, p2 = parents(rev)
248 value = None
249 e = merge_caches.pop(rev, None)
250 if e is not None:
251 return e
252 changes = None
253 if flags(rev) & HASCOPIESINFO:
215 if flags(rev) & HASCOPIESINFO:
254 changes = changelogrevision(rev).changes
216 changes = changelogrevision(rev).changes
255 value = (p1, p2, changes)
217 else:
256 if p1 != nullrev and p2 != nullrev:
218 changes = None
257 # XXX some case we over cache, IGNORE
219 return (p1, p2, changes)
258 merge_caches[rev] = value
259 return value
260
220
261 return revinfo
221 return revinfo
262
222
263
223
264 def cached_is_ancestor(is_ancestor):
224 def cached_is_ancestor(is_ancestor):
265 """return a cached version of is_ancestor"""
225 """return a cached version of is_ancestor"""
266 cache = {}
226 cache = {}
267
227
268 def _is_ancestor(anc, desc):
228 def _is_ancestor(anc, desc):
269 if anc > desc:
229 if anc > desc:
270 return False
230 return False
271 elif anc == desc:
231 elif anc == desc:
272 return True
232 return True
273 key = (anc, desc)
233 key = (anc, desc)
274 ret = cache.get(key)
234 ret = cache.get(key)
275 if ret is None:
235 if ret is None:
276 ret = cache[key] = is_ancestor(anc, desc)
236 ret = cache[key] = is_ancestor(anc, desc)
277 return ret
237 return ret
278
238
279 return _is_ancestor
239 return _is_ancestor
280
240
281
241
282 def _changesetforwardcopies(a, b, match):
242 def _changesetforwardcopies(a, b, match):
283 if a.rev() in (nullrev, b.rev()):
243 if a.rev() in (nullrev, b.rev()):
284 return {}
244 return {}
285
245
286 repo = a.repo().unfiltered()
246 repo = a.repo().unfiltered()
287 children = {}
247 children = {}
288
248
289 cl = repo.changelog
249 cl = repo.changelog
290 isancestor = cl.isancestorrev
250 isancestor = cl.isancestorrev
291
251
292 # To track rename from "A" to B, we need to gather all parent → children
252 # To track rename from "A" to B, we need to gather all parent → children
293 # edges that are contains in `::B` but not in `::A`.
253 # edges that are contains in `::B` but not in `::A`.
294 #
254 #
295 #
255 #
296 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
256 # To do so, we need to gather all revisions exclusive¹ to "B" (ie¹: `::b -
297 # ::a`) and also all the "roots point", ie the parents of the exclusive set
257 # ::a`) and also all the "roots point", ie the parents of the exclusive set
298 # that belong to ::a. These are exactly all the revisions needed to express
258 # that belong to ::a. These are exactly all the revisions needed to express
299 # the parent → children we need to combine.
259 # the parent → children we need to combine.
300 #
260 #
301 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
261 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
302 # excluding paths that leads to roots that are not ancestors of `a`. We
262 # excluding paths that leads to roots that are not ancestors of `a`. We
303 # keep this out of the explanation because it is hard enough without this special case..
263 # keep this out of the explanation because it is hard enough without this special case..
304
264
305 parents = cl._uncheckedparentrevs
265 parents = cl._uncheckedparentrevs
306 graph_roots = (nullrev, nullrev)
266 graph_roots = (nullrev, nullrev)
307
267
308 ancestors = cl.ancestors([a.rev()], inclusive=True)
268 ancestors = cl.ancestors([a.rev()], inclusive=True)
309 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
269 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
310 roots = set()
270 roots = set()
311 has_graph_roots = False
271 has_graph_roots = False
312
272
313 # iterate over `only(B, A)`
273 # iterate over `only(B, A)`
314 for r in revs:
274 for r in revs:
315 ps = parents(r)
275 ps = parents(r)
316 if ps == graph_roots:
276 if ps == graph_roots:
317 has_graph_roots = True
277 has_graph_roots = True
318 else:
278 else:
319 p1, p2 = ps
279 p1, p2 = ps
320
280
321 # find all the "root points" (see larger comment above)
281 # find all the "root points" (see larger comment above)
322 if p1 != nullrev and p1 in ancestors:
282 if p1 != nullrev and p1 in ancestors:
323 roots.add(p1)
283 roots.add(p1)
324 if p2 != nullrev and p2 in ancestors:
284 if p2 != nullrev and p2 in ancestors:
325 roots.add(p2)
285 roots.add(p2)
326 if not roots:
286 if not roots:
327 # no common revision to track copies from
287 # no common revision to track copies from
328 return {}
288 return {}
329 if has_graph_roots:
289 if has_graph_roots:
330 # this deal with the special case mentionned in the [1] footnotes. We
290 # this deal with the special case mentionned in the [1] footnotes. We
331 # must filter out revisions that leads to non-common graphroots.
291 # must filter out revisions that leads to non-common graphroots.
332 roots = list(roots)
292 roots = list(roots)
333 m = min(roots)
293 m = min(roots)
334 h = [b.rev()]
294 h = [b.rev()]
335 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
295 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
336 roots_to_head = set(roots_to_head)
296 roots_to_head = set(roots_to_head)
337 revs = [r for r in revs if r in roots_to_head]
297 revs = [r for r in revs if r in roots_to_head]
338
298
339 if repo.filecopiesmode == b'changeset-sidedata':
299 if repo.filecopiesmode == b'changeset-sidedata':
340 # When using side-data, we will process the edges "from" the children.
300 # When using side-data, we will process the edges "from" the children.
341 # We iterate over the childre, gathering previous collected data for
301 # We iterate over the childre, gathering previous collected data for
342 # the parents. Do know when the parents data is no longer necessary, we
302 # the parents. Do know when the parents data is no longer necessary, we
343 # keep a counter of how many children each revision has.
303 # keep a counter of how many children each revision has.
344 #
304 #
345 # An interresting property of `children_count` is that it only contains
305 # An interresting property of `children_count` is that it only contains
346 # revision that will be relevant for a edge of the graph. So if a
306 # revision that will be relevant for a edge of the graph. So if a
347 # children has parent not in `children_count`, that edges should not be
307 # children has parent not in `children_count`, that edges should not be
348 # processed.
308 # processed.
349 children_count = dict((r, 0) for r in roots)
309 children_count = dict((r, 0) for r in roots)
350 for r in revs:
310 for r in revs:
351 for p in cl.parentrevs(r):
311 for p in cl.parentrevs(r):
352 if p == nullrev:
312 if p == nullrev:
353 continue
313 continue
354 children_count[r] = 0
314 children_count[r] = 0
355 if p in children_count:
315 if p in children_count:
356 children_count[p] += 1
316 children_count[p] += 1
357 revinfo = _revinfo_getter(repo, match)
317 revinfo = _revinfo_getter(repo, match)
358 return _combine_changeset_copies(
318 return _combine_changeset_copies(
359 revs, children_count, b.rev(), revinfo, match, isancestor
319 revs, children_count, b.rev(), revinfo, match, isancestor
360 )
320 )
361 else:
321 else:
362 # When not using side-data, we will process the edges "from" the parent.
322 # When not using side-data, we will process the edges "from" the parent.
363 # so we need a full mapping of the parent -> children relation.
323 # so we need a full mapping of the parent -> children relation.
364 children = dict((r, []) for r in roots)
324 children = dict((r, []) for r in roots)
365 for r in revs:
325 for r in revs:
366 for p in cl.parentrevs(r):
326 for p in cl.parentrevs(r):
367 if p == nullrev:
327 if p == nullrev:
368 continue
328 continue
369 children[r] = []
329 children[r] = []
370 if p in children:
330 if p in children:
371 children[p].append(r)
331 children[p].append(r)
372 x = revs.pop()
332 x = revs.pop()
373 assert x == b.rev()
333 assert x == b.rev()
374 revs.extend(roots)
334 revs.extend(roots)
375 revs.sort()
335 revs.sort()
376
336
377 revinfo = _revinfo_getter_extra(repo)
337 revinfo = _revinfo_getter_extra(repo)
378 return _combine_changeset_copies_extra(
338 return _combine_changeset_copies_extra(
379 revs, children, b.rev(), revinfo, match, isancestor
339 revs, children, b.rev(), revinfo, match, isancestor
380 )
340 )
381
341
382
342
383 def _combine_changeset_copies(
343 def _combine_changeset_copies(
384 revs, children_count, targetrev, revinfo, match, isancestor
344 revs, children_count, targetrev, revinfo, match, isancestor
385 ):
345 ):
386 """combine the copies information for each item of iterrevs
346 """combine the copies information for each item of iterrevs
387
347
388 revs: sorted iterable of revision to visit
348 revs: sorted iterable of revision to visit
389 children_count: a {parent: <number-of-relevant-children>} mapping.
349 children_count: a {parent: <number-of-relevant-children>} mapping.
390 targetrev: the final copies destination revision (not in iterrevs)
350 targetrev: the final copies destination revision (not in iterrevs)
391 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
351 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
392 match: a matcher
352 match: a matcher
393
353
394 It returns the aggregated copies information for `targetrev`.
354 It returns the aggregated copies information for `targetrev`.
395 """
355 """
396
356
397 alwaysmatch = match.always()
357 alwaysmatch = match.always()
398
358
399 if rustmod is not None and alwaysmatch:
359 if rustmod is not None and alwaysmatch:
400 return rustmod.combine_changeset_copies(
360 return rustmod.combine_changeset_copies(
401 list(revs), children_count, targetrev, revinfo, isancestor
361 list(revs), children_count, targetrev, revinfo, isancestor
402 )
362 )
403
363
404 isancestor = cached_is_ancestor(isancestor)
364 isancestor = cached_is_ancestor(isancestor)
405
365
406 all_copies = {}
366 all_copies = {}
407 # iterate over all the "children" side of copy tracing "edge"
367 # iterate over all the "children" side of copy tracing "edge"
408 for current_rev in revs:
368 for current_rev in revs:
409 p1, p2, changes = revinfo(current_rev)
369 p1, p2, changes = revinfo(current_rev)
410 current_copies = None
370 current_copies = None
411
371
412 # iterate over all parents to chain the existing data with the
372 # iterate over all parents to chain the existing data with the
413 # data from the parent → child edge.
373 # data from the parent → child edge.
414 for parent, parent_rev in ((1, p1), (2, p2)):
374 for parent, parent_rev in ((1, p1), (2, p2)):
415 if parent_rev == nullrev:
375 if parent_rev == nullrev:
416 continue
376 continue
417 remaining_children = children_count.get(parent_rev)
377 remaining_children = children_count.get(parent_rev)
418 if remaining_children is None:
378 if remaining_children is None:
419 continue
379 continue
420 remaining_children -= 1
380 remaining_children -= 1
421 children_count[parent_rev] = remaining_children
381 children_count[parent_rev] = remaining_children
422 if remaining_children:
382 if remaining_children:
423 copies = all_copies.get(parent_rev, None)
383 copies = all_copies.get(parent_rev, None)
424 else:
384 else:
425 copies = all_copies.pop(parent_rev, None)
385 copies = all_copies.pop(parent_rev, None)
426
386
427 if copies is None:
387 if copies is None:
428 # this is a root
388 # this is a root
429 copies = {}
389 copies = {}
430
390
431 newcopies = copies
391 newcopies = copies
432 # chain the data in the edge with the existing data
392 # chain the data in the edge with the existing data
433 if changes is not None:
393 if changes is not None:
434 childcopies = {}
394 childcopies = {}
435 if parent == 1:
395 if parent == 1:
436 childcopies = changes.copied_from_p1
396 childcopies = changes.copied_from_p1
437 elif parent == 2:
397 elif parent == 2:
438 childcopies = changes.copied_from_p2
398 childcopies = changes.copied_from_p2
439
399
440 if not alwaysmatch:
400 if not alwaysmatch:
441 childcopies = {
401 childcopies = {
442 dst: src
402 dst: src
443 for dst, src in childcopies.items()
403 for dst, src in childcopies.items()
444 if match(dst)
404 if match(dst)
445 }
405 }
446 if childcopies:
406 if childcopies:
447 newcopies = copies.copy()
407 newcopies = copies.copy()
448 for dest, source in pycompat.iteritems(childcopies):
408 for dest, source in pycompat.iteritems(childcopies):
449 prev = copies.get(source)
409 prev = copies.get(source)
450 if prev is not None and prev[1] is not None:
410 if prev is not None and prev[1] is not None:
451 source = prev[1]
411 source = prev[1]
452 newcopies[dest] = (current_rev, source)
412 newcopies[dest] = (current_rev, source)
453 assert newcopies is not copies
413 assert newcopies is not copies
454 if changes.removed:
414 if changes.removed:
455 if newcopies is copies:
415 if newcopies is copies:
456 newcopies = copies.copy()
416 newcopies = copies.copy()
457 for f in changes.removed:
417 for f in changes.removed:
458 if f in newcopies:
418 if f in newcopies:
459 if newcopies is copies:
419 if newcopies is copies:
460 # copy on write to avoid affecting potential other
420 # copy on write to avoid affecting potential other
461 # branches. when there are no other branches, this
421 # branches. when there are no other branches, this
462 # could be avoided.
422 # could be avoided.
463 newcopies = copies.copy()
423 newcopies = copies.copy()
464 newcopies[f] = (current_rev, None)
424 newcopies[f] = (current_rev, None)
465
425
466 # check potential need to combine the data from another parent (for
426 # check potential need to combine the data from another parent (for
467 # that child). See comment below for details.
427 # that child). See comment below for details.
468 if current_copies is None:
428 if current_copies is None:
469 current_copies = newcopies
429 current_copies = newcopies
470 elif current_copies is newcopies:
430 elif current_copies is newcopies:
471 # nothing to merge:
431 # nothing to merge:
472 pass
432 pass
473 else:
433 else:
474 # we are the second parent to work on c, we need to merge our
434 # we are the second parent to work on c, we need to merge our
475 # work with the other.
435 # work with the other.
476 #
436 #
477 # In case of conflict, parent 1 take precedence over parent 2.
437 # In case of conflict, parent 1 take precedence over parent 2.
478 # This is an arbitrary choice made anew when implementing
438 # This is an arbitrary choice made anew when implementing
479 # changeset based copies. It was made without regards with
439 # changeset based copies. It was made without regards with
480 # potential filelog related behavior.
440 # potential filelog related behavior.
481 assert parent == 2
441 assert parent == 2
482 current_copies = _merge_copies_dict(
442 current_copies = _merge_copies_dict(
483 newcopies, current_copies, isancestor, changes
443 newcopies, current_copies, isancestor, changes
484 )
444 )
485 all_copies[current_rev] = current_copies
445 all_copies[current_rev] = current_copies
486
446
487 # filter out internal details and return a {dest: source mapping}
447 # filter out internal details and return a {dest: source mapping}
488 final_copies = {}
448 final_copies = {}
489 for dest, (tt, source) in all_copies[targetrev].items():
449 for dest, (tt, source) in all_copies[targetrev].items():
490 if source is not None:
450 if source is not None:
491 final_copies[dest] = source
451 final_copies[dest] = source
492 return final_copies
452 return final_copies
493
453
494
454
495 def _merge_copies_dict(minor, major, isancestor, changes):
455 def _merge_copies_dict(minor, major, isancestor, changes):
496 """merge two copies-mapping together, minor and major
456 """merge two copies-mapping together, minor and major
497
457
498 In case of conflict, value from "major" will be picked.
458 In case of conflict, value from "major" will be picked.
499
459
500 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
460 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
501 ancestors of `high_rev`,
461 ancestors of `high_rev`,
502
462
503 - `ismerged(path)`: callable return True if `path` have been merged in the
463 - `ismerged(path)`: callable return True if `path` have been merged in the
504 current revision,
464 current revision,
505
465
506 return the resulting dict (in practice, the "minor" object, updated)
466 return the resulting dict (in practice, the "minor" object, updated)
507 """
467 """
508 for dest, value in major.items():
468 for dest, value in major.items():
509 other = minor.get(dest)
469 other = minor.get(dest)
510 if other is None:
470 if other is None:
511 minor[dest] = value
471 minor[dest] = value
512 else:
472 else:
513 new_tt = value[0]
473 new_tt = value[0]
514 other_tt = other[0]
474 other_tt = other[0]
515 if value[1] == other[1]:
475 if value[1] == other[1]:
516 continue
476 continue
517 # content from "major" wins, unless it is older
477 # content from "major" wins, unless it is older
518 # than the branch point or there is a merge
478 # than the branch point or there is a merge
519 if new_tt == other_tt:
479 if new_tt == other_tt:
520 minor[dest] = value
480 minor[dest] = value
521 elif (
481 elif (
522 changes is not None
482 changes is not None
523 and value[1] is None
483 and value[1] is None
524 and dest in changes.salvaged
484 and dest in changes.salvaged
525 ):
485 ):
526 pass
486 pass
527 elif (
487 elif (
528 changes is not None
488 changes is not None
529 and other[1] is None
489 and other[1] is None
530 and dest in changes.salvaged
490 and dest in changes.salvaged
531 ):
491 ):
532 minor[dest] = value
492 minor[dest] = value
533 elif changes is not None and dest in changes.merged:
493 elif changes is not None and dest in changes.merged:
534 minor[dest] = value
494 minor[dest] = value
535 elif not isancestor(new_tt, other_tt):
495 elif not isancestor(new_tt, other_tt):
536 if value[1] is not None:
496 if value[1] is not None:
537 minor[dest] = value
497 minor[dest] = value
538 elif isancestor(other_tt, new_tt):
498 elif isancestor(other_tt, new_tt):
539 minor[dest] = value
499 minor[dest] = value
540 return minor
500 return minor
541
501
542
502
543 def _revinfo_getter_extra(repo):
503 def _revinfo_getter_extra(repo):
544 """return a function that return multiple data given a <rev>"i
504 """return a function that return multiple data given a <rev>"i
545
505
546 * p1: revision number of first parent
506 * p1: revision number of first parent
547 * p2: revision number of first parent
507 * p2: revision number of first parent
548 * p1copies: mapping of copies from p1
508 * p1copies: mapping of copies from p1
549 * p2copies: mapping of copies from p2
509 * p2copies: mapping of copies from p2
550 * removed: a list of removed files
510 * removed: a list of removed files
551 * ismerged: a callback to know if file was merged in that revision
511 * ismerged: a callback to know if file was merged in that revision
552 """
512 """
553 cl = repo.changelog
513 cl = repo.changelog
554 parents = cl.parentrevs
514 parents = cl.parentrevs
555
515
556 def get_ismerged(rev):
516 def get_ismerged(rev):
557 ctx = repo[rev]
517 ctx = repo[rev]
558
518
559 def ismerged(path):
519 def ismerged(path):
560 if path not in ctx.files():
520 if path not in ctx.files():
561 return False
521 return False
562 fctx = ctx[path]
522 fctx = ctx[path]
563 parents = fctx._filelog.parents(fctx._filenode)
523 parents = fctx._filelog.parents(fctx._filenode)
564 nb_parents = 0
524 nb_parents = 0
565 for n in parents:
525 for n in parents:
566 if n != nullid:
526 if n != nullid:
567 nb_parents += 1
527 nb_parents += 1
568 return nb_parents >= 2
528 return nb_parents >= 2
569
529
570 return ismerged
530 return ismerged
571
531
572 def revinfo(rev):
532 def revinfo(rev):
573 p1, p2 = parents(rev)
533 p1, p2 = parents(rev)
574 ctx = repo[rev]
534 ctx = repo[rev]
575 p1copies, p2copies = ctx._copies
535 p1copies, p2copies = ctx._copies
576 removed = ctx.filesremoved()
536 removed = ctx.filesremoved()
577 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
537 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
578
538
579 return revinfo
539 return revinfo
580
540
581
541
582 def _combine_changeset_copies_extra(
542 def _combine_changeset_copies_extra(
583 revs, children, targetrev, revinfo, match, isancestor
543 revs, children, targetrev, revinfo, match, isancestor
584 ):
544 ):
585 """version of `_combine_changeset_copies` that works with the Google
545 """version of `_combine_changeset_copies` that works with the Google
586 specific "extra" based storage for copy information"""
546 specific "extra" based storage for copy information"""
587 all_copies = {}
547 all_copies = {}
588 alwaysmatch = match.always()
548 alwaysmatch = match.always()
589 for r in revs:
549 for r in revs:
590 copies = all_copies.pop(r, None)
550 copies = all_copies.pop(r, None)
591 if copies is None:
551 if copies is None:
592 # this is a root
552 # this is a root
593 copies = {}
553 copies = {}
594 for i, c in enumerate(children[r]):
554 for i, c in enumerate(children[r]):
595 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
555 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
596 if r == p1:
556 if r == p1:
597 parent = 1
557 parent = 1
598 childcopies = p1copies
558 childcopies = p1copies
599 else:
559 else:
600 assert r == p2
560 assert r == p2
601 parent = 2
561 parent = 2
602 childcopies = p2copies
562 childcopies = p2copies
603 if not alwaysmatch:
563 if not alwaysmatch:
604 childcopies = {
564 childcopies = {
605 dst: src for dst, src in childcopies.items() if match(dst)
565 dst: src for dst, src in childcopies.items() if match(dst)
606 }
566 }
607 newcopies = copies
567 newcopies = copies
608 if childcopies:
568 if childcopies:
609 newcopies = copies.copy()
569 newcopies = copies.copy()
610 for dest, source in pycompat.iteritems(childcopies):
570 for dest, source in pycompat.iteritems(childcopies):
611 prev = copies.get(source)
571 prev = copies.get(source)
612 if prev is not None and prev[1] is not None:
572 if prev is not None and prev[1] is not None:
613 source = prev[1]
573 source = prev[1]
614 newcopies[dest] = (c, source)
574 newcopies[dest] = (c, source)
615 assert newcopies is not copies
575 assert newcopies is not copies
616 for f in removed:
576 for f in removed:
617 if f in newcopies:
577 if f in newcopies:
618 if newcopies is copies:
578 if newcopies is copies:
619 # copy on write to avoid affecting potential other
579 # copy on write to avoid affecting potential other
620 # branches. when there are no other branches, this
580 # branches. when there are no other branches, this
621 # could be avoided.
581 # could be avoided.
622 newcopies = copies.copy()
582 newcopies = copies.copy()
623 newcopies[f] = (c, None)
583 newcopies[f] = (c, None)
624 othercopies = all_copies.get(c)
584 othercopies = all_copies.get(c)
625 if othercopies is None:
585 if othercopies is None:
626 all_copies[c] = newcopies
586 all_copies[c] = newcopies
627 else:
587 else:
628 # we are the second parent to work on c, we need to merge our
588 # we are the second parent to work on c, we need to merge our
629 # work with the other.
589 # work with the other.
630 #
590 #
631 # In case of conflict, parent 1 take precedence over parent 2.
591 # In case of conflict, parent 1 take precedence over parent 2.
632 # This is an arbitrary choice made anew when implementing
592 # This is an arbitrary choice made anew when implementing
633 # changeset based copies. It was made without regards with
593 # changeset based copies. It was made without regards with
634 # potential filelog related behavior.
594 # potential filelog related behavior.
635 if parent == 1:
595 if parent == 1:
636 _merge_copies_dict_extra(
596 _merge_copies_dict_extra(
637 othercopies, newcopies, isancestor, ismerged
597 othercopies, newcopies, isancestor, ismerged
638 )
598 )
639 else:
599 else:
640 _merge_copies_dict_extra(
600 _merge_copies_dict_extra(
641 newcopies, othercopies, isancestor, ismerged
601 newcopies, othercopies, isancestor, ismerged
642 )
602 )
643 all_copies[c] = newcopies
603 all_copies[c] = newcopies
644
604
645 final_copies = {}
605 final_copies = {}
646 for dest, (tt, source) in all_copies[targetrev].items():
606 for dest, (tt, source) in all_copies[targetrev].items():
647 if source is not None:
607 if source is not None:
648 final_copies[dest] = source
608 final_copies[dest] = source
649 return final_copies
609 return final_copies
650
610
651
611
652 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
612 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
653 """version of `_merge_copies_dict` that works with the Google
613 """version of `_merge_copies_dict` that works with the Google
654 specific "extra" based storage for copy information"""
614 specific "extra" based storage for copy information"""
655 for dest, value in major.items():
615 for dest, value in major.items():
656 other = minor.get(dest)
616 other = minor.get(dest)
657 if other is None:
617 if other is None:
658 minor[dest] = value
618 minor[dest] = value
659 else:
619 else:
660 new_tt = value[0]
620 new_tt = value[0]
661 other_tt = other[0]
621 other_tt = other[0]
662 if value[1] == other[1]:
622 if value[1] == other[1]:
663 continue
623 continue
664 # content from "major" wins, unless it is older
624 # content from "major" wins, unless it is older
665 # than the branch point or there is a merge
625 # than the branch point or there is a merge
666 if (
626 if (
667 new_tt == other_tt
627 new_tt == other_tt
668 or not isancestor(new_tt, other_tt)
628 or not isancestor(new_tt, other_tt)
669 or ismerged(dest)
629 or ismerged(dest)
670 ):
630 ):
671 minor[dest] = value
631 minor[dest] = value
672
632
673
633
674 def _forwardcopies(a, b, base=None, match=None):
634 def _forwardcopies(a, b, base=None, match=None):
675 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
635 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
676
636
677 if base is None:
637 if base is None:
678 base = a
638 base = a
679 match = a.repo().narrowmatch(match)
639 match = a.repo().narrowmatch(match)
680 # check for working copy
640 # check for working copy
681 if b.rev() is None:
641 if b.rev() is None:
682 cm = _committedforwardcopies(a, b.p1(), base, match)
642 cm = _committedforwardcopies(a, b.p1(), base, match)
683 # combine copies from dirstate if necessary
643 # combine copies from dirstate if necessary
684 copies = _chain(cm, _dirstatecopies(b._repo, match))
644 copies = _chain(cm, _dirstatecopies(b._repo, match))
685 else:
645 else:
686 copies = _committedforwardcopies(a, b, base, match)
646 copies = _committedforwardcopies(a, b, base, match)
687 return copies
647 return copies
688
648
689
649
690 def _backwardrenames(a, b, match):
650 def _backwardrenames(a, b, match):
691 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
651 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
692 return {}
652 return {}
693
653
694 # Even though we're not taking copies into account, 1:n rename situations
654 # Even though we're not taking copies into account, 1:n rename situations
695 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
655 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
696 # arbitrarily pick one of the renames.
656 # arbitrarily pick one of the renames.
697 # We don't want to pass in "match" here, since that would filter
657 # We don't want to pass in "match" here, since that would filter
698 # the destination by it. Since we're reversing the copies, we want
658 # the destination by it. Since we're reversing the copies, we want
699 # to filter the source instead.
659 # to filter the source instead.
700 f = _forwardcopies(b, a)
660 f = _forwardcopies(b, a)
701 r = {}
661 r = {}
702 for k, v in sorted(pycompat.iteritems(f)):
662 for k, v in sorted(pycompat.iteritems(f)):
703 if match and not match(v):
663 if match and not match(v):
704 continue
664 continue
705 # remove copies
665 # remove copies
706 if v in a:
666 if v in a:
707 continue
667 continue
708 r[v] = k
668 r[v] = k
709 return r
669 return r
710
670
711
671
712 def pathcopies(x, y, match=None):
672 def pathcopies(x, y, match=None):
713 """find {dst@y: src@x} copy mapping for directed compare"""
673 """find {dst@y: src@x} copy mapping for directed compare"""
714 repo = x._repo
674 repo = x._repo
715 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
675 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
716 if debug:
676 if debug:
717 repo.ui.debug(
677 repo.ui.debug(
718 b'debug.copies: searching copies from %s to %s\n' % (x, y)
678 b'debug.copies: searching copies from %s to %s\n' % (x, y)
719 )
679 )
720 if x == y or not x or not y:
680 if x == y or not x or not y:
721 return {}
681 return {}
722 if y.rev() is None and x == y.p1():
682 if y.rev() is None and x == y.p1():
723 if debug:
683 if debug:
724 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
684 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
725 # short-circuit to avoid issues with merge states
685 # short-circuit to avoid issues with merge states
726 return _dirstatecopies(repo, match)
686 return _dirstatecopies(repo, match)
727 a = y.ancestor(x)
687 a = y.ancestor(x)
728 if a == x:
688 if a == x:
729 if debug:
689 if debug:
730 repo.ui.debug(b'debug.copies: search mode: forward\n')
690 repo.ui.debug(b'debug.copies: search mode: forward\n')
731 copies = _forwardcopies(x, y, match=match)
691 copies = _forwardcopies(x, y, match=match)
732 elif a == y:
692 elif a == y:
733 if debug:
693 if debug:
734 repo.ui.debug(b'debug.copies: search mode: backward\n')
694 repo.ui.debug(b'debug.copies: search mode: backward\n')
735 copies = _backwardrenames(x, y, match=match)
695 copies = _backwardrenames(x, y, match=match)
736 else:
696 else:
737 if debug:
697 if debug:
738 repo.ui.debug(b'debug.copies: search mode: combined\n')
698 repo.ui.debug(b'debug.copies: search mode: combined\n')
739 base = None
699 base = None
740 if a.rev() != nullrev:
700 if a.rev() != nullrev:
741 base = x
701 base = x
742 copies = _chain(
702 copies = _chain(
743 _backwardrenames(x, a, match=match),
703 _backwardrenames(x, a, match=match),
744 _forwardcopies(a, y, base, match=match),
704 _forwardcopies(a, y, base, match=match),
745 )
705 )
746 _filter(x, y, copies)
706 _filter(x, y, copies)
747 return copies
707 return copies
748
708
749
709
750 def mergecopies(repo, c1, c2, base):
710 def mergecopies(repo, c1, c2, base):
751 """
711 """
752 Finds moves and copies between context c1 and c2 that are relevant for
712 Finds moves and copies between context c1 and c2 that are relevant for
753 merging. 'base' will be used as the merge base.
713 merging. 'base' will be used as the merge base.
754
714
755 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
715 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
756 files that were moved/ copied in one merge parent and modified in another.
716 files that were moved/ copied in one merge parent and modified in another.
757 For example:
717 For example:
758
718
759 o ---> 4 another commit
719 o ---> 4 another commit
760 |
720 |
761 | o ---> 3 commit that modifies a.txt
721 | o ---> 3 commit that modifies a.txt
762 | /
722 | /
763 o / ---> 2 commit that moves a.txt to b.txt
723 o / ---> 2 commit that moves a.txt to b.txt
764 |/
724 |/
765 o ---> 1 merge base
725 o ---> 1 merge base
766
726
767 If we try to rebase revision 3 on revision 4, since there is no a.txt in
727 If we try to rebase revision 3 on revision 4, since there is no a.txt in
768 revision 4, and if user have copytrace disabled, we prints the following
728 revision 4, and if user have copytrace disabled, we prints the following
769 message:
729 message:
770
730
771 ```other changed <file> which local deleted```
731 ```other changed <file> which local deleted```
772
732
773 Returns a tuple where:
733 Returns a tuple where:
774
734
775 "branch_copies" an instance of branch_copies.
735 "branch_copies" an instance of branch_copies.
776
736
777 "diverge" is a mapping of source name -> list of destination names
737 "diverge" is a mapping of source name -> list of destination names
778 for divergent renames.
738 for divergent renames.
779
739
780 This function calls different copytracing algorithms based on config.
740 This function calls different copytracing algorithms based on config.
781 """
741 """
782 # avoid silly behavior for update from empty dir
742 # avoid silly behavior for update from empty dir
783 if not c1 or not c2 or c1 == c2:
743 if not c1 or not c2 or c1 == c2:
784 return branch_copies(), branch_copies(), {}
744 return branch_copies(), branch_copies(), {}
785
745
786 narrowmatch = c1.repo().narrowmatch()
746 narrowmatch = c1.repo().narrowmatch()
787
747
788 # avoid silly behavior for parent -> working dir
748 # avoid silly behavior for parent -> working dir
789 if c2.node() is None and c1.node() == repo.dirstate.p1():
749 if c2.node() is None and c1.node() == repo.dirstate.p1():
790 return (
750 return (
791 branch_copies(_dirstatecopies(repo, narrowmatch)),
751 branch_copies(_dirstatecopies(repo, narrowmatch)),
792 branch_copies(),
752 branch_copies(),
793 {},
753 {},
794 )
754 )
795
755
796 copytracing = repo.ui.config(b'experimental', b'copytrace')
756 copytracing = repo.ui.config(b'experimental', b'copytrace')
797 if stringutil.parsebool(copytracing) is False:
757 if stringutil.parsebool(copytracing) is False:
798 # stringutil.parsebool() returns None when it is unable to parse the
758 # stringutil.parsebool() returns None when it is unable to parse the
799 # value, so we should rely on making sure copytracing is on such cases
759 # value, so we should rely on making sure copytracing is on such cases
800 return branch_copies(), branch_copies(), {}
760 return branch_copies(), branch_copies(), {}
801
761
802 if usechangesetcentricalgo(repo):
762 if usechangesetcentricalgo(repo):
803 # The heuristics don't make sense when we need changeset-centric algos
763 # The heuristics don't make sense when we need changeset-centric algos
804 return _fullcopytracing(repo, c1, c2, base)
764 return _fullcopytracing(repo, c1, c2, base)
805
765
806 # Copy trace disabling is explicitly below the node == p1 logic above
766 # Copy trace disabling is explicitly below the node == p1 logic above
807 # because the logic above is required for a simple copy to be kept across a
767 # because the logic above is required for a simple copy to be kept across a
808 # rebase.
768 # rebase.
809 if copytracing == b'heuristics':
769 if copytracing == b'heuristics':
810 # Do full copytracing if only non-public revisions are involved as
770 # Do full copytracing if only non-public revisions are involved as
811 # that will be fast enough and will also cover the copies which could
771 # that will be fast enough and will also cover the copies which could
812 # be missed by heuristics
772 # be missed by heuristics
813 if _isfullcopytraceable(repo, c1, base):
773 if _isfullcopytraceable(repo, c1, base):
814 return _fullcopytracing(repo, c1, c2, base)
774 return _fullcopytracing(repo, c1, c2, base)
815 return _heuristicscopytracing(repo, c1, c2, base)
775 return _heuristicscopytracing(repo, c1, c2, base)
816 else:
776 else:
817 return _fullcopytracing(repo, c1, c2, base)
777 return _fullcopytracing(repo, c1, c2, base)
818
778
819
779
820 def _isfullcopytraceable(repo, c1, base):
780 def _isfullcopytraceable(repo, c1, base):
821 """Checks that if base, source and destination are all no-public branches,
781 """Checks that if base, source and destination are all no-public branches,
822 if yes let's use the full copytrace algorithm for increased capabilities
782 if yes let's use the full copytrace algorithm for increased capabilities
823 since it will be fast enough.
783 since it will be fast enough.
824
784
825 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
785 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
826 number of changesets from c1 to base such that if number of changesets are
786 number of changesets from c1 to base such that if number of changesets are
827 more than the limit, full copytracing algorithm won't be used.
787 more than the limit, full copytracing algorithm won't be used.
828 """
788 """
829 if c1.rev() is None:
789 if c1.rev() is None:
830 c1 = c1.p1()
790 c1 = c1.p1()
831 if c1.mutable() and base.mutable():
791 if c1.mutable() and base.mutable():
832 sourcecommitlimit = repo.ui.configint(
792 sourcecommitlimit = repo.ui.configint(
833 b'experimental', b'copytrace.sourcecommitlimit'
793 b'experimental', b'copytrace.sourcecommitlimit'
834 )
794 )
835 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
795 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
836 return commits < sourcecommitlimit
796 return commits < sourcecommitlimit
837 return False
797 return False
838
798
839
799
840 def _checksinglesidecopies(
800 def _checksinglesidecopies(
841 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
801 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
842 ):
802 ):
843 if src not in m2:
803 if src not in m2:
844 # deleted on side 2
804 # deleted on side 2
845 if src not in m1:
805 if src not in m1:
846 # renamed on side 1, deleted on side 2
806 # renamed on side 1, deleted on side 2
847 renamedelete[src] = dsts1
807 renamedelete[src] = dsts1
848 elif src not in mb:
808 elif src not in mb:
849 # Work around the "short-circuit to avoid issues with merge states"
809 # Work around the "short-circuit to avoid issues with merge states"
850 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
810 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
851 # destination doesn't exist in y.
811 # destination doesn't exist in y.
852 pass
812 pass
853 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
813 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
854 return
814 return
855 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
815 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
856 # modified on side 2
816 # modified on side 2
857 for dst in dsts1:
817 for dst in dsts1:
858 copy[dst] = src
818 copy[dst] = src
859
819
860
820
861 class branch_copies(object):
821 class branch_copies(object):
862 """Information about copies made on one side of a merge/graft.
822 """Information about copies made on one side of a merge/graft.
863
823
864 "copy" is a mapping from destination name -> source name,
824 "copy" is a mapping from destination name -> source name,
865 where source is in c1 and destination is in c2 or vice-versa.
825 where source is in c1 and destination is in c2 or vice-versa.
866
826
867 "movewithdir" is a mapping from source name -> destination name,
827 "movewithdir" is a mapping from source name -> destination name,
868 where the file at source present in one context but not the other
828 where the file at source present in one context but not the other
869 needs to be moved to destination by the merge process, because the
829 needs to be moved to destination by the merge process, because the
870 other context moved the directory it is in.
830 other context moved the directory it is in.
871
831
872 "renamedelete" is a mapping of source name -> list of destination
832 "renamedelete" is a mapping of source name -> list of destination
873 names for files deleted in c1 that were renamed in c2 or vice-versa.
833 names for files deleted in c1 that were renamed in c2 or vice-versa.
874
834
875 "dirmove" is a mapping of detected source dir -> destination dir renames.
835 "dirmove" is a mapping of detected source dir -> destination dir renames.
876 This is needed for handling changes to new files previously grafted into
836 This is needed for handling changes to new files previously grafted into
877 renamed directories.
837 renamed directories.
878 """
838 """
879
839
880 def __init__(
840 def __init__(
881 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
841 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
882 ):
842 ):
883 self.copy = {} if copy is None else copy
843 self.copy = {} if copy is None else copy
884 self.renamedelete = {} if renamedelete is None else renamedelete
844 self.renamedelete = {} if renamedelete is None else renamedelete
885 self.dirmove = {} if dirmove is None else dirmove
845 self.dirmove = {} if dirmove is None else dirmove
886 self.movewithdir = {} if movewithdir is None else movewithdir
846 self.movewithdir = {} if movewithdir is None else movewithdir
887
847
888 def __repr__(self):
848 def __repr__(self):
889 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
849 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
890 self.copy,
850 self.copy,
891 self.renamedelete,
851 self.renamedelete,
892 self.dirmove,
852 self.dirmove,
893 self.movewithdir,
853 self.movewithdir,
894 )
854 )
895
855
896
856
897 def _fullcopytracing(repo, c1, c2, base):
857 def _fullcopytracing(repo, c1, c2, base):
898 """The full copytracing algorithm which finds all the new files that were
858 """The full copytracing algorithm which finds all the new files that were
899 added from merge base up to the top commit and for each file it checks if
859 added from merge base up to the top commit and for each file it checks if
900 this file was copied from another file.
860 this file was copied from another file.
901
861
902 This is pretty slow when a lot of changesets are involved but will track all
862 This is pretty slow when a lot of changesets are involved but will track all
903 the copies.
863 the copies.
904 """
864 """
905 m1 = c1.manifest()
865 m1 = c1.manifest()
906 m2 = c2.manifest()
866 m2 = c2.manifest()
907 mb = base.manifest()
867 mb = base.manifest()
908
868
909 copies1 = pathcopies(base, c1)
869 copies1 = pathcopies(base, c1)
910 copies2 = pathcopies(base, c2)
870 copies2 = pathcopies(base, c2)
911
871
912 if not (copies1 or copies2):
872 if not (copies1 or copies2):
913 return branch_copies(), branch_copies(), {}
873 return branch_copies(), branch_copies(), {}
914
874
915 inversecopies1 = {}
875 inversecopies1 = {}
916 inversecopies2 = {}
876 inversecopies2 = {}
917 for dst, src in copies1.items():
877 for dst, src in copies1.items():
918 inversecopies1.setdefault(src, []).append(dst)
878 inversecopies1.setdefault(src, []).append(dst)
919 for dst, src in copies2.items():
879 for dst, src in copies2.items():
920 inversecopies2.setdefault(src, []).append(dst)
880 inversecopies2.setdefault(src, []).append(dst)
921
881
922 copy1 = {}
882 copy1 = {}
923 copy2 = {}
883 copy2 = {}
924 diverge = {}
884 diverge = {}
925 renamedelete1 = {}
885 renamedelete1 = {}
926 renamedelete2 = {}
886 renamedelete2 = {}
927 allsources = set(inversecopies1) | set(inversecopies2)
887 allsources = set(inversecopies1) | set(inversecopies2)
928 for src in allsources:
888 for src in allsources:
929 dsts1 = inversecopies1.get(src)
889 dsts1 = inversecopies1.get(src)
930 dsts2 = inversecopies2.get(src)
890 dsts2 = inversecopies2.get(src)
931 if dsts1 and dsts2:
891 if dsts1 and dsts2:
932 # copied/renamed on both sides
892 # copied/renamed on both sides
933 if src not in m1 and src not in m2:
893 if src not in m1 and src not in m2:
934 # renamed on both sides
894 # renamed on both sides
935 dsts1 = set(dsts1)
895 dsts1 = set(dsts1)
936 dsts2 = set(dsts2)
896 dsts2 = set(dsts2)
937 # If there's some overlap in the rename destinations, we
897 # If there's some overlap in the rename destinations, we
938 # consider it not divergent. For example, if side 1 copies 'a'
898 # consider it not divergent. For example, if side 1 copies 'a'
939 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
899 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
940 # and 'd' and deletes 'a'.
900 # and 'd' and deletes 'a'.
941 if dsts1 & dsts2:
901 if dsts1 & dsts2:
942 for dst in dsts1 & dsts2:
902 for dst in dsts1 & dsts2:
943 copy1[dst] = src
903 copy1[dst] = src
944 copy2[dst] = src
904 copy2[dst] = src
945 else:
905 else:
946 diverge[src] = sorted(dsts1 | dsts2)
906 diverge[src] = sorted(dsts1 | dsts2)
947 elif src in m1 and src in m2:
907 elif src in m1 and src in m2:
948 # copied on both sides
908 # copied on both sides
949 dsts1 = set(dsts1)
909 dsts1 = set(dsts1)
950 dsts2 = set(dsts2)
910 dsts2 = set(dsts2)
951 for dst in dsts1 & dsts2:
911 for dst in dsts1 & dsts2:
952 copy1[dst] = src
912 copy1[dst] = src
953 copy2[dst] = src
913 copy2[dst] = src
954 # TODO: Handle cases where it was renamed on one side and copied
914 # TODO: Handle cases where it was renamed on one side and copied
955 # on the other side
915 # on the other side
956 elif dsts1:
916 elif dsts1:
957 # copied/renamed only on side 1
917 # copied/renamed only on side 1
958 _checksinglesidecopies(
918 _checksinglesidecopies(
959 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
919 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
960 )
920 )
961 elif dsts2:
921 elif dsts2:
962 # copied/renamed only on side 2
922 # copied/renamed only on side 2
963 _checksinglesidecopies(
923 _checksinglesidecopies(
964 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
924 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
965 )
925 )
966
926
967 # find interesting file sets from manifests
927 # find interesting file sets from manifests
968 cache = []
928 cache = []
969
929
970 def _get_addedfiles(idx):
930 def _get_addedfiles(idx):
971 if not cache:
931 if not cache:
972 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
932 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
973 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
933 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
974 u1 = sorted(addedinm1 - addedinm2)
934 u1 = sorted(addedinm1 - addedinm2)
975 u2 = sorted(addedinm2 - addedinm1)
935 u2 = sorted(addedinm2 - addedinm1)
976 cache.extend((u1, u2))
936 cache.extend((u1, u2))
977 return cache[idx]
937 return cache[idx]
978
938
979 u1fn = lambda: _get_addedfiles(0)
939 u1fn = lambda: _get_addedfiles(0)
980 u2fn = lambda: _get_addedfiles(1)
940 u2fn = lambda: _get_addedfiles(1)
981 if repo.ui.debugflag:
941 if repo.ui.debugflag:
982 u1 = u1fn()
942 u1 = u1fn()
983 u2 = u2fn()
943 u2 = u2fn()
984
944
985 header = b" unmatched files in %s"
945 header = b" unmatched files in %s"
986 if u1:
946 if u1:
987 repo.ui.debug(
947 repo.ui.debug(
988 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
948 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
989 )
949 )
990 if u2:
950 if u2:
991 repo.ui.debug(
951 repo.ui.debug(
992 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
952 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
993 )
953 )
994
954
995 renamedeleteset = set()
955 renamedeleteset = set()
996 divergeset = set()
956 divergeset = set()
997 for dsts in diverge.values():
957 for dsts in diverge.values():
998 divergeset.update(dsts)
958 divergeset.update(dsts)
999 for dsts in renamedelete1.values():
959 for dsts in renamedelete1.values():
1000 renamedeleteset.update(dsts)
960 renamedeleteset.update(dsts)
1001 for dsts in renamedelete2.values():
961 for dsts in renamedelete2.values():
1002 renamedeleteset.update(dsts)
962 renamedeleteset.update(dsts)
1003
963
1004 repo.ui.debug(
964 repo.ui.debug(
1005 b" all copies found (* = to merge, ! = divergent, "
965 b" all copies found (* = to merge, ! = divergent, "
1006 b"% = renamed and deleted):\n"
966 b"% = renamed and deleted):\n"
1007 )
967 )
1008 for side, copies in ((b"local", copies1), (b"remote", copies2)):
968 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1009 if not copies:
969 if not copies:
1010 continue
970 continue
1011 repo.ui.debug(b" on %s side:\n" % side)
971 repo.ui.debug(b" on %s side:\n" % side)
1012 for f in sorted(copies):
972 for f in sorted(copies):
1013 note = b""
973 note = b""
1014 if f in copy1 or f in copy2:
974 if f in copy1 or f in copy2:
1015 note += b"*"
975 note += b"*"
1016 if f in divergeset:
976 if f in divergeset:
1017 note += b"!"
977 note += b"!"
1018 if f in renamedeleteset:
978 if f in renamedeleteset:
1019 note += b"%"
979 note += b"%"
1020 repo.ui.debug(
980 repo.ui.debug(
1021 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
981 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1022 )
982 )
1023 del renamedeleteset
983 del renamedeleteset
1024 del divergeset
984 del divergeset
1025
985
1026 repo.ui.debug(b" checking for directory renames\n")
986 repo.ui.debug(b" checking for directory renames\n")
1027
987
1028 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
988 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1029 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
989 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1030
990
1031 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
991 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1032 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
992 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1033
993
1034 return branch_copies1, branch_copies2, diverge
994 return branch_copies1, branch_copies2, diverge
1035
995
1036
996
1037 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
997 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1038 """Finds moved directories and files that should move with them.
998 """Finds moved directories and files that should move with them.
1039
999
1040 ctx: the context for one of the sides
1000 ctx: the context for one of the sides
1041 copy: files copied on the same side (as ctx)
1001 copy: files copied on the same side (as ctx)
1042 fullcopy: files copied on the same side (as ctx), including those that
1002 fullcopy: files copied on the same side (as ctx), including those that
1043 merge.manifestmerge() won't care about
1003 merge.manifestmerge() won't care about
1044 addedfilesfn: function returning added files on the other side (compared to
1004 addedfilesfn: function returning added files on the other side (compared to
1045 ctx)
1005 ctx)
1046 """
1006 """
1047 # generate a directory move map
1007 # generate a directory move map
1048 invalid = set()
1008 invalid = set()
1049 dirmove = {}
1009 dirmove = {}
1050
1010
1051 # examine each file copy for a potential directory move, which is
1011 # examine each file copy for a potential directory move, which is
1052 # when all the files in a directory are moved to a new directory
1012 # when all the files in a directory are moved to a new directory
1053 for dst, src in pycompat.iteritems(fullcopy):
1013 for dst, src in pycompat.iteritems(fullcopy):
1054 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1014 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1055 if dsrc in invalid:
1015 if dsrc in invalid:
1056 # already seen to be uninteresting
1016 # already seen to be uninteresting
1057 continue
1017 continue
1058 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1018 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1059 # directory wasn't entirely moved locally
1019 # directory wasn't entirely moved locally
1060 invalid.add(dsrc)
1020 invalid.add(dsrc)
1061 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1021 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1062 # files from the same directory moved to two different places
1022 # files from the same directory moved to two different places
1063 invalid.add(dsrc)
1023 invalid.add(dsrc)
1064 else:
1024 else:
1065 # looks good so far
1025 # looks good so far
1066 dirmove[dsrc] = ddst
1026 dirmove[dsrc] = ddst
1067
1027
1068 for i in invalid:
1028 for i in invalid:
1069 if i in dirmove:
1029 if i in dirmove:
1070 del dirmove[i]
1030 del dirmove[i]
1071 del invalid
1031 del invalid
1072
1032
1073 if not dirmove:
1033 if not dirmove:
1074 return {}, {}
1034 return {}, {}
1075
1035
1076 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1036 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1077
1037
1078 for d in dirmove:
1038 for d in dirmove:
1079 repo.ui.debug(
1039 repo.ui.debug(
1080 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1040 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1081 )
1041 )
1082
1042
1083 movewithdir = {}
1043 movewithdir = {}
1084 # check unaccounted nonoverlapping files against directory moves
1044 # check unaccounted nonoverlapping files against directory moves
1085 for f in addedfilesfn():
1045 for f in addedfilesfn():
1086 if f not in fullcopy:
1046 if f not in fullcopy:
1087 for d in dirmove:
1047 for d in dirmove:
1088 if f.startswith(d):
1048 if f.startswith(d):
1089 # new file added in a directory that was moved, move it
1049 # new file added in a directory that was moved, move it
1090 df = dirmove[d] + f[len(d) :]
1050 df = dirmove[d] + f[len(d) :]
1091 if df not in copy:
1051 if df not in copy:
1092 movewithdir[f] = df
1052 movewithdir[f] = df
1093 repo.ui.debug(
1053 repo.ui.debug(
1094 b" pending file src: '%s' -> dst: '%s'\n"
1054 b" pending file src: '%s' -> dst: '%s'\n"
1095 % (f, df)
1055 % (f, df)
1096 )
1056 )
1097 break
1057 break
1098
1058
1099 return dirmove, movewithdir
1059 return dirmove, movewithdir
1100
1060
1101
1061
1102 def _heuristicscopytracing(repo, c1, c2, base):
1062 def _heuristicscopytracing(repo, c1, c2, base):
1103 """Fast copytracing using filename heuristics
1063 """Fast copytracing using filename heuristics
1104
1064
1105 Assumes that moves or renames are of following two types:
1065 Assumes that moves or renames are of following two types:
1106
1066
1107 1) Inside a directory only (same directory name but different filenames)
1067 1) Inside a directory only (same directory name but different filenames)
1108 2) Move from one directory to another
1068 2) Move from one directory to another
1109 (same filenames but different directory names)
1069 (same filenames but different directory names)
1110
1070
1111 Works only when there are no merge commits in the "source branch".
1071 Works only when there are no merge commits in the "source branch".
1112 Source branch is commits from base up to c2 not including base.
1072 Source branch is commits from base up to c2 not including base.
1113
1073
1114 If merge is involved it fallbacks to _fullcopytracing().
1074 If merge is involved it fallbacks to _fullcopytracing().
1115
1075
1116 Can be used by setting the following config:
1076 Can be used by setting the following config:
1117
1077
1118 [experimental]
1078 [experimental]
1119 copytrace = heuristics
1079 copytrace = heuristics
1120
1080
1121 In some cases the copy/move candidates found by heuristics can be very large
1081 In some cases the copy/move candidates found by heuristics can be very large
1122 in number and that will make the algorithm slow. The number of possible
1082 in number and that will make the algorithm slow. The number of possible
1123 candidates to check can be limited by using the config
1083 candidates to check can be limited by using the config
1124 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1084 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1125 """
1085 """
1126
1086
1127 if c1.rev() is None:
1087 if c1.rev() is None:
1128 c1 = c1.p1()
1088 c1 = c1.p1()
1129 if c2.rev() is None:
1089 if c2.rev() is None:
1130 c2 = c2.p1()
1090 c2 = c2.p1()
1131
1091
1132 changedfiles = set()
1092 changedfiles = set()
1133 m1 = c1.manifest()
1093 m1 = c1.manifest()
1134 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1094 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1135 # If base is not in c2 branch, we switch to fullcopytracing
1095 # If base is not in c2 branch, we switch to fullcopytracing
1136 repo.ui.debug(
1096 repo.ui.debug(
1137 b"switching to full copytracing as base is not "
1097 b"switching to full copytracing as base is not "
1138 b"an ancestor of c2\n"
1098 b"an ancestor of c2\n"
1139 )
1099 )
1140 return _fullcopytracing(repo, c1, c2, base)
1100 return _fullcopytracing(repo, c1, c2, base)
1141
1101
1142 ctx = c2
1102 ctx = c2
1143 while ctx != base:
1103 while ctx != base:
1144 if len(ctx.parents()) == 2:
1104 if len(ctx.parents()) == 2:
1145 # To keep things simple let's not handle merges
1105 # To keep things simple let's not handle merges
1146 repo.ui.debug(b"switching to full copytracing because of merges\n")
1106 repo.ui.debug(b"switching to full copytracing because of merges\n")
1147 return _fullcopytracing(repo, c1, c2, base)
1107 return _fullcopytracing(repo, c1, c2, base)
1148 changedfiles.update(ctx.files())
1108 changedfiles.update(ctx.files())
1149 ctx = ctx.p1()
1109 ctx = ctx.p1()
1150
1110
1151 copies2 = {}
1111 copies2 = {}
1152 cp = _forwardcopies(base, c2)
1112 cp = _forwardcopies(base, c2)
1153 for dst, src in pycompat.iteritems(cp):
1113 for dst, src in pycompat.iteritems(cp):
1154 if src in m1:
1114 if src in m1:
1155 copies2[dst] = src
1115 copies2[dst] = src
1156
1116
1157 # file is missing if it isn't present in the destination, but is present in
1117 # file is missing if it isn't present in the destination, but is present in
1158 # the base and present in the source.
1118 # the base and present in the source.
1159 # Presence in the base is important to exclude added files, presence in the
1119 # Presence in the base is important to exclude added files, presence in the
1160 # source is important to exclude removed files.
1120 # source is important to exclude removed files.
1161 filt = lambda f: f not in m1 and f in base and f in c2
1121 filt = lambda f: f not in m1 and f in base and f in c2
1162 missingfiles = [f for f in changedfiles if filt(f)]
1122 missingfiles = [f for f in changedfiles if filt(f)]
1163
1123
1164 copies1 = {}
1124 copies1 = {}
1165 if missingfiles:
1125 if missingfiles:
1166 basenametofilename = collections.defaultdict(list)
1126 basenametofilename = collections.defaultdict(list)
1167 dirnametofilename = collections.defaultdict(list)
1127 dirnametofilename = collections.defaultdict(list)
1168
1128
1169 for f in m1.filesnotin(base.manifest()):
1129 for f in m1.filesnotin(base.manifest()):
1170 basename = os.path.basename(f)
1130 basename = os.path.basename(f)
1171 dirname = os.path.dirname(f)
1131 dirname = os.path.dirname(f)
1172 basenametofilename[basename].append(f)
1132 basenametofilename[basename].append(f)
1173 dirnametofilename[dirname].append(f)
1133 dirnametofilename[dirname].append(f)
1174
1134
1175 for f in missingfiles:
1135 for f in missingfiles:
1176 basename = os.path.basename(f)
1136 basename = os.path.basename(f)
1177 dirname = os.path.dirname(f)
1137 dirname = os.path.dirname(f)
1178 samebasename = basenametofilename[basename]
1138 samebasename = basenametofilename[basename]
1179 samedirname = dirnametofilename[dirname]
1139 samedirname = dirnametofilename[dirname]
1180 movecandidates = samebasename + samedirname
1140 movecandidates = samebasename + samedirname
1181 # f is guaranteed to be present in c2, that's why
1141 # f is guaranteed to be present in c2, that's why
1182 # c2.filectx(f) won't fail
1142 # c2.filectx(f) won't fail
1183 f2 = c2.filectx(f)
1143 f2 = c2.filectx(f)
1184 # we can have a lot of candidates which can slow down the heuristics
1144 # we can have a lot of candidates which can slow down the heuristics
1185 # config value to limit the number of candidates moves to check
1145 # config value to limit the number of candidates moves to check
1186 maxcandidates = repo.ui.configint(
1146 maxcandidates = repo.ui.configint(
1187 b'experimental', b'copytrace.movecandidateslimit'
1147 b'experimental', b'copytrace.movecandidateslimit'
1188 )
1148 )
1189
1149
1190 if len(movecandidates) > maxcandidates:
1150 if len(movecandidates) > maxcandidates:
1191 repo.ui.status(
1151 repo.ui.status(
1192 _(
1152 _(
1193 b"skipping copytracing for '%s', more "
1153 b"skipping copytracing for '%s', more "
1194 b"candidates than the limit: %d\n"
1154 b"candidates than the limit: %d\n"
1195 )
1155 )
1196 % (f, len(movecandidates))
1156 % (f, len(movecandidates))
1197 )
1157 )
1198 continue
1158 continue
1199
1159
1200 for candidate in movecandidates:
1160 for candidate in movecandidates:
1201 f1 = c1.filectx(candidate)
1161 f1 = c1.filectx(candidate)
1202 if _related(f1, f2):
1162 if _related(f1, f2):
1203 # if there are a few related copies then we'll merge
1163 # if there are a few related copies then we'll merge
1204 # changes into all of them. This matches the behaviour
1164 # changes into all of them. This matches the behaviour
1205 # of upstream copytracing
1165 # of upstream copytracing
1206 copies1[candidate] = f
1166 copies1[candidate] = f
1207
1167
1208 return branch_copies(copies1), branch_copies(copies2), {}
1168 return branch_copies(copies1), branch_copies(copies2), {}
1209
1169
1210
1170
1211 def _related(f1, f2):
1171 def _related(f1, f2):
1212 """return True if f1 and f2 filectx have a common ancestor
1172 """return True if f1 and f2 filectx have a common ancestor
1213
1173
1214 Walk back to common ancestor to see if the two files originate
1174 Walk back to common ancestor to see if the two files originate
1215 from the same file. Since workingfilectx's rev() is None it messes
1175 from the same file. Since workingfilectx's rev() is None it messes
1216 up the integer comparison logic, hence the pre-step check for
1176 up the integer comparison logic, hence the pre-step check for
1217 None (f1 and f2 can only be workingfilectx's initially).
1177 None (f1 and f2 can only be workingfilectx's initially).
1218 """
1178 """
1219
1179
1220 if f1 == f2:
1180 if f1 == f2:
1221 return True # a match
1181 return True # a match
1222
1182
1223 g1, g2 = f1.ancestors(), f2.ancestors()
1183 g1, g2 = f1.ancestors(), f2.ancestors()
1224 try:
1184 try:
1225 f1r, f2r = f1.linkrev(), f2.linkrev()
1185 f1r, f2r = f1.linkrev(), f2.linkrev()
1226
1186
1227 if f1r is None:
1187 if f1r is None:
1228 f1 = next(g1)
1188 f1 = next(g1)
1229 if f2r is None:
1189 if f2r is None:
1230 f2 = next(g2)
1190 f2 = next(g2)
1231
1191
1232 while True:
1192 while True:
1233 f1r, f2r = f1.linkrev(), f2.linkrev()
1193 f1r, f2r = f1.linkrev(), f2.linkrev()
1234 if f1r > f2r:
1194 if f1r > f2r:
1235 f1 = next(g1)
1195 f1 = next(g1)
1236 elif f2r > f1r:
1196 elif f2r > f1r:
1237 f2 = next(g2)
1197 f2 = next(g2)
1238 else: # f1 and f2 point to files in the same linkrev
1198 else: # f1 and f2 point to files in the same linkrev
1239 return f1 == f2 # true if they point to the same file
1199 return f1 == f2 # true if they point to the same file
1240 except StopIteration:
1200 except StopIteration:
1241 return False
1201 return False
1242
1202
1243
1203
1244 def graftcopies(wctx, ctx, base):
1204 def graftcopies(wctx, ctx, base):
1245 """reproduce copies between base and ctx in the wctx
1205 """reproduce copies between base and ctx in the wctx
1246
1206
1247 Unlike mergecopies(), this function will only consider copies between base
1207 Unlike mergecopies(), this function will only consider copies between base
1248 and ctx; it will ignore copies between base and wctx. Also unlike
1208 and ctx; it will ignore copies between base and wctx. Also unlike
1249 mergecopies(), this function will apply copies to the working copy (instead
1209 mergecopies(), this function will apply copies to the working copy (instead
1250 of just returning information about the copies). That makes it cheaper
1210 of just returning information about the copies). That makes it cheaper
1251 (especially in the common case of base==ctx.p1()) and useful also when
1211 (especially in the common case of base==ctx.p1()) and useful also when
1252 experimental.copytrace=off.
1212 experimental.copytrace=off.
1253
1213
1254 merge.update() will have already marked most copies, but it will only
1214 merge.update() will have already marked most copies, but it will only
1255 mark copies if it thinks the source files are related (see
1215 mark copies if it thinks the source files are related (see
1256 merge._related()). It will also not mark copies if the file wasn't modified
1216 merge._related()). It will also not mark copies if the file wasn't modified
1257 on the local side. This function adds the copies that were "missed"
1217 on the local side. This function adds the copies that were "missed"
1258 by merge.update().
1218 by merge.update().
1259 """
1219 """
1260 new_copies = pathcopies(base, ctx)
1220 new_copies = pathcopies(base, ctx)
1261 _filter(wctx.p1(), wctx, new_copies)
1221 _filter(wctx.p1(), wctx, new_copies)
1262 for dst, src in pycompat.iteritems(new_copies):
1222 for dst, src in pycompat.iteritems(new_copies):
1263 wctx[dst].markcopied(src)
1223 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now