##// END OF EJS Templates
copies: inline _backwardrenames() in pathcopies()...
Martin von Zweigbergk -
r47395:324ded1a default
parent child Browse files
Show More
@@ -1,1300 +1,1303 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 if k == v: # case 3
62 if k == v: # case 3
63 del t[k]
63 del t[k]
64 elif v not in src: # case 5
64 elif v not in src: # case 5
65 # remove copies from files that didn't exist
65 # remove copies from files that didn't exist
66 del t[k]
66 del t[k]
67 elif k not in dst: # case 1
67 elif k not in dst: # case 1
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 del t[k]
69 del t[k]
70
70
71
71
72 def _chain(prefix, suffix):
72 def _chain(prefix, suffix):
73 """chain two sets of copies 'prefix' and 'suffix'"""
73 """chain two sets of copies 'prefix' and 'suffix'"""
74 result = prefix.copy()
74 result = prefix.copy()
75 for key, value in pycompat.iteritems(suffix):
75 for key, value in pycompat.iteritems(suffix):
76 result[key] = prefix.get(value, value)
76 result[key] = prefix.get(value, value)
77 return result
77 return result
78
78
79
79
80 def _tracefile(fctx, am, basemf):
80 def _tracefile(fctx, am, basemf):
81 """return file context that is the ancestor of fctx present in ancestor
81 """return file context that is the ancestor of fctx present in ancestor
82 manifest am
82 manifest am
83
83
84 Note: we used to try and stop after a given limit, however checking if that
84 Note: we used to try and stop after a given limit, however checking if that
85 limit is reached turned out to be very expensive. we are better off
85 limit is reached turned out to be very expensive. we are better off
86 disabling that feature."""
86 disabling that feature."""
87
87
88 for f in fctx.ancestors():
88 for f in fctx.ancestors():
89 path = f.path()
89 path = f.path()
90 if am.get(path, None) == f.filenode():
90 if am.get(path, None) == f.filenode():
91 return path
91 return path
92 if basemf and basemf.get(path, None) == f.filenode():
92 if basemf and basemf.get(path, None) == f.filenode():
93 return path
93 return path
94
94
95
95
96 def _dirstatecopies(repo, match=None):
96 def _dirstatecopies(repo, match=None):
97 ds = repo.dirstate
97 ds = repo.dirstate
98 c = ds.copies().copy()
98 c = ds.copies().copy()
99 for k in list(c):
99 for k in list(c):
100 if ds[k] not in b'anm' or (match and not match(k)):
100 if ds[k] not in b'anm' or (match and not match(k)):
101 del c[k]
101 del c[k]
102 return c
102 return c
103
103
104
104
105 def _computeforwardmissing(a, b, match=None):
105 def _computeforwardmissing(a, b, match=None):
106 """Computes which files are in b but not a.
106 """Computes which files are in b but not a.
107 This is its own function so extensions can easily wrap this call to see what
107 This is its own function so extensions can easily wrap this call to see what
108 files _forwardcopies is about to process.
108 files _forwardcopies is about to process.
109 """
109 """
110 ma = a.manifest()
110 ma = a.manifest()
111 mb = b.manifest()
111 mb = b.manifest()
112 return mb.filesnotin(ma, match=match)
112 return mb.filesnotin(ma, match=match)
113
113
114
114
115 def usechangesetcentricalgo(repo):
115 def usechangesetcentricalgo(repo):
116 """Checks if we should use changeset-centric copy algorithms"""
116 """Checks if we should use changeset-centric copy algorithms"""
117 if repo.filecopiesmode == b'changeset-sidedata':
117 if repo.filecopiesmode == b'changeset-sidedata':
118 return True
118 return True
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 changesetsource = (b'changeset-only', b'compatibility')
120 changesetsource = (b'changeset-only', b'compatibility')
121 return readfrom in changesetsource
121 return readfrom in changesetsource
122
122
123
123
124 def _committedforwardcopies(a, b, base, match):
124 def _committedforwardcopies(a, b, base, match):
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 # files might have to be traced back to the fctx parent of the last
126 # files might have to be traced back to the fctx parent of the last
127 # one-side-only changeset, but not further back than that
127 # one-side-only changeset, but not further back than that
128 repo = a._repo
128 repo = a._repo
129
129
130 if usechangesetcentricalgo(repo):
130 if usechangesetcentricalgo(repo):
131 return _changesetforwardcopies(a, b, match)
131 return _changesetforwardcopies(a, b, match)
132
132
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 dbg = repo.ui.debug
134 dbg = repo.ui.debug
135 if debug:
135 if debug:
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 am = a.manifest()
137 am = a.manifest()
138 basemf = None if base is None else base.manifest()
138 basemf = None if base is None else base.manifest()
139
139
140 # find where new files came from
140 # find where new files came from
141 # we currently don't try to find where old files went, too expensive
141 # we currently don't try to find where old files went, too expensive
142 # this means we can miss a case like 'hg rm b; hg cp a b'
142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 cm = {}
143 cm = {}
144
144
145 # Computing the forward missing is quite expensive on large manifests, since
145 # Computing the forward missing is quite expensive on large manifests, since
146 # it compares the entire manifests. We can optimize it in the common use
146 # it compares the entire manifests. We can optimize it in the common use
147 # case of computing what copies are in a commit versus its parent (like
147 # case of computing what copies are in a commit versus its parent (like
148 # during a rebase or histedit). Note, we exclude merge commits from this
148 # during a rebase or histedit). Note, we exclude merge commits from this
149 # optimization, since the ctx.files() for a merge commit is not correct for
149 # optimization, since the ctx.files() for a merge commit is not correct for
150 # this comparison.
150 # this comparison.
151 forwardmissingmatch = match
151 forwardmissingmatch = match
152 if b.p1() == a and b.p2().node() == nullid:
152 if b.p1() == a and b.p2().node() == nullid:
153 filesmatcher = matchmod.exact(b.files())
153 filesmatcher = matchmod.exact(b.files())
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 missing = list(b.walk(match))
156 missing = list(b.walk(match))
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 if debug:
158 if debug:
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 else:
160 else:
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 if debug:
162 if debug:
163 dbg(
163 dbg(
164 b'debug.copies: missing files to search: %d\n'
164 b'debug.copies: missing files to search: %d\n'
165 % len(missing)
165 % len(missing)
166 )
166 )
167
167
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169
169
170 for f in sorted(missing):
170 for f in sorted(missing):
171 if debug:
171 if debug:
172 dbg(b'debug.copies: tracing file: %s\n' % f)
172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 fctx = b[f]
173 fctx = b[f]
174 fctx._ancestrycontext = ancestrycontext
174 fctx._ancestrycontext = ancestrycontext
175
175
176 if debug:
176 if debug:
177 start = util.timer()
177 start = util.timer()
178 opath = _tracefile(fctx, am, basemf)
178 opath = _tracefile(fctx, am, basemf)
179 if opath:
179 if opath:
180 if debug:
180 if debug:
181 dbg(b'debug.copies: rename of: %s\n' % opath)
181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 cm[f] = opath
182 cm[f] = opath
183 if debug:
183 if debug:
184 dbg(
184 dbg(
185 b'debug.copies: time: %f seconds\n'
185 b'debug.copies: time: %f seconds\n'
186 % (util.timer() - start)
186 % (util.timer() - start)
187 )
187 )
188 return cm
188 return cm
189
189
190
190
191 def _revinfo_getter(repo, match):
191 def _revinfo_getter(repo, match):
192 """returns a function that returns the following data given a <rev>"
192 """returns a function that returns the following data given a <rev>"
193
193
194 * p1: revision number of first parent
194 * p1: revision number of first parent
195 * p2: revision number of first parent
195 * p2: revision number of first parent
196 * changes: a ChangingFiles object
196 * changes: a ChangingFiles object
197 """
197 """
198 cl = repo.changelog
198 cl = repo.changelog
199 parents = cl.parentrevs
199 parents = cl.parentrevs
200 flags = cl.flags
200 flags = cl.flags
201
201
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203
203
204 changelogrevision = cl.changelogrevision
204 changelogrevision = cl.changelogrevision
205
205
206 if rustmod is not None:
206 if rustmod is not None:
207
207
208 def revinfo(rev):
208 def revinfo(rev):
209 p1, p2 = parents(rev)
209 p1, p2 = parents(rev)
210 if flags(rev) & HASCOPIESINFO:
210 if flags(rev) & HASCOPIESINFO:
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 else:
212 else:
213 raw = None
213 raw = None
214 return (p1, p2, raw)
214 return (p1, p2, raw)
215
215
216 else:
216 else:
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & HASCOPIESINFO:
220 if flags(rev) & HASCOPIESINFO:
221 changes = changelogrevision(rev).changes
221 changes = changelogrevision(rev).changes
222 else:
222 else:
223 changes = None
223 changes = None
224 return (p1, p2, changes)
224 return (p1, p2, changes)
225
225
226 return revinfo
226 return revinfo
227
227
228
228
229 def cached_is_ancestor(is_ancestor):
229 def cached_is_ancestor(is_ancestor):
230 """return a cached version of is_ancestor"""
230 """return a cached version of is_ancestor"""
231 cache = {}
231 cache = {}
232
232
233 def _is_ancestor(anc, desc):
233 def _is_ancestor(anc, desc):
234 if anc > desc:
234 if anc > desc:
235 return False
235 return False
236 elif anc == desc:
236 elif anc == desc:
237 return True
237 return True
238 key = (anc, desc)
238 key = (anc, desc)
239 ret = cache.get(key)
239 ret = cache.get(key)
240 if ret is None:
240 if ret is None:
241 ret = cache[key] = is_ancestor(anc, desc)
241 ret = cache[key] = is_ancestor(anc, desc)
242 return ret
242 return ret
243
243
244 return _is_ancestor
244 return _is_ancestor
245
245
246
246
247 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
248 if a.rev() in (nullrev, b.rev()):
248 if a.rev() in (nullrev, b.rev()):
249 return {}
249 return {}
250
250
251 repo = a.repo().unfiltered()
251 repo = a.repo().unfiltered()
252 children = {}
252 children = {}
253
253
254 cl = repo.changelog
254 cl = repo.changelog
255 isancestor = cl.isancestorrev
255 isancestor = cl.isancestorrev
256
256
257 # To track rename from "A" to B, we need to gather all parent β†’ children
257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 # edges that are contains in `::B` but not in `::A`.
258 # edges that are contains in `::B` but not in `::A`.
259 #
259 #
260 #
260 #
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 # that belong to ::a. These are exactly all the revisions needed to express
263 # that belong to ::a. These are exactly all the revisions needed to express
264 # the parent β†’ children we need to combine.
264 # the parent β†’ children we need to combine.
265 #
265 #
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 # excluding paths that leads to roots that are not ancestors of `a`. We
267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 # keep this out of the explanation because it is hard enough without this special case..
268 # keep this out of the explanation because it is hard enough without this special case..
269
269
270 parents = cl._uncheckedparentrevs
270 parents = cl._uncheckedparentrevs
271 graph_roots = (nullrev, nullrev)
271 graph_roots = (nullrev, nullrev)
272
272
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 roots = set()
275 roots = set()
276 has_graph_roots = False
276 has_graph_roots = False
277 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
277 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
278
278
279 # iterate over `only(B, A)`
279 # iterate over `only(B, A)`
280 for r in revs:
280 for r in revs:
281 ps = parents(r)
281 ps = parents(r)
282 if ps == graph_roots:
282 if ps == graph_roots:
283 has_graph_roots = True
283 has_graph_roots = True
284 else:
284 else:
285 p1, p2 = ps
285 p1, p2 = ps
286
286
287 # find all the "root points" (see larger comment above)
287 # find all the "root points" (see larger comment above)
288 if p1 != nullrev and p1 in ancestors:
288 if p1 != nullrev and p1 in ancestors:
289 roots.add(p1)
289 roots.add(p1)
290 if p2 != nullrev and p2 in ancestors:
290 if p2 != nullrev and p2 in ancestors:
291 roots.add(p2)
291 roots.add(p2)
292 if not roots:
292 if not roots:
293 # no common revision to track copies from
293 # no common revision to track copies from
294 return {}
294 return {}
295 if has_graph_roots:
295 if has_graph_roots:
296 # this deal with the special case mentionned in the [1] footnotes. We
296 # this deal with the special case mentionned in the [1] footnotes. We
297 # must filter out revisions that leads to non-common graphroots.
297 # must filter out revisions that leads to non-common graphroots.
298 roots = list(roots)
298 roots = list(roots)
299 m = min(roots)
299 m = min(roots)
300 h = [b.rev()]
300 h = [b.rev()]
301 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
302 roots_to_head = set(roots_to_head)
302 roots_to_head = set(roots_to_head)
303 revs = [r for r in revs if r in roots_to_head]
303 revs = [r for r in revs if r in roots_to_head]
304
304
305 if repo.filecopiesmode == b'changeset-sidedata':
305 if repo.filecopiesmode == b'changeset-sidedata':
306 # When using side-data, we will process the edges "from" the children.
306 # When using side-data, we will process the edges "from" the children.
307 # We iterate over the childre, gathering previous collected data for
307 # We iterate over the childre, gathering previous collected data for
308 # the parents. Do know when the parents data is no longer necessary, we
308 # the parents. Do know when the parents data is no longer necessary, we
309 # keep a counter of how many children each revision has.
309 # keep a counter of how many children each revision has.
310 #
310 #
311 # An interresting property of `children_count` is that it only contains
311 # An interresting property of `children_count` is that it only contains
312 # revision that will be relevant for a edge of the graph. So if a
312 # revision that will be relevant for a edge of the graph. So if a
313 # children has parent not in `children_count`, that edges should not be
313 # children has parent not in `children_count`, that edges should not be
314 # processed.
314 # processed.
315 children_count = dict((r, 0) for r in roots)
315 children_count = dict((r, 0) for r in roots)
316 for r in revs:
316 for r in revs:
317 for p in cl.parentrevs(r):
317 for p in cl.parentrevs(r):
318 if p == nullrev:
318 if p == nullrev:
319 continue
319 continue
320 children_count[r] = 0
320 children_count[r] = 0
321 if p in children_count:
321 if p in children_count:
322 children_count[p] += 1
322 children_count[p] += 1
323 revinfo = _revinfo_getter(repo, match)
323 revinfo = _revinfo_getter(repo, match)
324 return _combine_changeset_copies(
324 return _combine_changeset_copies(
325 revs,
325 revs,
326 children_count,
326 children_count,
327 b.rev(),
327 b.rev(),
328 revinfo,
328 revinfo,
329 match,
329 match,
330 isancestor,
330 isancestor,
331 multi_thread,
331 multi_thread,
332 )
332 )
333 else:
333 else:
334 # When not using side-data, we will process the edges "from" the parent.
334 # When not using side-data, we will process the edges "from" the parent.
335 # so we need a full mapping of the parent -> children relation.
335 # so we need a full mapping of the parent -> children relation.
336 children = dict((r, []) for r in roots)
336 children = dict((r, []) for r in roots)
337 for r in revs:
337 for r in revs:
338 for p in cl.parentrevs(r):
338 for p in cl.parentrevs(r):
339 if p == nullrev:
339 if p == nullrev:
340 continue
340 continue
341 children[r] = []
341 children[r] = []
342 if p in children:
342 if p in children:
343 children[p].append(r)
343 children[p].append(r)
344 x = revs.pop()
344 x = revs.pop()
345 assert x == b.rev()
345 assert x == b.rev()
346 revs.extend(roots)
346 revs.extend(roots)
347 revs.sort()
347 revs.sort()
348
348
349 revinfo = _revinfo_getter_extra(repo)
349 revinfo = _revinfo_getter_extra(repo)
350 return _combine_changeset_copies_extra(
350 return _combine_changeset_copies_extra(
351 revs, children, b.rev(), revinfo, match, isancestor
351 revs, children, b.rev(), revinfo, match, isancestor
352 )
352 )
353
353
354
354
355 def _combine_changeset_copies(
355 def _combine_changeset_copies(
356 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
356 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
357 ):
357 ):
358 """combine the copies information for each item of iterrevs
358 """combine the copies information for each item of iterrevs
359
359
360 revs: sorted iterable of revision to visit
360 revs: sorted iterable of revision to visit
361 children_count: a {parent: <number-of-relevant-children>} mapping.
361 children_count: a {parent: <number-of-relevant-children>} mapping.
362 targetrev: the final copies destination revision (not in iterrevs)
362 targetrev: the final copies destination revision (not in iterrevs)
363 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
363 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
364 match: a matcher
364 match: a matcher
365
365
366 It returns the aggregated copies information for `targetrev`.
366 It returns the aggregated copies information for `targetrev`.
367 """
367 """
368
368
369 alwaysmatch = match.always()
369 alwaysmatch = match.always()
370
370
371 if rustmod is not None:
371 if rustmod is not None:
372 final_copies = rustmod.combine_changeset_copies(
372 final_copies = rustmod.combine_changeset_copies(
373 list(revs), children_count, targetrev, revinfo, multi_thread
373 list(revs), children_count, targetrev, revinfo, multi_thread
374 )
374 )
375 else:
375 else:
376 isancestor = cached_is_ancestor(isancestor)
376 isancestor = cached_is_ancestor(isancestor)
377
377
378 all_copies = {}
378 all_copies = {}
379 # iterate over all the "children" side of copy tracing "edge"
379 # iterate over all the "children" side of copy tracing "edge"
380 for current_rev in revs:
380 for current_rev in revs:
381 p1, p2, changes = revinfo(current_rev)
381 p1, p2, changes = revinfo(current_rev)
382 current_copies = None
382 current_copies = None
383 # iterate over all parents to chain the existing data with the
383 # iterate over all parents to chain the existing data with the
384 # data from the parent β†’ child edge.
384 # data from the parent β†’ child edge.
385 for parent, parent_rev in ((1, p1), (2, p2)):
385 for parent, parent_rev in ((1, p1), (2, p2)):
386 if parent_rev == nullrev:
386 if parent_rev == nullrev:
387 continue
387 continue
388 remaining_children = children_count.get(parent_rev)
388 remaining_children = children_count.get(parent_rev)
389 if remaining_children is None:
389 if remaining_children is None:
390 continue
390 continue
391 remaining_children -= 1
391 remaining_children -= 1
392 children_count[parent_rev] = remaining_children
392 children_count[parent_rev] = remaining_children
393 if remaining_children:
393 if remaining_children:
394 copies = all_copies.get(parent_rev, None)
394 copies = all_copies.get(parent_rev, None)
395 else:
395 else:
396 copies = all_copies.pop(parent_rev, None)
396 copies = all_copies.pop(parent_rev, None)
397
397
398 if copies is None:
398 if copies is None:
399 # this is a root
399 # this is a root
400 newcopies = copies = {}
400 newcopies = copies = {}
401 elif remaining_children:
401 elif remaining_children:
402 newcopies = copies.copy()
402 newcopies = copies.copy()
403 else:
403 else:
404 newcopies = copies
404 newcopies = copies
405 # chain the data in the edge with the existing data
405 # chain the data in the edge with the existing data
406 if changes is not None:
406 if changes is not None:
407 childcopies = {}
407 childcopies = {}
408 if parent == 1:
408 if parent == 1:
409 childcopies = changes.copied_from_p1
409 childcopies = changes.copied_from_p1
410 elif parent == 2:
410 elif parent == 2:
411 childcopies = changes.copied_from_p2
411 childcopies = changes.copied_from_p2
412
412
413 if childcopies:
413 if childcopies:
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 for dest, source in pycompat.iteritems(childcopies):
415 for dest, source in pycompat.iteritems(childcopies):
416 prev = copies.get(source)
416 prev = copies.get(source)
417 if prev is not None and prev[1] is not None:
417 if prev is not None and prev[1] is not None:
418 source = prev[1]
418 source = prev[1]
419 newcopies[dest] = (current_rev, source)
419 newcopies[dest] = (current_rev, source)
420 assert newcopies is not copies
420 assert newcopies is not copies
421 if changes.removed:
421 if changes.removed:
422 for f in changes.removed:
422 for f in changes.removed:
423 if f in newcopies:
423 if f in newcopies:
424 if newcopies is copies:
424 if newcopies is copies:
425 # copy on write to avoid affecting potential other
425 # copy on write to avoid affecting potential other
426 # branches. when there are no other branches, this
426 # branches. when there are no other branches, this
427 # could be avoided.
427 # could be avoided.
428 newcopies = copies.copy()
428 newcopies = copies.copy()
429 newcopies[f] = (current_rev, None)
429 newcopies[f] = (current_rev, None)
430 # check potential need to combine the data from another parent (for
430 # check potential need to combine the data from another parent (for
431 # that child). See comment below for details.
431 # that child). See comment below for details.
432 if current_copies is None:
432 if current_copies is None:
433 current_copies = newcopies
433 current_copies = newcopies
434 else:
434 else:
435 # we are the second parent to work on c, we need to merge our
435 # we are the second parent to work on c, we need to merge our
436 # work with the other.
436 # work with the other.
437 #
437 #
438 # In case of conflict, parent 1 take precedence over parent 2.
438 # In case of conflict, parent 1 take precedence over parent 2.
439 # This is an arbitrary choice made anew when implementing
439 # This is an arbitrary choice made anew when implementing
440 # changeset based copies. It was made without regards with
440 # changeset based copies. It was made without regards with
441 # potential filelog related behavior.
441 # potential filelog related behavior.
442 assert parent == 2
442 assert parent == 2
443 current_copies = _merge_copies_dict(
443 current_copies = _merge_copies_dict(
444 newcopies,
444 newcopies,
445 current_copies,
445 current_copies,
446 isancestor,
446 isancestor,
447 changes,
447 changes,
448 current_rev,
448 current_rev,
449 )
449 )
450 all_copies[current_rev] = current_copies
450 all_copies[current_rev] = current_copies
451
451
452 # filter out internal details and return a {dest: source mapping}
452 # filter out internal details and return a {dest: source mapping}
453 final_copies = {}
453 final_copies = {}
454 for dest, (tt, source) in all_copies[targetrev].items():
454 for dest, (tt, source) in all_copies[targetrev].items():
455 if source is not None:
455 if source is not None:
456 final_copies[dest] = source
456 final_copies[dest] = source
457 if not alwaysmatch:
457 if not alwaysmatch:
458 for filename in list(final_copies.keys()):
458 for filename in list(final_copies.keys()):
459 if not match(filename):
459 if not match(filename):
460 del final_copies[filename]
460 del final_copies[filename]
461 return final_copies
461 return final_copies
462
462
463
463
464 # constant to decide which side to pick with _merge_copies_dict
464 # constant to decide which side to pick with _merge_copies_dict
465 PICK_MINOR = 0
465 PICK_MINOR = 0
466 PICK_MAJOR = 1
466 PICK_MAJOR = 1
467 PICK_EITHER = 2
467 PICK_EITHER = 2
468
468
469
469
470 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
470 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
471 """merge two copies-mapping together, minor and major
471 """merge two copies-mapping together, minor and major
472
472
473 In case of conflict, value from "major" will be picked.
473 In case of conflict, value from "major" will be picked.
474
474
475 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
475 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
476 ancestors of `high_rev`,
476 ancestors of `high_rev`,
477
477
478 - `ismerged(path)`: callable return True if `path` have been merged in the
478 - `ismerged(path)`: callable return True if `path` have been merged in the
479 current revision,
479 current revision,
480
480
481 return the resulting dict (in practice, the "minor" object, updated)
481 return the resulting dict (in practice, the "minor" object, updated)
482 """
482 """
483 for dest, value in major.items():
483 for dest, value in major.items():
484 other = minor.get(dest)
484 other = minor.get(dest)
485 if other is None:
485 if other is None:
486 minor[dest] = value
486 minor[dest] = value
487 else:
487 else:
488 pick, overwrite = _compare_values(
488 pick, overwrite = _compare_values(
489 changes, isancestor, dest, other, value
489 changes, isancestor, dest, other, value
490 )
490 )
491 if overwrite:
491 if overwrite:
492 if pick == PICK_MAJOR:
492 if pick == PICK_MAJOR:
493 minor[dest] = (current_merge, value[1])
493 minor[dest] = (current_merge, value[1])
494 else:
494 else:
495 minor[dest] = (current_merge, other[1])
495 minor[dest] = (current_merge, other[1])
496 elif pick == PICK_MAJOR:
496 elif pick == PICK_MAJOR:
497 minor[dest] = value
497 minor[dest] = value
498 return minor
498 return minor
499
499
500
500
501 def _compare_values(changes, isancestor, dest, minor, major):
501 def _compare_values(changes, isancestor, dest, minor, major):
502 """compare two value within a _merge_copies_dict loop iteration
502 """compare two value within a _merge_copies_dict loop iteration
503
503
504 return (pick, overwrite).
504 return (pick, overwrite).
505
505
506 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
506 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
507 - overwrite is True if pick is a return of an ambiguity that needs resolution.
507 - overwrite is True if pick is a return of an ambiguity that needs resolution.
508 """
508 """
509 major_tt, major_value = major
509 major_tt, major_value = major
510 minor_tt, minor_value = minor
510 minor_tt, minor_value = minor
511
511
512 if major_tt == minor_tt:
512 if major_tt == minor_tt:
513 # if it comes from the same revision it must be the same value
513 # if it comes from the same revision it must be the same value
514 assert major_value == minor_value
514 assert major_value == minor_value
515 return PICK_EITHER, False
515 return PICK_EITHER, False
516 elif (
516 elif (
517 changes is not None
517 changes is not None
518 and minor_value is not None
518 and minor_value is not None
519 and major_value is None
519 and major_value is None
520 and dest in changes.salvaged
520 and dest in changes.salvaged
521 ):
521 ):
522 # In this case, a deletion was reverted, the "alive" value overwrite
522 # In this case, a deletion was reverted, the "alive" value overwrite
523 # the deleted one.
523 # the deleted one.
524 return PICK_MINOR, True
524 return PICK_MINOR, True
525 elif (
525 elif (
526 changes is not None
526 changes is not None
527 and major_value is not None
527 and major_value is not None
528 and minor_value is None
528 and minor_value is None
529 and dest in changes.salvaged
529 and dest in changes.salvaged
530 ):
530 ):
531 # In this case, a deletion was reverted, the "alive" value overwrite
531 # In this case, a deletion was reverted, the "alive" value overwrite
532 # the deleted one.
532 # the deleted one.
533 return PICK_MAJOR, True
533 return PICK_MAJOR, True
534 elif isancestor(minor_tt, major_tt):
534 elif isancestor(minor_tt, major_tt):
535 if changes is not None and dest in changes.merged:
535 if changes is not None and dest in changes.merged:
536 # change to dest happened on the branch without copy-source change,
536 # change to dest happened on the branch without copy-source change,
537 # so both source are valid and "major" wins.
537 # so both source are valid and "major" wins.
538 return PICK_MAJOR, True
538 return PICK_MAJOR, True
539 else:
539 else:
540 return PICK_MAJOR, False
540 return PICK_MAJOR, False
541 elif isancestor(major_tt, minor_tt):
541 elif isancestor(major_tt, minor_tt):
542 if changes is not None and dest in changes.merged:
542 if changes is not None and dest in changes.merged:
543 # change to dest happened on the branch without copy-source change,
543 # change to dest happened on the branch without copy-source change,
544 # so both source are valid and "major" wins.
544 # so both source are valid and "major" wins.
545 return PICK_MAJOR, True
545 return PICK_MAJOR, True
546 else:
546 else:
547 return PICK_MINOR, False
547 return PICK_MINOR, False
548 elif minor_value is None:
548 elif minor_value is None:
549 # in case of conflict, the "alive" side wins.
549 # in case of conflict, the "alive" side wins.
550 return PICK_MAJOR, True
550 return PICK_MAJOR, True
551 elif major_value is None:
551 elif major_value is None:
552 # in case of conflict, the "alive" side wins.
552 # in case of conflict, the "alive" side wins.
553 return PICK_MINOR, True
553 return PICK_MINOR, True
554 else:
554 else:
555 # in case of conflict where both side are alive, major wins.
555 # in case of conflict where both side are alive, major wins.
556 return PICK_MAJOR, True
556 return PICK_MAJOR, True
557
557
558
558
559 def _revinfo_getter_extra(repo):
559 def _revinfo_getter_extra(repo):
560 """return a function that return multiple data given a <rev>"i
560 """return a function that return multiple data given a <rev>"i
561
561
562 * p1: revision number of first parent
562 * p1: revision number of first parent
563 * p2: revision number of first parent
563 * p2: revision number of first parent
564 * p1copies: mapping of copies from p1
564 * p1copies: mapping of copies from p1
565 * p2copies: mapping of copies from p2
565 * p2copies: mapping of copies from p2
566 * removed: a list of removed files
566 * removed: a list of removed files
567 * ismerged: a callback to know if file was merged in that revision
567 * ismerged: a callback to know if file was merged in that revision
568 """
568 """
569 cl = repo.changelog
569 cl = repo.changelog
570 parents = cl.parentrevs
570 parents = cl.parentrevs
571
571
572 def get_ismerged(rev):
572 def get_ismerged(rev):
573 ctx = repo[rev]
573 ctx = repo[rev]
574
574
575 def ismerged(path):
575 def ismerged(path):
576 if path not in ctx.files():
576 if path not in ctx.files():
577 return False
577 return False
578 fctx = ctx[path]
578 fctx = ctx[path]
579 parents = fctx._filelog.parents(fctx._filenode)
579 parents = fctx._filelog.parents(fctx._filenode)
580 nb_parents = 0
580 nb_parents = 0
581 for n in parents:
581 for n in parents:
582 if n != nullid:
582 if n != nullid:
583 nb_parents += 1
583 nb_parents += 1
584 return nb_parents >= 2
584 return nb_parents >= 2
585
585
586 return ismerged
586 return ismerged
587
587
588 def revinfo(rev):
588 def revinfo(rev):
589 p1, p2 = parents(rev)
589 p1, p2 = parents(rev)
590 ctx = repo[rev]
590 ctx = repo[rev]
591 p1copies, p2copies = ctx._copies
591 p1copies, p2copies = ctx._copies
592 removed = ctx.filesremoved()
592 removed = ctx.filesremoved()
593 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
593 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
594
594
595 return revinfo
595 return revinfo
596
596
597
597
598 def _combine_changeset_copies_extra(
598 def _combine_changeset_copies_extra(
599 revs, children, targetrev, revinfo, match, isancestor
599 revs, children, targetrev, revinfo, match, isancestor
600 ):
600 ):
601 """version of `_combine_changeset_copies` that works with the Google
601 """version of `_combine_changeset_copies` that works with the Google
602 specific "extra" based storage for copy information"""
602 specific "extra" based storage for copy information"""
603 all_copies = {}
603 all_copies = {}
604 alwaysmatch = match.always()
604 alwaysmatch = match.always()
605 for r in revs:
605 for r in revs:
606 copies = all_copies.pop(r, None)
606 copies = all_copies.pop(r, None)
607 if copies is None:
607 if copies is None:
608 # this is a root
608 # this is a root
609 copies = {}
609 copies = {}
610 for i, c in enumerate(children[r]):
610 for i, c in enumerate(children[r]):
611 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
611 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
612 if r == p1:
612 if r == p1:
613 parent = 1
613 parent = 1
614 childcopies = p1copies
614 childcopies = p1copies
615 else:
615 else:
616 assert r == p2
616 assert r == p2
617 parent = 2
617 parent = 2
618 childcopies = p2copies
618 childcopies = p2copies
619 if not alwaysmatch:
619 if not alwaysmatch:
620 childcopies = {
620 childcopies = {
621 dst: src for dst, src in childcopies.items() if match(dst)
621 dst: src for dst, src in childcopies.items() if match(dst)
622 }
622 }
623 newcopies = copies
623 newcopies = copies
624 if childcopies:
624 if childcopies:
625 newcopies = copies.copy()
625 newcopies = copies.copy()
626 for dest, source in pycompat.iteritems(childcopies):
626 for dest, source in pycompat.iteritems(childcopies):
627 prev = copies.get(source)
627 prev = copies.get(source)
628 if prev is not None and prev[1] is not None:
628 if prev is not None and prev[1] is not None:
629 source = prev[1]
629 source = prev[1]
630 newcopies[dest] = (c, source)
630 newcopies[dest] = (c, source)
631 assert newcopies is not copies
631 assert newcopies is not copies
632 for f in removed:
632 for f in removed:
633 if f in newcopies:
633 if f in newcopies:
634 if newcopies is copies:
634 if newcopies is copies:
635 # copy on write to avoid affecting potential other
635 # copy on write to avoid affecting potential other
636 # branches. when there are no other branches, this
636 # branches. when there are no other branches, this
637 # could be avoided.
637 # could be avoided.
638 newcopies = copies.copy()
638 newcopies = copies.copy()
639 newcopies[f] = (c, None)
639 newcopies[f] = (c, None)
640 othercopies = all_copies.get(c)
640 othercopies = all_copies.get(c)
641 if othercopies is None:
641 if othercopies is None:
642 all_copies[c] = newcopies
642 all_copies[c] = newcopies
643 else:
643 else:
644 # we are the second parent to work on c, we need to merge our
644 # we are the second parent to work on c, we need to merge our
645 # work with the other.
645 # work with the other.
646 #
646 #
647 # In case of conflict, parent 1 take precedence over parent 2.
647 # In case of conflict, parent 1 take precedence over parent 2.
648 # This is an arbitrary choice made anew when implementing
648 # This is an arbitrary choice made anew when implementing
649 # changeset based copies. It was made without regards with
649 # changeset based copies. It was made without regards with
650 # potential filelog related behavior.
650 # potential filelog related behavior.
651 if parent == 1:
651 if parent == 1:
652 _merge_copies_dict_extra(
652 _merge_copies_dict_extra(
653 othercopies, newcopies, isancestor, ismerged
653 othercopies, newcopies, isancestor, ismerged
654 )
654 )
655 else:
655 else:
656 _merge_copies_dict_extra(
656 _merge_copies_dict_extra(
657 newcopies, othercopies, isancestor, ismerged
657 newcopies, othercopies, isancestor, ismerged
658 )
658 )
659 all_copies[c] = newcopies
659 all_copies[c] = newcopies
660
660
661 final_copies = {}
661 final_copies = {}
662 for dest, (tt, source) in all_copies[targetrev].items():
662 for dest, (tt, source) in all_copies[targetrev].items():
663 if source is not None:
663 if source is not None:
664 final_copies[dest] = source
664 final_copies[dest] = source
665 return final_copies
665 return final_copies
666
666
667
667
668 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
668 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
669 """version of `_merge_copies_dict` that works with the Google
669 """version of `_merge_copies_dict` that works with the Google
670 specific "extra" based storage for copy information"""
670 specific "extra" based storage for copy information"""
671 for dest, value in major.items():
671 for dest, value in major.items():
672 other = minor.get(dest)
672 other = minor.get(dest)
673 if other is None:
673 if other is None:
674 minor[dest] = value
674 minor[dest] = value
675 else:
675 else:
676 new_tt = value[0]
676 new_tt = value[0]
677 other_tt = other[0]
677 other_tt = other[0]
678 if value[1] == other[1]:
678 if value[1] == other[1]:
679 continue
679 continue
680 # content from "major" wins, unless it is older
680 # content from "major" wins, unless it is older
681 # than the branch point or there is a merge
681 # than the branch point or there is a merge
682 if (
682 if (
683 new_tt == other_tt
683 new_tt == other_tt
684 or not isancestor(new_tt, other_tt)
684 or not isancestor(new_tt, other_tt)
685 or ismerged(dest)
685 or ismerged(dest)
686 ):
686 ):
687 minor[dest] = value
687 minor[dest] = value
688
688
689
689
690 def _forwardcopies(a, b, base=None, match=None):
690 def _forwardcopies(a, b, base=None, match=None):
691 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
691 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
692
692
693 if base is None:
693 if base is None:
694 base = a
694 base = a
695 match = a.repo().narrowmatch(match)
695 match = a.repo().narrowmatch(match)
696 # check for working copy
696 # check for working copy
697 if b.rev() is None:
697 if b.rev() is None:
698 cm = _committedforwardcopies(a, b.p1(), base, match)
698 cm = _committedforwardcopies(a, b.p1(), base, match)
699 # combine copies from dirstate if necessary
699 # combine copies from dirstate if necessary
700 copies = _chain(cm, _dirstatecopies(b._repo, match))
700 copies = _chain(cm, _dirstatecopies(b._repo, match))
701 else:
701 else:
702 copies = _committedforwardcopies(a, b, base, match)
702 copies = _committedforwardcopies(a, b, base, match)
703 return copies
703 return copies
704
704
705
705
706 def _backwardrenames(a, b, match):
706 def _backwardrenames(a, b, match):
707 """find renames from a to b"""
707 """find renames from a to b"""
708 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
708 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
709 return {}
709 return {}
710
710
711 # We don't want to pass in "match" here, since that would filter
711 # We don't want to pass in "match" here, since that would filter
712 # the destination by it. Since we're reversing the copies, we want
712 # the destination by it. Since we're reversing the copies, we want
713 # to filter the source instead.
713 # to filter the source instead.
714 copies = _forwardcopies(b, a)
714 copies = _forwardcopies(b, a)
715 return _reverse_renames(copies, a, match)
715 return _reverse_renames(copies, a, match)
716
716
717
717
718 def _reverse_renames(copies, dst, match):
718 def _reverse_renames(copies, dst, match):
719 """given copies to context 'dst', finds renames from that context"""
719 """given copies to context 'dst', finds renames from that context"""
720 # Even though we're not taking copies into account, 1:n rename situations
720 # Even though we're not taking copies into account, 1:n rename situations
721 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
721 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
722 # arbitrarily pick one of the renames.
722 # arbitrarily pick one of the renames.
723 r = {}
723 r = {}
724 for k, v in sorted(pycompat.iteritems(copies)):
724 for k, v in sorted(pycompat.iteritems(copies)):
725 if match and not match(v):
725 if match and not match(v):
726 continue
726 continue
727 # remove copies
727 # remove copies
728 if v in dst:
728 if v in dst:
729 continue
729 continue
730 r[v] = k
730 r[v] = k
731 return r
731 return r
732
732
733
733
734 def pathcopies(x, y, match=None):
734 def pathcopies(x, y, match=None):
735 """find {dst@y: src@x} copy mapping for directed compare"""
735 """find {dst@y: src@x} copy mapping for directed compare"""
736 repo = x._repo
736 repo = x._repo
737 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
737 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
738 if debug:
738 if debug:
739 repo.ui.debug(
739 repo.ui.debug(
740 b'debug.copies: searching copies from %s to %s\n' % (x, y)
740 b'debug.copies: searching copies from %s to %s\n' % (x, y)
741 )
741 )
742 if x == y or not x or not y:
742 if x == y or not x or not y:
743 return {}
743 return {}
744 if y.rev() is None and x == y.p1():
744 if y.rev() is None and x == y.p1():
745 if debug:
745 if debug:
746 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
746 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
747 # short-circuit to avoid issues with merge states
747 # short-circuit to avoid issues with merge states
748 return _dirstatecopies(repo, match)
748 return _dirstatecopies(repo, match)
749 a = y.ancestor(x)
749 a = y.ancestor(x)
750 if a == x:
750 if a == x:
751 if debug:
751 if debug:
752 repo.ui.debug(b'debug.copies: search mode: forward\n')
752 repo.ui.debug(b'debug.copies: search mode: forward\n')
753 copies = _forwardcopies(x, y, match=match)
753 copies = _forwardcopies(x, y, match=match)
754 elif a == y:
754 elif a == y:
755 if debug:
755 if debug:
756 repo.ui.debug(b'debug.copies: search mode: backward\n')
756 repo.ui.debug(b'debug.copies: search mode: backward\n')
757 copies = _backwardrenames(x, y, match=match)
757 copies = _backwardrenames(x, y, match=match)
758 else:
758 else:
759 if debug:
759 if debug:
760 repo.ui.debug(b'debug.copies: search mode: combined\n')
760 repo.ui.debug(b'debug.copies: search mode: combined\n')
761 base = None
761 base = None
762 if a.rev() != nullrev:
762 if a.rev() != nullrev:
763 base = x
763 base = x
764 x_copies = _forwardcopies(a, x)
765 y_copies = _forwardcopies(a, y, base, match=match)
766 x_backward_renames = _reverse_renames(x_copies, x, match)
764 copies = _chain(
767 copies = _chain(
765 _backwardrenames(x, a, match=match),
768 x_backward_renames,
766 _forwardcopies(a, y, base, match=match),
769 y_copies,
767 )
770 )
768 _filter(x, y, copies)
771 _filter(x, y, copies)
769 return copies
772 return copies
770
773
771
774
772 def mergecopies(repo, c1, c2, base):
775 def mergecopies(repo, c1, c2, base):
773 """
776 """
774 Finds moves and copies between context c1 and c2 that are relevant for
777 Finds moves and copies between context c1 and c2 that are relevant for
775 merging. 'base' will be used as the merge base.
778 merging. 'base' will be used as the merge base.
776
779
777 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
780 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
778 files that were moved/ copied in one merge parent and modified in another.
781 files that were moved/ copied in one merge parent and modified in another.
779 For example:
782 For example:
780
783
781 o ---> 4 another commit
784 o ---> 4 another commit
782 |
785 |
783 | o ---> 3 commit that modifies a.txt
786 | o ---> 3 commit that modifies a.txt
784 | /
787 | /
785 o / ---> 2 commit that moves a.txt to b.txt
788 o / ---> 2 commit that moves a.txt to b.txt
786 |/
789 |/
787 o ---> 1 merge base
790 o ---> 1 merge base
788
791
789 If we try to rebase revision 3 on revision 4, since there is no a.txt in
792 If we try to rebase revision 3 on revision 4, since there is no a.txt in
790 revision 4, and if user have copytrace disabled, we prints the following
793 revision 4, and if user have copytrace disabled, we prints the following
791 message:
794 message:
792
795
793 ```other changed <file> which local deleted```
796 ```other changed <file> which local deleted```
794
797
795 Returns a tuple where:
798 Returns a tuple where:
796
799
797 "branch_copies" an instance of branch_copies.
800 "branch_copies" an instance of branch_copies.
798
801
799 "diverge" is a mapping of source name -> list of destination names
802 "diverge" is a mapping of source name -> list of destination names
800 for divergent renames.
803 for divergent renames.
801
804
802 This function calls different copytracing algorithms based on config.
805 This function calls different copytracing algorithms based on config.
803 """
806 """
804 # avoid silly behavior for update from empty dir
807 # avoid silly behavior for update from empty dir
805 if not c1 or not c2 or c1 == c2:
808 if not c1 or not c2 or c1 == c2:
806 return branch_copies(), branch_copies(), {}
809 return branch_copies(), branch_copies(), {}
807
810
808 narrowmatch = c1.repo().narrowmatch()
811 narrowmatch = c1.repo().narrowmatch()
809
812
810 # avoid silly behavior for parent -> working dir
813 # avoid silly behavior for parent -> working dir
811 if c2.node() is None and c1.node() == repo.dirstate.p1():
814 if c2.node() is None and c1.node() == repo.dirstate.p1():
812 return (
815 return (
813 branch_copies(_dirstatecopies(repo, narrowmatch)),
816 branch_copies(_dirstatecopies(repo, narrowmatch)),
814 branch_copies(),
817 branch_copies(),
815 {},
818 {},
816 )
819 )
817
820
818 copytracing = repo.ui.config(b'experimental', b'copytrace')
821 copytracing = repo.ui.config(b'experimental', b'copytrace')
819 if stringutil.parsebool(copytracing) is False:
822 if stringutil.parsebool(copytracing) is False:
820 # stringutil.parsebool() returns None when it is unable to parse the
823 # stringutil.parsebool() returns None when it is unable to parse the
821 # value, so we should rely on making sure copytracing is on such cases
824 # value, so we should rely on making sure copytracing is on such cases
822 return branch_copies(), branch_copies(), {}
825 return branch_copies(), branch_copies(), {}
823
826
824 if usechangesetcentricalgo(repo):
827 if usechangesetcentricalgo(repo):
825 # The heuristics don't make sense when we need changeset-centric algos
828 # The heuristics don't make sense when we need changeset-centric algos
826 return _fullcopytracing(repo, c1, c2, base)
829 return _fullcopytracing(repo, c1, c2, base)
827
830
828 # Copy trace disabling is explicitly below the node == p1 logic above
831 # Copy trace disabling is explicitly below the node == p1 logic above
829 # because the logic above is required for a simple copy to be kept across a
832 # because the logic above is required for a simple copy to be kept across a
830 # rebase.
833 # rebase.
831 if copytracing == b'heuristics':
834 if copytracing == b'heuristics':
832 # Do full copytracing if only non-public revisions are involved as
835 # Do full copytracing if only non-public revisions are involved as
833 # that will be fast enough and will also cover the copies which could
836 # that will be fast enough and will also cover the copies which could
834 # be missed by heuristics
837 # be missed by heuristics
835 if _isfullcopytraceable(repo, c1, base):
838 if _isfullcopytraceable(repo, c1, base):
836 return _fullcopytracing(repo, c1, c2, base)
839 return _fullcopytracing(repo, c1, c2, base)
837 return _heuristicscopytracing(repo, c1, c2, base)
840 return _heuristicscopytracing(repo, c1, c2, base)
838 else:
841 else:
839 return _fullcopytracing(repo, c1, c2, base)
842 return _fullcopytracing(repo, c1, c2, base)
840
843
841
844
842 def _isfullcopytraceable(repo, c1, base):
845 def _isfullcopytraceable(repo, c1, base):
843 """Checks that if base, source and destination are all no-public branches,
846 """Checks that if base, source and destination are all no-public branches,
844 if yes let's use the full copytrace algorithm for increased capabilities
847 if yes let's use the full copytrace algorithm for increased capabilities
845 since it will be fast enough.
848 since it will be fast enough.
846
849
847 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
850 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
848 number of changesets from c1 to base such that if number of changesets are
851 number of changesets from c1 to base such that if number of changesets are
849 more than the limit, full copytracing algorithm won't be used.
852 more than the limit, full copytracing algorithm won't be used.
850 """
853 """
851 if c1.rev() is None:
854 if c1.rev() is None:
852 c1 = c1.p1()
855 c1 = c1.p1()
853 if c1.mutable() and base.mutable():
856 if c1.mutable() and base.mutable():
854 sourcecommitlimit = repo.ui.configint(
857 sourcecommitlimit = repo.ui.configint(
855 b'experimental', b'copytrace.sourcecommitlimit'
858 b'experimental', b'copytrace.sourcecommitlimit'
856 )
859 )
857 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
860 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
858 return commits < sourcecommitlimit
861 return commits < sourcecommitlimit
859 return False
862 return False
860
863
861
864
862 def _checksinglesidecopies(
865 def _checksinglesidecopies(
863 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
866 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
864 ):
867 ):
865 if src not in m2:
868 if src not in m2:
866 # deleted on side 2
869 # deleted on side 2
867 if src not in m1:
870 if src not in m1:
868 # renamed on side 1, deleted on side 2
871 # renamed on side 1, deleted on side 2
869 renamedelete[src] = dsts1
872 renamedelete[src] = dsts1
870 elif src not in mb:
873 elif src not in mb:
871 # Work around the "short-circuit to avoid issues with merge states"
874 # Work around the "short-circuit to avoid issues with merge states"
872 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
875 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
873 # destination doesn't exist in y.
876 # destination doesn't exist in y.
874 pass
877 pass
875 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
878 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
876 return
879 return
877 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
880 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
878 # modified on side 2
881 # modified on side 2
879 for dst in dsts1:
882 for dst in dsts1:
880 copy[dst] = src
883 copy[dst] = src
881
884
882
885
883 class branch_copies(object):
886 class branch_copies(object):
884 """Information about copies made on one side of a merge/graft.
887 """Information about copies made on one side of a merge/graft.
885
888
886 "copy" is a mapping from destination name -> source name,
889 "copy" is a mapping from destination name -> source name,
887 where source is in c1 and destination is in c2 or vice-versa.
890 where source is in c1 and destination is in c2 or vice-versa.
888
891
889 "movewithdir" is a mapping from source name -> destination name,
892 "movewithdir" is a mapping from source name -> destination name,
890 where the file at source present in one context but not the other
893 where the file at source present in one context but not the other
891 needs to be moved to destination by the merge process, because the
894 needs to be moved to destination by the merge process, because the
892 other context moved the directory it is in.
895 other context moved the directory it is in.
893
896
894 "renamedelete" is a mapping of source name -> list of destination
897 "renamedelete" is a mapping of source name -> list of destination
895 names for files deleted in c1 that were renamed in c2 or vice-versa.
898 names for files deleted in c1 that were renamed in c2 or vice-versa.
896
899
897 "dirmove" is a mapping of detected source dir -> destination dir renames.
900 "dirmove" is a mapping of detected source dir -> destination dir renames.
898 This is needed for handling changes to new files previously grafted into
901 This is needed for handling changes to new files previously grafted into
899 renamed directories.
902 renamed directories.
900 """
903 """
901
904
902 def __init__(
905 def __init__(
903 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
906 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
904 ):
907 ):
905 self.copy = {} if copy is None else copy
908 self.copy = {} if copy is None else copy
906 self.renamedelete = {} if renamedelete is None else renamedelete
909 self.renamedelete = {} if renamedelete is None else renamedelete
907 self.dirmove = {} if dirmove is None else dirmove
910 self.dirmove = {} if dirmove is None else dirmove
908 self.movewithdir = {} if movewithdir is None else movewithdir
911 self.movewithdir = {} if movewithdir is None else movewithdir
909
912
910 def __repr__(self):
913 def __repr__(self):
911 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
914 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
912 self.copy,
915 self.copy,
913 self.renamedelete,
916 self.renamedelete,
914 self.dirmove,
917 self.dirmove,
915 self.movewithdir,
918 self.movewithdir,
916 )
919 )
917
920
918
921
919 def _fullcopytracing(repo, c1, c2, base):
922 def _fullcopytracing(repo, c1, c2, base):
920 """The full copytracing algorithm which finds all the new files that were
923 """The full copytracing algorithm which finds all the new files that were
921 added from merge base up to the top commit and for each file it checks if
924 added from merge base up to the top commit and for each file it checks if
922 this file was copied from another file.
925 this file was copied from another file.
923
926
924 This is pretty slow when a lot of changesets are involved but will track all
927 This is pretty slow when a lot of changesets are involved but will track all
925 the copies.
928 the copies.
926 """
929 """
927 m1 = c1.manifest()
930 m1 = c1.manifest()
928 m2 = c2.manifest()
931 m2 = c2.manifest()
929 mb = base.manifest()
932 mb = base.manifest()
930
933
931 copies1 = pathcopies(base, c1)
934 copies1 = pathcopies(base, c1)
932 copies2 = pathcopies(base, c2)
935 copies2 = pathcopies(base, c2)
933
936
934 if not (copies1 or copies2):
937 if not (copies1 or copies2):
935 return branch_copies(), branch_copies(), {}
938 return branch_copies(), branch_copies(), {}
936
939
937 inversecopies1 = {}
940 inversecopies1 = {}
938 inversecopies2 = {}
941 inversecopies2 = {}
939 for dst, src in copies1.items():
942 for dst, src in copies1.items():
940 inversecopies1.setdefault(src, []).append(dst)
943 inversecopies1.setdefault(src, []).append(dst)
941 for dst, src in copies2.items():
944 for dst, src in copies2.items():
942 inversecopies2.setdefault(src, []).append(dst)
945 inversecopies2.setdefault(src, []).append(dst)
943
946
944 copy1 = {}
947 copy1 = {}
945 copy2 = {}
948 copy2 = {}
946 diverge = {}
949 diverge = {}
947 renamedelete1 = {}
950 renamedelete1 = {}
948 renamedelete2 = {}
951 renamedelete2 = {}
949 allsources = set(inversecopies1) | set(inversecopies2)
952 allsources = set(inversecopies1) | set(inversecopies2)
950 for src in allsources:
953 for src in allsources:
951 dsts1 = inversecopies1.get(src)
954 dsts1 = inversecopies1.get(src)
952 dsts2 = inversecopies2.get(src)
955 dsts2 = inversecopies2.get(src)
953 if dsts1 and dsts2:
956 if dsts1 and dsts2:
954 # copied/renamed on both sides
957 # copied/renamed on both sides
955 if src not in m1 and src not in m2:
958 if src not in m1 and src not in m2:
956 # renamed on both sides
959 # renamed on both sides
957 dsts1 = set(dsts1)
960 dsts1 = set(dsts1)
958 dsts2 = set(dsts2)
961 dsts2 = set(dsts2)
959 # If there's some overlap in the rename destinations, we
962 # If there's some overlap in the rename destinations, we
960 # consider it not divergent. For example, if side 1 copies 'a'
963 # consider it not divergent. For example, if side 1 copies 'a'
961 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
964 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
962 # and 'd' and deletes 'a'.
965 # and 'd' and deletes 'a'.
963 if dsts1 & dsts2:
966 if dsts1 & dsts2:
964 for dst in dsts1 & dsts2:
967 for dst in dsts1 & dsts2:
965 copy1[dst] = src
968 copy1[dst] = src
966 copy2[dst] = src
969 copy2[dst] = src
967 else:
970 else:
968 diverge[src] = sorted(dsts1 | dsts2)
971 diverge[src] = sorted(dsts1 | dsts2)
969 elif src in m1 and src in m2:
972 elif src in m1 and src in m2:
970 # copied on both sides
973 # copied on both sides
971 dsts1 = set(dsts1)
974 dsts1 = set(dsts1)
972 dsts2 = set(dsts2)
975 dsts2 = set(dsts2)
973 for dst in dsts1 & dsts2:
976 for dst in dsts1 & dsts2:
974 copy1[dst] = src
977 copy1[dst] = src
975 copy2[dst] = src
978 copy2[dst] = src
976 # TODO: Handle cases where it was renamed on one side and copied
979 # TODO: Handle cases where it was renamed on one side and copied
977 # on the other side
980 # on the other side
978 elif dsts1:
981 elif dsts1:
979 # copied/renamed only on side 1
982 # copied/renamed only on side 1
980 _checksinglesidecopies(
983 _checksinglesidecopies(
981 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
984 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
982 )
985 )
983 elif dsts2:
986 elif dsts2:
984 # copied/renamed only on side 2
987 # copied/renamed only on side 2
985 _checksinglesidecopies(
988 _checksinglesidecopies(
986 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
989 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
987 )
990 )
988
991
989 # find interesting file sets from manifests
992 # find interesting file sets from manifests
990 cache = []
993 cache = []
991
994
992 def _get_addedfiles(idx):
995 def _get_addedfiles(idx):
993 if not cache:
996 if not cache:
994 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
997 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
995 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
998 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
996 u1 = sorted(addedinm1 - addedinm2)
999 u1 = sorted(addedinm1 - addedinm2)
997 u2 = sorted(addedinm2 - addedinm1)
1000 u2 = sorted(addedinm2 - addedinm1)
998 cache.extend((u1, u2))
1001 cache.extend((u1, u2))
999 return cache[idx]
1002 return cache[idx]
1000
1003
1001 u1fn = lambda: _get_addedfiles(0)
1004 u1fn = lambda: _get_addedfiles(0)
1002 u2fn = lambda: _get_addedfiles(1)
1005 u2fn = lambda: _get_addedfiles(1)
1003 if repo.ui.debugflag:
1006 if repo.ui.debugflag:
1004 u1 = u1fn()
1007 u1 = u1fn()
1005 u2 = u2fn()
1008 u2 = u2fn()
1006
1009
1007 header = b" unmatched files in %s"
1010 header = b" unmatched files in %s"
1008 if u1:
1011 if u1:
1009 repo.ui.debug(
1012 repo.ui.debug(
1010 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1013 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1011 )
1014 )
1012 if u2:
1015 if u2:
1013 repo.ui.debug(
1016 repo.ui.debug(
1014 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1017 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1015 )
1018 )
1016
1019
1017 renamedeleteset = set()
1020 renamedeleteset = set()
1018 divergeset = set()
1021 divergeset = set()
1019 for dsts in diverge.values():
1022 for dsts in diverge.values():
1020 divergeset.update(dsts)
1023 divergeset.update(dsts)
1021 for dsts in renamedelete1.values():
1024 for dsts in renamedelete1.values():
1022 renamedeleteset.update(dsts)
1025 renamedeleteset.update(dsts)
1023 for dsts in renamedelete2.values():
1026 for dsts in renamedelete2.values():
1024 renamedeleteset.update(dsts)
1027 renamedeleteset.update(dsts)
1025
1028
1026 repo.ui.debug(
1029 repo.ui.debug(
1027 b" all copies found (* = to merge, ! = divergent, "
1030 b" all copies found (* = to merge, ! = divergent, "
1028 b"% = renamed and deleted):\n"
1031 b"% = renamed and deleted):\n"
1029 )
1032 )
1030 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1033 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1031 if not copies:
1034 if not copies:
1032 continue
1035 continue
1033 repo.ui.debug(b" on %s side:\n" % side)
1036 repo.ui.debug(b" on %s side:\n" % side)
1034 for f in sorted(copies):
1037 for f in sorted(copies):
1035 note = b""
1038 note = b""
1036 if f in copy1 or f in copy2:
1039 if f in copy1 or f in copy2:
1037 note += b"*"
1040 note += b"*"
1038 if f in divergeset:
1041 if f in divergeset:
1039 note += b"!"
1042 note += b"!"
1040 if f in renamedeleteset:
1043 if f in renamedeleteset:
1041 note += b"%"
1044 note += b"%"
1042 repo.ui.debug(
1045 repo.ui.debug(
1043 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1046 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1044 )
1047 )
1045 del renamedeleteset
1048 del renamedeleteset
1046 del divergeset
1049 del divergeset
1047
1050
1048 repo.ui.debug(b" checking for directory renames\n")
1051 repo.ui.debug(b" checking for directory renames\n")
1049
1052
1050 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1053 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1051 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1054 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1052
1055
1053 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1056 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1054 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1057 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1055
1058
1056 return branch_copies1, branch_copies2, diverge
1059 return branch_copies1, branch_copies2, diverge
1057
1060
1058
1061
1059 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1062 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1060 """Finds moved directories and files that should move with them.
1063 """Finds moved directories and files that should move with them.
1061
1064
1062 ctx: the context for one of the sides
1065 ctx: the context for one of the sides
1063 copy: files copied on the same side (as ctx)
1066 copy: files copied on the same side (as ctx)
1064 fullcopy: files copied on the same side (as ctx), including those that
1067 fullcopy: files copied on the same side (as ctx), including those that
1065 merge.manifestmerge() won't care about
1068 merge.manifestmerge() won't care about
1066 addedfilesfn: function returning added files on the other side (compared to
1069 addedfilesfn: function returning added files on the other side (compared to
1067 ctx)
1070 ctx)
1068 """
1071 """
1069 # generate a directory move map
1072 # generate a directory move map
1070 invalid = set()
1073 invalid = set()
1071 dirmove = {}
1074 dirmove = {}
1072
1075
1073 # examine each file copy for a potential directory move, which is
1076 # examine each file copy for a potential directory move, which is
1074 # when all the files in a directory are moved to a new directory
1077 # when all the files in a directory are moved to a new directory
1075 for dst, src in pycompat.iteritems(fullcopy):
1078 for dst, src in pycompat.iteritems(fullcopy):
1076 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1079 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1077 if dsrc in invalid:
1080 if dsrc in invalid:
1078 # already seen to be uninteresting
1081 # already seen to be uninteresting
1079 continue
1082 continue
1080 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1083 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1081 # directory wasn't entirely moved locally
1084 # directory wasn't entirely moved locally
1082 invalid.add(dsrc)
1085 invalid.add(dsrc)
1083 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1086 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1084 # files from the same directory moved to two different places
1087 # files from the same directory moved to two different places
1085 invalid.add(dsrc)
1088 invalid.add(dsrc)
1086 else:
1089 else:
1087 # looks good so far
1090 # looks good so far
1088 dirmove[dsrc] = ddst
1091 dirmove[dsrc] = ddst
1089
1092
1090 for i in invalid:
1093 for i in invalid:
1091 if i in dirmove:
1094 if i in dirmove:
1092 del dirmove[i]
1095 del dirmove[i]
1093 del invalid
1096 del invalid
1094
1097
1095 if not dirmove:
1098 if not dirmove:
1096 return {}, {}
1099 return {}, {}
1097
1100
1098 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1101 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1099
1102
1100 for d in dirmove:
1103 for d in dirmove:
1101 repo.ui.debug(
1104 repo.ui.debug(
1102 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1105 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1103 )
1106 )
1104
1107
1105 # Sort the directories in reverse order, so we find children first
1108 # Sort the directories in reverse order, so we find children first
1106 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1109 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1107 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1110 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1108 # to dir2/subdir2/file (not dir2/subdir1/file)
1111 # to dir2/subdir2/file (not dir2/subdir1/file)
1109 dirmove_children_first = sorted(dirmove, reverse=True)
1112 dirmove_children_first = sorted(dirmove, reverse=True)
1110
1113
1111 movewithdir = {}
1114 movewithdir = {}
1112 # check unaccounted nonoverlapping files against directory moves
1115 # check unaccounted nonoverlapping files against directory moves
1113 for f in addedfilesfn():
1116 for f in addedfilesfn():
1114 if f not in fullcopy:
1117 if f not in fullcopy:
1115 for d in dirmove_children_first:
1118 for d in dirmove_children_first:
1116 if f.startswith(d):
1119 if f.startswith(d):
1117 # new file added in a directory that was moved, move it
1120 # new file added in a directory that was moved, move it
1118 df = dirmove[d] + f[len(d) :]
1121 df = dirmove[d] + f[len(d) :]
1119 if df not in copy:
1122 if df not in copy:
1120 movewithdir[f] = df
1123 movewithdir[f] = df
1121 repo.ui.debug(
1124 repo.ui.debug(
1122 b" pending file src: '%s' -> dst: '%s'\n"
1125 b" pending file src: '%s' -> dst: '%s'\n"
1123 % (f, df)
1126 % (f, df)
1124 )
1127 )
1125 break
1128 break
1126
1129
1127 return dirmove, movewithdir
1130 return dirmove, movewithdir
1128
1131
1129
1132
1130 def _heuristicscopytracing(repo, c1, c2, base):
1133 def _heuristicscopytracing(repo, c1, c2, base):
1131 """Fast copytracing using filename heuristics
1134 """Fast copytracing using filename heuristics
1132
1135
1133 Assumes that moves or renames are of following two types:
1136 Assumes that moves or renames are of following two types:
1134
1137
1135 1) Inside a directory only (same directory name but different filenames)
1138 1) Inside a directory only (same directory name but different filenames)
1136 2) Move from one directory to another
1139 2) Move from one directory to another
1137 (same filenames but different directory names)
1140 (same filenames but different directory names)
1138
1141
1139 Works only when there are no merge commits in the "source branch".
1142 Works only when there are no merge commits in the "source branch".
1140 Source branch is commits from base up to c2 not including base.
1143 Source branch is commits from base up to c2 not including base.
1141
1144
1142 If merge is involved it fallbacks to _fullcopytracing().
1145 If merge is involved it fallbacks to _fullcopytracing().
1143
1146
1144 Can be used by setting the following config:
1147 Can be used by setting the following config:
1145
1148
1146 [experimental]
1149 [experimental]
1147 copytrace = heuristics
1150 copytrace = heuristics
1148
1151
1149 In some cases the copy/move candidates found by heuristics can be very large
1152 In some cases the copy/move candidates found by heuristics can be very large
1150 in number and that will make the algorithm slow. The number of possible
1153 in number and that will make the algorithm slow. The number of possible
1151 candidates to check can be limited by using the config
1154 candidates to check can be limited by using the config
1152 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1155 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1153 """
1156 """
1154
1157
1155 if c1.rev() is None:
1158 if c1.rev() is None:
1156 c1 = c1.p1()
1159 c1 = c1.p1()
1157 if c2.rev() is None:
1160 if c2.rev() is None:
1158 c2 = c2.p1()
1161 c2 = c2.p1()
1159
1162
1160 changedfiles = set()
1163 changedfiles = set()
1161 m1 = c1.manifest()
1164 m1 = c1.manifest()
1162 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1165 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1163 # If base is not in c2 branch, we switch to fullcopytracing
1166 # If base is not in c2 branch, we switch to fullcopytracing
1164 repo.ui.debug(
1167 repo.ui.debug(
1165 b"switching to full copytracing as base is not "
1168 b"switching to full copytracing as base is not "
1166 b"an ancestor of c2\n"
1169 b"an ancestor of c2\n"
1167 )
1170 )
1168 return _fullcopytracing(repo, c1, c2, base)
1171 return _fullcopytracing(repo, c1, c2, base)
1169
1172
1170 ctx = c2
1173 ctx = c2
1171 while ctx != base:
1174 while ctx != base:
1172 if len(ctx.parents()) == 2:
1175 if len(ctx.parents()) == 2:
1173 # To keep things simple let's not handle merges
1176 # To keep things simple let's not handle merges
1174 repo.ui.debug(b"switching to full copytracing because of merges\n")
1177 repo.ui.debug(b"switching to full copytracing because of merges\n")
1175 return _fullcopytracing(repo, c1, c2, base)
1178 return _fullcopytracing(repo, c1, c2, base)
1176 changedfiles.update(ctx.files())
1179 changedfiles.update(ctx.files())
1177 ctx = ctx.p1()
1180 ctx = ctx.p1()
1178
1181
1179 copies2 = {}
1182 copies2 = {}
1180 cp = _forwardcopies(base, c2)
1183 cp = _forwardcopies(base, c2)
1181 for dst, src in pycompat.iteritems(cp):
1184 for dst, src in pycompat.iteritems(cp):
1182 if src in m1:
1185 if src in m1:
1183 copies2[dst] = src
1186 copies2[dst] = src
1184
1187
1185 # file is missing if it isn't present in the destination, but is present in
1188 # file is missing if it isn't present in the destination, but is present in
1186 # the base and present in the source.
1189 # the base and present in the source.
1187 # Presence in the base is important to exclude added files, presence in the
1190 # Presence in the base is important to exclude added files, presence in the
1188 # source is important to exclude removed files.
1191 # source is important to exclude removed files.
1189 filt = lambda f: f not in m1 and f in base and f in c2
1192 filt = lambda f: f not in m1 and f in base and f in c2
1190 missingfiles = [f for f in changedfiles if filt(f)]
1193 missingfiles = [f for f in changedfiles if filt(f)]
1191
1194
1192 copies1 = {}
1195 copies1 = {}
1193 if missingfiles:
1196 if missingfiles:
1194 basenametofilename = collections.defaultdict(list)
1197 basenametofilename = collections.defaultdict(list)
1195 dirnametofilename = collections.defaultdict(list)
1198 dirnametofilename = collections.defaultdict(list)
1196
1199
1197 for f in m1.filesnotin(base.manifest()):
1200 for f in m1.filesnotin(base.manifest()):
1198 basename = os.path.basename(f)
1201 basename = os.path.basename(f)
1199 dirname = os.path.dirname(f)
1202 dirname = os.path.dirname(f)
1200 basenametofilename[basename].append(f)
1203 basenametofilename[basename].append(f)
1201 dirnametofilename[dirname].append(f)
1204 dirnametofilename[dirname].append(f)
1202
1205
1203 for f in missingfiles:
1206 for f in missingfiles:
1204 basename = os.path.basename(f)
1207 basename = os.path.basename(f)
1205 dirname = os.path.dirname(f)
1208 dirname = os.path.dirname(f)
1206 samebasename = basenametofilename[basename]
1209 samebasename = basenametofilename[basename]
1207 samedirname = dirnametofilename[dirname]
1210 samedirname = dirnametofilename[dirname]
1208 movecandidates = samebasename + samedirname
1211 movecandidates = samebasename + samedirname
1209 # f is guaranteed to be present in c2, that's why
1212 # f is guaranteed to be present in c2, that's why
1210 # c2.filectx(f) won't fail
1213 # c2.filectx(f) won't fail
1211 f2 = c2.filectx(f)
1214 f2 = c2.filectx(f)
1212 # we can have a lot of candidates which can slow down the heuristics
1215 # we can have a lot of candidates which can slow down the heuristics
1213 # config value to limit the number of candidates moves to check
1216 # config value to limit the number of candidates moves to check
1214 maxcandidates = repo.ui.configint(
1217 maxcandidates = repo.ui.configint(
1215 b'experimental', b'copytrace.movecandidateslimit'
1218 b'experimental', b'copytrace.movecandidateslimit'
1216 )
1219 )
1217
1220
1218 if len(movecandidates) > maxcandidates:
1221 if len(movecandidates) > maxcandidates:
1219 repo.ui.status(
1222 repo.ui.status(
1220 _(
1223 _(
1221 b"skipping copytracing for '%s', more "
1224 b"skipping copytracing for '%s', more "
1222 b"candidates than the limit: %d\n"
1225 b"candidates than the limit: %d\n"
1223 )
1226 )
1224 % (f, len(movecandidates))
1227 % (f, len(movecandidates))
1225 )
1228 )
1226 continue
1229 continue
1227
1230
1228 for candidate in movecandidates:
1231 for candidate in movecandidates:
1229 f1 = c1.filectx(candidate)
1232 f1 = c1.filectx(candidate)
1230 if _related(f1, f2):
1233 if _related(f1, f2):
1231 # if there are a few related copies then we'll merge
1234 # if there are a few related copies then we'll merge
1232 # changes into all of them. This matches the behaviour
1235 # changes into all of them. This matches the behaviour
1233 # of upstream copytracing
1236 # of upstream copytracing
1234 copies1[candidate] = f
1237 copies1[candidate] = f
1235
1238
1236 return branch_copies(copies1), branch_copies(copies2), {}
1239 return branch_copies(copies1), branch_copies(copies2), {}
1237
1240
1238
1241
1239 def _related(f1, f2):
1242 def _related(f1, f2):
1240 """return True if f1 and f2 filectx have a common ancestor
1243 """return True if f1 and f2 filectx have a common ancestor
1241
1244
1242 Walk back to common ancestor to see if the two files originate
1245 Walk back to common ancestor to see if the two files originate
1243 from the same file. Since workingfilectx's rev() is None it messes
1246 from the same file. Since workingfilectx's rev() is None it messes
1244 up the integer comparison logic, hence the pre-step check for
1247 up the integer comparison logic, hence the pre-step check for
1245 None (f1 and f2 can only be workingfilectx's initially).
1248 None (f1 and f2 can only be workingfilectx's initially).
1246 """
1249 """
1247
1250
1248 if f1 == f2:
1251 if f1 == f2:
1249 return True # a match
1252 return True # a match
1250
1253
1251 g1, g2 = f1.ancestors(), f2.ancestors()
1254 g1, g2 = f1.ancestors(), f2.ancestors()
1252 try:
1255 try:
1253 f1r, f2r = f1.linkrev(), f2.linkrev()
1256 f1r, f2r = f1.linkrev(), f2.linkrev()
1254
1257
1255 if f1r is None:
1258 if f1r is None:
1256 f1 = next(g1)
1259 f1 = next(g1)
1257 if f2r is None:
1260 if f2r is None:
1258 f2 = next(g2)
1261 f2 = next(g2)
1259
1262
1260 while True:
1263 while True:
1261 f1r, f2r = f1.linkrev(), f2.linkrev()
1264 f1r, f2r = f1.linkrev(), f2.linkrev()
1262 if f1r > f2r:
1265 if f1r > f2r:
1263 f1 = next(g1)
1266 f1 = next(g1)
1264 elif f2r > f1r:
1267 elif f2r > f1r:
1265 f2 = next(g2)
1268 f2 = next(g2)
1266 else: # f1 and f2 point to files in the same linkrev
1269 else: # f1 and f2 point to files in the same linkrev
1267 return f1 == f2 # true if they point to the same file
1270 return f1 == f2 # true if they point to the same file
1268 except StopIteration:
1271 except StopIteration:
1269 return False
1272 return False
1270
1273
1271
1274
1272 def graftcopies(wctx, ctx, base):
1275 def graftcopies(wctx, ctx, base):
1273 """reproduce copies between base and ctx in the wctx
1276 """reproduce copies between base and ctx in the wctx
1274
1277
1275 Unlike mergecopies(), this function will only consider copies between base
1278 Unlike mergecopies(), this function will only consider copies between base
1276 and ctx; it will ignore copies between base and wctx. Also unlike
1279 and ctx; it will ignore copies between base and wctx. Also unlike
1277 mergecopies(), this function will apply copies to the working copy (instead
1280 mergecopies(), this function will apply copies to the working copy (instead
1278 of just returning information about the copies). That makes it cheaper
1281 of just returning information about the copies). That makes it cheaper
1279 (especially in the common case of base==ctx.p1()) and useful also when
1282 (especially in the common case of base==ctx.p1()) and useful also when
1280 experimental.copytrace=off.
1283 experimental.copytrace=off.
1281
1284
1282 merge.update() will have already marked most copies, but it will only
1285 merge.update() will have already marked most copies, but it will only
1283 mark copies if it thinks the source files are related (see
1286 mark copies if it thinks the source files are related (see
1284 merge._related()). It will also not mark copies if the file wasn't modified
1287 merge._related()). It will also not mark copies if the file wasn't modified
1285 on the local side. This function adds the copies that were "missed"
1288 on the local side. This function adds the copies that were "missed"
1286 by merge.update().
1289 by merge.update().
1287 """
1290 """
1288 new_copies = pathcopies(base, ctx)
1291 new_copies = pathcopies(base, ctx)
1289 parent = wctx.p1()
1292 parent = wctx.p1()
1290 _filter(parent, wctx, new_copies)
1293 _filter(parent, wctx, new_copies)
1291 # Extra filtering to drop copy information for files that existed before
1294 # Extra filtering to drop copy information for files that existed before
1292 # the graft. This is to handle the case of grafting a rename onto a commit
1295 # the graft. This is to handle the case of grafting a rename onto a commit
1293 # that already has the rename. Otherwise the presence of copy information
1296 # that already has the rename. Otherwise the presence of copy information
1294 # would result in the creation of an empty commit where we would prefer to
1297 # would result in the creation of an empty commit where we would prefer to
1295 # not create one.
1298 # not create one.
1296 for dest, __ in list(new_copies.items()):
1299 for dest, __ in list(new_copies.items()):
1297 if dest in parent:
1300 if dest in parent:
1298 del new_copies[dest]
1301 del new_copies[dest]
1299 for dst, src in pycompat.iteritems(new_copies):
1302 for dst, src in pycompat.iteritems(new_copies):
1300 wctx[dst].markcopied(src)
1303 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now