##// END OF EJS Templates
dirstate-item: use `tracked` instead of `state` during copy detection...
marmoute -
r48909:d86875b7 default
parent child Browse files
Show More
@@ -1,1306 +1,1306 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import nullrev
15 from .node import nullrev
16
16
17 from . import (
17 from . import (
18 match as matchmod,
18 match as matchmod,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25
25
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 from .revlogutils import (
28 from .revlogutils import (
29 flagutil,
29 flagutil,
30 sidedata as sidedatamod,
30 sidedata as sidedatamod,
31 )
31 )
32
32
33 rustmod = policy.importrust("copy_tracing")
33 rustmod = policy.importrust("copy_tracing")
34
34
35
35
36 def _filter(src, dst, t):
36 def _filter(src, dst, t):
37 """filters out invalid copies after chaining"""
37 """filters out invalid copies after chaining"""
38
38
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
39 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
40 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
41 # in the following table (not including trivial cases). For example, case 6
41 # in the following table (not including trivial cases). For example, case 6
42 # is where a file existed in 'src' and remained under that name in 'mid' and
42 # is where a file existed in 'src' and remained under that name in 'mid' and
43 # then was renamed between 'mid' and 'dst'.
43 # then was renamed between 'mid' and 'dst'.
44 #
44 #
45 # case src mid dst result
45 # case src mid dst result
46 # 1 x y - -
46 # 1 x y - -
47 # 2 x y y x->y
47 # 2 x y y x->y
48 # 3 x y x -
48 # 3 x y x -
49 # 4 x y z x->z
49 # 4 x y z x->z
50 # 5 - x y -
50 # 5 - x y -
51 # 6 x x y x->y
51 # 6 x x y x->y
52 #
52 #
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
53 # _chain() takes care of chaining the copies in 'a' and 'b', but it
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
54 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
55 # between 5 and 6, so it includes all cases in its result.
55 # between 5 and 6, so it includes all cases in its result.
56 # Cases 1, 3, and 5 are then removed by _filter().
56 # Cases 1, 3, and 5 are then removed by _filter().
57
57
58 for k, v in list(t.items()):
58 for k, v in list(t.items()):
59 if k == v: # case 3
59 if k == v: # case 3
60 del t[k]
60 del t[k]
61 elif v not in src: # case 5
61 elif v not in src: # case 5
62 # remove copies from files that didn't exist
62 # remove copies from files that didn't exist
63 del t[k]
63 del t[k]
64 elif k not in dst: # case 1
64 elif k not in dst: # case 1
65 # remove copies to files that were then removed
65 # remove copies to files that were then removed
66 del t[k]
66 del t[k]
67
67
68
68
69 def _chain(prefix, suffix):
69 def _chain(prefix, suffix):
70 """chain two sets of copies 'prefix' and 'suffix'"""
70 """chain two sets of copies 'prefix' and 'suffix'"""
71 result = prefix.copy()
71 result = prefix.copy()
72 for key, value in pycompat.iteritems(suffix):
72 for key, value in pycompat.iteritems(suffix):
73 result[key] = prefix.get(value, value)
73 result[key] = prefix.get(value, value)
74 return result
74 return result
75
75
76
76
77 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
79 manifest am
80
80
81 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
83 disabling that feature."""
84
84
85 for f in fctx.ancestors():
85 for f in fctx.ancestors():
86 path = f.path()
86 path = f.path()
87 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
88 return path
88 return path
89 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
90 return path
90 return path
91
91
92
92
93 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
94 ds = repo.dirstate
95 c = ds.copies().copy()
95 c = ds.copies().copy()
96 for k in list(c):
96 for k in list(c):
97 if ds[k] not in b'anm' or (match and not match(k)):
97 if not ds.get_entry(k).tracked or (match and not match(k)):
98 del c[k]
98 del c[k]
99 return c
99 return c
100
100
101
101
102 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
106 """
106 """
107 ma = a.manifest()
107 ma = a.manifest()
108 mb = b.manifest()
108 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
110
110
111
111
112 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
115 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
118 return readfrom in changesetsource
119
119
120
120
121 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
125 repo = a._repo
125 repo = a._repo
126
126
127 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
129
129
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
131 dbg = repo.ui.debug
132 if debug:
132 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
134 am = a.manifest()
135 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
136
136
137 # find where new files came from
137 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
140 cm = {}
141
141
142 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
147 # this comparison.
148 forwardmissingmatch = match
148 forwardmissingmatch = match
149 if b.p1() == a and b.p2().rev() == nullrev:
149 if b.p1() == a and b.p2().rev() == nullrev:
150 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
152 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
153 missing = list(b.walk(match))
153 missing = list(b.walk(match))
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
154 # _computeforwardmissing(a, b, match=forwardmissingmatch)
155 if debug:
155 if debug:
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
156 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
157 else:
157 else:
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
158 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
159 if debug:
159 if debug:
160 dbg(
160 dbg(
161 b'debug.copies: missing files to search: %d\n'
161 b'debug.copies: missing files to search: %d\n'
162 % len(missing)
162 % len(missing)
163 )
163 )
164
164
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
165 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
166
166
167 for f in sorted(missing):
167 for f in sorted(missing):
168 if debug:
168 if debug:
169 dbg(b'debug.copies: tracing file: %s\n' % f)
169 dbg(b'debug.copies: tracing file: %s\n' % f)
170 fctx = b[f]
170 fctx = b[f]
171 fctx._ancestrycontext = ancestrycontext
171 fctx._ancestrycontext = ancestrycontext
172
172
173 if debug:
173 if debug:
174 start = util.timer()
174 start = util.timer()
175 opath = _tracefile(fctx, am, basemf)
175 opath = _tracefile(fctx, am, basemf)
176 if opath:
176 if opath:
177 if debug:
177 if debug:
178 dbg(b'debug.copies: rename of: %s\n' % opath)
178 dbg(b'debug.copies: rename of: %s\n' % opath)
179 cm[f] = opath
179 cm[f] = opath
180 if debug:
180 if debug:
181 dbg(
181 dbg(
182 b'debug.copies: time: %f seconds\n'
182 b'debug.copies: time: %f seconds\n'
183 % (util.timer() - start)
183 % (util.timer() - start)
184 )
184 )
185 return cm
185 return cm
186
186
187
187
188 def _revinfo_getter(repo, match):
188 def _revinfo_getter(repo, match):
189 """returns a function that returns the following data given a <rev>"
189 """returns a function that returns the following data given a <rev>"
190
190
191 * p1: revision number of first parent
191 * p1: revision number of first parent
192 * p2: revision number of first parent
192 * p2: revision number of first parent
193 * changes: a ChangingFiles object
193 * changes: a ChangingFiles object
194 """
194 """
195 cl = repo.changelog
195 cl = repo.changelog
196 parents = cl.parentrevs
196 parents = cl.parentrevs
197 flags = cl.flags
197 flags = cl.flags
198
198
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
199 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
200
200
201 changelogrevision = cl.changelogrevision
201 changelogrevision = cl.changelogrevision
202
202
203 if rustmod is not None:
203 if rustmod is not None:
204
204
205 def revinfo(rev):
205 def revinfo(rev):
206 p1, p2 = parents(rev)
206 p1, p2 = parents(rev)
207 if flags(rev) & HASCOPIESINFO:
207 if flags(rev) & HASCOPIESINFO:
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
208 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
209 else:
209 else:
210 raw = None
210 raw = None
211 return (p1, p2, raw)
211 return (p1, p2, raw)
212
212
213 else:
213 else:
214
214
215 def revinfo(rev):
215 def revinfo(rev):
216 p1, p2 = parents(rev)
216 p1, p2 = parents(rev)
217 if flags(rev) & HASCOPIESINFO:
217 if flags(rev) & HASCOPIESINFO:
218 changes = changelogrevision(rev).changes
218 changes = changelogrevision(rev).changes
219 else:
219 else:
220 changes = None
220 changes = None
221 return (p1, p2, changes)
221 return (p1, p2, changes)
222
222
223 return revinfo
223 return revinfo
224
224
225
225
226 def cached_is_ancestor(is_ancestor):
226 def cached_is_ancestor(is_ancestor):
227 """return a cached version of is_ancestor"""
227 """return a cached version of is_ancestor"""
228 cache = {}
228 cache = {}
229
229
230 def _is_ancestor(anc, desc):
230 def _is_ancestor(anc, desc):
231 if anc > desc:
231 if anc > desc:
232 return False
232 return False
233 elif anc == desc:
233 elif anc == desc:
234 return True
234 return True
235 key = (anc, desc)
235 key = (anc, desc)
236 ret = cache.get(key)
236 ret = cache.get(key)
237 if ret is None:
237 if ret is None:
238 ret = cache[key] = is_ancestor(anc, desc)
238 ret = cache[key] = is_ancestor(anc, desc)
239 return ret
239 return ret
240
240
241 return _is_ancestor
241 return _is_ancestor
242
242
243
243
244 def _changesetforwardcopies(a, b, match):
244 def _changesetforwardcopies(a, b, match):
245 if a.rev() in (nullrev, b.rev()):
245 if a.rev() in (nullrev, b.rev()):
246 return {}
246 return {}
247
247
248 repo = a.repo().unfiltered()
248 repo = a.repo().unfiltered()
249 children = {}
249 children = {}
250
250
251 cl = repo.changelog
251 cl = repo.changelog
252 isancestor = cl.isancestorrev
252 isancestor = cl.isancestorrev
253
253
254 # To track rename from "A" to B, we need to gather all parent β†’ children
254 # To track rename from "A" to B, we need to gather all parent β†’ children
255 # edges that are contains in `::B` but not in `::A`.
255 # edges that are contains in `::B` but not in `::A`.
256 #
256 #
257 #
257 #
258 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
258 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
259 # ::a`) and also all the "roots point", ie the parents of the exclusive set
259 # ::a`) and also all the "roots point", ie the parents of the exclusive set
260 # that belong to ::a. These are exactly all the revisions needed to express
260 # that belong to ::a. These are exactly all the revisions needed to express
261 # the parent β†’ children we need to combine.
261 # the parent β†’ children we need to combine.
262 #
262 #
263 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
263 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
264 # excluding paths that leads to roots that are not ancestors of `a`. We
264 # excluding paths that leads to roots that are not ancestors of `a`. We
265 # keep this out of the explanation because it is hard enough without this special case..
265 # keep this out of the explanation because it is hard enough without this special case..
266
266
267 parents = cl._uncheckedparentrevs
267 parents = cl._uncheckedparentrevs
268 graph_roots = (nullrev, nullrev)
268 graph_roots = (nullrev, nullrev)
269
269
270 ancestors = cl.ancestors([a.rev()], inclusive=True)
270 ancestors = cl.ancestors([a.rev()], inclusive=True)
271 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
271 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
272 roots = set()
272 roots = set()
273 has_graph_roots = False
273 has_graph_roots = False
274 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
274 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
275
275
276 # iterate over `only(B, A)`
276 # iterate over `only(B, A)`
277 for r in revs:
277 for r in revs:
278 ps = parents(r)
278 ps = parents(r)
279 if ps == graph_roots:
279 if ps == graph_roots:
280 has_graph_roots = True
280 has_graph_roots = True
281 else:
281 else:
282 p1, p2 = ps
282 p1, p2 = ps
283
283
284 # find all the "root points" (see larger comment above)
284 # find all the "root points" (see larger comment above)
285 if p1 != nullrev and p1 in ancestors:
285 if p1 != nullrev and p1 in ancestors:
286 roots.add(p1)
286 roots.add(p1)
287 if p2 != nullrev and p2 in ancestors:
287 if p2 != nullrev and p2 in ancestors:
288 roots.add(p2)
288 roots.add(p2)
289 if not roots:
289 if not roots:
290 # no common revision to track copies from
290 # no common revision to track copies from
291 return {}
291 return {}
292 if has_graph_roots:
292 if has_graph_roots:
293 # this deal with the special case mentionned in the [1] footnotes. We
293 # this deal with the special case mentionned in the [1] footnotes. We
294 # must filter out revisions that leads to non-common graphroots.
294 # must filter out revisions that leads to non-common graphroots.
295 roots = list(roots)
295 roots = list(roots)
296 m = min(roots)
296 m = min(roots)
297 h = [b.rev()]
297 h = [b.rev()]
298 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
298 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
299 roots_to_head = set(roots_to_head)
299 roots_to_head = set(roots_to_head)
300 revs = [r for r in revs if r in roots_to_head]
300 revs = [r for r in revs if r in roots_to_head]
301
301
302 if repo.filecopiesmode == b'changeset-sidedata':
302 if repo.filecopiesmode == b'changeset-sidedata':
303 # When using side-data, we will process the edges "from" the children.
303 # When using side-data, we will process the edges "from" the children.
304 # We iterate over the childre, gathering previous collected data for
304 # We iterate over the childre, gathering previous collected data for
305 # the parents. Do know when the parents data is no longer necessary, we
305 # the parents. Do know when the parents data is no longer necessary, we
306 # keep a counter of how many children each revision has.
306 # keep a counter of how many children each revision has.
307 #
307 #
308 # An interresting property of `children_count` is that it only contains
308 # An interresting property of `children_count` is that it only contains
309 # revision that will be relevant for a edge of the graph. So if a
309 # revision that will be relevant for a edge of the graph. So if a
310 # children has parent not in `children_count`, that edges should not be
310 # children has parent not in `children_count`, that edges should not be
311 # processed.
311 # processed.
312 children_count = dict((r, 0) for r in roots)
312 children_count = dict((r, 0) for r in roots)
313 for r in revs:
313 for r in revs:
314 for p in cl.parentrevs(r):
314 for p in cl.parentrevs(r):
315 if p == nullrev:
315 if p == nullrev:
316 continue
316 continue
317 children_count[r] = 0
317 children_count[r] = 0
318 if p in children_count:
318 if p in children_count:
319 children_count[p] += 1
319 children_count[p] += 1
320 revinfo = _revinfo_getter(repo, match)
320 revinfo = _revinfo_getter(repo, match)
321 with repo.changelog.reading():
321 with repo.changelog.reading():
322 return _combine_changeset_copies(
322 return _combine_changeset_copies(
323 revs,
323 revs,
324 children_count,
324 children_count,
325 b.rev(),
325 b.rev(),
326 revinfo,
326 revinfo,
327 match,
327 match,
328 isancestor,
328 isancestor,
329 multi_thread,
329 multi_thread,
330 )
330 )
331 else:
331 else:
332 # When not using side-data, we will process the edges "from" the parent.
332 # When not using side-data, we will process the edges "from" the parent.
333 # so we need a full mapping of the parent -> children relation.
333 # so we need a full mapping of the parent -> children relation.
334 children = dict((r, []) for r in roots)
334 children = dict((r, []) for r in roots)
335 for r in revs:
335 for r in revs:
336 for p in cl.parentrevs(r):
336 for p in cl.parentrevs(r):
337 if p == nullrev:
337 if p == nullrev:
338 continue
338 continue
339 children[r] = []
339 children[r] = []
340 if p in children:
340 if p in children:
341 children[p].append(r)
341 children[p].append(r)
342 x = revs.pop()
342 x = revs.pop()
343 assert x == b.rev()
343 assert x == b.rev()
344 revs.extend(roots)
344 revs.extend(roots)
345 revs.sort()
345 revs.sort()
346
346
347 revinfo = _revinfo_getter_extra(repo)
347 revinfo = _revinfo_getter_extra(repo)
348 return _combine_changeset_copies_extra(
348 return _combine_changeset_copies_extra(
349 revs, children, b.rev(), revinfo, match, isancestor
349 revs, children, b.rev(), revinfo, match, isancestor
350 )
350 )
351
351
352
352
353 def _combine_changeset_copies(
353 def _combine_changeset_copies(
354 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
354 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
355 ):
355 ):
356 """combine the copies information for each item of iterrevs
356 """combine the copies information for each item of iterrevs
357
357
358 revs: sorted iterable of revision to visit
358 revs: sorted iterable of revision to visit
359 children_count: a {parent: <number-of-relevant-children>} mapping.
359 children_count: a {parent: <number-of-relevant-children>} mapping.
360 targetrev: the final copies destination revision (not in iterrevs)
360 targetrev: the final copies destination revision (not in iterrevs)
361 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
361 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
362 match: a matcher
362 match: a matcher
363
363
364 It returns the aggregated copies information for `targetrev`.
364 It returns the aggregated copies information for `targetrev`.
365 """
365 """
366
366
367 alwaysmatch = match.always()
367 alwaysmatch = match.always()
368
368
369 if rustmod is not None:
369 if rustmod is not None:
370 final_copies = rustmod.combine_changeset_copies(
370 final_copies = rustmod.combine_changeset_copies(
371 list(revs), children_count, targetrev, revinfo, multi_thread
371 list(revs), children_count, targetrev, revinfo, multi_thread
372 )
372 )
373 else:
373 else:
374 isancestor = cached_is_ancestor(isancestor)
374 isancestor = cached_is_ancestor(isancestor)
375
375
376 all_copies = {}
376 all_copies = {}
377 # iterate over all the "children" side of copy tracing "edge"
377 # iterate over all the "children" side of copy tracing "edge"
378 for current_rev in revs:
378 for current_rev in revs:
379 p1, p2, changes = revinfo(current_rev)
379 p1, p2, changes = revinfo(current_rev)
380 current_copies = None
380 current_copies = None
381 # iterate over all parents to chain the existing data with the
381 # iterate over all parents to chain the existing data with the
382 # data from the parent β†’ child edge.
382 # data from the parent β†’ child edge.
383 for parent, parent_rev in ((1, p1), (2, p2)):
383 for parent, parent_rev in ((1, p1), (2, p2)):
384 if parent_rev == nullrev:
384 if parent_rev == nullrev:
385 continue
385 continue
386 remaining_children = children_count.get(parent_rev)
386 remaining_children = children_count.get(parent_rev)
387 if remaining_children is None:
387 if remaining_children is None:
388 continue
388 continue
389 remaining_children -= 1
389 remaining_children -= 1
390 children_count[parent_rev] = remaining_children
390 children_count[parent_rev] = remaining_children
391 if remaining_children:
391 if remaining_children:
392 copies = all_copies.get(parent_rev, None)
392 copies = all_copies.get(parent_rev, None)
393 else:
393 else:
394 copies = all_copies.pop(parent_rev, None)
394 copies = all_copies.pop(parent_rev, None)
395
395
396 if copies is None:
396 if copies is None:
397 # this is a root
397 # this is a root
398 newcopies = copies = {}
398 newcopies = copies = {}
399 elif remaining_children:
399 elif remaining_children:
400 newcopies = copies.copy()
400 newcopies = copies.copy()
401 else:
401 else:
402 newcopies = copies
402 newcopies = copies
403 # chain the data in the edge with the existing data
403 # chain the data in the edge with the existing data
404 if changes is not None:
404 if changes is not None:
405 childcopies = {}
405 childcopies = {}
406 if parent == 1:
406 if parent == 1:
407 childcopies = changes.copied_from_p1
407 childcopies = changes.copied_from_p1
408 elif parent == 2:
408 elif parent == 2:
409 childcopies = changes.copied_from_p2
409 childcopies = changes.copied_from_p2
410
410
411 if childcopies:
411 if childcopies:
412 newcopies = copies.copy()
412 newcopies = copies.copy()
413 for dest, source in pycompat.iteritems(childcopies):
413 for dest, source in pycompat.iteritems(childcopies):
414 prev = copies.get(source)
414 prev = copies.get(source)
415 if prev is not None and prev[1] is not None:
415 if prev is not None and prev[1] is not None:
416 source = prev[1]
416 source = prev[1]
417 newcopies[dest] = (current_rev, source)
417 newcopies[dest] = (current_rev, source)
418 assert newcopies is not copies
418 assert newcopies is not copies
419 if changes.removed:
419 if changes.removed:
420 for f in changes.removed:
420 for f in changes.removed:
421 if f in newcopies:
421 if f in newcopies:
422 if newcopies is copies:
422 if newcopies is copies:
423 # copy on write to avoid affecting potential other
423 # copy on write to avoid affecting potential other
424 # branches. when there are no other branches, this
424 # branches. when there are no other branches, this
425 # could be avoided.
425 # could be avoided.
426 newcopies = copies.copy()
426 newcopies = copies.copy()
427 newcopies[f] = (current_rev, None)
427 newcopies[f] = (current_rev, None)
428 # check potential need to combine the data from another parent (for
428 # check potential need to combine the data from another parent (for
429 # that child). See comment below for details.
429 # that child). See comment below for details.
430 if current_copies is None:
430 if current_copies is None:
431 current_copies = newcopies
431 current_copies = newcopies
432 else:
432 else:
433 # we are the second parent to work on c, we need to merge our
433 # we are the second parent to work on c, we need to merge our
434 # work with the other.
434 # work with the other.
435 #
435 #
436 # In case of conflict, parent 1 take precedence over parent 2.
436 # In case of conflict, parent 1 take precedence over parent 2.
437 # This is an arbitrary choice made anew when implementing
437 # This is an arbitrary choice made anew when implementing
438 # changeset based copies. It was made without regards with
438 # changeset based copies. It was made without regards with
439 # potential filelog related behavior.
439 # potential filelog related behavior.
440 assert parent == 2
440 assert parent == 2
441 current_copies = _merge_copies_dict(
441 current_copies = _merge_copies_dict(
442 newcopies,
442 newcopies,
443 current_copies,
443 current_copies,
444 isancestor,
444 isancestor,
445 changes,
445 changes,
446 current_rev,
446 current_rev,
447 )
447 )
448 all_copies[current_rev] = current_copies
448 all_copies[current_rev] = current_copies
449
449
450 # filter out internal details and return a {dest: source mapping}
450 # filter out internal details and return a {dest: source mapping}
451 final_copies = {}
451 final_copies = {}
452 for dest, (tt, source) in all_copies[targetrev].items():
452 for dest, (tt, source) in all_copies[targetrev].items():
453 if source is not None:
453 if source is not None:
454 final_copies[dest] = source
454 final_copies[dest] = source
455 if not alwaysmatch:
455 if not alwaysmatch:
456 for filename in list(final_copies.keys()):
456 for filename in list(final_copies.keys()):
457 if not match(filename):
457 if not match(filename):
458 del final_copies[filename]
458 del final_copies[filename]
459 return final_copies
459 return final_copies
460
460
461
461
462 # constant to decide which side to pick with _merge_copies_dict
462 # constant to decide which side to pick with _merge_copies_dict
463 PICK_MINOR = 0
463 PICK_MINOR = 0
464 PICK_MAJOR = 1
464 PICK_MAJOR = 1
465 PICK_EITHER = 2
465 PICK_EITHER = 2
466
466
467
467
468 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
468 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
469 """merge two copies-mapping together, minor and major
469 """merge two copies-mapping together, minor and major
470
470
471 In case of conflict, value from "major" will be picked.
471 In case of conflict, value from "major" will be picked.
472
472
473 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
473 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
474 ancestors of `high_rev`,
474 ancestors of `high_rev`,
475
475
476 - `ismerged(path)`: callable return True if `path` have been merged in the
476 - `ismerged(path)`: callable return True if `path` have been merged in the
477 current revision,
477 current revision,
478
478
479 return the resulting dict (in practice, the "minor" object, updated)
479 return the resulting dict (in practice, the "minor" object, updated)
480 """
480 """
481 for dest, value in major.items():
481 for dest, value in major.items():
482 other = minor.get(dest)
482 other = minor.get(dest)
483 if other is None:
483 if other is None:
484 minor[dest] = value
484 minor[dest] = value
485 else:
485 else:
486 pick, overwrite = _compare_values(
486 pick, overwrite = _compare_values(
487 changes, isancestor, dest, other, value
487 changes, isancestor, dest, other, value
488 )
488 )
489 if overwrite:
489 if overwrite:
490 if pick == PICK_MAJOR:
490 if pick == PICK_MAJOR:
491 minor[dest] = (current_merge, value[1])
491 minor[dest] = (current_merge, value[1])
492 else:
492 else:
493 minor[dest] = (current_merge, other[1])
493 minor[dest] = (current_merge, other[1])
494 elif pick == PICK_MAJOR:
494 elif pick == PICK_MAJOR:
495 minor[dest] = value
495 minor[dest] = value
496 return minor
496 return minor
497
497
498
498
499 def _compare_values(changes, isancestor, dest, minor, major):
499 def _compare_values(changes, isancestor, dest, minor, major):
500 """compare two value within a _merge_copies_dict loop iteration
500 """compare two value within a _merge_copies_dict loop iteration
501
501
502 return (pick, overwrite).
502 return (pick, overwrite).
503
503
504 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
504 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
505 - overwrite is True if pick is a return of an ambiguity that needs resolution.
505 - overwrite is True if pick is a return of an ambiguity that needs resolution.
506 """
506 """
507 major_tt, major_value = major
507 major_tt, major_value = major
508 minor_tt, minor_value = minor
508 minor_tt, minor_value = minor
509
509
510 if major_tt == minor_tt:
510 if major_tt == minor_tt:
511 # if it comes from the same revision it must be the same value
511 # if it comes from the same revision it must be the same value
512 assert major_value == minor_value
512 assert major_value == minor_value
513 return PICK_EITHER, False
513 return PICK_EITHER, False
514 elif (
514 elif (
515 changes is not None
515 changes is not None
516 and minor_value is not None
516 and minor_value is not None
517 and major_value is None
517 and major_value is None
518 and dest in changes.salvaged
518 and dest in changes.salvaged
519 ):
519 ):
520 # In this case, a deletion was reverted, the "alive" value overwrite
520 # In this case, a deletion was reverted, the "alive" value overwrite
521 # the deleted one.
521 # the deleted one.
522 return PICK_MINOR, True
522 return PICK_MINOR, True
523 elif (
523 elif (
524 changes is not None
524 changes is not None
525 and major_value is not None
525 and major_value is not None
526 and minor_value is None
526 and minor_value is None
527 and dest in changes.salvaged
527 and dest in changes.salvaged
528 ):
528 ):
529 # In this case, a deletion was reverted, the "alive" value overwrite
529 # In this case, a deletion was reverted, the "alive" value overwrite
530 # the deleted one.
530 # the deleted one.
531 return PICK_MAJOR, True
531 return PICK_MAJOR, True
532 elif isancestor(minor_tt, major_tt):
532 elif isancestor(minor_tt, major_tt):
533 if changes is not None and dest in changes.merged:
533 if changes is not None and dest in changes.merged:
534 # change to dest happened on the branch without copy-source change,
534 # change to dest happened on the branch without copy-source change,
535 # so both source are valid and "major" wins.
535 # so both source are valid and "major" wins.
536 return PICK_MAJOR, True
536 return PICK_MAJOR, True
537 else:
537 else:
538 return PICK_MAJOR, False
538 return PICK_MAJOR, False
539 elif isancestor(major_tt, minor_tt):
539 elif isancestor(major_tt, minor_tt):
540 if changes is not None and dest in changes.merged:
540 if changes is not None and dest in changes.merged:
541 # change to dest happened on the branch without copy-source change,
541 # change to dest happened on the branch without copy-source change,
542 # so both source are valid and "major" wins.
542 # so both source are valid and "major" wins.
543 return PICK_MAJOR, True
543 return PICK_MAJOR, True
544 else:
544 else:
545 return PICK_MINOR, False
545 return PICK_MINOR, False
546 elif minor_value is None:
546 elif minor_value is None:
547 # in case of conflict, the "alive" side wins.
547 # in case of conflict, the "alive" side wins.
548 return PICK_MAJOR, True
548 return PICK_MAJOR, True
549 elif major_value is None:
549 elif major_value is None:
550 # in case of conflict, the "alive" side wins.
550 # in case of conflict, the "alive" side wins.
551 return PICK_MINOR, True
551 return PICK_MINOR, True
552 else:
552 else:
553 # in case of conflict where both side are alive, major wins.
553 # in case of conflict where both side are alive, major wins.
554 return PICK_MAJOR, True
554 return PICK_MAJOR, True
555
555
556
556
557 def _revinfo_getter_extra(repo):
557 def _revinfo_getter_extra(repo):
558 """return a function that return multiple data given a <rev>"i
558 """return a function that return multiple data given a <rev>"i
559
559
560 * p1: revision number of first parent
560 * p1: revision number of first parent
561 * p2: revision number of first parent
561 * p2: revision number of first parent
562 * p1copies: mapping of copies from p1
562 * p1copies: mapping of copies from p1
563 * p2copies: mapping of copies from p2
563 * p2copies: mapping of copies from p2
564 * removed: a list of removed files
564 * removed: a list of removed files
565 * ismerged: a callback to know if file was merged in that revision
565 * ismerged: a callback to know if file was merged in that revision
566 """
566 """
567 cl = repo.changelog
567 cl = repo.changelog
568 parents = cl.parentrevs
568 parents = cl.parentrevs
569
569
570 def get_ismerged(rev):
570 def get_ismerged(rev):
571 ctx = repo[rev]
571 ctx = repo[rev]
572
572
573 def ismerged(path):
573 def ismerged(path):
574 if path not in ctx.files():
574 if path not in ctx.files():
575 return False
575 return False
576 fctx = ctx[path]
576 fctx = ctx[path]
577 parents = fctx._filelog.parents(fctx._filenode)
577 parents = fctx._filelog.parents(fctx._filenode)
578 nb_parents = 0
578 nb_parents = 0
579 for n in parents:
579 for n in parents:
580 if n != repo.nullid:
580 if n != repo.nullid:
581 nb_parents += 1
581 nb_parents += 1
582 return nb_parents >= 2
582 return nb_parents >= 2
583
583
584 return ismerged
584 return ismerged
585
585
586 def revinfo(rev):
586 def revinfo(rev):
587 p1, p2 = parents(rev)
587 p1, p2 = parents(rev)
588 ctx = repo[rev]
588 ctx = repo[rev]
589 p1copies, p2copies = ctx._copies
589 p1copies, p2copies = ctx._copies
590 removed = ctx.filesremoved()
590 removed = ctx.filesremoved()
591 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
591 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
592
592
593 return revinfo
593 return revinfo
594
594
595
595
596 def _combine_changeset_copies_extra(
596 def _combine_changeset_copies_extra(
597 revs, children, targetrev, revinfo, match, isancestor
597 revs, children, targetrev, revinfo, match, isancestor
598 ):
598 ):
599 """version of `_combine_changeset_copies` that works with the Google
599 """version of `_combine_changeset_copies` that works with the Google
600 specific "extra" based storage for copy information"""
600 specific "extra" based storage for copy information"""
601 all_copies = {}
601 all_copies = {}
602 alwaysmatch = match.always()
602 alwaysmatch = match.always()
603 for r in revs:
603 for r in revs:
604 copies = all_copies.pop(r, None)
604 copies = all_copies.pop(r, None)
605 if copies is None:
605 if copies is None:
606 # this is a root
606 # this is a root
607 copies = {}
607 copies = {}
608 for i, c in enumerate(children[r]):
608 for i, c in enumerate(children[r]):
609 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
609 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
610 if r == p1:
610 if r == p1:
611 parent = 1
611 parent = 1
612 childcopies = p1copies
612 childcopies = p1copies
613 else:
613 else:
614 assert r == p2
614 assert r == p2
615 parent = 2
615 parent = 2
616 childcopies = p2copies
616 childcopies = p2copies
617 if not alwaysmatch:
617 if not alwaysmatch:
618 childcopies = {
618 childcopies = {
619 dst: src for dst, src in childcopies.items() if match(dst)
619 dst: src for dst, src in childcopies.items() if match(dst)
620 }
620 }
621 newcopies = copies
621 newcopies = copies
622 if childcopies:
622 if childcopies:
623 newcopies = copies.copy()
623 newcopies = copies.copy()
624 for dest, source in pycompat.iteritems(childcopies):
624 for dest, source in pycompat.iteritems(childcopies):
625 prev = copies.get(source)
625 prev = copies.get(source)
626 if prev is not None and prev[1] is not None:
626 if prev is not None and prev[1] is not None:
627 source = prev[1]
627 source = prev[1]
628 newcopies[dest] = (c, source)
628 newcopies[dest] = (c, source)
629 assert newcopies is not copies
629 assert newcopies is not copies
630 for f in removed:
630 for f in removed:
631 if f in newcopies:
631 if f in newcopies:
632 if newcopies is copies:
632 if newcopies is copies:
633 # copy on write to avoid affecting potential other
633 # copy on write to avoid affecting potential other
634 # branches. when there are no other branches, this
634 # branches. when there are no other branches, this
635 # could be avoided.
635 # could be avoided.
636 newcopies = copies.copy()
636 newcopies = copies.copy()
637 newcopies[f] = (c, None)
637 newcopies[f] = (c, None)
638 othercopies = all_copies.get(c)
638 othercopies = all_copies.get(c)
639 if othercopies is None:
639 if othercopies is None:
640 all_copies[c] = newcopies
640 all_copies[c] = newcopies
641 else:
641 else:
642 # we are the second parent to work on c, we need to merge our
642 # we are the second parent to work on c, we need to merge our
643 # work with the other.
643 # work with the other.
644 #
644 #
645 # In case of conflict, parent 1 take precedence over parent 2.
645 # In case of conflict, parent 1 take precedence over parent 2.
646 # This is an arbitrary choice made anew when implementing
646 # This is an arbitrary choice made anew when implementing
647 # changeset based copies. It was made without regards with
647 # changeset based copies. It was made without regards with
648 # potential filelog related behavior.
648 # potential filelog related behavior.
649 if parent == 1:
649 if parent == 1:
650 _merge_copies_dict_extra(
650 _merge_copies_dict_extra(
651 othercopies, newcopies, isancestor, ismerged
651 othercopies, newcopies, isancestor, ismerged
652 )
652 )
653 else:
653 else:
654 _merge_copies_dict_extra(
654 _merge_copies_dict_extra(
655 newcopies, othercopies, isancestor, ismerged
655 newcopies, othercopies, isancestor, ismerged
656 )
656 )
657 all_copies[c] = newcopies
657 all_copies[c] = newcopies
658
658
659 final_copies = {}
659 final_copies = {}
660 for dest, (tt, source) in all_copies[targetrev].items():
660 for dest, (tt, source) in all_copies[targetrev].items():
661 if source is not None:
661 if source is not None:
662 final_copies[dest] = source
662 final_copies[dest] = source
663 return final_copies
663 return final_copies
664
664
665
665
666 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
666 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
667 """version of `_merge_copies_dict` that works with the Google
667 """version of `_merge_copies_dict` that works with the Google
668 specific "extra" based storage for copy information"""
668 specific "extra" based storage for copy information"""
669 for dest, value in major.items():
669 for dest, value in major.items():
670 other = minor.get(dest)
670 other = minor.get(dest)
671 if other is None:
671 if other is None:
672 minor[dest] = value
672 minor[dest] = value
673 else:
673 else:
674 new_tt = value[0]
674 new_tt = value[0]
675 other_tt = other[0]
675 other_tt = other[0]
676 if value[1] == other[1]:
676 if value[1] == other[1]:
677 continue
677 continue
678 # content from "major" wins, unless it is older
678 # content from "major" wins, unless it is older
679 # than the branch point or there is a merge
679 # than the branch point or there is a merge
680 if (
680 if (
681 new_tt == other_tt
681 new_tt == other_tt
682 or not isancestor(new_tt, other_tt)
682 or not isancestor(new_tt, other_tt)
683 or ismerged(dest)
683 or ismerged(dest)
684 ):
684 ):
685 minor[dest] = value
685 minor[dest] = value
686
686
687
687
688 def _forwardcopies(a, b, base=None, match=None):
688 def _forwardcopies(a, b, base=None, match=None):
689 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
689 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
690
690
691 if base is None:
691 if base is None:
692 base = a
692 base = a
693 match = a.repo().narrowmatch(match)
693 match = a.repo().narrowmatch(match)
694 # check for working copy
694 # check for working copy
695 if b.rev() is None:
695 if b.rev() is None:
696 cm = _committedforwardcopies(a, b.p1(), base, match)
696 cm = _committedforwardcopies(a, b.p1(), base, match)
697 # combine copies from dirstate if necessary
697 # combine copies from dirstate if necessary
698 copies = _chain(cm, _dirstatecopies(b._repo, match))
698 copies = _chain(cm, _dirstatecopies(b._repo, match))
699 else:
699 else:
700 copies = _committedforwardcopies(a, b, base, match)
700 copies = _committedforwardcopies(a, b, base, match)
701 return copies
701 return copies
702
702
703
703
704 def _backwardrenames(a, b, match):
704 def _backwardrenames(a, b, match):
705 """find renames from a to b"""
705 """find renames from a to b"""
706 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
706 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
707 return {}
707 return {}
708
708
709 # We don't want to pass in "match" here, since that would filter
709 # We don't want to pass in "match" here, since that would filter
710 # the destination by it. Since we're reversing the copies, we want
710 # the destination by it. Since we're reversing the copies, we want
711 # to filter the source instead.
711 # to filter the source instead.
712 copies = _forwardcopies(b, a)
712 copies = _forwardcopies(b, a)
713 return _reverse_renames(copies, a, match)
713 return _reverse_renames(copies, a, match)
714
714
715
715
716 def _reverse_renames(copies, dst, match):
716 def _reverse_renames(copies, dst, match):
717 """given copies to context 'dst', finds renames from that context"""
717 """given copies to context 'dst', finds renames from that context"""
718 # Even though we're not taking copies into account, 1:n rename situations
718 # Even though we're not taking copies into account, 1:n rename situations
719 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
719 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
720 # arbitrarily pick one of the renames.
720 # arbitrarily pick one of the renames.
721 r = {}
721 r = {}
722 for k, v in sorted(pycompat.iteritems(copies)):
722 for k, v in sorted(pycompat.iteritems(copies)):
723 if match and not match(v):
723 if match and not match(v):
724 continue
724 continue
725 # remove copies
725 # remove copies
726 if v in dst:
726 if v in dst:
727 continue
727 continue
728 r[v] = k
728 r[v] = k
729 return r
729 return r
730
730
731
731
732 def pathcopies(x, y, match=None):
732 def pathcopies(x, y, match=None):
733 """find {dst@y: src@x} copy mapping for directed compare"""
733 """find {dst@y: src@x} copy mapping for directed compare"""
734 repo = x._repo
734 repo = x._repo
735 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
735 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
736 if debug:
736 if debug:
737 repo.ui.debug(
737 repo.ui.debug(
738 b'debug.copies: searching copies from %s to %s\n' % (x, y)
738 b'debug.copies: searching copies from %s to %s\n' % (x, y)
739 )
739 )
740 if x == y or not x or not y:
740 if x == y or not x or not y:
741 return {}
741 return {}
742 if y.rev() is None and x == y.p1():
742 if y.rev() is None and x == y.p1():
743 if debug:
743 if debug:
744 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
744 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
745 # short-circuit to avoid issues with merge states
745 # short-circuit to avoid issues with merge states
746 return _dirstatecopies(repo, match)
746 return _dirstatecopies(repo, match)
747 a = y.ancestor(x)
747 a = y.ancestor(x)
748 if a == x:
748 if a == x:
749 if debug:
749 if debug:
750 repo.ui.debug(b'debug.copies: search mode: forward\n')
750 repo.ui.debug(b'debug.copies: search mode: forward\n')
751 copies = _forwardcopies(x, y, match=match)
751 copies = _forwardcopies(x, y, match=match)
752 elif a == y:
752 elif a == y:
753 if debug:
753 if debug:
754 repo.ui.debug(b'debug.copies: search mode: backward\n')
754 repo.ui.debug(b'debug.copies: search mode: backward\n')
755 copies = _backwardrenames(x, y, match=match)
755 copies = _backwardrenames(x, y, match=match)
756 else:
756 else:
757 if debug:
757 if debug:
758 repo.ui.debug(b'debug.copies: search mode: combined\n')
758 repo.ui.debug(b'debug.copies: search mode: combined\n')
759 base = None
759 base = None
760 if a.rev() != nullrev:
760 if a.rev() != nullrev:
761 base = x
761 base = x
762 x_copies = _forwardcopies(a, x)
762 x_copies = _forwardcopies(a, x)
763 y_copies = _forwardcopies(a, y, base, match=match)
763 y_copies = _forwardcopies(a, y, base, match=match)
764 same_keys = set(x_copies) & set(y_copies)
764 same_keys = set(x_copies) & set(y_copies)
765 for k in same_keys:
765 for k in same_keys:
766 if x_copies.get(k) == y_copies.get(k):
766 if x_copies.get(k) == y_copies.get(k):
767 del x_copies[k]
767 del x_copies[k]
768 del y_copies[k]
768 del y_copies[k]
769 x_backward_renames = _reverse_renames(x_copies, x, match)
769 x_backward_renames = _reverse_renames(x_copies, x, match)
770 copies = _chain(
770 copies = _chain(
771 x_backward_renames,
771 x_backward_renames,
772 y_copies,
772 y_copies,
773 )
773 )
774 _filter(x, y, copies)
774 _filter(x, y, copies)
775 return copies
775 return copies
776
776
777
777
778 def mergecopies(repo, c1, c2, base):
778 def mergecopies(repo, c1, c2, base):
779 """
779 """
780 Finds moves and copies between context c1 and c2 that are relevant for
780 Finds moves and copies between context c1 and c2 that are relevant for
781 merging. 'base' will be used as the merge base.
781 merging. 'base' will be used as the merge base.
782
782
783 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
783 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
784 files that were moved/ copied in one merge parent and modified in another.
784 files that were moved/ copied in one merge parent and modified in another.
785 For example:
785 For example:
786
786
787 o ---> 4 another commit
787 o ---> 4 another commit
788 |
788 |
789 | o ---> 3 commit that modifies a.txt
789 | o ---> 3 commit that modifies a.txt
790 | /
790 | /
791 o / ---> 2 commit that moves a.txt to b.txt
791 o / ---> 2 commit that moves a.txt to b.txt
792 |/
792 |/
793 o ---> 1 merge base
793 o ---> 1 merge base
794
794
795 If we try to rebase revision 3 on revision 4, since there is no a.txt in
795 If we try to rebase revision 3 on revision 4, since there is no a.txt in
796 revision 4, and if user have copytrace disabled, we prints the following
796 revision 4, and if user have copytrace disabled, we prints the following
797 message:
797 message:
798
798
799 ```other changed <file> which local deleted```
799 ```other changed <file> which local deleted```
800
800
801 Returns a tuple where:
801 Returns a tuple where:
802
802
803 "branch_copies" an instance of branch_copies.
803 "branch_copies" an instance of branch_copies.
804
804
805 "diverge" is a mapping of source name -> list of destination names
805 "diverge" is a mapping of source name -> list of destination names
806 for divergent renames.
806 for divergent renames.
807
807
808 This function calls different copytracing algorithms based on config.
808 This function calls different copytracing algorithms based on config.
809 """
809 """
810 # avoid silly behavior for update from empty dir
810 # avoid silly behavior for update from empty dir
811 if not c1 or not c2 or c1 == c2:
811 if not c1 or not c2 or c1 == c2:
812 return branch_copies(), branch_copies(), {}
812 return branch_copies(), branch_copies(), {}
813
813
814 narrowmatch = c1.repo().narrowmatch()
814 narrowmatch = c1.repo().narrowmatch()
815
815
816 # avoid silly behavior for parent -> working dir
816 # avoid silly behavior for parent -> working dir
817 if c2.node() is None and c1.node() == repo.dirstate.p1():
817 if c2.node() is None and c1.node() == repo.dirstate.p1():
818 return (
818 return (
819 branch_copies(_dirstatecopies(repo, narrowmatch)),
819 branch_copies(_dirstatecopies(repo, narrowmatch)),
820 branch_copies(),
820 branch_copies(),
821 {},
821 {},
822 )
822 )
823
823
824 copytracing = repo.ui.config(b'experimental', b'copytrace')
824 copytracing = repo.ui.config(b'experimental', b'copytrace')
825 if stringutil.parsebool(copytracing) is False:
825 if stringutil.parsebool(copytracing) is False:
826 # stringutil.parsebool() returns None when it is unable to parse the
826 # stringutil.parsebool() returns None when it is unable to parse the
827 # value, so we should rely on making sure copytracing is on such cases
827 # value, so we should rely on making sure copytracing is on such cases
828 return branch_copies(), branch_copies(), {}
828 return branch_copies(), branch_copies(), {}
829
829
830 if usechangesetcentricalgo(repo):
830 if usechangesetcentricalgo(repo):
831 # The heuristics don't make sense when we need changeset-centric algos
831 # The heuristics don't make sense when we need changeset-centric algos
832 return _fullcopytracing(repo, c1, c2, base)
832 return _fullcopytracing(repo, c1, c2, base)
833
833
834 # Copy trace disabling is explicitly below the node == p1 logic above
834 # Copy trace disabling is explicitly below the node == p1 logic above
835 # because the logic above is required for a simple copy to be kept across a
835 # because the logic above is required for a simple copy to be kept across a
836 # rebase.
836 # rebase.
837 if copytracing == b'heuristics':
837 if copytracing == b'heuristics':
838 # Do full copytracing if only non-public revisions are involved as
838 # Do full copytracing if only non-public revisions are involved as
839 # that will be fast enough and will also cover the copies which could
839 # that will be fast enough and will also cover the copies which could
840 # be missed by heuristics
840 # be missed by heuristics
841 if _isfullcopytraceable(repo, c1, base):
841 if _isfullcopytraceable(repo, c1, base):
842 return _fullcopytracing(repo, c1, c2, base)
842 return _fullcopytracing(repo, c1, c2, base)
843 return _heuristicscopytracing(repo, c1, c2, base)
843 return _heuristicscopytracing(repo, c1, c2, base)
844 else:
844 else:
845 return _fullcopytracing(repo, c1, c2, base)
845 return _fullcopytracing(repo, c1, c2, base)
846
846
847
847
848 def _isfullcopytraceable(repo, c1, base):
848 def _isfullcopytraceable(repo, c1, base):
849 """Checks that if base, source and destination are all no-public branches,
849 """Checks that if base, source and destination are all no-public branches,
850 if yes let's use the full copytrace algorithm for increased capabilities
850 if yes let's use the full copytrace algorithm for increased capabilities
851 since it will be fast enough.
851 since it will be fast enough.
852
852
853 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
853 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
854 number of changesets from c1 to base such that if number of changesets are
854 number of changesets from c1 to base such that if number of changesets are
855 more than the limit, full copytracing algorithm won't be used.
855 more than the limit, full copytracing algorithm won't be used.
856 """
856 """
857 if c1.rev() is None:
857 if c1.rev() is None:
858 c1 = c1.p1()
858 c1 = c1.p1()
859 if c1.mutable() and base.mutable():
859 if c1.mutable() and base.mutable():
860 sourcecommitlimit = repo.ui.configint(
860 sourcecommitlimit = repo.ui.configint(
861 b'experimental', b'copytrace.sourcecommitlimit'
861 b'experimental', b'copytrace.sourcecommitlimit'
862 )
862 )
863 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
863 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
864 return commits < sourcecommitlimit
864 return commits < sourcecommitlimit
865 return False
865 return False
866
866
867
867
868 def _checksinglesidecopies(
868 def _checksinglesidecopies(
869 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
869 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
870 ):
870 ):
871 if src not in m2:
871 if src not in m2:
872 # deleted on side 2
872 # deleted on side 2
873 if src not in m1:
873 if src not in m1:
874 # renamed on side 1, deleted on side 2
874 # renamed on side 1, deleted on side 2
875 renamedelete[src] = dsts1
875 renamedelete[src] = dsts1
876 elif src not in mb:
876 elif src not in mb:
877 # Work around the "short-circuit to avoid issues with merge states"
877 # Work around the "short-circuit to avoid issues with merge states"
878 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
878 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
879 # destination doesn't exist in y.
879 # destination doesn't exist in y.
880 pass
880 pass
881 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
881 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
882 return
882 return
883 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
883 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
884 # modified on side 2
884 # modified on side 2
885 for dst in dsts1:
885 for dst in dsts1:
886 copy[dst] = src
886 copy[dst] = src
887
887
888
888
889 class branch_copies(object):
889 class branch_copies(object):
890 """Information about copies made on one side of a merge/graft.
890 """Information about copies made on one side of a merge/graft.
891
891
892 "copy" is a mapping from destination name -> source name,
892 "copy" is a mapping from destination name -> source name,
893 where source is in c1 and destination is in c2 or vice-versa.
893 where source is in c1 and destination is in c2 or vice-versa.
894
894
895 "movewithdir" is a mapping from source name -> destination name,
895 "movewithdir" is a mapping from source name -> destination name,
896 where the file at source present in one context but not the other
896 where the file at source present in one context but not the other
897 needs to be moved to destination by the merge process, because the
897 needs to be moved to destination by the merge process, because the
898 other context moved the directory it is in.
898 other context moved the directory it is in.
899
899
900 "renamedelete" is a mapping of source name -> list of destination
900 "renamedelete" is a mapping of source name -> list of destination
901 names for files deleted in c1 that were renamed in c2 or vice-versa.
901 names for files deleted in c1 that were renamed in c2 or vice-versa.
902
902
903 "dirmove" is a mapping of detected source dir -> destination dir renames.
903 "dirmove" is a mapping of detected source dir -> destination dir renames.
904 This is needed for handling changes to new files previously grafted into
904 This is needed for handling changes to new files previously grafted into
905 renamed directories.
905 renamed directories.
906 """
906 """
907
907
908 def __init__(
908 def __init__(
909 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
909 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
910 ):
910 ):
911 self.copy = {} if copy is None else copy
911 self.copy = {} if copy is None else copy
912 self.renamedelete = {} if renamedelete is None else renamedelete
912 self.renamedelete = {} if renamedelete is None else renamedelete
913 self.dirmove = {} if dirmove is None else dirmove
913 self.dirmove = {} if dirmove is None else dirmove
914 self.movewithdir = {} if movewithdir is None else movewithdir
914 self.movewithdir = {} if movewithdir is None else movewithdir
915
915
916 def __repr__(self):
916 def __repr__(self):
917 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
917 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
918 self.copy,
918 self.copy,
919 self.renamedelete,
919 self.renamedelete,
920 self.dirmove,
920 self.dirmove,
921 self.movewithdir,
921 self.movewithdir,
922 )
922 )
923
923
924
924
925 def _fullcopytracing(repo, c1, c2, base):
925 def _fullcopytracing(repo, c1, c2, base):
926 """The full copytracing algorithm which finds all the new files that were
926 """The full copytracing algorithm which finds all the new files that were
927 added from merge base up to the top commit and for each file it checks if
927 added from merge base up to the top commit and for each file it checks if
928 this file was copied from another file.
928 this file was copied from another file.
929
929
930 This is pretty slow when a lot of changesets are involved but will track all
930 This is pretty slow when a lot of changesets are involved but will track all
931 the copies.
931 the copies.
932 """
932 """
933 m1 = c1.manifest()
933 m1 = c1.manifest()
934 m2 = c2.manifest()
934 m2 = c2.manifest()
935 mb = base.manifest()
935 mb = base.manifest()
936
936
937 copies1 = pathcopies(base, c1)
937 copies1 = pathcopies(base, c1)
938 copies2 = pathcopies(base, c2)
938 copies2 = pathcopies(base, c2)
939
939
940 if not (copies1 or copies2):
940 if not (copies1 or copies2):
941 return branch_copies(), branch_copies(), {}
941 return branch_copies(), branch_copies(), {}
942
942
943 inversecopies1 = {}
943 inversecopies1 = {}
944 inversecopies2 = {}
944 inversecopies2 = {}
945 for dst, src in copies1.items():
945 for dst, src in copies1.items():
946 inversecopies1.setdefault(src, []).append(dst)
946 inversecopies1.setdefault(src, []).append(dst)
947 for dst, src in copies2.items():
947 for dst, src in copies2.items():
948 inversecopies2.setdefault(src, []).append(dst)
948 inversecopies2.setdefault(src, []).append(dst)
949
949
950 copy1 = {}
950 copy1 = {}
951 copy2 = {}
951 copy2 = {}
952 diverge = {}
952 diverge = {}
953 renamedelete1 = {}
953 renamedelete1 = {}
954 renamedelete2 = {}
954 renamedelete2 = {}
955 allsources = set(inversecopies1) | set(inversecopies2)
955 allsources = set(inversecopies1) | set(inversecopies2)
956 for src in allsources:
956 for src in allsources:
957 dsts1 = inversecopies1.get(src)
957 dsts1 = inversecopies1.get(src)
958 dsts2 = inversecopies2.get(src)
958 dsts2 = inversecopies2.get(src)
959 if dsts1 and dsts2:
959 if dsts1 and dsts2:
960 # copied/renamed on both sides
960 # copied/renamed on both sides
961 if src not in m1 and src not in m2:
961 if src not in m1 and src not in m2:
962 # renamed on both sides
962 # renamed on both sides
963 dsts1 = set(dsts1)
963 dsts1 = set(dsts1)
964 dsts2 = set(dsts2)
964 dsts2 = set(dsts2)
965 # If there's some overlap in the rename destinations, we
965 # If there's some overlap in the rename destinations, we
966 # consider it not divergent. For example, if side 1 copies 'a'
966 # consider it not divergent. For example, if side 1 copies 'a'
967 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
967 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
968 # and 'd' and deletes 'a'.
968 # and 'd' and deletes 'a'.
969 if dsts1 & dsts2:
969 if dsts1 & dsts2:
970 for dst in dsts1 & dsts2:
970 for dst in dsts1 & dsts2:
971 copy1[dst] = src
971 copy1[dst] = src
972 copy2[dst] = src
972 copy2[dst] = src
973 else:
973 else:
974 diverge[src] = sorted(dsts1 | dsts2)
974 diverge[src] = sorted(dsts1 | dsts2)
975 elif src in m1 and src in m2:
975 elif src in m1 and src in m2:
976 # copied on both sides
976 # copied on both sides
977 dsts1 = set(dsts1)
977 dsts1 = set(dsts1)
978 dsts2 = set(dsts2)
978 dsts2 = set(dsts2)
979 for dst in dsts1 & dsts2:
979 for dst in dsts1 & dsts2:
980 copy1[dst] = src
980 copy1[dst] = src
981 copy2[dst] = src
981 copy2[dst] = src
982 # TODO: Handle cases where it was renamed on one side and copied
982 # TODO: Handle cases where it was renamed on one side and copied
983 # on the other side
983 # on the other side
984 elif dsts1:
984 elif dsts1:
985 # copied/renamed only on side 1
985 # copied/renamed only on side 1
986 _checksinglesidecopies(
986 _checksinglesidecopies(
987 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
987 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
988 )
988 )
989 elif dsts2:
989 elif dsts2:
990 # copied/renamed only on side 2
990 # copied/renamed only on side 2
991 _checksinglesidecopies(
991 _checksinglesidecopies(
992 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
992 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
993 )
993 )
994
994
995 # find interesting file sets from manifests
995 # find interesting file sets from manifests
996 cache = []
996 cache = []
997
997
998 def _get_addedfiles(idx):
998 def _get_addedfiles(idx):
999 if not cache:
999 if not cache:
1000 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1000 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
1001 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1001 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
1002 u1 = sorted(addedinm1 - addedinm2)
1002 u1 = sorted(addedinm1 - addedinm2)
1003 u2 = sorted(addedinm2 - addedinm1)
1003 u2 = sorted(addedinm2 - addedinm1)
1004 cache.extend((u1, u2))
1004 cache.extend((u1, u2))
1005 return cache[idx]
1005 return cache[idx]
1006
1006
1007 u1fn = lambda: _get_addedfiles(0)
1007 u1fn = lambda: _get_addedfiles(0)
1008 u2fn = lambda: _get_addedfiles(1)
1008 u2fn = lambda: _get_addedfiles(1)
1009 if repo.ui.debugflag:
1009 if repo.ui.debugflag:
1010 u1 = u1fn()
1010 u1 = u1fn()
1011 u2 = u2fn()
1011 u2 = u2fn()
1012
1012
1013 header = b" unmatched files in %s"
1013 header = b" unmatched files in %s"
1014 if u1:
1014 if u1:
1015 repo.ui.debug(
1015 repo.ui.debug(
1016 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1016 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1017 )
1017 )
1018 if u2:
1018 if u2:
1019 repo.ui.debug(
1019 repo.ui.debug(
1020 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1020 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1021 )
1021 )
1022
1022
1023 renamedeleteset = set()
1023 renamedeleteset = set()
1024 divergeset = set()
1024 divergeset = set()
1025 for dsts in diverge.values():
1025 for dsts in diverge.values():
1026 divergeset.update(dsts)
1026 divergeset.update(dsts)
1027 for dsts in renamedelete1.values():
1027 for dsts in renamedelete1.values():
1028 renamedeleteset.update(dsts)
1028 renamedeleteset.update(dsts)
1029 for dsts in renamedelete2.values():
1029 for dsts in renamedelete2.values():
1030 renamedeleteset.update(dsts)
1030 renamedeleteset.update(dsts)
1031
1031
1032 repo.ui.debug(
1032 repo.ui.debug(
1033 b" all copies found (* = to merge, ! = divergent, "
1033 b" all copies found (* = to merge, ! = divergent, "
1034 b"% = renamed and deleted):\n"
1034 b"% = renamed and deleted):\n"
1035 )
1035 )
1036 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1036 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1037 if not copies:
1037 if not copies:
1038 continue
1038 continue
1039 repo.ui.debug(b" on %s side:\n" % side)
1039 repo.ui.debug(b" on %s side:\n" % side)
1040 for f in sorted(copies):
1040 for f in sorted(copies):
1041 note = b""
1041 note = b""
1042 if f in copy1 or f in copy2:
1042 if f in copy1 or f in copy2:
1043 note += b"*"
1043 note += b"*"
1044 if f in divergeset:
1044 if f in divergeset:
1045 note += b"!"
1045 note += b"!"
1046 if f in renamedeleteset:
1046 if f in renamedeleteset:
1047 note += b"%"
1047 note += b"%"
1048 repo.ui.debug(
1048 repo.ui.debug(
1049 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1049 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1050 )
1050 )
1051 del renamedeleteset
1051 del renamedeleteset
1052 del divergeset
1052 del divergeset
1053
1053
1054 repo.ui.debug(b" checking for directory renames\n")
1054 repo.ui.debug(b" checking for directory renames\n")
1055
1055
1056 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1056 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1057 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1057 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1058
1058
1059 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1059 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1060 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1060 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1061
1061
1062 return branch_copies1, branch_copies2, diverge
1062 return branch_copies1, branch_copies2, diverge
1063
1063
1064
1064
1065 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1065 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1066 """Finds moved directories and files that should move with them.
1066 """Finds moved directories and files that should move with them.
1067
1067
1068 ctx: the context for one of the sides
1068 ctx: the context for one of the sides
1069 copy: files copied on the same side (as ctx)
1069 copy: files copied on the same side (as ctx)
1070 fullcopy: files copied on the same side (as ctx), including those that
1070 fullcopy: files copied on the same side (as ctx), including those that
1071 merge.manifestmerge() won't care about
1071 merge.manifestmerge() won't care about
1072 addedfilesfn: function returning added files on the other side (compared to
1072 addedfilesfn: function returning added files on the other side (compared to
1073 ctx)
1073 ctx)
1074 """
1074 """
1075 # generate a directory move map
1075 # generate a directory move map
1076 invalid = set()
1076 invalid = set()
1077 dirmove = {}
1077 dirmove = {}
1078
1078
1079 # examine each file copy for a potential directory move, which is
1079 # examine each file copy for a potential directory move, which is
1080 # when all the files in a directory are moved to a new directory
1080 # when all the files in a directory are moved to a new directory
1081 for dst, src in pycompat.iteritems(fullcopy):
1081 for dst, src in pycompat.iteritems(fullcopy):
1082 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1082 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1083 if dsrc in invalid:
1083 if dsrc in invalid:
1084 # already seen to be uninteresting
1084 # already seen to be uninteresting
1085 continue
1085 continue
1086 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1086 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1087 # directory wasn't entirely moved locally
1087 # directory wasn't entirely moved locally
1088 invalid.add(dsrc)
1088 invalid.add(dsrc)
1089 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1089 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1090 # files from the same directory moved to two different places
1090 # files from the same directory moved to two different places
1091 invalid.add(dsrc)
1091 invalid.add(dsrc)
1092 else:
1092 else:
1093 # looks good so far
1093 # looks good so far
1094 dirmove[dsrc] = ddst
1094 dirmove[dsrc] = ddst
1095
1095
1096 for i in invalid:
1096 for i in invalid:
1097 if i in dirmove:
1097 if i in dirmove:
1098 del dirmove[i]
1098 del dirmove[i]
1099 del invalid
1099 del invalid
1100
1100
1101 if not dirmove:
1101 if not dirmove:
1102 return {}, {}
1102 return {}, {}
1103
1103
1104 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1104 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1105
1105
1106 for d in dirmove:
1106 for d in dirmove:
1107 repo.ui.debug(
1107 repo.ui.debug(
1108 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1108 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1109 )
1109 )
1110
1110
1111 # Sort the directories in reverse order, so we find children first
1111 # Sort the directories in reverse order, so we find children first
1112 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1112 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1113 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1113 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1114 # to dir2/subdir2/file (not dir2/subdir1/file)
1114 # to dir2/subdir2/file (not dir2/subdir1/file)
1115 dirmove_children_first = sorted(dirmove, reverse=True)
1115 dirmove_children_first = sorted(dirmove, reverse=True)
1116
1116
1117 movewithdir = {}
1117 movewithdir = {}
1118 # check unaccounted nonoverlapping files against directory moves
1118 # check unaccounted nonoverlapping files against directory moves
1119 for f in addedfilesfn():
1119 for f in addedfilesfn():
1120 if f not in fullcopy:
1120 if f not in fullcopy:
1121 for d in dirmove_children_first:
1121 for d in dirmove_children_first:
1122 if f.startswith(d):
1122 if f.startswith(d):
1123 # new file added in a directory that was moved, move it
1123 # new file added in a directory that was moved, move it
1124 df = dirmove[d] + f[len(d) :]
1124 df = dirmove[d] + f[len(d) :]
1125 if df not in copy:
1125 if df not in copy:
1126 movewithdir[f] = df
1126 movewithdir[f] = df
1127 repo.ui.debug(
1127 repo.ui.debug(
1128 b" pending file src: '%s' -> dst: '%s'\n"
1128 b" pending file src: '%s' -> dst: '%s'\n"
1129 % (f, df)
1129 % (f, df)
1130 )
1130 )
1131 break
1131 break
1132
1132
1133 return dirmove, movewithdir
1133 return dirmove, movewithdir
1134
1134
1135
1135
1136 def _heuristicscopytracing(repo, c1, c2, base):
1136 def _heuristicscopytracing(repo, c1, c2, base):
1137 """Fast copytracing using filename heuristics
1137 """Fast copytracing using filename heuristics
1138
1138
1139 Assumes that moves or renames are of following two types:
1139 Assumes that moves or renames are of following two types:
1140
1140
1141 1) Inside a directory only (same directory name but different filenames)
1141 1) Inside a directory only (same directory name but different filenames)
1142 2) Move from one directory to another
1142 2) Move from one directory to another
1143 (same filenames but different directory names)
1143 (same filenames but different directory names)
1144
1144
1145 Works only when there are no merge commits in the "source branch".
1145 Works only when there are no merge commits in the "source branch".
1146 Source branch is commits from base up to c2 not including base.
1146 Source branch is commits from base up to c2 not including base.
1147
1147
1148 If merge is involved it fallbacks to _fullcopytracing().
1148 If merge is involved it fallbacks to _fullcopytracing().
1149
1149
1150 Can be used by setting the following config:
1150 Can be used by setting the following config:
1151
1151
1152 [experimental]
1152 [experimental]
1153 copytrace = heuristics
1153 copytrace = heuristics
1154
1154
1155 In some cases the copy/move candidates found by heuristics can be very large
1155 In some cases the copy/move candidates found by heuristics can be very large
1156 in number and that will make the algorithm slow. The number of possible
1156 in number and that will make the algorithm slow. The number of possible
1157 candidates to check can be limited by using the config
1157 candidates to check can be limited by using the config
1158 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1158 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1159 """
1159 """
1160
1160
1161 if c1.rev() is None:
1161 if c1.rev() is None:
1162 c1 = c1.p1()
1162 c1 = c1.p1()
1163 if c2.rev() is None:
1163 if c2.rev() is None:
1164 c2 = c2.p1()
1164 c2 = c2.p1()
1165
1165
1166 changedfiles = set()
1166 changedfiles = set()
1167 m1 = c1.manifest()
1167 m1 = c1.manifest()
1168 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1168 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1169 # If base is not in c2 branch, we switch to fullcopytracing
1169 # If base is not in c2 branch, we switch to fullcopytracing
1170 repo.ui.debug(
1170 repo.ui.debug(
1171 b"switching to full copytracing as base is not "
1171 b"switching to full copytracing as base is not "
1172 b"an ancestor of c2\n"
1172 b"an ancestor of c2\n"
1173 )
1173 )
1174 return _fullcopytracing(repo, c1, c2, base)
1174 return _fullcopytracing(repo, c1, c2, base)
1175
1175
1176 ctx = c2
1176 ctx = c2
1177 while ctx != base:
1177 while ctx != base:
1178 if len(ctx.parents()) == 2:
1178 if len(ctx.parents()) == 2:
1179 # To keep things simple let's not handle merges
1179 # To keep things simple let's not handle merges
1180 repo.ui.debug(b"switching to full copytracing because of merges\n")
1180 repo.ui.debug(b"switching to full copytracing because of merges\n")
1181 return _fullcopytracing(repo, c1, c2, base)
1181 return _fullcopytracing(repo, c1, c2, base)
1182 changedfiles.update(ctx.files())
1182 changedfiles.update(ctx.files())
1183 ctx = ctx.p1()
1183 ctx = ctx.p1()
1184
1184
1185 copies2 = {}
1185 copies2 = {}
1186 cp = _forwardcopies(base, c2)
1186 cp = _forwardcopies(base, c2)
1187 for dst, src in pycompat.iteritems(cp):
1187 for dst, src in pycompat.iteritems(cp):
1188 if src in m1:
1188 if src in m1:
1189 copies2[dst] = src
1189 copies2[dst] = src
1190
1190
1191 # file is missing if it isn't present in the destination, but is present in
1191 # file is missing if it isn't present in the destination, but is present in
1192 # the base and present in the source.
1192 # the base and present in the source.
1193 # Presence in the base is important to exclude added files, presence in the
1193 # Presence in the base is important to exclude added files, presence in the
1194 # source is important to exclude removed files.
1194 # source is important to exclude removed files.
1195 filt = lambda f: f not in m1 and f in base and f in c2
1195 filt = lambda f: f not in m1 and f in base and f in c2
1196 missingfiles = [f for f in changedfiles if filt(f)]
1196 missingfiles = [f for f in changedfiles if filt(f)]
1197
1197
1198 copies1 = {}
1198 copies1 = {}
1199 if missingfiles:
1199 if missingfiles:
1200 basenametofilename = collections.defaultdict(list)
1200 basenametofilename = collections.defaultdict(list)
1201 dirnametofilename = collections.defaultdict(list)
1201 dirnametofilename = collections.defaultdict(list)
1202
1202
1203 for f in m1.filesnotin(base.manifest()):
1203 for f in m1.filesnotin(base.manifest()):
1204 basename = os.path.basename(f)
1204 basename = os.path.basename(f)
1205 dirname = os.path.dirname(f)
1205 dirname = os.path.dirname(f)
1206 basenametofilename[basename].append(f)
1206 basenametofilename[basename].append(f)
1207 dirnametofilename[dirname].append(f)
1207 dirnametofilename[dirname].append(f)
1208
1208
1209 for f in missingfiles:
1209 for f in missingfiles:
1210 basename = os.path.basename(f)
1210 basename = os.path.basename(f)
1211 dirname = os.path.dirname(f)
1211 dirname = os.path.dirname(f)
1212 samebasename = basenametofilename[basename]
1212 samebasename = basenametofilename[basename]
1213 samedirname = dirnametofilename[dirname]
1213 samedirname = dirnametofilename[dirname]
1214 movecandidates = samebasename + samedirname
1214 movecandidates = samebasename + samedirname
1215 # f is guaranteed to be present in c2, that's why
1215 # f is guaranteed to be present in c2, that's why
1216 # c2.filectx(f) won't fail
1216 # c2.filectx(f) won't fail
1217 f2 = c2.filectx(f)
1217 f2 = c2.filectx(f)
1218 # we can have a lot of candidates which can slow down the heuristics
1218 # we can have a lot of candidates which can slow down the heuristics
1219 # config value to limit the number of candidates moves to check
1219 # config value to limit the number of candidates moves to check
1220 maxcandidates = repo.ui.configint(
1220 maxcandidates = repo.ui.configint(
1221 b'experimental', b'copytrace.movecandidateslimit'
1221 b'experimental', b'copytrace.movecandidateslimit'
1222 )
1222 )
1223
1223
1224 if len(movecandidates) > maxcandidates:
1224 if len(movecandidates) > maxcandidates:
1225 repo.ui.status(
1225 repo.ui.status(
1226 _(
1226 _(
1227 b"skipping copytracing for '%s', more "
1227 b"skipping copytracing for '%s', more "
1228 b"candidates than the limit: %d\n"
1228 b"candidates than the limit: %d\n"
1229 )
1229 )
1230 % (f, len(movecandidates))
1230 % (f, len(movecandidates))
1231 )
1231 )
1232 continue
1232 continue
1233
1233
1234 for candidate in movecandidates:
1234 for candidate in movecandidates:
1235 f1 = c1.filectx(candidate)
1235 f1 = c1.filectx(candidate)
1236 if _related(f1, f2):
1236 if _related(f1, f2):
1237 # if there are a few related copies then we'll merge
1237 # if there are a few related copies then we'll merge
1238 # changes into all of them. This matches the behaviour
1238 # changes into all of them. This matches the behaviour
1239 # of upstream copytracing
1239 # of upstream copytracing
1240 copies1[candidate] = f
1240 copies1[candidate] = f
1241
1241
1242 return branch_copies(copies1), branch_copies(copies2), {}
1242 return branch_copies(copies1), branch_copies(copies2), {}
1243
1243
1244
1244
1245 def _related(f1, f2):
1245 def _related(f1, f2):
1246 """return True if f1 and f2 filectx have a common ancestor
1246 """return True if f1 and f2 filectx have a common ancestor
1247
1247
1248 Walk back to common ancestor to see if the two files originate
1248 Walk back to common ancestor to see if the two files originate
1249 from the same file. Since workingfilectx's rev() is None it messes
1249 from the same file. Since workingfilectx's rev() is None it messes
1250 up the integer comparison logic, hence the pre-step check for
1250 up the integer comparison logic, hence the pre-step check for
1251 None (f1 and f2 can only be workingfilectx's initially).
1251 None (f1 and f2 can only be workingfilectx's initially).
1252 """
1252 """
1253
1253
1254 if f1 == f2:
1254 if f1 == f2:
1255 return True # a match
1255 return True # a match
1256
1256
1257 g1, g2 = f1.ancestors(), f2.ancestors()
1257 g1, g2 = f1.ancestors(), f2.ancestors()
1258 try:
1258 try:
1259 f1r, f2r = f1.linkrev(), f2.linkrev()
1259 f1r, f2r = f1.linkrev(), f2.linkrev()
1260
1260
1261 if f1r is None:
1261 if f1r is None:
1262 f1 = next(g1)
1262 f1 = next(g1)
1263 if f2r is None:
1263 if f2r is None:
1264 f2 = next(g2)
1264 f2 = next(g2)
1265
1265
1266 while True:
1266 while True:
1267 f1r, f2r = f1.linkrev(), f2.linkrev()
1267 f1r, f2r = f1.linkrev(), f2.linkrev()
1268 if f1r > f2r:
1268 if f1r > f2r:
1269 f1 = next(g1)
1269 f1 = next(g1)
1270 elif f2r > f1r:
1270 elif f2r > f1r:
1271 f2 = next(g2)
1271 f2 = next(g2)
1272 else: # f1 and f2 point to files in the same linkrev
1272 else: # f1 and f2 point to files in the same linkrev
1273 return f1 == f2 # true if they point to the same file
1273 return f1 == f2 # true if they point to the same file
1274 except StopIteration:
1274 except StopIteration:
1275 return False
1275 return False
1276
1276
1277
1277
1278 def graftcopies(wctx, ctx, base):
1278 def graftcopies(wctx, ctx, base):
1279 """reproduce copies between base and ctx in the wctx
1279 """reproduce copies between base and ctx in the wctx
1280
1280
1281 Unlike mergecopies(), this function will only consider copies between base
1281 Unlike mergecopies(), this function will only consider copies between base
1282 and ctx; it will ignore copies between base and wctx. Also unlike
1282 and ctx; it will ignore copies between base and wctx. Also unlike
1283 mergecopies(), this function will apply copies to the working copy (instead
1283 mergecopies(), this function will apply copies to the working copy (instead
1284 of just returning information about the copies). That makes it cheaper
1284 of just returning information about the copies). That makes it cheaper
1285 (especially in the common case of base==ctx.p1()) and useful also when
1285 (especially in the common case of base==ctx.p1()) and useful also when
1286 experimental.copytrace=off.
1286 experimental.copytrace=off.
1287
1287
1288 merge.update() will have already marked most copies, but it will only
1288 merge.update() will have already marked most copies, but it will only
1289 mark copies if it thinks the source files are related (see
1289 mark copies if it thinks the source files are related (see
1290 merge._related()). It will also not mark copies if the file wasn't modified
1290 merge._related()). It will also not mark copies if the file wasn't modified
1291 on the local side. This function adds the copies that were "missed"
1291 on the local side. This function adds the copies that were "missed"
1292 by merge.update().
1292 by merge.update().
1293 """
1293 """
1294 new_copies = pathcopies(base, ctx)
1294 new_copies = pathcopies(base, ctx)
1295 parent = wctx.p1()
1295 parent = wctx.p1()
1296 _filter(parent, wctx, new_copies)
1296 _filter(parent, wctx, new_copies)
1297 # Extra filtering to drop copy information for files that existed before
1297 # Extra filtering to drop copy information for files that existed before
1298 # the graft. This is to handle the case of grafting a rename onto a commit
1298 # the graft. This is to handle the case of grafting a rename onto a commit
1299 # that already has the rename. Otherwise the presence of copy information
1299 # that already has the rename. Otherwise the presence of copy information
1300 # would result in the creation of an empty commit where we would prefer to
1300 # would result in the creation of an empty commit where we would prefer to
1301 # not create one.
1301 # not create one.
1302 for dest, __ in list(new_copies.items()):
1302 for dest, __ in list(new_copies.items()):
1303 if dest in parent:
1303 if dest in parent:
1304 del new_copies[dest]
1304 del new_copies[dest]
1305 for dst, src in pycompat.iteritems(new_copies):
1305 for dst, src in pycompat.iteritems(new_copies):
1306 wctx[dst].markcopied(src)
1306 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now