##// END OF EJS Templates
copies: extract function _backwardcopies() for reversing renames...
Martin von Zweigbergk -
r47394:eca88f5f default
parent child Browse files
Show More
@@ -1,1294 +1,1300 b''
1 # coding: utf8
1 # coding: utf8
2 # copies.py - copy detection for Mercurial
2 # copies.py - copy detection for Mercurial
3 #
3 #
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import collections
11 import collections
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15 from .node import (
15 from .node import (
16 nullid,
16 nullid,
17 nullrev,
17 nullrev,
18 )
18 )
19
19
20 from . import (
20 from . import (
21 match as matchmod,
21 match as matchmod,
22 pathutil,
22 pathutil,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31 from .revlogutils import (
31 from .revlogutils import (
32 flagutil,
32 flagutil,
33 sidedata as sidedatamod,
33 sidedata as sidedatamod,
34 )
34 )
35
35
36 rustmod = policy.importrust("copy_tracing")
36 rustmod = policy.importrust("copy_tracing")
37
37
38
38
39 def _filter(src, dst, t):
39 def _filter(src, dst, t):
40 """filters out invalid copies after chaining"""
40 """filters out invalid copies after chaining"""
41
41
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 # in the following table (not including trivial cases). For example, case 6
44 # in the following table (not including trivial cases). For example, case 6
45 # is where a file existed in 'src' and remained under that name in 'mid' and
45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 # then was renamed between 'mid' and 'dst'.
46 # then was renamed between 'mid' and 'dst'.
47 #
47 #
48 # case src mid dst result
48 # case src mid dst result
49 # 1 x y - -
49 # 1 x y - -
50 # 2 x y y x->y
50 # 2 x y y x->y
51 # 3 x y x -
51 # 3 x y x -
52 # 4 x y z x->z
52 # 4 x y z x->z
53 # 5 - x y -
53 # 5 - x y -
54 # 6 x x y x->y
54 # 6 x x y x->y
55 #
55 #
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 # between 5 and 6, so it includes all cases in its result.
58 # between 5 and 6, so it includes all cases in its result.
59 # Cases 1, 3, and 5 are then removed by _filter().
59 # Cases 1, 3, and 5 are then removed by _filter().
60
60
61 for k, v in list(t.items()):
61 for k, v in list(t.items()):
62 if k == v: # case 3
62 if k == v: # case 3
63 del t[k]
63 del t[k]
64 elif v not in src: # case 5
64 elif v not in src: # case 5
65 # remove copies from files that didn't exist
65 # remove copies from files that didn't exist
66 del t[k]
66 del t[k]
67 elif k not in dst: # case 1
67 elif k not in dst: # case 1
68 # remove copies to files that were then removed
68 # remove copies to files that were then removed
69 del t[k]
69 del t[k]
70
70
71
71
72 def _chain(prefix, suffix):
72 def _chain(prefix, suffix):
73 """chain two sets of copies 'prefix' and 'suffix'"""
73 """chain two sets of copies 'prefix' and 'suffix'"""
74 result = prefix.copy()
74 result = prefix.copy()
75 for key, value in pycompat.iteritems(suffix):
75 for key, value in pycompat.iteritems(suffix):
76 result[key] = prefix.get(value, value)
76 result[key] = prefix.get(value, value)
77 return result
77 return result
78
78
79
79
80 def _tracefile(fctx, am, basemf):
80 def _tracefile(fctx, am, basemf):
81 """return file context that is the ancestor of fctx present in ancestor
81 """return file context that is the ancestor of fctx present in ancestor
82 manifest am
82 manifest am
83
83
84 Note: we used to try and stop after a given limit, however checking if that
84 Note: we used to try and stop after a given limit, however checking if that
85 limit is reached turned out to be very expensive. we are better off
85 limit is reached turned out to be very expensive. we are better off
86 disabling that feature."""
86 disabling that feature."""
87
87
88 for f in fctx.ancestors():
88 for f in fctx.ancestors():
89 path = f.path()
89 path = f.path()
90 if am.get(path, None) == f.filenode():
90 if am.get(path, None) == f.filenode():
91 return path
91 return path
92 if basemf and basemf.get(path, None) == f.filenode():
92 if basemf and basemf.get(path, None) == f.filenode():
93 return path
93 return path
94
94
95
95
96 def _dirstatecopies(repo, match=None):
96 def _dirstatecopies(repo, match=None):
97 ds = repo.dirstate
97 ds = repo.dirstate
98 c = ds.copies().copy()
98 c = ds.copies().copy()
99 for k in list(c):
99 for k in list(c):
100 if ds[k] not in b'anm' or (match and not match(k)):
100 if ds[k] not in b'anm' or (match and not match(k)):
101 del c[k]
101 del c[k]
102 return c
102 return c
103
103
104
104
105 def _computeforwardmissing(a, b, match=None):
105 def _computeforwardmissing(a, b, match=None):
106 """Computes which files are in b but not a.
106 """Computes which files are in b but not a.
107 This is its own function so extensions can easily wrap this call to see what
107 This is its own function so extensions can easily wrap this call to see what
108 files _forwardcopies is about to process.
108 files _forwardcopies is about to process.
109 """
109 """
110 ma = a.manifest()
110 ma = a.manifest()
111 mb = b.manifest()
111 mb = b.manifest()
112 return mb.filesnotin(ma, match=match)
112 return mb.filesnotin(ma, match=match)
113
113
114
114
115 def usechangesetcentricalgo(repo):
115 def usechangesetcentricalgo(repo):
116 """Checks if we should use changeset-centric copy algorithms"""
116 """Checks if we should use changeset-centric copy algorithms"""
117 if repo.filecopiesmode == b'changeset-sidedata':
117 if repo.filecopiesmode == b'changeset-sidedata':
118 return True
118 return True
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 changesetsource = (b'changeset-only', b'compatibility')
120 changesetsource = (b'changeset-only', b'compatibility')
121 return readfrom in changesetsource
121 return readfrom in changesetsource
122
122
123
123
124 def _committedforwardcopies(a, b, base, match):
124 def _committedforwardcopies(a, b, base, match):
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 # files might have to be traced back to the fctx parent of the last
126 # files might have to be traced back to the fctx parent of the last
127 # one-side-only changeset, but not further back than that
127 # one-side-only changeset, but not further back than that
128 repo = a._repo
128 repo = a._repo
129
129
130 if usechangesetcentricalgo(repo):
130 if usechangesetcentricalgo(repo):
131 return _changesetforwardcopies(a, b, match)
131 return _changesetforwardcopies(a, b, match)
132
132
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 dbg = repo.ui.debug
134 dbg = repo.ui.debug
135 if debug:
135 if debug:
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 am = a.manifest()
137 am = a.manifest()
138 basemf = None if base is None else base.manifest()
138 basemf = None if base is None else base.manifest()
139
139
140 # find where new files came from
140 # find where new files came from
141 # we currently don't try to find where old files went, too expensive
141 # we currently don't try to find where old files went, too expensive
142 # this means we can miss a case like 'hg rm b; hg cp a b'
142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 cm = {}
143 cm = {}
144
144
145 # Computing the forward missing is quite expensive on large manifests, since
145 # Computing the forward missing is quite expensive on large manifests, since
146 # it compares the entire manifests. We can optimize it in the common use
146 # it compares the entire manifests. We can optimize it in the common use
147 # case of computing what copies are in a commit versus its parent (like
147 # case of computing what copies are in a commit versus its parent (like
148 # during a rebase or histedit). Note, we exclude merge commits from this
148 # during a rebase or histedit). Note, we exclude merge commits from this
149 # optimization, since the ctx.files() for a merge commit is not correct for
149 # optimization, since the ctx.files() for a merge commit is not correct for
150 # this comparison.
150 # this comparison.
151 forwardmissingmatch = match
151 forwardmissingmatch = match
152 if b.p1() == a and b.p2().node() == nullid:
152 if b.p1() == a and b.p2().node() == nullid:
153 filesmatcher = matchmod.exact(b.files())
153 filesmatcher = matchmod.exact(b.files())
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 missing = list(b.walk(match))
156 missing = list(b.walk(match))
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 if debug:
158 if debug:
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 else:
160 else:
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 if debug:
162 if debug:
163 dbg(
163 dbg(
164 b'debug.copies: missing files to search: %d\n'
164 b'debug.copies: missing files to search: %d\n'
165 % len(missing)
165 % len(missing)
166 )
166 )
167
167
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169
169
170 for f in sorted(missing):
170 for f in sorted(missing):
171 if debug:
171 if debug:
172 dbg(b'debug.copies: tracing file: %s\n' % f)
172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 fctx = b[f]
173 fctx = b[f]
174 fctx._ancestrycontext = ancestrycontext
174 fctx._ancestrycontext = ancestrycontext
175
175
176 if debug:
176 if debug:
177 start = util.timer()
177 start = util.timer()
178 opath = _tracefile(fctx, am, basemf)
178 opath = _tracefile(fctx, am, basemf)
179 if opath:
179 if opath:
180 if debug:
180 if debug:
181 dbg(b'debug.copies: rename of: %s\n' % opath)
181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 cm[f] = opath
182 cm[f] = opath
183 if debug:
183 if debug:
184 dbg(
184 dbg(
185 b'debug.copies: time: %f seconds\n'
185 b'debug.copies: time: %f seconds\n'
186 % (util.timer() - start)
186 % (util.timer() - start)
187 )
187 )
188 return cm
188 return cm
189
189
190
190
191 def _revinfo_getter(repo, match):
191 def _revinfo_getter(repo, match):
192 """returns a function that returns the following data given a <rev>"
192 """returns a function that returns the following data given a <rev>"
193
193
194 * p1: revision number of first parent
194 * p1: revision number of first parent
195 * p2: revision number of first parent
195 * p2: revision number of first parent
196 * changes: a ChangingFiles object
196 * changes: a ChangingFiles object
197 """
197 """
198 cl = repo.changelog
198 cl = repo.changelog
199 parents = cl.parentrevs
199 parents = cl.parentrevs
200 flags = cl.flags
200 flags = cl.flags
201
201
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203
203
204 changelogrevision = cl.changelogrevision
204 changelogrevision = cl.changelogrevision
205
205
206 if rustmod is not None:
206 if rustmod is not None:
207
207
208 def revinfo(rev):
208 def revinfo(rev):
209 p1, p2 = parents(rev)
209 p1, p2 = parents(rev)
210 if flags(rev) & HASCOPIESINFO:
210 if flags(rev) & HASCOPIESINFO:
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 else:
212 else:
213 raw = None
213 raw = None
214 return (p1, p2, raw)
214 return (p1, p2, raw)
215
215
216 else:
216 else:
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & HASCOPIESINFO:
220 if flags(rev) & HASCOPIESINFO:
221 changes = changelogrevision(rev).changes
221 changes = changelogrevision(rev).changes
222 else:
222 else:
223 changes = None
223 changes = None
224 return (p1, p2, changes)
224 return (p1, p2, changes)
225
225
226 return revinfo
226 return revinfo
227
227
228
228
229 def cached_is_ancestor(is_ancestor):
229 def cached_is_ancestor(is_ancestor):
230 """return a cached version of is_ancestor"""
230 """return a cached version of is_ancestor"""
231 cache = {}
231 cache = {}
232
232
233 def _is_ancestor(anc, desc):
233 def _is_ancestor(anc, desc):
234 if anc > desc:
234 if anc > desc:
235 return False
235 return False
236 elif anc == desc:
236 elif anc == desc:
237 return True
237 return True
238 key = (anc, desc)
238 key = (anc, desc)
239 ret = cache.get(key)
239 ret = cache.get(key)
240 if ret is None:
240 if ret is None:
241 ret = cache[key] = is_ancestor(anc, desc)
241 ret = cache[key] = is_ancestor(anc, desc)
242 return ret
242 return ret
243
243
244 return _is_ancestor
244 return _is_ancestor
245
245
246
246
247 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
248 if a.rev() in (nullrev, b.rev()):
248 if a.rev() in (nullrev, b.rev()):
249 return {}
249 return {}
250
250
251 repo = a.repo().unfiltered()
251 repo = a.repo().unfiltered()
252 children = {}
252 children = {}
253
253
254 cl = repo.changelog
254 cl = repo.changelog
255 isancestor = cl.isancestorrev
255 isancestor = cl.isancestorrev
256
256
257 # To track rename from "A" to B, we need to gather all parent β†’ children
257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 # edges that are contains in `::B` but not in `::A`.
258 # edges that are contains in `::B` but not in `::A`.
259 #
259 #
260 #
260 #
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 # that belong to ::a. These are exactly all the revisions needed to express
263 # that belong to ::a. These are exactly all the revisions needed to express
264 # the parent β†’ children we need to combine.
264 # the parent β†’ children we need to combine.
265 #
265 #
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 # excluding paths that leads to roots that are not ancestors of `a`. We
267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 # keep this out of the explanation because it is hard enough without this special case..
268 # keep this out of the explanation because it is hard enough without this special case..
269
269
270 parents = cl._uncheckedparentrevs
270 parents = cl._uncheckedparentrevs
271 graph_roots = (nullrev, nullrev)
271 graph_roots = (nullrev, nullrev)
272
272
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 roots = set()
275 roots = set()
276 has_graph_roots = False
276 has_graph_roots = False
277 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
277 multi_thread = repo.ui.configbool(b'devel', b'copy-tracing.multi-thread')
278
278
279 # iterate over `only(B, A)`
279 # iterate over `only(B, A)`
280 for r in revs:
280 for r in revs:
281 ps = parents(r)
281 ps = parents(r)
282 if ps == graph_roots:
282 if ps == graph_roots:
283 has_graph_roots = True
283 has_graph_roots = True
284 else:
284 else:
285 p1, p2 = ps
285 p1, p2 = ps
286
286
287 # find all the "root points" (see larger comment above)
287 # find all the "root points" (see larger comment above)
288 if p1 != nullrev and p1 in ancestors:
288 if p1 != nullrev and p1 in ancestors:
289 roots.add(p1)
289 roots.add(p1)
290 if p2 != nullrev and p2 in ancestors:
290 if p2 != nullrev and p2 in ancestors:
291 roots.add(p2)
291 roots.add(p2)
292 if not roots:
292 if not roots:
293 # no common revision to track copies from
293 # no common revision to track copies from
294 return {}
294 return {}
295 if has_graph_roots:
295 if has_graph_roots:
296 # this deal with the special case mentionned in the [1] footnotes. We
296 # this deal with the special case mentionned in the [1] footnotes. We
297 # must filter out revisions that leads to non-common graphroots.
297 # must filter out revisions that leads to non-common graphroots.
298 roots = list(roots)
298 roots = list(roots)
299 m = min(roots)
299 m = min(roots)
300 h = [b.rev()]
300 h = [b.rev()]
301 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
302 roots_to_head = set(roots_to_head)
302 roots_to_head = set(roots_to_head)
303 revs = [r for r in revs if r in roots_to_head]
303 revs = [r for r in revs if r in roots_to_head]
304
304
305 if repo.filecopiesmode == b'changeset-sidedata':
305 if repo.filecopiesmode == b'changeset-sidedata':
306 # When using side-data, we will process the edges "from" the children.
306 # When using side-data, we will process the edges "from" the children.
307 # We iterate over the childre, gathering previous collected data for
307 # We iterate over the childre, gathering previous collected data for
308 # the parents. Do know when the parents data is no longer necessary, we
308 # the parents. Do know when the parents data is no longer necessary, we
309 # keep a counter of how many children each revision has.
309 # keep a counter of how many children each revision has.
310 #
310 #
311 # An interresting property of `children_count` is that it only contains
311 # An interresting property of `children_count` is that it only contains
312 # revision that will be relevant for a edge of the graph. So if a
312 # revision that will be relevant for a edge of the graph. So if a
313 # children has parent not in `children_count`, that edges should not be
313 # children has parent not in `children_count`, that edges should not be
314 # processed.
314 # processed.
315 children_count = dict((r, 0) for r in roots)
315 children_count = dict((r, 0) for r in roots)
316 for r in revs:
316 for r in revs:
317 for p in cl.parentrevs(r):
317 for p in cl.parentrevs(r):
318 if p == nullrev:
318 if p == nullrev:
319 continue
319 continue
320 children_count[r] = 0
320 children_count[r] = 0
321 if p in children_count:
321 if p in children_count:
322 children_count[p] += 1
322 children_count[p] += 1
323 revinfo = _revinfo_getter(repo, match)
323 revinfo = _revinfo_getter(repo, match)
324 return _combine_changeset_copies(
324 return _combine_changeset_copies(
325 revs,
325 revs,
326 children_count,
326 children_count,
327 b.rev(),
327 b.rev(),
328 revinfo,
328 revinfo,
329 match,
329 match,
330 isancestor,
330 isancestor,
331 multi_thread,
331 multi_thread,
332 )
332 )
333 else:
333 else:
334 # When not using side-data, we will process the edges "from" the parent.
334 # When not using side-data, we will process the edges "from" the parent.
335 # so we need a full mapping of the parent -> children relation.
335 # so we need a full mapping of the parent -> children relation.
336 children = dict((r, []) for r in roots)
336 children = dict((r, []) for r in roots)
337 for r in revs:
337 for r in revs:
338 for p in cl.parentrevs(r):
338 for p in cl.parentrevs(r):
339 if p == nullrev:
339 if p == nullrev:
340 continue
340 continue
341 children[r] = []
341 children[r] = []
342 if p in children:
342 if p in children:
343 children[p].append(r)
343 children[p].append(r)
344 x = revs.pop()
344 x = revs.pop()
345 assert x == b.rev()
345 assert x == b.rev()
346 revs.extend(roots)
346 revs.extend(roots)
347 revs.sort()
347 revs.sort()
348
348
349 revinfo = _revinfo_getter_extra(repo)
349 revinfo = _revinfo_getter_extra(repo)
350 return _combine_changeset_copies_extra(
350 return _combine_changeset_copies_extra(
351 revs, children, b.rev(), revinfo, match, isancestor
351 revs, children, b.rev(), revinfo, match, isancestor
352 )
352 )
353
353
354
354
355 def _combine_changeset_copies(
355 def _combine_changeset_copies(
356 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
356 revs, children_count, targetrev, revinfo, match, isancestor, multi_thread
357 ):
357 ):
358 """combine the copies information for each item of iterrevs
358 """combine the copies information for each item of iterrevs
359
359
360 revs: sorted iterable of revision to visit
360 revs: sorted iterable of revision to visit
361 children_count: a {parent: <number-of-relevant-children>} mapping.
361 children_count: a {parent: <number-of-relevant-children>} mapping.
362 targetrev: the final copies destination revision (not in iterrevs)
362 targetrev: the final copies destination revision (not in iterrevs)
363 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
363 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
364 match: a matcher
364 match: a matcher
365
365
366 It returns the aggregated copies information for `targetrev`.
366 It returns the aggregated copies information for `targetrev`.
367 """
367 """
368
368
369 alwaysmatch = match.always()
369 alwaysmatch = match.always()
370
370
371 if rustmod is not None:
371 if rustmod is not None:
372 final_copies = rustmod.combine_changeset_copies(
372 final_copies = rustmod.combine_changeset_copies(
373 list(revs), children_count, targetrev, revinfo, multi_thread
373 list(revs), children_count, targetrev, revinfo, multi_thread
374 )
374 )
375 else:
375 else:
376 isancestor = cached_is_ancestor(isancestor)
376 isancestor = cached_is_ancestor(isancestor)
377
377
378 all_copies = {}
378 all_copies = {}
379 # iterate over all the "children" side of copy tracing "edge"
379 # iterate over all the "children" side of copy tracing "edge"
380 for current_rev in revs:
380 for current_rev in revs:
381 p1, p2, changes = revinfo(current_rev)
381 p1, p2, changes = revinfo(current_rev)
382 current_copies = None
382 current_copies = None
383 # iterate over all parents to chain the existing data with the
383 # iterate over all parents to chain the existing data with the
384 # data from the parent β†’ child edge.
384 # data from the parent β†’ child edge.
385 for parent, parent_rev in ((1, p1), (2, p2)):
385 for parent, parent_rev in ((1, p1), (2, p2)):
386 if parent_rev == nullrev:
386 if parent_rev == nullrev:
387 continue
387 continue
388 remaining_children = children_count.get(parent_rev)
388 remaining_children = children_count.get(parent_rev)
389 if remaining_children is None:
389 if remaining_children is None:
390 continue
390 continue
391 remaining_children -= 1
391 remaining_children -= 1
392 children_count[parent_rev] = remaining_children
392 children_count[parent_rev] = remaining_children
393 if remaining_children:
393 if remaining_children:
394 copies = all_copies.get(parent_rev, None)
394 copies = all_copies.get(parent_rev, None)
395 else:
395 else:
396 copies = all_copies.pop(parent_rev, None)
396 copies = all_copies.pop(parent_rev, None)
397
397
398 if copies is None:
398 if copies is None:
399 # this is a root
399 # this is a root
400 newcopies = copies = {}
400 newcopies = copies = {}
401 elif remaining_children:
401 elif remaining_children:
402 newcopies = copies.copy()
402 newcopies = copies.copy()
403 else:
403 else:
404 newcopies = copies
404 newcopies = copies
405 # chain the data in the edge with the existing data
405 # chain the data in the edge with the existing data
406 if changes is not None:
406 if changes is not None:
407 childcopies = {}
407 childcopies = {}
408 if parent == 1:
408 if parent == 1:
409 childcopies = changes.copied_from_p1
409 childcopies = changes.copied_from_p1
410 elif parent == 2:
410 elif parent == 2:
411 childcopies = changes.copied_from_p2
411 childcopies = changes.copied_from_p2
412
412
413 if childcopies:
413 if childcopies:
414 newcopies = copies.copy()
414 newcopies = copies.copy()
415 for dest, source in pycompat.iteritems(childcopies):
415 for dest, source in pycompat.iteritems(childcopies):
416 prev = copies.get(source)
416 prev = copies.get(source)
417 if prev is not None and prev[1] is not None:
417 if prev is not None and prev[1] is not None:
418 source = prev[1]
418 source = prev[1]
419 newcopies[dest] = (current_rev, source)
419 newcopies[dest] = (current_rev, source)
420 assert newcopies is not copies
420 assert newcopies is not copies
421 if changes.removed:
421 if changes.removed:
422 for f in changes.removed:
422 for f in changes.removed:
423 if f in newcopies:
423 if f in newcopies:
424 if newcopies is copies:
424 if newcopies is copies:
425 # copy on write to avoid affecting potential other
425 # copy on write to avoid affecting potential other
426 # branches. when there are no other branches, this
426 # branches. when there are no other branches, this
427 # could be avoided.
427 # could be avoided.
428 newcopies = copies.copy()
428 newcopies = copies.copy()
429 newcopies[f] = (current_rev, None)
429 newcopies[f] = (current_rev, None)
430 # check potential need to combine the data from another parent (for
430 # check potential need to combine the data from another parent (for
431 # that child). See comment below for details.
431 # that child). See comment below for details.
432 if current_copies is None:
432 if current_copies is None:
433 current_copies = newcopies
433 current_copies = newcopies
434 else:
434 else:
435 # we are the second parent to work on c, we need to merge our
435 # we are the second parent to work on c, we need to merge our
436 # work with the other.
436 # work with the other.
437 #
437 #
438 # In case of conflict, parent 1 take precedence over parent 2.
438 # In case of conflict, parent 1 take precedence over parent 2.
439 # This is an arbitrary choice made anew when implementing
439 # This is an arbitrary choice made anew when implementing
440 # changeset based copies. It was made without regards with
440 # changeset based copies. It was made without regards with
441 # potential filelog related behavior.
441 # potential filelog related behavior.
442 assert parent == 2
442 assert parent == 2
443 current_copies = _merge_copies_dict(
443 current_copies = _merge_copies_dict(
444 newcopies,
444 newcopies,
445 current_copies,
445 current_copies,
446 isancestor,
446 isancestor,
447 changes,
447 changes,
448 current_rev,
448 current_rev,
449 )
449 )
450 all_copies[current_rev] = current_copies
450 all_copies[current_rev] = current_copies
451
451
452 # filter out internal details and return a {dest: source mapping}
452 # filter out internal details and return a {dest: source mapping}
453 final_copies = {}
453 final_copies = {}
454 for dest, (tt, source) in all_copies[targetrev].items():
454 for dest, (tt, source) in all_copies[targetrev].items():
455 if source is not None:
455 if source is not None:
456 final_copies[dest] = source
456 final_copies[dest] = source
457 if not alwaysmatch:
457 if not alwaysmatch:
458 for filename in list(final_copies.keys()):
458 for filename in list(final_copies.keys()):
459 if not match(filename):
459 if not match(filename):
460 del final_copies[filename]
460 del final_copies[filename]
461 return final_copies
461 return final_copies
462
462
463
463
464 # constant to decide which side to pick with _merge_copies_dict
464 # constant to decide which side to pick with _merge_copies_dict
465 PICK_MINOR = 0
465 PICK_MINOR = 0
466 PICK_MAJOR = 1
466 PICK_MAJOR = 1
467 PICK_EITHER = 2
467 PICK_EITHER = 2
468
468
469
469
470 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
470 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
471 """merge two copies-mapping together, minor and major
471 """merge two copies-mapping together, minor and major
472
472
473 In case of conflict, value from "major" will be picked.
473 In case of conflict, value from "major" will be picked.
474
474
475 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
475 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
476 ancestors of `high_rev`,
476 ancestors of `high_rev`,
477
477
478 - `ismerged(path)`: callable return True if `path` have been merged in the
478 - `ismerged(path)`: callable return True if `path` have been merged in the
479 current revision,
479 current revision,
480
480
481 return the resulting dict (in practice, the "minor" object, updated)
481 return the resulting dict (in practice, the "minor" object, updated)
482 """
482 """
483 for dest, value in major.items():
483 for dest, value in major.items():
484 other = minor.get(dest)
484 other = minor.get(dest)
485 if other is None:
485 if other is None:
486 minor[dest] = value
486 minor[dest] = value
487 else:
487 else:
488 pick, overwrite = _compare_values(
488 pick, overwrite = _compare_values(
489 changes, isancestor, dest, other, value
489 changes, isancestor, dest, other, value
490 )
490 )
491 if overwrite:
491 if overwrite:
492 if pick == PICK_MAJOR:
492 if pick == PICK_MAJOR:
493 minor[dest] = (current_merge, value[1])
493 minor[dest] = (current_merge, value[1])
494 else:
494 else:
495 minor[dest] = (current_merge, other[1])
495 minor[dest] = (current_merge, other[1])
496 elif pick == PICK_MAJOR:
496 elif pick == PICK_MAJOR:
497 minor[dest] = value
497 minor[dest] = value
498 return minor
498 return minor
499
499
500
500
501 def _compare_values(changes, isancestor, dest, minor, major):
501 def _compare_values(changes, isancestor, dest, minor, major):
502 """compare two value within a _merge_copies_dict loop iteration
502 """compare two value within a _merge_copies_dict loop iteration
503
503
504 return (pick, overwrite).
504 return (pick, overwrite).
505
505
506 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
506 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
507 - overwrite is True if pick is a return of an ambiguity that needs resolution.
507 - overwrite is True if pick is a return of an ambiguity that needs resolution.
508 """
508 """
509 major_tt, major_value = major
509 major_tt, major_value = major
510 minor_tt, minor_value = minor
510 minor_tt, minor_value = minor
511
511
512 if major_tt == minor_tt:
512 if major_tt == minor_tt:
513 # if it comes from the same revision it must be the same value
513 # if it comes from the same revision it must be the same value
514 assert major_value == minor_value
514 assert major_value == minor_value
515 return PICK_EITHER, False
515 return PICK_EITHER, False
516 elif (
516 elif (
517 changes is not None
517 changes is not None
518 and minor_value is not None
518 and minor_value is not None
519 and major_value is None
519 and major_value is None
520 and dest in changes.salvaged
520 and dest in changes.salvaged
521 ):
521 ):
522 # In this case, a deletion was reverted, the "alive" value overwrite
522 # In this case, a deletion was reverted, the "alive" value overwrite
523 # the deleted one.
523 # the deleted one.
524 return PICK_MINOR, True
524 return PICK_MINOR, True
525 elif (
525 elif (
526 changes is not None
526 changes is not None
527 and major_value is not None
527 and major_value is not None
528 and minor_value is None
528 and minor_value is None
529 and dest in changes.salvaged
529 and dest in changes.salvaged
530 ):
530 ):
531 # In this case, a deletion was reverted, the "alive" value overwrite
531 # In this case, a deletion was reverted, the "alive" value overwrite
532 # the deleted one.
532 # the deleted one.
533 return PICK_MAJOR, True
533 return PICK_MAJOR, True
534 elif isancestor(minor_tt, major_tt):
534 elif isancestor(minor_tt, major_tt):
535 if changes is not None and dest in changes.merged:
535 if changes is not None and dest in changes.merged:
536 # change to dest happened on the branch without copy-source change,
536 # change to dest happened on the branch without copy-source change,
537 # so both source are valid and "major" wins.
537 # so both source are valid and "major" wins.
538 return PICK_MAJOR, True
538 return PICK_MAJOR, True
539 else:
539 else:
540 return PICK_MAJOR, False
540 return PICK_MAJOR, False
541 elif isancestor(major_tt, minor_tt):
541 elif isancestor(major_tt, minor_tt):
542 if changes is not None and dest in changes.merged:
542 if changes is not None and dest in changes.merged:
543 # change to dest happened on the branch without copy-source change,
543 # change to dest happened on the branch without copy-source change,
544 # so both source are valid and "major" wins.
544 # so both source are valid and "major" wins.
545 return PICK_MAJOR, True
545 return PICK_MAJOR, True
546 else:
546 else:
547 return PICK_MINOR, False
547 return PICK_MINOR, False
548 elif minor_value is None:
548 elif minor_value is None:
549 # in case of conflict, the "alive" side wins.
549 # in case of conflict, the "alive" side wins.
550 return PICK_MAJOR, True
550 return PICK_MAJOR, True
551 elif major_value is None:
551 elif major_value is None:
552 # in case of conflict, the "alive" side wins.
552 # in case of conflict, the "alive" side wins.
553 return PICK_MINOR, True
553 return PICK_MINOR, True
554 else:
554 else:
555 # in case of conflict where both side are alive, major wins.
555 # in case of conflict where both side are alive, major wins.
556 return PICK_MAJOR, True
556 return PICK_MAJOR, True
557
557
558
558
559 def _revinfo_getter_extra(repo):
559 def _revinfo_getter_extra(repo):
560 """return a function that return multiple data given a <rev>"i
560 """return a function that return multiple data given a <rev>"i
561
561
562 * p1: revision number of first parent
562 * p1: revision number of first parent
563 * p2: revision number of first parent
563 * p2: revision number of first parent
564 * p1copies: mapping of copies from p1
564 * p1copies: mapping of copies from p1
565 * p2copies: mapping of copies from p2
565 * p2copies: mapping of copies from p2
566 * removed: a list of removed files
566 * removed: a list of removed files
567 * ismerged: a callback to know if file was merged in that revision
567 * ismerged: a callback to know if file was merged in that revision
568 """
568 """
569 cl = repo.changelog
569 cl = repo.changelog
570 parents = cl.parentrevs
570 parents = cl.parentrevs
571
571
572 def get_ismerged(rev):
572 def get_ismerged(rev):
573 ctx = repo[rev]
573 ctx = repo[rev]
574
574
575 def ismerged(path):
575 def ismerged(path):
576 if path not in ctx.files():
576 if path not in ctx.files():
577 return False
577 return False
578 fctx = ctx[path]
578 fctx = ctx[path]
579 parents = fctx._filelog.parents(fctx._filenode)
579 parents = fctx._filelog.parents(fctx._filenode)
580 nb_parents = 0
580 nb_parents = 0
581 for n in parents:
581 for n in parents:
582 if n != nullid:
582 if n != nullid:
583 nb_parents += 1
583 nb_parents += 1
584 return nb_parents >= 2
584 return nb_parents >= 2
585
585
586 return ismerged
586 return ismerged
587
587
588 def revinfo(rev):
588 def revinfo(rev):
589 p1, p2 = parents(rev)
589 p1, p2 = parents(rev)
590 ctx = repo[rev]
590 ctx = repo[rev]
591 p1copies, p2copies = ctx._copies
591 p1copies, p2copies = ctx._copies
592 removed = ctx.filesremoved()
592 removed = ctx.filesremoved()
593 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
593 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
594
594
595 return revinfo
595 return revinfo
596
596
597
597
598 def _combine_changeset_copies_extra(
598 def _combine_changeset_copies_extra(
599 revs, children, targetrev, revinfo, match, isancestor
599 revs, children, targetrev, revinfo, match, isancestor
600 ):
600 ):
601 """version of `_combine_changeset_copies` that works with the Google
601 """version of `_combine_changeset_copies` that works with the Google
602 specific "extra" based storage for copy information"""
602 specific "extra" based storage for copy information"""
603 all_copies = {}
603 all_copies = {}
604 alwaysmatch = match.always()
604 alwaysmatch = match.always()
605 for r in revs:
605 for r in revs:
606 copies = all_copies.pop(r, None)
606 copies = all_copies.pop(r, None)
607 if copies is None:
607 if copies is None:
608 # this is a root
608 # this is a root
609 copies = {}
609 copies = {}
610 for i, c in enumerate(children[r]):
610 for i, c in enumerate(children[r]):
611 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
611 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
612 if r == p1:
612 if r == p1:
613 parent = 1
613 parent = 1
614 childcopies = p1copies
614 childcopies = p1copies
615 else:
615 else:
616 assert r == p2
616 assert r == p2
617 parent = 2
617 parent = 2
618 childcopies = p2copies
618 childcopies = p2copies
619 if not alwaysmatch:
619 if not alwaysmatch:
620 childcopies = {
620 childcopies = {
621 dst: src for dst, src in childcopies.items() if match(dst)
621 dst: src for dst, src in childcopies.items() if match(dst)
622 }
622 }
623 newcopies = copies
623 newcopies = copies
624 if childcopies:
624 if childcopies:
625 newcopies = copies.copy()
625 newcopies = copies.copy()
626 for dest, source in pycompat.iteritems(childcopies):
626 for dest, source in pycompat.iteritems(childcopies):
627 prev = copies.get(source)
627 prev = copies.get(source)
628 if prev is not None and prev[1] is not None:
628 if prev is not None and prev[1] is not None:
629 source = prev[1]
629 source = prev[1]
630 newcopies[dest] = (c, source)
630 newcopies[dest] = (c, source)
631 assert newcopies is not copies
631 assert newcopies is not copies
632 for f in removed:
632 for f in removed:
633 if f in newcopies:
633 if f in newcopies:
634 if newcopies is copies:
634 if newcopies is copies:
635 # copy on write to avoid affecting potential other
635 # copy on write to avoid affecting potential other
636 # branches. when there are no other branches, this
636 # branches. when there are no other branches, this
637 # could be avoided.
637 # could be avoided.
638 newcopies = copies.copy()
638 newcopies = copies.copy()
639 newcopies[f] = (c, None)
639 newcopies[f] = (c, None)
640 othercopies = all_copies.get(c)
640 othercopies = all_copies.get(c)
641 if othercopies is None:
641 if othercopies is None:
642 all_copies[c] = newcopies
642 all_copies[c] = newcopies
643 else:
643 else:
644 # we are the second parent to work on c, we need to merge our
644 # we are the second parent to work on c, we need to merge our
645 # work with the other.
645 # work with the other.
646 #
646 #
647 # In case of conflict, parent 1 take precedence over parent 2.
647 # In case of conflict, parent 1 take precedence over parent 2.
648 # This is an arbitrary choice made anew when implementing
648 # This is an arbitrary choice made anew when implementing
649 # changeset based copies. It was made without regards with
649 # changeset based copies. It was made without regards with
650 # potential filelog related behavior.
650 # potential filelog related behavior.
651 if parent == 1:
651 if parent == 1:
652 _merge_copies_dict_extra(
652 _merge_copies_dict_extra(
653 othercopies, newcopies, isancestor, ismerged
653 othercopies, newcopies, isancestor, ismerged
654 )
654 )
655 else:
655 else:
656 _merge_copies_dict_extra(
656 _merge_copies_dict_extra(
657 newcopies, othercopies, isancestor, ismerged
657 newcopies, othercopies, isancestor, ismerged
658 )
658 )
659 all_copies[c] = newcopies
659 all_copies[c] = newcopies
660
660
661 final_copies = {}
661 final_copies = {}
662 for dest, (tt, source) in all_copies[targetrev].items():
662 for dest, (tt, source) in all_copies[targetrev].items():
663 if source is not None:
663 if source is not None:
664 final_copies[dest] = source
664 final_copies[dest] = source
665 return final_copies
665 return final_copies
666
666
667
667
668 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
668 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
669 """version of `_merge_copies_dict` that works with the Google
669 """version of `_merge_copies_dict` that works with the Google
670 specific "extra" based storage for copy information"""
670 specific "extra" based storage for copy information"""
671 for dest, value in major.items():
671 for dest, value in major.items():
672 other = minor.get(dest)
672 other = minor.get(dest)
673 if other is None:
673 if other is None:
674 minor[dest] = value
674 minor[dest] = value
675 else:
675 else:
676 new_tt = value[0]
676 new_tt = value[0]
677 other_tt = other[0]
677 other_tt = other[0]
678 if value[1] == other[1]:
678 if value[1] == other[1]:
679 continue
679 continue
680 # content from "major" wins, unless it is older
680 # content from "major" wins, unless it is older
681 # than the branch point or there is a merge
681 # than the branch point or there is a merge
682 if (
682 if (
683 new_tt == other_tt
683 new_tt == other_tt
684 or not isancestor(new_tt, other_tt)
684 or not isancestor(new_tt, other_tt)
685 or ismerged(dest)
685 or ismerged(dest)
686 ):
686 ):
687 minor[dest] = value
687 minor[dest] = value
688
688
689
689
690 def _forwardcopies(a, b, base=None, match=None):
690 def _forwardcopies(a, b, base=None, match=None):
691 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
691 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
692
692
693 if base is None:
693 if base is None:
694 base = a
694 base = a
695 match = a.repo().narrowmatch(match)
695 match = a.repo().narrowmatch(match)
696 # check for working copy
696 # check for working copy
697 if b.rev() is None:
697 if b.rev() is None:
698 cm = _committedforwardcopies(a, b.p1(), base, match)
698 cm = _committedforwardcopies(a, b.p1(), base, match)
699 # combine copies from dirstate if necessary
699 # combine copies from dirstate if necessary
700 copies = _chain(cm, _dirstatecopies(b._repo, match))
700 copies = _chain(cm, _dirstatecopies(b._repo, match))
701 else:
701 else:
702 copies = _committedforwardcopies(a, b, base, match)
702 copies = _committedforwardcopies(a, b, base, match)
703 return copies
703 return copies
704
704
705
705
706 def _backwardrenames(a, b, match):
706 def _backwardrenames(a, b, match):
707 """find renames from a to b"""
707 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
708 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
708 return {}
709 return {}
709
710
711 # We don't want to pass in "match" here, since that would filter
712 # the destination by it. Since we're reversing the copies, we want
713 # to filter the source instead.
714 copies = _forwardcopies(b, a)
715 return _reverse_renames(copies, a, match)
716
717
718 def _reverse_renames(copies, dst, match):
719 """given copies to context 'dst', finds renames from that context"""
710 # Even though we're not taking copies into account, 1:n rename situations
720 # Even though we're not taking copies into account, 1:n rename situations
711 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
721 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
712 # arbitrarily pick one of the renames.
722 # arbitrarily pick one of the renames.
713 # We don't want to pass in "match" here, since that would filter
714 # the destination by it. Since we're reversing the copies, we want
715 # to filter the source instead.
716 f = _forwardcopies(b, a)
717 r = {}
723 r = {}
718 for k, v in sorted(pycompat.iteritems(f)):
724 for k, v in sorted(pycompat.iteritems(copies)):
719 if match and not match(v):
725 if match and not match(v):
720 continue
726 continue
721 # remove copies
727 # remove copies
722 if v in a:
728 if v in dst:
723 continue
729 continue
724 r[v] = k
730 r[v] = k
725 return r
731 return r
726
732
727
733
728 def pathcopies(x, y, match=None):
734 def pathcopies(x, y, match=None):
729 """find {dst@y: src@x} copy mapping for directed compare"""
735 """find {dst@y: src@x} copy mapping for directed compare"""
730 repo = x._repo
736 repo = x._repo
731 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
737 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
732 if debug:
738 if debug:
733 repo.ui.debug(
739 repo.ui.debug(
734 b'debug.copies: searching copies from %s to %s\n' % (x, y)
740 b'debug.copies: searching copies from %s to %s\n' % (x, y)
735 )
741 )
736 if x == y or not x or not y:
742 if x == y or not x or not y:
737 return {}
743 return {}
738 if y.rev() is None and x == y.p1():
744 if y.rev() is None and x == y.p1():
739 if debug:
745 if debug:
740 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
746 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
741 # short-circuit to avoid issues with merge states
747 # short-circuit to avoid issues with merge states
742 return _dirstatecopies(repo, match)
748 return _dirstatecopies(repo, match)
743 a = y.ancestor(x)
749 a = y.ancestor(x)
744 if a == x:
750 if a == x:
745 if debug:
751 if debug:
746 repo.ui.debug(b'debug.copies: search mode: forward\n')
752 repo.ui.debug(b'debug.copies: search mode: forward\n')
747 copies = _forwardcopies(x, y, match=match)
753 copies = _forwardcopies(x, y, match=match)
748 elif a == y:
754 elif a == y:
749 if debug:
755 if debug:
750 repo.ui.debug(b'debug.copies: search mode: backward\n')
756 repo.ui.debug(b'debug.copies: search mode: backward\n')
751 copies = _backwardrenames(x, y, match=match)
757 copies = _backwardrenames(x, y, match=match)
752 else:
758 else:
753 if debug:
759 if debug:
754 repo.ui.debug(b'debug.copies: search mode: combined\n')
760 repo.ui.debug(b'debug.copies: search mode: combined\n')
755 base = None
761 base = None
756 if a.rev() != nullrev:
762 if a.rev() != nullrev:
757 base = x
763 base = x
758 copies = _chain(
764 copies = _chain(
759 _backwardrenames(x, a, match=match),
765 _backwardrenames(x, a, match=match),
760 _forwardcopies(a, y, base, match=match),
766 _forwardcopies(a, y, base, match=match),
761 )
767 )
762 _filter(x, y, copies)
768 _filter(x, y, copies)
763 return copies
769 return copies
764
770
765
771
766 def mergecopies(repo, c1, c2, base):
772 def mergecopies(repo, c1, c2, base):
767 """
773 """
768 Finds moves and copies between context c1 and c2 that are relevant for
774 Finds moves and copies between context c1 and c2 that are relevant for
769 merging. 'base' will be used as the merge base.
775 merging. 'base' will be used as the merge base.
770
776
771 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
777 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
772 files that were moved/ copied in one merge parent and modified in another.
778 files that were moved/ copied in one merge parent and modified in another.
773 For example:
779 For example:
774
780
775 o ---> 4 another commit
781 o ---> 4 another commit
776 |
782 |
777 | o ---> 3 commit that modifies a.txt
783 | o ---> 3 commit that modifies a.txt
778 | /
784 | /
779 o / ---> 2 commit that moves a.txt to b.txt
785 o / ---> 2 commit that moves a.txt to b.txt
780 |/
786 |/
781 o ---> 1 merge base
787 o ---> 1 merge base
782
788
783 If we try to rebase revision 3 on revision 4, since there is no a.txt in
789 If we try to rebase revision 3 on revision 4, since there is no a.txt in
784 revision 4, and if user have copytrace disabled, we prints the following
790 revision 4, and if user have copytrace disabled, we prints the following
785 message:
791 message:
786
792
787 ```other changed <file> which local deleted```
793 ```other changed <file> which local deleted```
788
794
789 Returns a tuple where:
795 Returns a tuple where:
790
796
791 "branch_copies" an instance of branch_copies.
797 "branch_copies" an instance of branch_copies.
792
798
793 "diverge" is a mapping of source name -> list of destination names
799 "diverge" is a mapping of source name -> list of destination names
794 for divergent renames.
800 for divergent renames.
795
801
796 This function calls different copytracing algorithms based on config.
802 This function calls different copytracing algorithms based on config.
797 """
803 """
798 # avoid silly behavior for update from empty dir
804 # avoid silly behavior for update from empty dir
799 if not c1 or not c2 or c1 == c2:
805 if not c1 or not c2 or c1 == c2:
800 return branch_copies(), branch_copies(), {}
806 return branch_copies(), branch_copies(), {}
801
807
802 narrowmatch = c1.repo().narrowmatch()
808 narrowmatch = c1.repo().narrowmatch()
803
809
804 # avoid silly behavior for parent -> working dir
810 # avoid silly behavior for parent -> working dir
805 if c2.node() is None and c1.node() == repo.dirstate.p1():
811 if c2.node() is None and c1.node() == repo.dirstate.p1():
806 return (
812 return (
807 branch_copies(_dirstatecopies(repo, narrowmatch)),
813 branch_copies(_dirstatecopies(repo, narrowmatch)),
808 branch_copies(),
814 branch_copies(),
809 {},
815 {},
810 )
816 )
811
817
812 copytracing = repo.ui.config(b'experimental', b'copytrace')
818 copytracing = repo.ui.config(b'experimental', b'copytrace')
813 if stringutil.parsebool(copytracing) is False:
819 if stringutil.parsebool(copytracing) is False:
814 # stringutil.parsebool() returns None when it is unable to parse the
820 # stringutil.parsebool() returns None when it is unable to parse the
815 # value, so we should rely on making sure copytracing is on such cases
821 # value, so we should rely on making sure copytracing is on such cases
816 return branch_copies(), branch_copies(), {}
822 return branch_copies(), branch_copies(), {}
817
823
818 if usechangesetcentricalgo(repo):
824 if usechangesetcentricalgo(repo):
819 # The heuristics don't make sense when we need changeset-centric algos
825 # The heuristics don't make sense when we need changeset-centric algos
820 return _fullcopytracing(repo, c1, c2, base)
826 return _fullcopytracing(repo, c1, c2, base)
821
827
822 # Copy trace disabling is explicitly below the node == p1 logic above
828 # Copy trace disabling is explicitly below the node == p1 logic above
823 # because the logic above is required for a simple copy to be kept across a
829 # because the logic above is required for a simple copy to be kept across a
824 # rebase.
830 # rebase.
825 if copytracing == b'heuristics':
831 if copytracing == b'heuristics':
826 # Do full copytracing if only non-public revisions are involved as
832 # Do full copytracing if only non-public revisions are involved as
827 # that will be fast enough and will also cover the copies which could
833 # that will be fast enough and will also cover the copies which could
828 # be missed by heuristics
834 # be missed by heuristics
829 if _isfullcopytraceable(repo, c1, base):
835 if _isfullcopytraceable(repo, c1, base):
830 return _fullcopytracing(repo, c1, c2, base)
836 return _fullcopytracing(repo, c1, c2, base)
831 return _heuristicscopytracing(repo, c1, c2, base)
837 return _heuristicscopytracing(repo, c1, c2, base)
832 else:
838 else:
833 return _fullcopytracing(repo, c1, c2, base)
839 return _fullcopytracing(repo, c1, c2, base)
834
840
835
841
836 def _isfullcopytraceable(repo, c1, base):
842 def _isfullcopytraceable(repo, c1, base):
837 """Checks that if base, source and destination are all no-public branches,
843 """Checks that if base, source and destination are all no-public branches,
838 if yes let's use the full copytrace algorithm for increased capabilities
844 if yes let's use the full copytrace algorithm for increased capabilities
839 since it will be fast enough.
845 since it will be fast enough.
840
846
841 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
847 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
842 number of changesets from c1 to base such that if number of changesets are
848 number of changesets from c1 to base such that if number of changesets are
843 more than the limit, full copytracing algorithm won't be used.
849 more than the limit, full copytracing algorithm won't be used.
844 """
850 """
845 if c1.rev() is None:
851 if c1.rev() is None:
846 c1 = c1.p1()
852 c1 = c1.p1()
847 if c1.mutable() and base.mutable():
853 if c1.mutable() and base.mutable():
848 sourcecommitlimit = repo.ui.configint(
854 sourcecommitlimit = repo.ui.configint(
849 b'experimental', b'copytrace.sourcecommitlimit'
855 b'experimental', b'copytrace.sourcecommitlimit'
850 )
856 )
851 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
857 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
852 return commits < sourcecommitlimit
858 return commits < sourcecommitlimit
853 return False
859 return False
854
860
855
861
856 def _checksinglesidecopies(
862 def _checksinglesidecopies(
857 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
863 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
858 ):
864 ):
859 if src not in m2:
865 if src not in m2:
860 # deleted on side 2
866 # deleted on side 2
861 if src not in m1:
867 if src not in m1:
862 # renamed on side 1, deleted on side 2
868 # renamed on side 1, deleted on side 2
863 renamedelete[src] = dsts1
869 renamedelete[src] = dsts1
864 elif src not in mb:
870 elif src not in mb:
865 # Work around the "short-circuit to avoid issues with merge states"
871 # Work around the "short-circuit to avoid issues with merge states"
866 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
872 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
867 # destination doesn't exist in y.
873 # destination doesn't exist in y.
868 pass
874 pass
869 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
875 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
870 return
876 return
871 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
877 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
872 # modified on side 2
878 # modified on side 2
873 for dst in dsts1:
879 for dst in dsts1:
874 copy[dst] = src
880 copy[dst] = src
875
881
876
882
877 class branch_copies(object):
883 class branch_copies(object):
878 """Information about copies made on one side of a merge/graft.
884 """Information about copies made on one side of a merge/graft.
879
885
880 "copy" is a mapping from destination name -> source name,
886 "copy" is a mapping from destination name -> source name,
881 where source is in c1 and destination is in c2 or vice-versa.
887 where source is in c1 and destination is in c2 or vice-versa.
882
888
883 "movewithdir" is a mapping from source name -> destination name,
889 "movewithdir" is a mapping from source name -> destination name,
884 where the file at source present in one context but not the other
890 where the file at source present in one context but not the other
885 needs to be moved to destination by the merge process, because the
891 needs to be moved to destination by the merge process, because the
886 other context moved the directory it is in.
892 other context moved the directory it is in.
887
893
888 "renamedelete" is a mapping of source name -> list of destination
894 "renamedelete" is a mapping of source name -> list of destination
889 names for files deleted in c1 that were renamed in c2 or vice-versa.
895 names for files deleted in c1 that were renamed in c2 or vice-versa.
890
896
891 "dirmove" is a mapping of detected source dir -> destination dir renames.
897 "dirmove" is a mapping of detected source dir -> destination dir renames.
892 This is needed for handling changes to new files previously grafted into
898 This is needed for handling changes to new files previously grafted into
893 renamed directories.
899 renamed directories.
894 """
900 """
895
901
896 def __init__(
902 def __init__(
897 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
903 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
898 ):
904 ):
899 self.copy = {} if copy is None else copy
905 self.copy = {} if copy is None else copy
900 self.renamedelete = {} if renamedelete is None else renamedelete
906 self.renamedelete = {} if renamedelete is None else renamedelete
901 self.dirmove = {} if dirmove is None else dirmove
907 self.dirmove = {} if dirmove is None else dirmove
902 self.movewithdir = {} if movewithdir is None else movewithdir
908 self.movewithdir = {} if movewithdir is None else movewithdir
903
909
904 def __repr__(self):
910 def __repr__(self):
905 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
911 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
906 self.copy,
912 self.copy,
907 self.renamedelete,
913 self.renamedelete,
908 self.dirmove,
914 self.dirmove,
909 self.movewithdir,
915 self.movewithdir,
910 )
916 )
911
917
912
918
913 def _fullcopytracing(repo, c1, c2, base):
919 def _fullcopytracing(repo, c1, c2, base):
914 """The full copytracing algorithm which finds all the new files that were
920 """The full copytracing algorithm which finds all the new files that were
915 added from merge base up to the top commit and for each file it checks if
921 added from merge base up to the top commit and for each file it checks if
916 this file was copied from another file.
922 this file was copied from another file.
917
923
918 This is pretty slow when a lot of changesets are involved but will track all
924 This is pretty slow when a lot of changesets are involved but will track all
919 the copies.
925 the copies.
920 """
926 """
921 m1 = c1.manifest()
927 m1 = c1.manifest()
922 m2 = c2.manifest()
928 m2 = c2.manifest()
923 mb = base.manifest()
929 mb = base.manifest()
924
930
925 copies1 = pathcopies(base, c1)
931 copies1 = pathcopies(base, c1)
926 copies2 = pathcopies(base, c2)
932 copies2 = pathcopies(base, c2)
927
933
928 if not (copies1 or copies2):
934 if not (copies1 or copies2):
929 return branch_copies(), branch_copies(), {}
935 return branch_copies(), branch_copies(), {}
930
936
931 inversecopies1 = {}
937 inversecopies1 = {}
932 inversecopies2 = {}
938 inversecopies2 = {}
933 for dst, src in copies1.items():
939 for dst, src in copies1.items():
934 inversecopies1.setdefault(src, []).append(dst)
940 inversecopies1.setdefault(src, []).append(dst)
935 for dst, src in copies2.items():
941 for dst, src in copies2.items():
936 inversecopies2.setdefault(src, []).append(dst)
942 inversecopies2.setdefault(src, []).append(dst)
937
943
938 copy1 = {}
944 copy1 = {}
939 copy2 = {}
945 copy2 = {}
940 diverge = {}
946 diverge = {}
941 renamedelete1 = {}
947 renamedelete1 = {}
942 renamedelete2 = {}
948 renamedelete2 = {}
943 allsources = set(inversecopies1) | set(inversecopies2)
949 allsources = set(inversecopies1) | set(inversecopies2)
944 for src in allsources:
950 for src in allsources:
945 dsts1 = inversecopies1.get(src)
951 dsts1 = inversecopies1.get(src)
946 dsts2 = inversecopies2.get(src)
952 dsts2 = inversecopies2.get(src)
947 if dsts1 and dsts2:
953 if dsts1 and dsts2:
948 # copied/renamed on both sides
954 # copied/renamed on both sides
949 if src not in m1 and src not in m2:
955 if src not in m1 and src not in m2:
950 # renamed on both sides
956 # renamed on both sides
951 dsts1 = set(dsts1)
957 dsts1 = set(dsts1)
952 dsts2 = set(dsts2)
958 dsts2 = set(dsts2)
953 # If there's some overlap in the rename destinations, we
959 # If there's some overlap in the rename destinations, we
954 # consider it not divergent. For example, if side 1 copies 'a'
960 # consider it not divergent. For example, if side 1 copies 'a'
955 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
961 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
956 # and 'd' and deletes 'a'.
962 # and 'd' and deletes 'a'.
957 if dsts1 & dsts2:
963 if dsts1 & dsts2:
958 for dst in dsts1 & dsts2:
964 for dst in dsts1 & dsts2:
959 copy1[dst] = src
965 copy1[dst] = src
960 copy2[dst] = src
966 copy2[dst] = src
961 else:
967 else:
962 diverge[src] = sorted(dsts1 | dsts2)
968 diverge[src] = sorted(dsts1 | dsts2)
963 elif src in m1 and src in m2:
969 elif src in m1 and src in m2:
964 # copied on both sides
970 # copied on both sides
965 dsts1 = set(dsts1)
971 dsts1 = set(dsts1)
966 dsts2 = set(dsts2)
972 dsts2 = set(dsts2)
967 for dst in dsts1 & dsts2:
973 for dst in dsts1 & dsts2:
968 copy1[dst] = src
974 copy1[dst] = src
969 copy2[dst] = src
975 copy2[dst] = src
970 # TODO: Handle cases where it was renamed on one side and copied
976 # TODO: Handle cases where it was renamed on one side and copied
971 # on the other side
977 # on the other side
972 elif dsts1:
978 elif dsts1:
973 # copied/renamed only on side 1
979 # copied/renamed only on side 1
974 _checksinglesidecopies(
980 _checksinglesidecopies(
975 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
981 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
976 )
982 )
977 elif dsts2:
983 elif dsts2:
978 # copied/renamed only on side 2
984 # copied/renamed only on side 2
979 _checksinglesidecopies(
985 _checksinglesidecopies(
980 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
986 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
981 )
987 )
982
988
983 # find interesting file sets from manifests
989 # find interesting file sets from manifests
984 cache = []
990 cache = []
985
991
986 def _get_addedfiles(idx):
992 def _get_addedfiles(idx):
987 if not cache:
993 if not cache:
988 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
994 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
989 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
995 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
990 u1 = sorted(addedinm1 - addedinm2)
996 u1 = sorted(addedinm1 - addedinm2)
991 u2 = sorted(addedinm2 - addedinm1)
997 u2 = sorted(addedinm2 - addedinm1)
992 cache.extend((u1, u2))
998 cache.extend((u1, u2))
993 return cache[idx]
999 return cache[idx]
994
1000
995 u1fn = lambda: _get_addedfiles(0)
1001 u1fn = lambda: _get_addedfiles(0)
996 u2fn = lambda: _get_addedfiles(1)
1002 u2fn = lambda: _get_addedfiles(1)
997 if repo.ui.debugflag:
1003 if repo.ui.debugflag:
998 u1 = u1fn()
1004 u1 = u1fn()
999 u2 = u2fn()
1005 u2 = u2fn()
1000
1006
1001 header = b" unmatched files in %s"
1007 header = b" unmatched files in %s"
1002 if u1:
1008 if u1:
1003 repo.ui.debug(
1009 repo.ui.debug(
1004 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1010 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
1005 )
1011 )
1006 if u2:
1012 if u2:
1007 repo.ui.debug(
1013 repo.ui.debug(
1008 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1014 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1009 )
1015 )
1010
1016
1011 renamedeleteset = set()
1017 renamedeleteset = set()
1012 divergeset = set()
1018 divergeset = set()
1013 for dsts in diverge.values():
1019 for dsts in diverge.values():
1014 divergeset.update(dsts)
1020 divergeset.update(dsts)
1015 for dsts in renamedelete1.values():
1021 for dsts in renamedelete1.values():
1016 renamedeleteset.update(dsts)
1022 renamedeleteset.update(dsts)
1017 for dsts in renamedelete2.values():
1023 for dsts in renamedelete2.values():
1018 renamedeleteset.update(dsts)
1024 renamedeleteset.update(dsts)
1019
1025
1020 repo.ui.debug(
1026 repo.ui.debug(
1021 b" all copies found (* = to merge, ! = divergent, "
1027 b" all copies found (* = to merge, ! = divergent, "
1022 b"% = renamed and deleted):\n"
1028 b"% = renamed and deleted):\n"
1023 )
1029 )
1024 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1030 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1025 if not copies:
1031 if not copies:
1026 continue
1032 continue
1027 repo.ui.debug(b" on %s side:\n" % side)
1033 repo.ui.debug(b" on %s side:\n" % side)
1028 for f in sorted(copies):
1034 for f in sorted(copies):
1029 note = b""
1035 note = b""
1030 if f in copy1 or f in copy2:
1036 if f in copy1 or f in copy2:
1031 note += b"*"
1037 note += b"*"
1032 if f in divergeset:
1038 if f in divergeset:
1033 note += b"!"
1039 note += b"!"
1034 if f in renamedeleteset:
1040 if f in renamedeleteset:
1035 note += b"%"
1041 note += b"%"
1036 repo.ui.debug(
1042 repo.ui.debug(
1037 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1043 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1038 )
1044 )
1039 del renamedeleteset
1045 del renamedeleteset
1040 del divergeset
1046 del divergeset
1041
1047
1042 repo.ui.debug(b" checking for directory renames\n")
1048 repo.ui.debug(b" checking for directory renames\n")
1043
1049
1044 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1050 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1045 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1051 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1046
1052
1047 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1053 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1048 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1054 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1049
1055
1050 return branch_copies1, branch_copies2, diverge
1056 return branch_copies1, branch_copies2, diverge
1051
1057
1052
1058
1053 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1059 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1054 """Finds moved directories and files that should move with them.
1060 """Finds moved directories and files that should move with them.
1055
1061
1056 ctx: the context for one of the sides
1062 ctx: the context for one of the sides
1057 copy: files copied on the same side (as ctx)
1063 copy: files copied on the same side (as ctx)
1058 fullcopy: files copied on the same side (as ctx), including those that
1064 fullcopy: files copied on the same side (as ctx), including those that
1059 merge.manifestmerge() won't care about
1065 merge.manifestmerge() won't care about
1060 addedfilesfn: function returning added files on the other side (compared to
1066 addedfilesfn: function returning added files on the other side (compared to
1061 ctx)
1067 ctx)
1062 """
1068 """
1063 # generate a directory move map
1069 # generate a directory move map
1064 invalid = set()
1070 invalid = set()
1065 dirmove = {}
1071 dirmove = {}
1066
1072
1067 # examine each file copy for a potential directory move, which is
1073 # examine each file copy for a potential directory move, which is
1068 # when all the files in a directory are moved to a new directory
1074 # when all the files in a directory are moved to a new directory
1069 for dst, src in pycompat.iteritems(fullcopy):
1075 for dst, src in pycompat.iteritems(fullcopy):
1070 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1076 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1071 if dsrc in invalid:
1077 if dsrc in invalid:
1072 # already seen to be uninteresting
1078 # already seen to be uninteresting
1073 continue
1079 continue
1074 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1080 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1075 # directory wasn't entirely moved locally
1081 # directory wasn't entirely moved locally
1076 invalid.add(dsrc)
1082 invalid.add(dsrc)
1077 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1083 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1078 # files from the same directory moved to two different places
1084 # files from the same directory moved to two different places
1079 invalid.add(dsrc)
1085 invalid.add(dsrc)
1080 else:
1086 else:
1081 # looks good so far
1087 # looks good so far
1082 dirmove[dsrc] = ddst
1088 dirmove[dsrc] = ddst
1083
1089
1084 for i in invalid:
1090 for i in invalid:
1085 if i in dirmove:
1091 if i in dirmove:
1086 del dirmove[i]
1092 del dirmove[i]
1087 del invalid
1093 del invalid
1088
1094
1089 if not dirmove:
1095 if not dirmove:
1090 return {}, {}
1096 return {}, {}
1091
1097
1092 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1098 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1093
1099
1094 for d in dirmove:
1100 for d in dirmove:
1095 repo.ui.debug(
1101 repo.ui.debug(
1096 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1102 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1097 )
1103 )
1098
1104
1099 # Sort the directories in reverse order, so we find children first
1105 # Sort the directories in reverse order, so we find children first
1100 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1106 # For example, if dir1/ was renamed to dir2/, and dir1/subdir1/
1101 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1107 # was renamed to dir2/subdir2/, we want to move dir1/subdir1/file
1102 # to dir2/subdir2/file (not dir2/subdir1/file)
1108 # to dir2/subdir2/file (not dir2/subdir1/file)
1103 dirmove_children_first = sorted(dirmove, reverse=True)
1109 dirmove_children_first = sorted(dirmove, reverse=True)
1104
1110
1105 movewithdir = {}
1111 movewithdir = {}
1106 # check unaccounted nonoverlapping files against directory moves
1112 # check unaccounted nonoverlapping files against directory moves
1107 for f in addedfilesfn():
1113 for f in addedfilesfn():
1108 if f not in fullcopy:
1114 if f not in fullcopy:
1109 for d in dirmove_children_first:
1115 for d in dirmove_children_first:
1110 if f.startswith(d):
1116 if f.startswith(d):
1111 # new file added in a directory that was moved, move it
1117 # new file added in a directory that was moved, move it
1112 df = dirmove[d] + f[len(d) :]
1118 df = dirmove[d] + f[len(d) :]
1113 if df not in copy:
1119 if df not in copy:
1114 movewithdir[f] = df
1120 movewithdir[f] = df
1115 repo.ui.debug(
1121 repo.ui.debug(
1116 b" pending file src: '%s' -> dst: '%s'\n"
1122 b" pending file src: '%s' -> dst: '%s'\n"
1117 % (f, df)
1123 % (f, df)
1118 )
1124 )
1119 break
1125 break
1120
1126
1121 return dirmove, movewithdir
1127 return dirmove, movewithdir
1122
1128
1123
1129
1124 def _heuristicscopytracing(repo, c1, c2, base):
1130 def _heuristicscopytracing(repo, c1, c2, base):
1125 """Fast copytracing using filename heuristics
1131 """Fast copytracing using filename heuristics
1126
1132
1127 Assumes that moves or renames are of following two types:
1133 Assumes that moves or renames are of following two types:
1128
1134
1129 1) Inside a directory only (same directory name but different filenames)
1135 1) Inside a directory only (same directory name but different filenames)
1130 2) Move from one directory to another
1136 2) Move from one directory to another
1131 (same filenames but different directory names)
1137 (same filenames but different directory names)
1132
1138
1133 Works only when there are no merge commits in the "source branch".
1139 Works only when there are no merge commits in the "source branch".
1134 Source branch is commits from base up to c2 not including base.
1140 Source branch is commits from base up to c2 not including base.
1135
1141
1136 If merge is involved it fallbacks to _fullcopytracing().
1142 If merge is involved it fallbacks to _fullcopytracing().
1137
1143
1138 Can be used by setting the following config:
1144 Can be used by setting the following config:
1139
1145
1140 [experimental]
1146 [experimental]
1141 copytrace = heuristics
1147 copytrace = heuristics
1142
1148
1143 In some cases the copy/move candidates found by heuristics can be very large
1149 In some cases the copy/move candidates found by heuristics can be very large
1144 in number and that will make the algorithm slow. The number of possible
1150 in number and that will make the algorithm slow. The number of possible
1145 candidates to check can be limited by using the config
1151 candidates to check can be limited by using the config
1146 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1152 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1147 """
1153 """
1148
1154
1149 if c1.rev() is None:
1155 if c1.rev() is None:
1150 c1 = c1.p1()
1156 c1 = c1.p1()
1151 if c2.rev() is None:
1157 if c2.rev() is None:
1152 c2 = c2.p1()
1158 c2 = c2.p1()
1153
1159
1154 changedfiles = set()
1160 changedfiles = set()
1155 m1 = c1.manifest()
1161 m1 = c1.manifest()
1156 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1162 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1157 # If base is not in c2 branch, we switch to fullcopytracing
1163 # If base is not in c2 branch, we switch to fullcopytracing
1158 repo.ui.debug(
1164 repo.ui.debug(
1159 b"switching to full copytracing as base is not "
1165 b"switching to full copytracing as base is not "
1160 b"an ancestor of c2\n"
1166 b"an ancestor of c2\n"
1161 )
1167 )
1162 return _fullcopytracing(repo, c1, c2, base)
1168 return _fullcopytracing(repo, c1, c2, base)
1163
1169
1164 ctx = c2
1170 ctx = c2
1165 while ctx != base:
1171 while ctx != base:
1166 if len(ctx.parents()) == 2:
1172 if len(ctx.parents()) == 2:
1167 # To keep things simple let's not handle merges
1173 # To keep things simple let's not handle merges
1168 repo.ui.debug(b"switching to full copytracing because of merges\n")
1174 repo.ui.debug(b"switching to full copytracing because of merges\n")
1169 return _fullcopytracing(repo, c1, c2, base)
1175 return _fullcopytracing(repo, c1, c2, base)
1170 changedfiles.update(ctx.files())
1176 changedfiles.update(ctx.files())
1171 ctx = ctx.p1()
1177 ctx = ctx.p1()
1172
1178
1173 copies2 = {}
1179 copies2 = {}
1174 cp = _forwardcopies(base, c2)
1180 cp = _forwardcopies(base, c2)
1175 for dst, src in pycompat.iteritems(cp):
1181 for dst, src in pycompat.iteritems(cp):
1176 if src in m1:
1182 if src in m1:
1177 copies2[dst] = src
1183 copies2[dst] = src
1178
1184
1179 # file is missing if it isn't present in the destination, but is present in
1185 # file is missing if it isn't present in the destination, but is present in
1180 # the base and present in the source.
1186 # the base and present in the source.
1181 # Presence in the base is important to exclude added files, presence in the
1187 # Presence in the base is important to exclude added files, presence in the
1182 # source is important to exclude removed files.
1188 # source is important to exclude removed files.
1183 filt = lambda f: f not in m1 and f in base and f in c2
1189 filt = lambda f: f not in m1 and f in base and f in c2
1184 missingfiles = [f for f in changedfiles if filt(f)]
1190 missingfiles = [f for f in changedfiles if filt(f)]
1185
1191
1186 copies1 = {}
1192 copies1 = {}
1187 if missingfiles:
1193 if missingfiles:
1188 basenametofilename = collections.defaultdict(list)
1194 basenametofilename = collections.defaultdict(list)
1189 dirnametofilename = collections.defaultdict(list)
1195 dirnametofilename = collections.defaultdict(list)
1190
1196
1191 for f in m1.filesnotin(base.manifest()):
1197 for f in m1.filesnotin(base.manifest()):
1192 basename = os.path.basename(f)
1198 basename = os.path.basename(f)
1193 dirname = os.path.dirname(f)
1199 dirname = os.path.dirname(f)
1194 basenametofilename[basename].append(f)
1200 basenametofilename[basename].append(f)
1195 dirnametofilename[dirname].append(f)
1201 dirnametofilename[dirname].append(f)
1196
1202
1197 for f in missingfiles:
1203 for f in missingfiles:
1198 basename = os.path.basename(f)
1204 basename = os.path.basename(f)
1199 dirname = os.path.dirname(f)
1205 dirname = os.path.dirname(f)
1200 samebasename = basenametofilename[basename]
1206 samebasename = basenametofilename[basename]
1201 samedirname = dirnametofilename[dirname]
1207 samedirname = dirnametofilename[dirname]
1202 movecandidates = samebasename + samedirname
1208 movecandidates = samebasename + samedirname
1203 # f is guaranteed to be present in c2, that's why
1209 # f is guaranteed to be present in c2, that's why
1204 # c2.filectx(f) won't fail
1210 # c2.filectx(f) won't fail
1205 f2 = c2.filectx(f)
1211 f2 = c2.filectx(f)
1206 # we can have a lot of candidates which can slow down the heuristics
1212 # we can have a lot of candidates which can slow down the heuristics
1207 # config value to limit the number of candidates moves to check
1213 # config value to limit the number of candidates moves to check
1208 maxcandidates = repo.ui.configint(
1214 maxcandidates = repo.ui.configint(
1209 b'experimental', b'copytrace.movecandidateslimit'
1215 b'experimental', b'copytrace.movecandidateslimit'
1210 )
1216 )
1211
1217
1212 if len(movecandidates) > maxcandidates:
1218 if len(movecandidates) > maxcandidates:
1213 repo.ui.status(
1219 repo.ui.status(
1214 _(
1220 _(
1215 b"skipping copytracing for '%s', more "
1221 b"skipping copytracing for '%s', more "
1216 b"candidates than the limit: %d\n"
1222 b"candidates than the limit: %d\n"
1217 )
1223 )
1218 % (f, len(movecandidates))
1224 % (f, len(movecandidates))
1219 )
1225 )
1220 continue
1226 continue
1221
1227
1222 for candidate in movecandidates:
1228 for candidate in movecandidates:
1223 f1 = c1.filectx(candidate)
1229 f1 = c1.filectx(candidate)
1224 if _related(f1, f2):
1230 if _related(f1, f2):
1225 # if there are a few related copies then we'll merge
1231 # if there are a few related copies then we'll merge
1226 # changes into all of them. This matches the behaviour
1232 # changes into all of them. This matches the behaviour
1227 # of upstream copytracing
1233 # of upstream copytracing
1228 copies1[candidate] = f
1234 copies1[candidate] = f
1229
1235
1230 return branch_copies(copies1), branch_copies(copies2), {}
1236 return branch_copies(copies1), branch_copies(copies2), {}
1231
1237
1232
1238
1233 def _related(f1, f2):
1239 def _related(f1, f2):
1234 """return True if f1 and f2 filectx have a common ancestor
1240 """return True if f1 and f2 filectx have a common ancestor
1235
1241
1236 Walk back to common ancestor to see if the two files originate
1242 Walk back to common ancestor to see if the two files originate
1237 from the same file. Since workingfilectx's rev() is None it messes
1243 from the same file. Since workingfilectx's rev() is None it messes
1238 up the integer comparison logic, hence the pre-step check for
1244 up the integer comparison logic, hence the pre-step check for
1239 None (f1 and f2 can only be workingfilectx's initially).
1245 None (f1 and f2 can only be workingfilectx's initially).
1240 """
1246 """
1241
1247
1242 if f1 == f2:
1248 if f1 == f2:
1243 return True # a match
1249 return True # a match
1244
1250
1245 g1, g2 = f1.ancestors(), f2.ancestors()
1251 g1, g2 = f1.ancestors(), f2.ancestors()
1246 try:
1252 try:
1247 f1r, f2r = f1.linkrev(), f2.linkrev()
1253 f1r, f2r = f1.linkrev(), f2.linkrev()
1248
1254
1249 if f1r is None:
1255 if f1r is None:
1250 f1 = next(g1)
1256 f1 = next(g1)
1251 if f2r is None:
1257 if f2r is None:
1252 f2 = next(g2)
1258 f2 = next(g2)
1253
1259
1254 while True:
1260 while True:
1255 f1r, f2r = f1.linkrev(), f2.linkrev()
1261 f1r, f2r = f1.linkrev(), f2.linkrev()
1256 if f1r > f2r:
1262 if f1r > f2r:
1257 f1 = next(g1)
1263 f1 = next(g1)
1258 elif f2r > f1r:
1264 elif f2r > f1r:
1259 f2 = next(g2)
1265 f2 = next(g2)
1260 else: # f1 and f2 point to files in the same linkrev
1266 else: # f1 and f2 point to files in the same linkrev
1261 return f1 == f2 # true if they point to the same file
1267 return f1 == f2 # true if they point to the same file
1262 except StopIteration:
1268 except StopIteration:
1263 return False
1269 return False
1264
1270
1265
1271
1266 def graftcopies(wctx, ctx, base):
1272 def graftcopies(wctx, ctx, base):
1267 """reproduce copies between base and ctx in the wctx
1273 """reproduce copies between base and ctx in the wctx
1268
1274
1269 Unlike mergecopies(), this function will only consider copies between base
1275 Unlike mergecopies(), this function will only consider copies between base
1270 and ctx; it will ignore copies between base and wctx. Also unlike
1276 and ctx; it will ignore copies between base and wctx. Also unlike
1271 mergecopies(), this function will apply copies to the working copy (instead
1277 mergecopies(), this function will apply copies to the working copy (instead
1272 of just returning information about the copies). That makes it cheaper
1278 of just returning information about the copies). That makes it cheaper
1273 (especially in the common case of base==ctx.p1()) and useful also when
1279 (especially in the common case of base==ctx.p1()) and useful also when
1274 experimental.copytrace=off.
1280 experimental.copytrace=off.
1275
1281
1276 merge.update() will have already marked most copies, but it will only
1282 merge.update() will have already marked most copies, but it will only
1277 mark copies if it thinks the source files are related (see
1283 mark copies if it thinks the source files are related (see
1278 merge._related()). It will also not mark copies if the file wasn't modified
1284 merge._related()). It will also not mark copies if the file wasn't modified
1279 on the local side. This function adds the copies that were "missed"
1285 on the local side. This function adds the copies that were "missed"
1280 by merge.update().
1286 by merge.update().
1281 """
1287 """
1282 new_copies = pathcopies(base, ctx)
1288 new_copies = pathcopies(base, ctx)
1283 parent = wctx.p1()
1289 parent = wctx.p1()
1284 _filter(parent, wctx, new_copies)
1290 _filter(parent, wctx, new_copies)
1285 # Extra filtering to drop copy information for files that existed before
1291 # Extra filtering to drop copy information for files that existed before
1286 # the graft. This is to handle the case of grafting a rename onto a commit
1292 # the graft. This is to handle the case of grafting a rename onto a commit
1287 # that already has the rename. Otherwise the presence of copy information
1293 # that already has the rename. Otherwise the presence of copy information
1288 # would result in the creation of an empty commit where we would prefer to
1294 # would result in the creation of an empty commit where we would prefer to
1289 # not create one.
1295 # not create one.
1290 for dest, __ in list(new_copies.items()):
1296 for dest, __ in list(new_copies.items()):
1291 if dest in parent:
1297 if dest in parent:
1292 del new_copies[dest]
1298 del new_copies[dest]
1293 for dst, src in pycompat.iteritems(new_copies):
1299 for dst, src in pycompat.iteritems(new_copies):
1294 wctx[dst].markcopied(src)
1300 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now