##// END OF EJS Templates
copies-rust: move is_ancestor caching within the rust code...
marmoute -
r46586:8b99c473 default
parent child Browse files
Show More
@@ -1,1142 +1,1144 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 policy,
20 policy,
21 pycompat,
21 pycompat,
22 util,
22 util,
23 )
23 )
24
24
25
25
26 from .utils import stringutil
26 from .utils import stringutil
27
27
28 from .revlogutils import flagutil
28 from .revlogutils import flagutil
29
29
30 rustmod = policy.importrust("copy_tracing")
30 rustmod = policy.importrust("copy_tracing")
31
31
32
32
33 def _filter(src, dst, t):
33 def _filter(src, dst, t):
34 """filters out invalid copies after chaining"""
34 """filters out invalid copies after chaining"""
35
35
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 # in the following table (not including trivial cases). For example, case 2
38 # in the following table (not including trivial cases). For example, case 2
39 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 # then was renamed between 'mid' and 'dst'.
40 # then was renamed between 'mid' and 'dst'.
41 #
41 #
42 # case src mid dst result
42 # case src mid dst result
43 # 1 x y - -
43 # 1 x y - -
44 # 2 x y y x->y
44 # 2 x y y x->y
45 # 3 x y x -
45 # 3 x y x -
46 # 4 x y z x->z
46 # 4 x y z x->z
47 # 5 - x y -
47 # 5 - x y -
48 # 6 x x y x->y
48 # 6 x x y x->y
49 #
49 #
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 # between 5 and 6, so it includes all cases in its result.
52 # between 5 and 6, so it includes all cases in its result.
53 # Cases 1, 3, and 5 are then removed by _filter().
53 # Cases 1, 3, and 5 are then removed by _filter().
54
54
55 for k, v in list(t.items()):
55 for k, v in list(t.items()):
56 # remove copies from files that didn't exist
56 # remove copies from files that didn't exist
57 if v not in src:
57 if v not in src:
58 del t[k]
58 del t[k]
59 # remove criss-crossed copies
59 # remove criss-crossed copies
60 elif k in src and v in dst:
60 elif k in src and v in dst:
61 del t[k]
61 del t[k]
62 # remove copies to files that were then removed
62 # remove copies to files that were then removed
63 elif k not in dst:
63 elif k not in dst:
64 del t[k]
64 del t[k]
65
65
66
66
67 def _chain(prefix, suffix):
67 def _chain(prefix, suffix):
68 """chain two sets of copies 'prefix' and 'suffix'"""
68 """chain two sets of copies 'prefix' and 'suffix'"""
69 result = prefix.copy()
69 result = prefix.copy()
70 for key, value in pycompat.iteritems(suffix):
70 for key, value in pycompat.iteritems(suffix):
71 result[key] = prefix.get(value, value)
71 result[key] = prefix.get(value, value)
72 return result
72 return result
73
73
74
74
75 def _tracefile(fctx, am, basemf):
75 def _tracefile(fctx, am, basemf):
76 """return file context that is the ancestor of fctx present in ancestor
76 """return file context that is the ancestor of fctx present in ancestor
77 manifest am
77 manifest am
78
78
79 Note: we used to try and stop after a given limit, however checking if that
79 Note: we used to try and stop after a given limit, however checking if that
80 limit is reached turned out to be very expensive. we are better off
80 limit is reached turned out to be very expensive. we are better off
81 disabling that feature."""
81 disabling that feature."""
82
82
83 for f in fctx.ancestors():
83 for f in fctx.ancestors():
84 path = f.path()
84 path = f.path()
85 if am.get(path, None) == f.filenode():
85 if am.get(path, None) == f.filenode():
86 return path
86 return path
87 if basemf and basemf.get(path, None) == f.filenode():
87 if basemf and basemf.get(path, None) == f.filenode():
88 return path
88 return path
89
89
90
90
91 def _dirstatecopies(repo, match=None):
91 def _dirstatecopies(repo, match=None):
92 ds = repo.dirstate
92 ds = repo.dirstate
93 c = ds.copies().copy()
93 c = ds.copies().copy()
94 for k in list(c):
94 for k in list(c):
95 if ds[k] not in b'anm' or (match and not match(k)):
95 if ds[k] not in b'anm' or (match and not match(k)):
96 del c[k]
96 del c[k]
97 return c
97 return c
98
98
99
99
100 def _computeforwardmissing(a, b, match=None):
100 def _computeforwardmissing(a, b, match=None):
101 """Computes which files are in b but not a.
101 """Computes which files are in b but not a.
102 This is its own function so extensions can easily wrap this call to see what
102 This is its own function so extensions can easily wrap this call to see what
103 files _forwardcopies is about to process.
103 files _forwardcopies is about to process.
104 """
104 """
105 ma = a.manifest()
105 ma = a.manifest()
106 mb = b.manifest()
106 mb = b.manifest()
107 return mb.filesnotin(ma, match=match)
107 return mb.filesnotin(ma, match=match)
108
108
109
109
110 def usechangesetcentricalgo(repo):
110 def usechangesetcentricalgo(repo):
111 """Checks if we should use changeset-centric copy algorithms"""
111 """Checks if we should use changeset-centric copy algorithms"""
112 if repo.filecopiesmode == b'changeset-sidedata':
112 if repo.filecopiesmode == b'changeset-sidedata':
113 return True
113 return True
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 changesetsource = (b'changeset-only', b'compatibility')
115 changesetsource = (b'changeset-only', b'compatibility')
116 return readfrom in changesetsource
116 return readfrom in changesetsource
117
117
118
118
119 def _committedforwardcopies(a, b, base, match):
119 def _committedforwardcopies(a, b, base, match):
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 # files might have to be traced back to the fctx parent of the last
121 # files might have to be traced back to the fctx parent of the last
122 # one-side-only changeset, but not further back than that
122 # one-side-only changeset, but not further back than that
123 repo = a._repo
123 repo = a._repo
124
124
125 if usechangesetcentricalgo(repo):
125 if usechangesetcentricalgo(repo):
126 return _changesetforwardcopies(a, b, match)
126 return _changesetforwardcopies(a, b, match)
127
127
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 dbg = repo.ui.debug
129 dbg = repo.ui.debug
130 if debug:
130 if debug:
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 am = a.manifest()
132 am = a.manifest()
133 basemf = None if base is None else base.manifest()
133 basemf = None if base is None else base.manifest()
134
134
135 # find where new files came from
135 # find where new files came from
136 # we currently don't try to find where old files went, too expensive
136 # we currently don't try to find where old files went, too expensive
137 # this means we can miss a case like 'hg rm b; hg cp a b'
137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 cm = {}
138 cm = {}
139
139
140 # Computing the forward missing is quite expensive on large manifests, since
140 # Computing the forward missing is quite expensive on large manifests, since
141 # it compares the entire manifests. We can optimize it in the common use
141 # it compares the entire manifests. We can optimize it in the common use
142 # case of computing what copies are in a commit versus its parent (like
142 # case of computing what copies are in a commit versus its parent (like
143 # during a rebase or histedit). Note, we exclude merge commits from this
143 # during a rebase or histedit). Note, we exclude merge commits from this
144 # optimization, since the ctx.files() for a merge commit is not correct for
144 # optimization, since the ctx.files() for a merge commit is not correct for
145 # this comparison.
145 # this comparison.
146 forwardmissingmatch = match
146 forwardmissingmatch = match
147 if b.p1() == a and b.p2().node() == node.nullid:
147 if b.p1() == a and b.p2().node() == node.nullid:
148 filesmatcher = matchmod.exact(b.files())
148 filesmatcher = matchmod.exact(b.files())
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151
151
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153
153
154 if debug:
154 if debug:
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156
156
157 for f in sorted(missing):
157 for f in sorted(missing):
158 if debug:
158 if debug:
159 dbg(b'debug.copies: tracing file: %s\n' % f)
159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 fctx = b[f]
160 fctx = b[f]
161 fctx._ancestrycontext = ancestrycontext
161 fctx._ancestrycontext = ancestrycontext
162
162
163 if debug:
163 if debug:
164 start = util.timer()
164 start = util.timer()
165 opath = _tracefile(fctx, am, basemf)
165 opath = _tracefile(fctx, am, basemf)
166 if opath:
166 if opath:
167 if debug:
167 if debug:
168 dbg(b'debug.copies: rename of: %s\n' % opath)
168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 cm[f] = opath
169 cm[f] = opath
170 if debug:
170 if debug:
171 dbg(
171 dbg(
172 b'debug.copies: time: %f seconds\n'
172 b'debug.copies: time: %f seconds\n'
173 % (util.timer() - start)
173 % (util.timer() - start)
174 )
174 )
175 return cm
175 return cm
176
176
177
177
178 def _revinfo_getter(repo):
178 def _revinfo_getter(repo):
179 """returns a function that returns the following data given a <rev>"
179 """returns a function that returns the following data given a <rev>"
180
180
181 * p1: revision number of first parent
181 * p1: revision number of first parent
182 * p2: revision number of first parent
182 * p2: revision number of first parent
183 * changes: a ChangingFiles object
183 * changes: a ChangingFiles object
184 """
184 """
185 cl = repo.changelog
185 cl = repo.changelog
186 parents = cl.parentrevs
186 parents = cl.parentrevs
187 flags = cl.flags
187 flags = cl.flags
188
188
189 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
189 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
190
190
191 changelogrevision = cl.changelogrevision
191 changelogrevision = cl.changelogrevision
192
192
193 # A small cache to avoid doing the work twice for merges
193 # A small cache to avoid doing the work twice for merges
194 #
194 #
195 # In the vast majority of cases, if we ask information for a revision
195 # In the vast majority of cases, if we ask information for a revision
196 # about 1 parent, we'll later ask it for the other. So it make sense to
196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # keep the information around when reaching the first parent of a merge
197 # keep the information around when reaching the first parent of a merge
198 # and dropping it after it was provided for the second parents.
198 # and dropping it after it was provided for the second parents.
199 #
199 #
200 # It exists cases were only one parent of the merge will be walked. It
200 # It exists cases were only one parent of the merge will be walked. It
201 # happens when the "destination" the copy tracing is descendant from a
201 # happens when the "destination" the copy tracing is descendant from a
202 # new root, not common with the "source". In that case, we will only walk
202 # new root, not common with the "source". In that case, we will only walk
203 # through merge parents that are descendant of changesets common
203 # through merge parents that are descendant of changesets common
204 # between "source" and "destination".
204 # between "source" and "destination".
205 #
205 #
206 # With the current case implementation if such changesets have a copy
206 # With the current case implementation if such changesets have a copy
207 # information, we'll keep them in memory until the end of
207 # information, we'll keep them in memory until the end of
208 # _changesetforwardcopies. We don't expect the case to be frequent
208 # _changesetforwardcopies. We don't expect the case to be frequent
209 # enough to matters.
209 # enough to matters.
210 #
210 #
211 # In addition, it would be possible to reach pathological case, were
211 # In addition, it would be possible to reach pathological case, were
212 # many first parent are met before any second parent is reached. In
212 # many first parent are met before any second parent is reached. In
213 # that case the cache could grow. If this even become an issue one can
213 # that case the cache could grow. If this even become an issue one can
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # time to save memory.
215 # time to save memory.
216 merge_caches = {}
216 merge_caches = {}
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 value = None
220 value = None
221 e = merge_caches.pop(rev, None)
221 e = merge_caches.pop(rev, None)
222 if e is not None:
222 if e is not None:
223 return e
223 return e
224 changes = None
224 changes = None
225 if flags(rev) & HASCOPIESINFO:
225 if flags(rev) & HASCOPIESINFO:
226 changes = changelogrevision(rev).changes
226 changes = changelogrevision(rev).changes
227 value = (p1, p2, changes)
227 value = (p1, p2, changes)
228 if p1 != node.nullrev and p2 != node.nullrev:
228 if p1 != node.nullrev and p2 != node.nullrev:
229 # XXX some case we over cache, IGNORE
229 # XXX some case we over cache, IGNORE
230 merge_caches[rev] = value
230 merge_caches[rev] = value
231 return value
231 return value
232
232
233 return revinfo
233 return revinfo
234
234
235
235
236 def cached_is_ancestor(is_ancestor):
236 def cached_is_ancestor(is_ancestor):
237 """return a cached version of is_ancestor"""
237 """return a cached version of is_ancestor"""
238 cache = {}
238 cache = {}
239
239
240 def _is_ancestor(anc, desc):
240 def _is_ancestor(anc, desc):
241 if anc > desc:
241 if anc > desc:
242 return False
242 return False
243 elif anc == desc:
243 elif anc == desc:
244 return True
244 return True
245 key = (anc, desc)
245 key = (anc, desc)
246 ret = cache.get(key)
246 ret = cache.get(key)
247 if ret is None:
247 if ret is None:
248 ret = cache[key] = is_ancestor(anc, desc)
248 ret = cache[key] = is_ancestor(anc, desc)
249 return ret
249 return ret
250
250
251 return _is_ancestor
251 return _is_ancestor
252
252
253
253
254 def _changesetforwardcopies(a, b, match):
254 def _changesetforwardcopies(a, b, match):
255 if a.rev() in (node.nullrev, b.rev()):
255 if a.rev() in (node.nullrev, b.rev()):
256 return {}
256 return {}
257
257
258 repo = a.repo().unfiltered()
258 repo = a.repo().unfiltered()
259 children = {}
259 children = {}
260
260
261 cl = repo.changelog
261 cl = repo.changelog
262 isancestor = cached_is_ancestor(cl.isancestorrev)
262 isancestor = cl.isancestorrev
263 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
263 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
264 mrset = set(missingrevs)
264 mrset = set(missingrevs)
265 roots = set()
265 roots = set()
266 for r in missingrevs:
266 for r in missingrevs:
267 for p in cl.parentrevs(r):
267 for p in cl.parentrevs(r):
268 if p == node.nullrev:
268 if p == node.nullrev:
269 continue
269 continue
270 if p not in children:
270 if p not in children:
271 children[p] = [r]
271 children[p] = [r]
272 else:
272 else:
273 children[p].append(r)
273 children[p].append(r)
274 if p not in mrset:
274 if p not in mrset:
275 roots.add(p)
275 roots.add(p)
276 if not roots:
276 if not roots:
277 # no common revision to track copies from
277 # no common revision to track copies from
278 return {}
278 return {}
279 min_root = min(roots)
279 min_root = min(roots)
280
280
281 from_head = set(
281 from_head = set(
282 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
282 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
283 )
283 )
284
284
285 iterrevs = set(from_head)
285 iterrevs = set(from_head)
286 iterrevs &= mrset
286 iterrevs &= mrset
287 iterrevs.update(roots)
287 iterrevs.update(roots)
288 iterrevs.remove(b.rev())
288 iterrevs.remove(b.rev())
289 revs = sorted(iterrevs)
289 revs = sorted(iterrevs)
290
290
291 if repo.filecopiesmode == b'changeset-sidedata':
291 if repo.filecopiesmode == b'changeset-sidedata':
292 revinfo = _revinfo_getter(repo)
292 revinfo = _revinfo_getter(repo)
293 return _combine_changeset_copies(
293 return _combine_changeset_copies(
294 revs, children, b.rev(), revinfo, match, isancestor
294 revs, children, b.rev(), revinfo, match, isancestor
295 )
295 )
296 else:
296 else:
297 revinfo = _revinfo_getter_extra(repo)
297 revinfo = _revinfo_getter_extra(repo)
298 return _combine_changeset_copies_extra(
298 return _combine_changeset_copies_extra(
299 revs, children, b.rev(), revinfo, match, isancestor
299 revs, children, b.rev(), revinfo, match, isancestor
300 )
300 )
301
301
302
302
303 def _combine_changeset_copies(
303 def _combine_changeset_copies(
304 revs, children, targetrev, revinfo, match, isancestor
304 revs, children, targetrev, revinfo, match, isancestor
305 ):
305 ):
306 """combine the copies information for each item of iterrevs
306 """combine the copies information for each item of iterrevs
307
307
308 revs: sorted iterable of revision to visit
308 revs: sorted iterable of revision to visit
309 children: a {parent: [children]} mapping.
309 children: a {parent: [children]} mapping.
310 targetrev: the final copies destination revision (not in iterrevs)
310 targetrev: the final copies destination revision (not in iterrevs)
311 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
311 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
312 match: a matcher
312 match: a matcher
313
313
314 It returns the aggregated copies information for `targetrev`.
314 It returns the aggregated copies information for `targetrev`.
315 """
315 """
316
316
317 alwaysmatch = match.always()
317 alwaysmatch = match.always()
318
318
319 if rustmod is not None and alwaysmatch:
319 if rustmod is not None and alwaysmatch:
320 return rustmod.combine_changeset_copies(
320 return rustmod.combine_changeset_copies(
321 list(revs), children, targetrev, revinfo, isancestor
321 list(revs), children, targetrev, revinfo, isancestor
322 )
322 )
323
323
324 isancestor = cached_is_ancestor(isancestor)
325
324 all_copies = {}
326 all_copies = {}
325 for r in revs:
327 for r in revs:
326 copies = all_copies.pop(r, None)
328 copies = all_copies.pop(r, None)
327 if copies is None:
329 if copies is None:
328 # this is a root
330 # this is a root
329 copies = {}
331 copies = {}
330 for i, c in enumerate(children[r]):
332 for i, c in enumerate(children[r]):
331 p1, p2, changes = revinfo(c)
333 p1, p2, changes = revinfo(c)
332 childcopies = {}
334 childcopies = {}
333 if r == p1:
335 if r == p1:
334 parent = 1
336 parent = 1
335 if changes is not None:
337 if changes is not None:
336 childcopies = changes.copied_from_p1
338 childcopies = changes.copied_from_p1
337 else:
339 else:
338 assert r == p2
340 assert r == p2
339 parent = 2
341 parent = 2
340 if changes is not None:
342 if changes is not None:
341 childcopies = changes.copied_from_p2
343 childcopies = changes.copied_from_p2
342 if not alwaysmatch:
344 if not alwaysmatch:
343 childcopies = {
345 childcopies = {
344 dst: src for dst, src in childcopies.items() if match(dst)
346 dst: src for dst, src in childcopies.items() if match(dst)
345 }
347 }
346 newcopies = copies
348 newcopies = copies
347 if childcopies:
349 if childcopies:
348 newcopies = copies.copy()
350 newcopies = copies.copy()
349 for dest, source in pycompat.iteritems(childcopies):
351 for dest, source in pycompat.iteritems(childcopies):
350 prev = copies.get(source)
352 prev = copies.get(source)
351 if prev is not None and prev[1] is not None:
353 if prev is not None and prev[1] is not None:
352 source = prev[1]
354 source = prev[1]
353 newcopies[dest] = (c, source)
355 newcopies[dest] = (c, source)
354 assert newcopies is not copies
356 assert newcopies is not copies
355 if changes is not None:
357 if changes is not None:
356 for f in changes.removed:
358 for f in changes.removed:
357 if f in newcopies:
359 if f in newcopies:
358 if newcopies is copies:
360 if newcopies is copies:
359 # copy on write to avoid affecting potential other
361 # copy on write to avoid affecting potential other
360 # branches. when there are no other branches, this
362 # branches. when there are no other branches, this
361 # could be avoided.
363 # could be avoided.
362 newcopies = copies.copy()
364 newcopies = copies.copy()
363 newcopies[f] = (c, None)
365 newcopies[f] = (c, None)
364 othercopies = all_copies.get(c)
366 othercopies = all_copies.get(c)
365 if othercopies is None:
367 if othercopies is None:
366 all_copies[c] = newcopies
368 all_copies[c] = newcopies
367 else:
369 else:
368 # we are the second parent to work on c, we need to merge our
370 # we are the second parent to work on c, we need to merge our
369 # work with the other.
371 # work with the other.
370 #
372 #
371 # In case of conflict, parent 1 take precedence over parent 2.
373 # In case of conflict, parent 1 take precedence over parent 2.
372 # This is an arbitrary choice made anew when implementing
374 # This is an arbitrary choice made anew when implementing
373 # changeset based copies. It was made without regards with
375 # changeset based copies. It was made without regards with
374 # potential filelog related behavior.
376 # potential filelog related behavior.
375 if parent == 1:
377 if parent == 1:
376 _merge_copies_dict(
378 _merge_copies_dict(
377 othercopies, newcopies, isancestor, changes
379 othercopies, newcopies, isancestor, changes
378 )
380 )
379 else:
381 else:
380 _merge_copies_dict(
382 _merge_copies_dict(
381 newcopies, othercopies, isancestor, changes
383 newcopies, othercopies, isancestor, changes
382 )
384 )
383 all_copies[c] = newcopies
385 all_copies[c] = newcopies
384
386
385 final_copies = {}
387 final_copies = {}
386 for dest, (tt, source) in all_copies[targetrev].items():
388 for dest, (tt, source) in all_copies[targetrev].items():
387 if source is not None:
389 if source is not None:
388 final_copies[dest] = source
390 final_copies[dest] = source
389 return final_copies
391 return final_copies
390
392
391
393
392 def _merge_copies_dict(minor, major, isancestor, changes):
394 def _merge_copies_dict(minor, major, isancestor, changes):
393 """merge two copies-mapping together, minor and major
395 """merge two copies-mapping together, minor and major
394
396
395 In case of conflict, value from "major" will be picked.
397 In case of conflict, value from "major" will be picked.
396
398
397 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
399 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
398 ancestors of `high_rev`,
400 ancestors of `high_rev`,
399
401
400 - `ismerged(path)`: callable return True if `path` have been merged in the
402 - `ismerged(path)`: callable return True if `path` have been merged in the
401 current revision,
403 current revision,
402 """
404 """
403 for dest, value in major.items():
405 for dest, value in major.items():
404 other = minor.get(dest)
406 other = minor.get(dest)
405 if other is None:
407 if other is None:
406 minor[dest] = value
408 minor[dest] = value
407 else:
409 else:
408 new_tt = value[0]
410 new_tt = value[0]
409 other_tt = other[0]
411 other_tt = other[0]
410 if value[1] == other[1]:
412 if value[1] == other[1]:
411 continue
413 continue
412 # content from "major" wins, unless it is older
414 # content from "major" wins, unless it is older
413 # than the branch point or there is a merge
415 # than the branch point or there is a merge
414 if new_tt == other_tt:
416 if new_tt == other_tt:
415 minor[dest] = value
417 minor[dest] = value
416 elif (
418 elif (
417 changes is not None
419 changes is not None
418 and value[1] is None
420 and value[1] is None
419 and dest in changes.salvaged
421 and dest in changes.salvaged
420 ):
422 ):
421 pass
423 pass
422 elif (
424 elif (
423 changes is not None
425 changes is not None
424 and other[1] is None
426 and other[1] is None
425 and dest in changes.salvaged
427 and dest in changes.salvaged
426 ):
428 ):
427 minor[dest] = value
429 minor[dest] = value
428 elif changes is not None and dest in changes.merged:
430 elif changes is not None and dest in changes.merged:
429 minor[dest] = value
431 minor[dest] = value
430 elif not isancestor(new_tt, other_tt):
432 elif not isancestor(new_tt, other_tt):
431 if value[1] is not None:
433 if value[1] is not None:
432 minor[dest] = value
434 minor[dest] = value
433 elif isancestor(other_tt, new_tt):
435 elif isancestor(other_tt, new_tt):
434 minor[dest] = value
436 minor[dest] = value
435
437
436
438
437 def _revinfo_getter_extra(repo):
439 def _revinfo_getter_extra(repo):
438 """return a function that return multiple data given a <rev>"i
440 """return a function that return multiple data given a <rev>"i
439
441
440 * p1: revision number of first parent
442 * p1: revision number of first parent
441 * p2: revision number of first parent
443 * p2: revision number of first parent
442 * p1copies: mapping of copies from p1
444 * p1copies: mapping of copies from p1
443 * p2copies: mapping of copies from p2
445 * p2copies: mapping of copies from p2
444 * removed: a list of removed files
446 * removed: a list of removed files
445 * ismerged: a callback to know if file was merged in that revision
447 * ismerged: a callback to know if file was merged in that revision
446 """
448 """
447 cl = repo.changelog
449 cl = repo.changelog
448 parents = cl.parentrevs
450 parents = cl.parentrevs
449
451
450 def get_ismerged(rev):
452 def get_ismerged(rev):
451 ctx = repo[rev]
453 ctx = repo[rev]
452
454
453 def ismerged(path):
455 def ismerged(path):
454 if path not in ctx.files():
456 if path not in ctx.files():
455 return False
457 return False
456 fctx = ctx[path]
458 fctx = ctx[path]
457 parents = fctx._filelog.parents(fctx._filenode)
459 parents = fctx._filelog.parents(fctx._filenode)
458 nb_parents = 0
460 nb_parents = 0
459 for n in parents:
461 for n in parents:
460 if n != node.nullid:
462 if n != node.nullid:
461 nb_parents += 1
463 nb_parents += 1
462 return nb_parents >= 2
464 return nb_parents >= 2
463
465
464 return ismerged
466 return ismerged
465
467
466 def revinfo(rev):
468 def revinfo(rev):
467 p1, p2 = parents(rev)
469 p1, p2 = parents(rev)
468 ctx = repo[rev]
470 ctx = repo[rev]
469 p1copies, p2copies = ctx._copies
471 p1copies, p2copies = ctx._copies
470 removed = ctx.filesremoved()
472 removed = ctx.filesremoved()
471 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
473 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
472
474
473 return revinfo
475 return revinfo
474
476
475
477
476 def _combine_changeset_copies_extra(
478 def _combine_changeset_copies_extra(
477 revs, children, targetrev, revinfo, match, isancestor
479 revs, children, targetrev, revinfo, match, isancestor
478 ):
480 ):
479 """version of `_combine_changeset_copies` that works with the Google
481 """version of `_combine_changeset_copies` that works with the Google
480 specific "extra" based storage for copy information"""
482 specific "extra" based storage for copy information"""
481 all_copies = {}
483 all_copies = {}
482 alwaysmatch = match.always()
484 alwaysmatch = match.always()
483 for r in revs:
485 for r in revs:
484 copies = all_copies.pop(r, None)
486 copies = all_copies.pop(r, None)
485 if copies is None:
487 if copies is None:
486 # this is a root
488 # this is a root
487 copies = {}
489 copies = {}
488 for i, c in enumerate(children[r]):
490 for i, c in enumerate(children[r]):
489 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
491 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
490 if r == p1:
492 if r == p1:
491 parent = 1
493 parent = 1
492 childcopies = p1copies
494 childcopies = p1copies
493 else:
495 else:
494 assert r == p2
496 assert r == p2
495 parent = 2
497 parent = 2
496 childcopies = p2copies
498 childcopies = p2copies
497 if not alwaysmatch:
499 if not alwaysmatch:
498 childcopies = {
500 childcopies = {
499 dst: src for dst, src in childcopies.items() if match(dst)
501 dst: src for dst, src in childcopies.items() if match(dst)
500 }
502 }
501 newcopies = copies
503 newcopies = copies
502 if childcopies:
504 if childcopies:
503 newcopies = copies.copy()
505 newcopies = copies.copy()
504 for dest, source in pycompat.iteritems(childcopies):
506 for dest, source in pycompat.iteritems(childcopies):
505 prev = copies.get(source)
507 prev = copies.get(source)
506 if prev is not None and prev[1] is not None:
508 if prev is not None and prev[1] is not None:
507 source = prev[1]
509 source = prev[1]
508 newcopies[dest] = (c, source)
510 newcopies[dest] = (c, source)
509 assert newcopies is not copies
511 assert newcopies is not copies
510 for f in removed:
512 for f in removed:
511 if f in newcopies:
513 if f in newcopies:
512 if newcopies is copies:
514 if newcopies is copies:
513 # copy on write to avoid affecting potential other
515 # copy on write to avoid affecting potential other
514 # branches. when there are no other branches, this
516 # branches. when there are no other branches, this
515 # could be avoided.
517 # could be avoided.
516 newcopies = copies.copy()
518 newcopies = copies.copy()
517 newcopies[f] = (c, None)
519 newcopies[f] = (c, None)
518 othercopies = all_copies.get(c)
520 othercopies = all_copies.get(c)
519 if othercopies is None:
521 if othercopies is None:
520 all_copies[c] = newcopies
522 all_copies[c] = newcopies
521 else:
523 else:
522 # we are the second parent to work on c, we need to merge our
524 # we are the second parent to work on c, we need to merge our
523 # work with the other.
525 # work with the other.
524 #
526 #
525 # In case of conflict, parent 1 take precedence over parent 2.
527 # In case of conflict, parent 1 take precedence over parent 2.
526 # This is an arbitrary choice made anew when implementing
528 # This is an arbitrary choice made anew when implementing
527 # changeset based copies. It was made without regards with
529 # changeset based copies. It was made without regards with
528 # potential filelog related behavior.
530 # potential filelog related behavior.
529 if parent == 1:
531 if parent == 1:
530 _merge_copies_dict_extra(
532 _merge_copies_dict_extra(
531 othercopies, newcopies, isancestor, ismerged
533 othercopies, newcopies, isancestor, ismerged
532 )
534 )
533 else:
535 else:
534 _merge_copies_dict_extra(
536 _merge_copies_dict_extra(
535 newcopies, othercopies, isancestor, ismerged
537 newcopies, othercopies, isancestor, ismerged
536 )
538 )
537 all_copies[c] = newcopies
539 all_copies[c] = newcopies
538
540
539 final_copies = {}
541 final_copies = {}
540 for dest, (tt, source) in all_copies[targetrev].items():
542 for dest, (tt, source) in all_copies[targetrev].items():
541 if source is not None:
543 if source is not None:
542 final_copies[dest] = source
544 final_copies[dest] = source
543 return final_copies
545 return final_copies
544
546
545
547
546 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
548 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
547 """version of `_merge_copies_dict` that works with the Google
549 """version of `_merge_copies_dict` that works with the Google
548 specific "extra" based storage for copy information"""
550 specific "extra" based storage for copy information"""
549 for dest, value in major.items():
551 for dest, value in major.items():
550 other = minor.get(dest)
552 other = minor.get(dest)
551 if other is None:
553 if other is None:
552 minor[dest] = value
554 minor[dest] = value
553 else:
555 else:
554 new_tt = value[0]
556 new_tt = value[0]
555 other_tt = other[0]
557 other_tt = other[0]
556 if value[1] == other[1]:
558 if value[1] == other[1]:
557 continue
559 continue
558 # content from "major" wins, unless it is older
560 # content from "major" wins, unless it is older
559 # than the branch point or there is a merge
561 # than the branch point or there is a merge
560 if (
562 if (
561 new_tt == other_tt
563 new_tt == other_tt
562 or not isancestor(new_tt, other_tt)
564 or not isancestor(new_tt, other_tt)
563 or ismerged(dest)
565 or ismerged(dest)
564 ):
566 ):
565 minor[dest] = value
567 minor[dest] = value
566
568
567
569
568 def _forwardcopies(a, b, base=None, match=None):
570 def _forwardcopies(a, b, base=None, match=None):
569 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
571 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
570
572
571 if base is None:
573 if base is None:
572 base = a
574 base = a
573 match = a.repo().narrowmatch(match)
575 match = a.repo().narrowmatch(match)
574 # check for working copy
576 # check for working copy
575 if b.rev() is None:
577 if b.rev() is None:
576 cm = _committedforwardcopies(a, b.p1(), base, match)
578 cm = _committedforwardcopies(a, b.p1(), base, match)
577 # combine copies from dirstate if necessary
579 # combine copies from dirstate if necessary
578 copies = _chain(cm, _dirstatecopies(b._repo, match))
580 copies = _chain(cm, _dirstatecopies(b._repo, match))
579 else:
581 else:
580 copies = _committedforwardcopies(a, b, base, match)
582 copies = _committedforwardcopies(a, b, base, match)
581 return copies
583 return copies
582
584
583
585
584 def _backwardrenames(a, b, match):
586 def _backwardrenames(a, b, match):
585 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
587 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
586 return {}
588 return {}
587
589
588 # Even though we're not taking copies into account, 1:n rename situations
590 # Even though we're not taking copies into account, 1:n rename situations
589 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
591 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
590 # arbitrarily pick one of the renames.
592 # arbitrarily pick one of the renames.
591 # We don't want to pass in "match" here, since that would filter
593 # We don't want to pass in "match" here, since that would filter
592 # the destination by it. Since we're reversing the copies, we want
594 # the destination by it. Since we're reversing the copies, we want
593 # to filter the source instead.
595 # to filter the source instead.
594 f = _forwardcopies(b, a)
596 f = _forwardcopies(b, a)
595 r = {}
597 r = {}
596 for k, v in sorted(pycompat.iteritems(f)):
598 for k, v in sorted(pycompat.iteritems(f)):
597 if match and not match(v):
599 if match and not match(v):
598 continue
600 continue
599 # remove copies
601 # remove copies
600 if v in a:
602 if v in a:
601 continue
603 continue
602 r[v] = k
604 r[v] = k
603 return r
605 return r
604
606
605
607
606 def pathcopies(x, y, match=None):
608 def pathcopies(x, y, match=None):
607 """find {dst@y: src@x} copy mapping for directed compare"""
609 """find {dst@y: src@x} copy mapping for directed compare"""
608 repo = x._repo
610 repo = x._repo
609 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
611 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
610 if debug:
612 if debug:
611 repo.ui.debug(
613 repo.ui.debug(
612 b'debug.copies: searching copies from %s to %s\n' % (x, y)
614 b'debug.copies: searching copies from %s to %s\n' % (x, y)
613 )
615 )
614 if x == y or not x or not y:
616 if x == y or not x or not y:
615 return {}
617 return {}
616 if y.rev() is None and x == y.p1():
618 if y.rev() is None and x == y.p1():
617 if debug:
619 if debug:
618 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
620 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
619 # short-circuit to avoid issues with merge states
621 # short-circuit to avoid issues with merge states
620 return _dirstatecopies(repo, match)
622 return _dirstatecopies(repo, match)
621 a = y.ancestor(x)
623 a = y.ancestor(x)
622 if a == x:
624 if a == x:
623 if debug:
625 if debug:
624 repo.ui.debug(b'debug.copies: search mode: forward\n')
626 repo.ui.debug(b'debug.copies: search mode: forward\n')
625 copies = _forwardcopies(x, y, match=match)
627 copies = _forwardcopies(x, y, match=match)
626 elif a == y:
628 elif a == y:
627 if debug:
629 if debug:
628 repo.ui.debug(b'debug.copies: search mode: backward\n')
630 repo.ui.debug(b'debug.copies: search mode: backward\n')
629 copies = _backwardrenames(x, y, match=match)
631 copies = _backwardrenames(x, y, match=match)
630 else:
632 else:
631 if debug:
633 if debug:
632 repo.ui.debug(b'debug.copies: search mode: combined\n')
634 repo.ui.debug(b'debug.copies: search mode: combined\n')
633 base = None
635 base = None
634 if a.rev() != node.nullrev:
636 if a.rev() != node.nullrev:
635 base = x
637 base = x
636 copies = _chain(
638 copies = _chain(
637 _backwardrenames(x, a, match=match),
639 _backwardrenames(x, a, match=match),
638 _forwardcopies(a, y, base, match=match),
640 _forwardcopies(a, y, base, match=match),
639 )
641 )
640 _filter(x, y, copies)
642 _filter(x, y, copies)
641 return copies
643 return copies
642
644
643
645
644 def mergecopies(repo, c1, c2, base):
646 def mergecopies(repo, c1, c2, base):
645 """
647 """
646 Finds moves and copies between context c1 and c2 that are relevant for
648 Finds moves and copies between context c1 and c2 that are relevant for
647 merging. 'base' will be used as the merge base.
649 merging. 'base' will be used as the merge base.
648
650
649 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
651 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
650 files that were moved/ copied in one merge parent and modified in another.
652 files that were moved/ copied in one merge parent and modified in another.
651 For example:
653 For example:
652
654
653 o ---> 4 another commit
655 o ---> 4 another commit
654 |
656 |
655 | o ---> 3 commit that modifies a.txt
657 | o ---> 3 commit that modifies a.txt
656 | /
658 | /
657 o / ---> 2 commit that moves a.txt to b.txt
659 o / ---> 2 commit that moves a.txt to b.txt
658 |/
660 |/
659 o ---> 1 merge base
661 o ---> 1 merge base
660
662
661 If we try to rebase revision 3 on revision 4, since there is no a.txt in
663 If we try to rebase revision 3 on revision 4, since there is no a.txt in
662 revision 4, and if user have copytrace disabled, we prints the following
664 revision 4, and if user have copytrace disabled, we prints the following
663 message:
665 message:
664
666
665 ```other changed <file> which local deleted```
667 ```other changed <file> which local deleted```
666
668
667 Returns a tuple where:
669 Returns a tuple where:
668
670
669 "branch_copies" an instance of branch_copies.
671 "branch_copies" an instance of branch_copies.
670
672
671 "diverge" is a mapping of source name -> list of destination names
673 "diverge" is a mapping of source name -> list of destination names
672 for divergent renames.
674 for divergent renames.
673
675
674 This function calls different copytracing algorithms based on config.
676 This function calls different copytracing algorithms based on config.
675 """
677 """
676 # avoid silly behavior for update from empty dir
678 # avoid silly behavior for update from empty dir
677 if not c1 or not c2 or c1 == c2:
679 if not c1 or not c2 or c1 == c2:
678 return branch_copies(), branch_copies(), {}
680 return branch_copies(), branch_copies(), {}
679
681
680 narrowmatch = c1.repo().narrowmatch()
682 narrowmatch = c1.repo().narrowmatch()
681
683
682 # avoid silly behavior for parent -> working dir
684 # avoid silly behavior for parent -> working dir
683 if c2.node() is None and c1.node() == repo.dirstate.p1():
685 if c2.node() is None and c1.node() == repo.dirstate.p1():
684 return (
686 return (
685 branch_copies(_dirstatecopies(repo, narrowmatch)),
687 branch_copies(_dirstatecopies(repo, narrowmatch)),
686 branch_copies(),
688 branch_copies(),
687 {},
689 {},
688 )
690 )
689
691
690 copytracing = repo.ui.config(b'experimental', b'copytrace')
692 copytracing = repo.ui.config(b'experimental', b'copytrace')
691 if stringutil.parsebool(copytracing) is False:
693 if stringutil.parsebool(copytracing) is False:
692 # stringutil.parsebool() returns None when it is unable to parse the
694 # stringutil.parsebool() returns None when it is unable to parse the
693 # value, so we should rely on making sure copytracing is on such cases
695 # value, so we should rely on making sure copytracing is on such cases
694 return branch_copies(), branch_copies(), {}
696 return branch_copies(), branch_copies(), {}
695
697
696 if usechangesetcentricalgo(repo):
698 if usechangesetcentricalgo(repo):
697 # The heuristics don't make sense when we need changeset-centric algos
699 # The heuristics don't make sense when we need changeset-centric algos
698 return _fullcopytracing(repo, c1, c2, base)
700 return _fullcopytracing(repo, c1, c2, base)
699
701
700 # Copy trace disabling is explicitly below the node == p1 logic above
702 # Copy trace disabling is explicitly below the node == p1 logic above
701 # because the logic above is required for a simple copy to be kept across a
703 # because the logic above is required for a simple copy to be kept across a
702 # rebase.
704 # rebase.
703 if copytracing == b'heuristics':
705 if copytracing == b'heuristics':
704 # Do full copytracing if only non-public revisions are involved as
706 # Do full copytracing if only non-public revisions are involved as
705 # that will be fast enough and will also cover the copies which could
707 # that will be fast enough and will also cover the copies which could
706 # be missed by heuristics
708 # be missed by heuristics
707 if _isfullcopytraceable(repo, c1, base):
709 if _isfullcopytraceable(repo, c1, base):
708 return _fullcopytracing(repo, c1, c2, base)
710 return _fullcopytracing(repo, c1, c2, base)
709 return _heuristicscopytracing(repo, c1, c2, base)
711 return _heuristicscopytracing(repo, c1, c2, base)
710 else:
712 else:
711 return _fullcopytracing(repo, c1, c2, base)
713 return _fullcopytracing(repo, c1, c2, base)
712
714
713
715
714 def _isfullcopytraceable(repo, c1, base):
716 def _isfullcopytraceable(repo, c1, base):
715 """Checks that if base, source and destination are all no-public branches,
717 """Checks that if base, source and destination are all no-public branches,
716 if yes let's use the full copytrace algorithm for increased capabilities
718 if yes let's use the full copytrace algorithm for increased capabilities
717 since it will be fast enough.
719 since it will be fast enough.
718
720
719 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
721 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
720 number of changesets from c1 to base such that if number of changesets are
722 number of changesets from c1 to base such that if number of changesets are
721 more than the limit, full copytracing algorithm won't be used.
723 more than the limit, full copytracing algorithm won't be used.
722 """
724 """
723 if c1.rev() is None:
725 if c1.rev() is None:
724 c1 = c1.p1()
726 c1 = c1.p1()
725 if c1.mutable() and base.mutable():
727 if c1.mutable() and base.mutable():
726 sourcecommitlimit = repo.ui.configint(
728 sourcecommitlimit = repo.ui.configint(
727 b'experimental', b'copytrace.sourcecommitlimit'
729 b'experimental', b'copytrace.sourcecommitlimit'
728 )
730 )
729 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
731 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
730 return commits < sourcecommitlimit
732 return commits < sourcecommitlimit
731 return False
733 return False
732
734
733
735
734 def _checksinglesidecopies(
736 def _checksinglesidecopies(
735 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
737 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
736 ):
738 ):
737 if src not in m2:
739 if src not in m2:
738 # deleted on side 2
740 # deleted on side 2
739 if src not in m1:
741 if src not in m1:
740 # renamed on side 1, deleted on side 2
742 # renamed on side 1, deleted on side 2
741 renamedelete[src] = dsts1
743 renamedelete[src] = dsts1
742 elif src not in mb:
744 elif src not in mb:
743 # Work around the "short-circuit to avoid issues with merge states"
745 # Work around the "short-circuit to avoid issues with merge states"
744 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
746 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
745 # destination doesn't exist in y.
747 # destination doesn't exist in y.
746 pass
748 pass
747 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
749 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
748 return
750 return
749 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
751 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
750 # modified on side 2
752 # modified on side 2
751 for dst in dsts1:
753 for dst in dsts1:
752 copy[dst] = src
754 copy[dst] = src
753
755
754
756
755 class branch_copies(object):
757 class branch_copies(object):
756 """Information about copies made on one side of a merge/graft.
758 """Information about copies made on one side of a merge/graft.
757
759
758 "copy" is a mapping from destination name -> source name,
760 "copy" is a mapping from destination name -> source name,
759 where source is in c1 and destination is in c2 or vice-versa.
761 where source is in c1 and destination is in c2 or vice-versa.
760
762
761 "movewithdir" is a mapping from source name -> destination name,
763 "movewithdir" is a mapping from source name -> destination name,
762 where the file at source present in one context but not the other
764 where the file at source present in one context but not the other
763 needs to be moved to destination by the merge process, because the
765 needs to be moved to destination by the merge process, because the
764 other context moved the directory it is in.
766 other context moved the directory it is in.
765
767
766 "renamedelete" is a mapping of source name -> list of destination
768 "renamedelete" is a mapping of source name -> list of destination
767 names for files deleted in c1 that were renamed in c2 or vice-versa.
769 names for files deleted in c1 that were renamed in c2 or vice-versa.
768
770
769 "dirmove" is a mapping of detected source dir -> destination dir renames.
771 "dirmove" is a mapping of detected source dir -> destination dir renames.
770 This is needed for handling changes to new files previously grafted into
772 This is needed for handling changes to new files previously grafted into
771 renamed directories.
773 renamed directories.
772 """
774 """
773
775
774 def __init__(
776 def __init__(
775 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
777 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
776 ):
778 ):
777 self.copy = {} if copy is None else copy
779 self.copy = {} if copy is None else copy
778 self.renamedelete = {} if renamedelete is None else renamedelete
780 self.renamedelete = {} if renamedelete is None else renamedelete
779 self.dirmove = {} if dirmove is None else dirmove
781 self.dirmove = {} if dirmove is None else dirmove
780 self.movewithdir = {} if movewithdir is None else movewithdir
782 self.movewithdir = {} if movewithdir is None else movewithdir
781
783
782 def __repr__(self):
784 def __repr__(self):
783 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
785 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
784 self.copy,
786 self.copy,
785 self.renamedelete,
787 self.renamedelete,
786 self.dirmove,
788 self.dirmove,
787 self.movewithdir,
789 self.movewithdir,
788 )
790 )
789
791
790
792
791 def _fullcopytracing(repo, c1, c2, base):
793 def _fullcopytracing(repo, c1, c2, base):
792 """The full copytracing algorithm which finds all the new files that were
794 """The full copytracing algorithm which finds all the new files that were
793 added from merge base up to the top commit and for each file it checks if
795 added from merge base up to the top commit and for each file it checks if
794 this file was copied from another file.
796 this file was copied from another file.
795
797
796 This is pretty slow when a lot of changesets are involved but will track all
798 This is pretty slow when a lot of changesets are involved but will track all
797 the copies.
799 the copies.
798 """
800 """
799 m1 = c1.manifest()
801 m1 = c1.manifest()
800 m2 = c2.manifest()
802 m2 = c2.manifest()
801 mb = base.manifest()
803 mb = base.manifest()
802
804
803 copies1 = pathcopies(base, c1)
805 copies1 = pathcopies(base, c1)
804 copies2 = pathcopies(base, c2)
806 copies2 = pathcopies(base, c2)
805
807
806 if not (copies1 or copies2):
808 if not (copies1 or copies2):
807 return branch_copies(), branch_copies(), {}
809 return branch_copies(), branch_copies(), {}
808
810
809 inversecopies1 = {}
811 inversecopies1 = {}
810 inversecopies2 = {}
812 inversecopies2 = {}
811 for dst, src in copies1.items():
813 for dst, src in copies1.items():
812 inversecopies1.setdefault(src, []).append(dst)
814 inversecopies1.setdefault(src, []).append(dst)
813 for dst, src in copies2.items():
815 for dst, src in copies2.items():
814 inversecopies2.setdefault(src, []).append(dst)
816 inversecopies2.setdefault(src, []).append(dst)
815
817
816 copy1 = {}
818 copy1 = {}
817 copy2 = {}
819 copy2 = {}
818 diverge = {}
820 diverge = {}
819 renamedelete1 = {}
821 renamedelete1 = {}
820 renamedelete2 = {}
822 renamedelete2 = {}
821 allsources = set(inversecopies1) | set(inversecopies2)
823 allsources = set(inversecopies1) | set(inversecopies2)
822 for src in allsources:
824 for src in allsources:
823 dsts1 = inversecopies1.get(src)
825 dsts1 = inversecopies1.get(src)
824 dsts2 = inversecopies2.get(src)
826 dsts2 = inversecopies2.get(src)
825 if dsts1 and dsts2:
827 if dsts1 and dsts2:
826 # copied/renamed on both sides
828 # copied/renamed on both sides
827 if src not in m1 and src not in m2:
829 if src not in m1 and src not in m2:
828 # renamed on both sides
830 # renamed on both sides
829 dsts1 = set(dsts1)
831 dsts1 = set(dsts1)
830 dsts2 = set(dsts2)
832 dsts2 = set(dsts2)
831 # If there's some overlap in the rename destinations, we
833 # If there's some overlap in the rename destinations, we
832 # consider it not divergent. For example, if side 1 copies 'a'
834 # consider it not divergent. For example, if side 1 copies 'a'
833 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
835 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
834 # and 'd' and deletes 'a'.
836 # and 'd' and deletes 'a'.
835 if dsts1 & dsts2:
837 if dsts1 & dsts2:
836 for dst in dsts1 & dsts2:
838 for dst in dsts1 & dsts2:
837 copy1[dst] = src
839 copy1[dst] = src
838 copy2[dst] = src
840 copy2[dst] = src
839 else:
841 else:
840 diverge[src] = sorted(dsts1 | dsts2)
842 diverge[src] = sorted(dsts1 | dsts2)
841 elif src in m1 and src in m2:
843 elif src in m1 and src in m2:
842 # copied on both sides
844 # copied on both sides
843 dsts1 = set(dsts1)
845 dsts1 = set(dsts1)
844 dsts2 = set(dsts2)
846 dsts2 = set(dsts2)
845 for dst in dsts1 & dsts2:
847 for dst in dsts1 & dsts2:
846 copy1[dst] = src
848 copy1[dst] = src
847 copy2[dst] = src
849 copy2[dst] = src
848 # TODO: Handle cases where it was renamed on one side and copied
850 # TODO: Handle cases where it was renamed on one side and copied
849 # on the other side
851 # on the other side
850 elif dsts1:
852 elif dsts1:
851 # copied/renamed only on side 1
853 # copied/renamed only on side 1
852 _checksinglesidecopies(
854 _checksinglesidecopies(
853 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
855 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
854 )
856 )
855 elif dsts2:
857 elif dsts2:
856 # copied/renamed only on side 2
858 # copied/renamed only on side 2
857 _checksinglesidecopies(
859 _checksinglesidecopies(
858 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
860 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
859 )
861 )
860
862
861 # find interesting file sets from manifests
863 # find interesting file sets from manifests
862 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
864 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
863 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
865 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
864 u1 = sorted(addedinm1 - addedinm2)
866 u1 = sorted(addedinm1 - addedinm2)
865 u2 = sorted(addedinm2 - addedinm1)
867 u2 = sorted(addedinm2 - addedinm1)
866
868
867 header = b" unmatched files in %s"
869 header = b" unmatched files in %s"
868 if u1:
870 if u1:
869 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
871 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
870 if u2:
872 if u2:
871 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
873 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
872
874
873 if repo.ui.debugflag:
875 if repo.ui.debugflag:
874 renamedeleteset = set()
876 renamedeleteset = set()
875 divergeset = set()
877 divergeset = set()
876 for dsts in diverge.values():
878 for dsts in diverge.values():
877 divergeset.update(dsts)
879 divergeset.update(dsts)
878 for dsts in renamedelete1.values():
880 for dsts in renamedelete1.values():
879 renamedeleteset.update(dsts)
881 renamedeleteset.update(dsts)
880 for dsts in renamedelete2.values():
882 for dsts in renamedelete2.values():
881 renamedeleteset.update(dsts)
883 renamedeleteset.update(dsts)
882
884
883 repo.ui.debug(
885 repo.ui.debug(
884 b" all copies found (* = to merge, ! = divergent, "
886 b" all copies found (* = to merge, ! = divergent, "
885 b"% = renamed and deleted):\n"
887 b"% = renamed and deleted):\n"
886 )
888 )
887 for side, copies in ((b"local", copies1), (b"remote", copies2)):
889 for side, copies in ((b"local", copies1), (b"remote", copies2)):
888 if not copies:
890 if not copies:
889 continue
891 continue
890 repo.ui.debug(b" on %s side:\n" % side)
892 repo.ui.debug(b" on %s side:\n" % side)
891 for f in sorted(copies):
893 for f in sorted(copies):
892 note = b""
894 note = b""
893 if f in copy1 or f in copy2:
895 if f in copy1 or f in copy2:
894 note += b"*"
896 note += b"*"
895 if f in divergeset:
897 if f in divergeset:
896 note += b"!"
898 note += b"!"
897 if f in renamedeleteset:
899 if f in renamedeleteset:
898 note += b"%"
900 note += b"%"
899 repo.ui.debug(
901 repo.ui.debug(
900 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
902 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
901 )
903 )
902 del renamedeleteset
904 del renamedeleteset
903 del divergeset
905 del divergeset
904
906
905 repo.ui.debug(b" checking for directory renames\n")
907 repo.ui.debug(b" checking for directory renames\n")
906
908
907 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
909 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
908 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
910 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
909
911
910 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
912 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
911 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
913 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
912
914
913 return branch_copies1, branch_copies2, diverge
915 return branch_copies1, branch_copies2, diverge
914
916
915
917
916 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
918 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
917 """Finds moved directories and files that should move with them.
919 """Finds moved directories and files that should move with them.
918
920
919 ctx: the context for one of the sides
921 ctx: the context for one of the sides
920 copy: files copied on the same side (as ctx)
922 copy: files copied on the same side (as ctx)
921 fullcopy: files copied on the same side (as ctx), including those that
923 fullcopy: files copied on the same side (as ctx), including those that
922 merge.manifestmerge() won't care about
924 merge.manifestmerge() won't care about
923 addedfiles: added files on the other side (compared to ctx)
925 addedfiles: added files on the other side (compared to ctx)
924 """
926 """
925 # generate a directory move map
927 # generate a directory move map
926 d = ctx.dirs()
928 d = ctx.dirs()
927 invalid = set()
929 invalid = set()
928 dirmove = {}
930 dirmove = {}
929
931
930 # examine each file copy for a potential directory move, which is
932 # examine each file copy for a potential directory move, which is
931 # when all the files in a directory are moved to a new directory
933 # when all the files in a directory are moved to a new directory
932 for dst, src in pycompat.iteritems(fullcopy):
934 for dst, src in pycompat.iteritems(fullcopy):
933 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
935 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
934 if dsrc in invalid:
936 if dsrc in invalid:
935 # already seen to be uninteresting
937 # already seen to be uninteresting
936 continue
938 continue
937 elif dsrc in d and ddst in d:
939 elif dsrc in d and ddst in d:
938 # directory wasn't entirely moved locally
940 # directory wasn't entirely moved locally
939 invalid.add(dsrc)
941 invalid.add(dsrc)
940 elif dsrc in dirmove and dirmove[dsrc] != ddst:
942 elif dsrc in dirmove and dirmove[dsrc] != ddst:
941 # files from the same directory moved to two different places
943 # files from the same directory moved to two different places
942 invalid.add(dsrc)
944 invalid.add(dsrc)
943 else:
945 else:
944 # looks good so far
946 # looks good so far
945 dirmove[dsrc] = ddst
947 dirmove[dsrc] = ddst
946
948
947 for i in invalid:
949 for i in invalid:
948 if i in dirmove:
950 if i in dirmove:
949 del dirmove[i]
951 del dirmove[i]
950 del d, invalid
952 del d, invalid
951
953
952 if not dirmove:
954 if not dirmove:
953 return {}, {}
955 return {}, {}
954
956
955 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
957 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
956
958
957 for d in dirmove:
959 for d in dirmove:
958 repo.ui.debug(
960 repo.ui.debug(
959 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
961 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
960 )
962 )
961
963
962 movewithdir = {}
964 movewithdir = {}
963 # check unaccounted nonoverlapping files against directory moves
965 # check unaccounted nonoverlapping files against directory moves
964 for f in addedfiles:
966 for f in addedfiles:
965 if f not in fullcopy:
967 if f not in fullcopy:
966 for d in dirmove:
968 for d in dirmove:
967 if f.startswith(d):
969 if f.startswith(d):
968 # new file added in a directory that was moved, move it
970 # new file added in a directory that was moved, move it
969 df = dirmove[d] + f[len(d) :]
971 df = dirmove[d] + f[len(d) :]
970 if df not in copy:
972 if df not in copy:
971 movewithdir[f] = df
973 movewithdir[f] = df
972 repo.ui.debug(
974 repo.ui.debug(
973 b" pending file src: '%s' -> dst: '%s'\n"
975 b" pending file src: '%s' -> dst: '%s'\n"
974 % (f, df)
976 % (f, df)
975 )
977 )
976 break
978 break
977
979
978 return dirmove, movewithdir
980 return dirmove, movewithdir
979
981
980
982
981 def _heuristicscopytracing(repo, c1, c2, base):
983 def _heuristicscopytracing(repo, c1, c2, base):
982 """Fast copytracing using filename heuristics
984 """Fast copytracing using filename heuristics
983
985
984 Assumes that moves or renames are of following two types:
986 Assumes that moves or renames are of following two types:
985
987
986 1) Inside a directory only (same directory name but different filenames)
988 1) Inside a directory only (same directory name but different filenames)
987 2) Move from one directory to another
989 2) Move from one directory to another
988 (same filenames but different directory names)
990 (same filenames but different directory names)
989
991
990 Works only when there are no merge commits in the "source branch".
992 Works only when there are no merge commits in the "source branch".
991 Source branch is commits from base up to c2 not including base.
993 Source branch is commits from base up to c2 not including base.
992
994
993 If merge is involved it fallbacks to _fullcopytracing().
995 If merge is involved it fallbacks to _fullcopytracing().
994
996
995 Can be used by setting the following config:
997 Can be used by setting the following config:
996
998
997 [experimental]
999 [experimental]
998 copytrace = heuristics
1000 copytrace = heuristics
999
1001
1000 In some cases the copy/move candidates found by heuristics can be very large
1002 In some cases the copy/move candidates found by heuristics can be very large
1001 in number and that will make the algorithm slow. The number of possible
1003 in number and that will make the algorithm slow. The number of possible
1002 candidates to check can be limited by using the config
1004 candidates to check can be limited by using the config
1003 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1005 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1004 """
1006 """
1005
1007
1006 if c1.rev() is None:
1008 if c1.rev() is None:
1007 c1 = c1.p1()
1009 c1 = c1.p1()
1008 if c2.rev() is None:
1010 if c2.rev() is None:
1009 c2 = c2.p1()
1011 c2 = c2.p1()
1010
1012
1011 changedfiles = set()
1013 changedfiles = set()
1012 m1 = c1.manifest()
1014 m1 = c1.manifest()
1013 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1015 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1014 # If base is not in c2 branch, we switch to fullcopytracing
1016 # If base is not in c2 branch, we switch to fullcopytracing
1015 repo.ui.debug(
1017 repo.ui.debug(
1016 b"switching to full copytracing as base is not "
1018 b"switching to full copytracing as base is not "
1017 b"an ancestor of c2\n"
1019 b"an ancestor of c2\n"
1018 )
1020 )
1019 return _fullcopytracing(repo, c1, c2, base)
1021 return _fullcopytracing(repo, c1, c2, base)
1020
1022
1021 ctx = c2
1023 ctx = c2
1022 while ctx != base:
1024 while ctx != base:
1023 if len(ctx.parents()) == 2:
1025 if len(ctx.parents()) == 2:
1024 # To keep things simple let's not handle merges
1026 # To keep things simple let's not handle merges
1025 repo.ui.debug(b"switching to full copytracing because of merges\n")
1027 repo.ui.debug(b"switching to full copytracing because of merges\n")
1026 return _fullcopytracing(repo, c1, c2, base)
1028 return _fullcopytracing(repo, c1, c2, base)
1027 changedfiles.update(ctx.files())
1029 changedfiles.update(ctx.files())
1028 ctx = ctx.p1()
1030 ctx = ctx.p1()
1029
1031
1030 copies2 = {}
1032 copies2 = {}
1031 cp = _forwardcopies(base, c2)
1033 cp = _forwardcopies(base, c2)
1032 for dst, src in pycompat.iteritems(cp):
1034 for dst, src in pycompat.iteritems(cp):
1033 if src in m1:
1035 if src in m1:
1034 copies2[dst] = src
1036 copies2[dst] = src
1035
1037
1036 # file is missing if it isn't present in the destination, but is present in
1038 # file is missing if it isn't present in the destination, but is present in
1037 # the base and present in the source.
1039 # the base and present in the source.
1038 # Presence in the base is important to exclude added files, presence in the
1040 # Presence in the base is important to exclude added files, presence in the
1039 # source is important to exclude removed files.
1041 # source is important to exclude removed files.
1040 filt = lambda f: f not in m1 and f in base and f in c2
1042 filt = lambda f: f not in m1 and f in base and f in c2
1041 missingfiles = [f for f in changedfiles if filt(f)]
1043 missingfiles = [f for f in changedfiles if filt(f)]
1042
1044
1043 copies1 = {}
1045 copies1 = {}
1044 if missingfiles:
1046 if missingfiles:
1045 basenametofilename = collections.defaultdict(list)
1047 basenametofilename = collections.defaultdict(list)
1046 dirnametofilename = collections.defaultdict(list)
1048 dirnametofilename = collections.defaultdict(list)
1047
1049
1048 for f in m1.filesnotin(base.manifest()):
1050 for f in m1.filesnotin(base.manifest()):
1049 basename = os.path.basename(f)
1051 basename = os.path.basename(f)
1050 dirname = os.path.dirname(f)
1052 dirname = os.path.dirname(f)
1051 basenametofilename[basename].append(f)
1053 basenametofilename[basename].append(f)
1052 dirnametofilename[dirname].append(f)
1054 dirnametofilename[dirname].append(f)
1053
1055
1054 for f in missingfiles:
1056 for f in missingfiles:
1055 basename = os.path.basename(f)
1057 basename = os.path.basename(f)
1056 dirname = os.path.dirname(f)
1058 dirname = os.path.dirname(f)
1057 samebasename = basenametofilename[basename]
1059 samebasename = basenametofilename[basename]
1058 samedirname = dirnametofilename[dirname]
1060 samedirname = dirnametofilename[dirname]
1059 movecandidates = samebasename + samedirname
1061 movecandidates = samebasename + samedirname
1060 # f is guaranteed to be present in c2, that's why
1062 # f is guaranteed to be present in c2, that's why
1061 # c2.filectx(f) won't fail
1063 # c2.filectx(f) won't fail
1062 f2 = c2.filectx(f)
1064 f2 = c2.filectx(f)
1063 # we can have a lot of candidates which can slow down the heuristics
1065 # we can have a lot of candidates which can slow down the heuristics
1064 # config value to limit the number of candidates moves to check
1066 # config value to limit the number of candidates moves to check
1065 maxcandidates = repo.ui.configint(
1067 maxcandidates = repo.ui.configint(
1066 b'experimental', b'copytrace.movecandidateslimit'
1068 b'experimental', b'copytrace.movecandidateslimit'
1067 )
1069 )
1068
1070
1069 if len(movecandidates) > maxcandidates:
1071 if len(movecandidates) > maxcandidates:
1070 repo.ui.status(
1072 repo.ui.status(
1071 _(
1073 _(
1072 b"skipping copytracing for '%s', more "
1074 b"skipping copytracing for '%s', more "
1073 b"candidates than the limit: %d\n"
1075 b"candidates than the limit: %d\n"
1074 )
1076 )
1075 % (f, len(movecandidates))
1077 % (f, len(movecandidates))
1076 )
1078 )
1077 continue
1079 continue
1078
1080
1079 for candidate in movecandidates:
1081 for candidate in movecandidates:
1080 f1 = c1.filectx(candidate)
1082 f1 = c1.filectx(candidate)
1081 if _related(f1, f2):
1083 if _related(f1, f2):
1082 # if there are a few related copies then we'll merge
1084 # if there are a few related copies then we'll merge
1083 # changes into all of them. This matches the behaviour
1085 # changes into all of them. This matches the behaviour
1084 # of upstream copytracing
1086 # of upstream copytracing
1085 copies1[candidate] = f
1087 copies1[candidate] = f
1086
1088
1087 return branch_copies(copies1), branch_copies(copies2), {}
1089 return branch_copies(copies1), branch_copies(copies2), {}
1088
1090
1089
1091
1090 def _related(f1, f2):
1092 def _related(f1, f2):
1091 """return True if f1 and f2 filectx have a common ancestor
1093 """return True if f1 and f2 filectx have a common ancestor
1092
1094
1093 Walk back to common ancestor to see if the two files originate
1095 Walk back to common ancestor to see if the two files originate
1094 from the same file. Since workingfilectx's rev() is None it messes
1096 from the same file. Since workingfilectx's rev() is None it messes
1095 up the integer comparison logic, hence the pre-step check for
1097 up the integer comparison logic, hence the pre-step check for
1096 None (f1 and f2 can only be workingfilectx's initially).
1098 None (f1 and f2 can only be workingfilectx's initially).
1097 """
1099 """
1098
1100
1099 if f1 == f2:
1101 if f1 == f2:
1100 return True # a match
1102 return True # a match
1101
1103
1102 g1, g2 = f1.ancestors(), f2.ancestors()
1104 g1, g2 = f1.ancestors(), f2.ancestors()
1103 try:
1105 try:
1104 f1r, f2r = f1.linkrev(), f2.linkrev()
1106 f1r, f2r = f1.linkrev(), f2.linkrev()
1105
1107
1106 if f1r is None:
1108 if f1r is None:
1107 f1 = next(g1)
1109 f1 = next(g1)
1108 if f2r is None:
1110 if f2r is None:
1109 f2 = next(g2)
1111 f2 = next(g2)
1110
1112
1111 while True:
1113 while True:
1112 f1r, f2r = f1.linkrev(), f2.linkrev()
1114 f1r, f2r = f1.linkrev(), f2.linkrev()
1113 if f1r > f2r:
1115 if f1r > f2r:
1114 f1 = next(g1)
1116 f1 = next(g1)
1115 elif f2r > f1r:
1117 elif f2r > f1r:
1116 f2 = next(g2)
1118 f2 = next(g2)
1117 else: # f1 and f2 point to files in the same linkrev
1119 else: # f1 and f2 point to files in the same linkrev
1118 return f1 == f2 # true if they point to the same file
1120 return f1 == f2 # true if they point to the same file
1119 except StopIteration:
1121 except StopIteration:
1120 return False
1122 return False
1121
1123
1122
1124
1123 def graftcopies(wctx, ctx, base):
1125 def graftcopies(wctx, ctx, base):
1124 """reproduce copies between base and ctx in the wctx
1126 """reproduce copies between base and ctx in the wctx
1125
1127
1126 Unlike mergecopies(), this function will only consider copies between base
1128 Unlike mergecopies(), this function will only consider copies between base
1127 and ctx; it will ignore copies between base and wctx. Also unlike
1129 and ctx; it will ignore copies between base and wctx. Also unlike
1128 mergecopies(), this function will apply copies to the working copy (instead
1130 mergecopies(), this function will apply copies to the working copy (instead
1129 of just returning information about the copies). That makes it cheaper
1131 of just returning information about the copies). That makes it cheaper
1130 (especially in the common case of base==ctx.p1()) and useful also when
1132 (especially in the common case of base==ctx.p1()) and useful also when
1131 experimental.copytrace=off.
1133 experimental.copytrace=off.
1132
1134
1133 merge.update() will have already marked most copies, but it will only
1135 merge.update() will have already marked most copies, but it will only
1134 mark copies if it thinks the source files are related (see
1136 mark copies if it thinks the source files are related (see
1135 merge._related()). It will also not mark copies if the file wasn't modified
1137 merge._related()). It will also not mark copies if the file wasn't modified
1136 on the local side. This function adds the copies that were "missed"
1138 on the local side. This function adds the copies that were "missed"
1137 by merge.update().
1139 by merge.update().
1138 """
1140 """
1139 new_copies = pathcopies(base, ctx)
1141 new_copies = pathcopies(base, ctx)
1140 _filter(wctx.p1(), wctx, new_copies)
1142 _filter(wctx.p1(), wctx, new_copies)
1141 for dst, src in pycompat.iteritems(new_copies):
1143 for dst, src in pycompat.iteritems(new_copies):
1142 wctx[dst].markcopied(src)
1144 wctx[dst].markcopied(src)
@@ -1,316 +1,354 b''
1 use crate::utils::hg_path::HgPathBuf;
1 use crate::utils::hg_path::HgPathBuf;
2 use crate::Revision;
2 use crate::Revision;
3
3
4 use im_rc::ordmap::DiffItem;
4 use im_rc::ordmap::DiffItem;
5 use im_rc::ordmap::OrdMap;
5 use im_rc::ordmap::OrdMap;
6
6
7 use std::collections::HashMap;
7 use std::collections::HashMap;
8 use std::collections::HashSet;
8 use std::collections::HashSet;
9
9
10 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
10 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
11
11
12 #[derive(Clone, Debug, PartialEq)]
12 #[derive(Clone, Debug, PartialEq)]
13 struct TimeStampedPathCopy {
13 struct TimeStampedPathCopy {
14 /// revision at which the copy information was added
14 /// revision at which the copy information was added
15 rev: Revision,
15 rev: Revision,
16 /// the copy source, (Set to None in case of deletion of the associated
16 /// the copy source, (Set to None in case of deletion of the associated
17 /// key)
17 /// key)
18 path: Option<HgPathBuf>,
18 path: Option<HgPathBuf>,
19 }
19 }
20
20
21 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
21 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
22 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
22 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
23
23
24 /// hold parent 1, parent 2 and relevant files actions.
24 /// hold parent 1, parent 2 and relevant files actions.
25 pub type RevInfo = (Revision, Revision, ChangedFiles);
25 pub type RevInfo = (Revision, Revision, ChangedFiles);
26
26
27 /// represent the files affected by a changesets
27 /// represent the files affected by a changesets
28 ///
28 ///
29 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
29 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
30 /// all the data categories tracked by it.
30 /// all the data categories tracked by it.
31 pub struct ChangedFiles {
31 pub struct ChangedFiles {
32 removed: HashSet<HgPathBuf>,
32 removed: HashSet<HgPathBuf>,
33 merged: HashSet<HgPathBuf>,
33 merged: HashSet<HgPathBuf>,
34 salvaged: HashSet<HgPathBuf>,
34 salvaged: HashSet<HgPathBuf>,
35 copied_from_p1: PathCopies,
35 copied_from_p1: PathCopies,
36 copied_from_p2: PathCopies,
36 copied_from_p2: PathCopies,
37 }
37 }
38
38
39 impl ChangedFiles {
39 impl ChangedFiles {
40 pub fn new(
40 pub fn new(
41 removed: HashSet<HgPathBuf>,
41 removed: HashSet<HgPathBuf>,
42 merged: HashSet<HgPathBuf>,
42 merged: HashSet<HgPathBuf>,
43 salvaged: HashSet<HgPathBuf>,
43 salvaged: HashSet<HgPathBuf>,
44 copied_from_p1: PathCopies,
44 copied_from_p1: PathCopies,
45 copied_from_p2: PathCopies,
45 copied_from_p2: PathCopies,
46 ) -> Self {
46 ) -> Self {
47 ChangedFiles {
47 ChangedFiles {
48 removed,
48 removed,
49 merged,
49 merged,
50 salvaged,
50 salvaged,
51 copied_from_p1,
51 copied_from_p1,
52 copied_from_p2,
52 copied_from_p2,
53 }
53 }
54 }
54 }
55
55
56 pub fn new_empty() -> Self {
56 pub fn new_empty() -> Self {
57 ChangedFiles {
57 ChangedFiles {
58 removed: HashSet::new(),
58 removed: HashSet::new(),
59 merged: HashSet::new(),
59 merged: HashSet::new(),
60 salvaged: HashSet::new(),
60 salvaged: HashSet::new(),
61 copied_from_p1: PathCopies::new(),
61 copied_from_p1: PathCopies::new(),
62 copied_from_p2: PathCopies::new(),
62 copied_from_p2: PathCopies::new(),
63 }
63 }
64 }
64 }
65 }
65 }
66
66
67 /// A struct responsible for answering "is X ancestors of Y" quickly
68 ///
69 /// The structure will delegate ancestors call to a callback, and cache the
70 /// result.
71 #[derive(Debug)]
72 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
73 inner: &'a A,
74 pairs: HashMap<(Revision, Revision), bool>,
75 }
76
77 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
78 fn new(func: &'a A) -> Self {
79 Self {
80 inner: func,
81 pairs: HashMap::default(),
82 }
83 }
84
85 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
86 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
87 if anc > desc {
88 false
89 } else if anc == desc {
90 true
91 } else {
92 if let Some(b) = self.pairs.get(&(anc, desc)) {
93 *b
94 } else {
95 let b = (self.inner)(anc, desc);
96 self.pairs.insert((anc, desc), b);
97 b
98 }
99 }
100 }
101 }
102
67 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
103 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
68 ///
104 ///
69 /// Arguments are:
105 /// Arguments are:
70 ///
106 ///
71 /// revs: all revisions to be considered
107 /// revs: all revisions to be considered
72 /// children: a {parent ? [childrens]} mapping
108 /// children: a {parent ? [childrens]} mapping
73 /// target_rev: the final revision we are combining copies to
109 /// target_rev: the final revision we are combining copies to
74 /// rev_info(rev): callback to get revision information:
110 /// rev_info(rev): callback to get revision information:
75 /// * first parent
111 /// * first parent
76 /// * second parent
112 /// * second parent
77 /// * ChangedFiles
113 /// * ChangedFiles
78 /// isancestors(low_rev, high_rev): callback to check if a revision is an
114 /// isancestors(low_rev, high_rev): callback to check if a revision is an
79 /// ancestor of another
115 /// ancestor of another
80 pub fn combine_changeset_copies(
116 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
81 revs: Vec<Revision>,
117 revs: Vec<Revision>,
82 children: HashMap<Revision, Vec<Revision>>,
118 children: HashMap<Revision, Vec<Revision>>,
83 target_rev: Revision,
119 target_rev: Revision,
84 rev_info: &impl Fn(Revision) -> RevInfo,
120 rev_info: &impl Fn(Revision) -> RevInfo,
85 is_ancestor: &impl Fn(Revision, Revision) -> bool,
121 is_ancestor: &A,
86 ) -> PathCopies {
122 ) -> PathCopies {
87 let mut all_copies = HashMap::new();
123 let mut all_copies = HashMap::new();
124 let mut oracle = AncestorOracle::new(is_ancestor);
88
125
89 for rev in revs {
126 for rev in revs {
90 // Retrieve data computed in a previous iteration
127 // Retrieve data computed in a previous iteration
91 let copies = all_copies.remove(&rev);
128 let copies = all_copies.remove(&rev);
92 let copies = match copies {
129 let copies = match copies {
93 Some(c) => c,
130 Some(c) => c,
94 None => TimeStampedPathCopies::default(), // root of the walked set
131 None => TimeStampedPathCopies::default(), // root of the walked set
95 };
132 };
96
133
97 let current_children = match children.get(&rev) {
134 let current_children = match children.get(&rev) {
98 Some(c) => c,
135 Some(c) => c,
99 None => panic!("inconsistent `revs` and `children`"),
136 None => panic!("inconsistent `revs` and `children`"),
100 };
137 };
101
138
102 for child in current_children {
139 for child in current_children {
103 // We will chain the copies information accumulated for `rev` with
140 // We will chain the copies information accumulated for `rev` with
104 // the individual copies information for each of its children.
141 // the individual copies information for each of its children.
105 // Creating a new PathCopies for each `rev` ? `children` vertex.
142 // Creating a new PathCopies for each `rev` ? `children` vertex.
106 let (p1, p2, changes) = rev_info(*child);
143 let (p1, p2, changes) = rev_info(*child);
107
144
108 let (parent, child_copies) = if rev == p1 {
145 let (parent, child_copies) = if rev == p1 {
109 (1, &changes.copied_from_p1)
146 (1, &changes.copied_from_p1)
110 } else {
147 } else {
111 assert_eq!(rev, p2);
148 assert_eq!(rev, p2);
112 (2, &changes.copied_from_p2)
149 (2, &changes.copied_from_p2)
113 };
150 };
114 let mut new_copies = copies.clone();
151 let mut new_copies = copies.clone();
115
152
116 for (dest, source) in child_copies {
153 for (dest, source) in child_copies {
117 let entry;
154 let entry;
118 if let Some(v) = copies.get(source) {
155 if let Some(v) = copies.get(source) {
119 entry = match &v.path {
156 entry = match &v.path {
120 Some(path) => Some((*(path)).to_owned()),
157 Some(path) => Some((*(path)).to_owned()),
121 None => Some(source.to_owned()),
158 None => Some(source.to_owned()),
122 }
159 }
123 } else {
160 } else {
124 entry = Some(source.to_owned());
161 entry = Some(source.to_owned());
125 }
162 }
126 // Each new entry is introduced by the children, we record this
163 // Each new entry is introduced by the children, we record this
127 // information as we will need it to take the right decision
164 // information as we will need it to take the right decision
128 // when merging conflicting copy information. See
165 // when merging conflicting copy information. See
129 // merge_copies_dict for details.
166 // merge_copies_dict for details.
130 let ttpc = TimeStampedPathCopy {
167 let ttpc = TimeStampedPathCopy {
131 rev: *child,
168 rev: *child,
132 path: entry,
169 path: entry,
133 };
170 };
134 new_copies.insert(dest.to_owned(), ttpc);
171 new_copies.insert(dest.to_owned(), ttpc);
135 }
172 }
136
173
137 // We must drop copy information for removed file.
174 // We must drop copy information for removed file.
138 //
175 //
139 // We need to explicitly record them as dropped to propagate this
176 // We need to explicitly record them as dropped to propagate this
140 // information when merging two TimeStampedPathCopies object.
177 // information when merging two TimeStampedPathCopies object.
141 for f in changes.removed.iter() {
178 for f in changes.removed.iter() {
142 if new_copies.contains_key(f.as_ref()) {
179 if new_copies.contains_key(f.as_ref()) {
143 let ttpc = TimeStampedPathCopy {
180 let ttpc = TimeStampedPathCopy {
144 rev: *child,
181 rev: *child,
145 path: None,
182 path: None,
146 };
183 };
147 new_copies.insert(f.to_owned(), ttpc);
184 new_copies.insert(f.to_owned(), ttpc);
148 }
185 }
149 }
186 }
150
187
151 // Merge has two parents needs to combines their copy information.
188 // Merge has two parents needs to combines their copy information.
152 //
189 //
153 // If the vertex from the other parent was already processed, we
190 // If the vertex from the other parent was already processed, we
154 // will have a value for the child ready to be used. We need to
191 // will have a value for the child ready to be used. We need to
155 // grab it and combine it with the one we already
192 // grab it and combine it with the one we already
156 // computed. If not we can simply store the newly
193 // computed. If not we can simply store the newly
157 // computed data. The processing happening at
194 // computed data. The processing happening at
158 // the time of the second parent will take care of combining the
195 // the time of the second parent will take care of combining the
159 // two TimeStampedPathCopies instance.
196 // two TimeStampedPathCopies instance.
160 match all_copies.remove(child) {
197 match all_copies.remove(child) {
161 None => {
198 None => {
162 all_copies.insert(child, new_copies);
199 all_copies.insert(child, new_copies);
163 }
200 }
164 Some(other_copies) => {
201 Some(other_copies) => {
165 let (minor, major) = match parent {
202 let (minor, major) = match parent {
166 1 => (other_copies, new_copies),
203 1 => (other_copies, new_copies),
167 2 => (new_copies, other_copies),
204 2 => (new_copies, other_copies),
168 _ => unreachable!(),
205 _ => unreachable!(),
169 };
206 };
170 let merged_copies =
207 let merged_copies =
171 merge_copies_dict(minor, major, &changes, is_ancestor);
208 merge_copies_dict(minor, major, &changes, &mut oracle);
172 all_copies.insert(child, merged_copies);
209 all_copies.insert(child, merged_copies);
173 }
210 }
174 };
211 };
175 }
212 }
176 }
213 }
177
214
178 // Drop internal information (like the timestamp) and return the final
215 // Drop internal information (like the timestamp) and return the final
179 // mapping.
216 // mapping.
180 let tt_result = all_copies
217 let tt_result = all_copies
181 .remove(&target_rev)
218 .remove(&target_rev)
182 .expect("target revision was not processed");
219 .expect("target revision was not processed");
183 let mut result = PathCopies::default();
220 let mut result = PathCopies::default();
184 for (dest, tt_source) in tt_result {
221 for (dest, tt_source) in tt_result {
185 if let Some(path) = tt_source.path {
222 if let Some(path) = tt_source.path {
186 result.insert(dest, path);
223 result.insert(dest, path);
187 }
224 }
188 }
225 }
189 result
226 result
190 }
227 }
191
228
192 /// merge two copies-mapping together, minor and major
229 /// merge two copies-mapping together, minor and major
193 ///
230 ///
194 /// In case of conflict, value from "major" will be picked, unless in some
231 /// In case of conflict, value from "major" will be picked, unless in some
195 /// cases. See inline documentation for details.
232 /// cases. See inline documentation for details.
196 #[allow(clippy::if_same_then_else)]
233 #[allow(clippy::if_same_then_else)]
197 fn merge_copies_dict(
234 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
198 minor: TimeStampedPathCopies,
235 minor: TimeStampedPathCopies,
199 major: TimeStampedPathCopies,
236 major: TimeStampedPathCopies,
200 changes: &ChangedFiles,
237 changes: &ChangedFiles,
201 is_ancestor: &impl Fn(Revision, Revision) -> bool,
238 oracle: &mut AncestorOracle<A>,
202 ) -> TimeStampedPathCopies {
239 ) -> TimeStampedPathCopies {
203 if minor.is_empty() {
240 if minor.is_empty() {
204 return major;
241 return major;
205 } else if major.is_empty() {
242 } else if major.is_empty() {
206 return minor;
243 return minor;
207 }
244 }
208 let mut override_minor = Vec::new();
245 let mut override_minor = Vec::new();
209 let mut override_major = Vec::new();
246 let mut override_major = Vec::new();
210
247
211 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
248 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
212 override_major.push((k.clone(), v.clone()))
249 override_major.push((k.clone(), v.clone()))
213 };
250 };
214 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
251 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
215 override_minor.push((k.clone(), v.clone()))
252 override_minor.push((k.clone(), v.clone()))
216 };
253 };
217
254
218 // The diff function leverage detection of the identical subpart if minor
255 // The diff function leverage detection of the identical subpart if minor
219 // and major has some common ancestors. This make it very fast is most
256 // and major has some common ancestors. This make it very fast is most
220 // case.
257 // case.
221 //
258 //
222 // In case where the two map are vastly different in size, the current
259 // In case where the two map are vastly different in size, the current
223 // approach is still slowish because the iteration will iterate over
260 // approach is still slowish because the iteration will iterate over
224 // all the "exclusive" content of the larger on. This situation can be
261 // all the "exclusive" content of the larger on. This situation can be
225 // frequent when the subgraph of revision we are processing has a lot
262 // frequent when the subgraph of revision we are processing has a lot
226 // of roots. Each roots adding they own fully new map to the mix (and
263 // of roots. Each roots adding they own fully new map to the mix (and
227 // likely a small map, if the path from the root to the "main path" is
264 // likely a small map, if the path from the root to the "main path" is
228 // small.
265 // small.
229 //
266 //
230 // We could do better by detecting such situation and processing them
267 // We could do better by detecting such situation and processing them
231 // differently.
268 // differently.
232 for d in minor.diff(&major) {
269 for d in minor.diff(&major) {
233 match d {
270 match d {
234 DiffItem::Add(k, v) => to_minor(k, v),
271 DiffItem::Add(k, v) => to_minor(k, v),
235 DiffItem::Remove(k, v) => to_major(k, v),
272 DiffItem::Remove(k, v) => to_major(k, v),
236 DiffItem::Update { old, new } => {
273 DiffItem::Update { old, new } => {
237 let (dest, src_major) = new;
274 let (dest, src_major) = new;
238 let (_, src_minor) = old;
275 let (_, src_minor) = old;
239 let mut pick_minor = || (to_major(dest, src_minor));
276 let mut pick_minor = || (to_major(dest, src_minor));
240 let mut pick_major = || (to_minor(dest, src_major));
277 let mut pick_major = || (to_minor(dest, src_major));
241 if src_major.path == src_minor.path {
278 if src_major.path == src_minor.path {
242 // we have the same value, but from other source;
279 // we have the same value, but from other source;
243 if src_major.rev == src_minor.rev {
280 if src_major.rev == src_minor.rev {
244 // If the two entry are identical, no need to do
281 // If the two entry are identical, no need to do
245 // anything (but diff should not have yield them)
282 // anything (but diff should not have yield them)
246 unreachable!();
283 unreachable!();
247 } else if is_ancestor(src_major.rev, src_minor.rev) {
284 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
285 {
248 pick_minor();
286 pick_minor();
249 } else {
287 } else {
250 pick_major();
288 pick_major();
251 }
289 }
252 } else if src_major.rev == src_minor.rev {
290 } else if src_major.rev == src_minor.rev {
253 // We cannot get copy information for both p1 and p2 in the
291 // We cannot get copy information for both p1 and p2 in the
254 // same rev. So this is the same value.
292 // same rev. So this is the same value.
255 unreachable!();
293 unreachable!();
256 } else if src_major.path.is_none()
294 } else if src_major.path.is_none()
257 && changes.salvaged.contains(dest)
295 && changes.salvaged.contains(dest)
258 {
296 {
259 // If the file is "deleted" in the major side but was
297 // If the file is "deleted" in the major side but was
260 // salvaged by the merge, we keep the minor side alive
298 // salvaged by the merge, we keep the minor side alive
261 pick_minor();
299 pick_minor();
262 } else if src_minor.path.is_none()
300 } else if src_minor.path.is_none()
263 && changes.salvaged.contains(dest)
301 && changes.salvaged.contains(dest)
264 {
302 {
265 // If the file is "deleted" in the minor side but was
303 // If the file is "deleted" in the minor side but was
266 // salvaged by the merge, unconditionnaly preserve the
304 // salvaged by the merge, unconditionnaly preserve the
267 // major side.
305 // major side.
268 pick_major();
306 pick_major();
269 } else if changes.merged.contains(dest) {
307 } else if changes.merged.contains(dest) {
270 // If the file was actively merged, copy information from
308 // If the file was actively merged, copy information from
271 // each side might conflict. The major side will win such
309 // each side might conflict. The major side will win such
272 // conflict.
310 // conflict.
273 pick_major();
311 pick_major();
274 } else if is_ancestor(src_major.rev, src_minor.rev) {
312 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
275 // If the minor side is strictly newer than the major side,
313 // If the minor side is strictly newer than the major side,
276 // it should be kept.
314 // it should be kept.
277 pick_minor();
315 pick_minor();
278 } else if src_major.path.is_some() {
316 } else if src_major.path.is_some() {
279 // without any special case, the "major" value win other
317 // without any special case, the "major" value win other
280 // the "minor" one.
318 // the "minor" one.
281 pick_major();
319 pick_major();
282 } else if is_ancestor(src_minor.rev, src_major.rev) {
320 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
283 // the "major" rev is a direct ancestors of "minor", any
321 // the "major" rev is a direct ancestors of "minor", any
284 // different value should overwrite
322 // different value should overwrite
285 pick_major();
323 pick_major();
286 } else {
324 } else {
287 // major version is None (so the file was deleted on that
325 // major version is None (so the file was deleted on that
288 // branch) and that branch is independant (neither minor
326 // branch) and that branch is independant (neither minor
289 // nor major is an ancestors of the other one.) We preserve
327 // nor major is an ancestors of the other one.) We preserve
290 // the new information about the new file.
328 // the new information about the new file.
291 pick_minor();
329 pick_minor();
292 }
330 }
293 }
331 }
294 };
332 };
295 }
333 }
296
334
297 let updates;
335 let updates;
298 let mut result;
336 let mut result;
299 if override_major.is_empty() {
337 if override_major.is_empty() {
300 result = major
338 result = major
301 } else if override_minor.is_empty() {
339 } else if override_minor.is_empty() {
302 result = minor
340 result = minor
303 } else {
341 } else {
304 if override_minor.len() < override_major.len() {
342 if override_minor.len() < override_major.len() {
305 updates = override_minor;
343 updates = override_minor;
306 result = minor;
344 result = minor;
307 } else {
345 } else {
308 updates = override_major;
346 updates = override_major;
309 result = major;
347 result = major;
310 }
348 }
311 for (k, v) in updates {
349 for (k, v) in updates {
312 result.insert(k, v);
350 result.insert(k, v);
313 }
351 }
314 }
352 }
315 result
353 result
316 }
354 }
General Comments 0
You need to be logged in to leave comments. Login now