##// END OF EJS Templates
copies: split the combination of the copies mapping in its own function...
marmoute -
r44225:421ea577 default
parent child Browse files
Show More
@@ -1,1017 +1,1032 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(prefix, suffix):
66 def _chain(prefix, suffix):
67 """chain two sets of copies 'prefix' and 'suffix'"""
67 """chain two sets of copies 'prefix' and 'suffix'"""
68 result = prefix.copy()
68 result = prefix.copy()
69 for key, value in pycompat.iteritems(suffix):
69 for key, value in pycompat.iteritems(suffix):
70 result[key] = prefix.get(value, value)
70 result[key] = prefix.get(value, value)
71 return result
71 return result
72
72
73
73
74 def _tracefile(fctx, am, basemf):
74 def _tracefile(fctx, am, basemf):
75 """return file context that is the ancestor of fctx present in ancestor
75 """return file context that is the ancestor of fctx present in ancestor
76 manifest am
76 manifest am
77
77
78 Note: we used to try and stop after a given limit, however checking if that
78 Note: we used to try and stop after a given limit, however checking if that
79 limit is reached turned out to be very expensive. we are better off
79 limit is reached turned out to be very expensive. we are better off
80 disabling that feature."""
80 disabling that feature."""
81
81
82 for f in fctx.ancestors():
82 for f in fctx.ancestors():
83 path = f.path()
83 path = f.path()
84 if am.get(path, None) == f.filenode():
84 if am.get(path, None) == f.filenode():
85 return path
85 return path
86 if basemf and basemf.get(path, None) == f.filenode():
86 if basemf and basemf.get(path, None) == f.filenode():
87 return path
87 return path
88
88
89
89
90 def _dirstatecopies(repo, match=None):
90 def _dirstatecopies(repo, match=None):
91 ds = repo.dirstate
91 ds = repo.dirstate
92 c = ds.copies().copy()
92 c = ds.copies().copy()
93 for k in list(c):
93 for k in list(c):
94 if ds[k] not in b'anm' or (match and not match(k)):
94 if ds[k] not in b'anm' or (match and not match(k)):
95 del c[k]
95 del c[k]
96 return c
96 return c
97
97
98
98
99 def _computeforwardmissing(a, b, match=None):
99 def _computeforwardmissing(a, b, match=None):
100 """Computes which files are in b but not a.
100 """Computes which files are in b but not a.
101 This is its own function so extensions can easily wrap this call to see what
101 This is its own function so extensions can easily wrap this call to see what
102 files _forwardcopies is about to process.
102 files _forwardcopies is about to process.
103 """
103 """
104 ma = a.manifest()
104 ma = a.manifest()
105 mb = b.manifest()
105 mb = b.manifest()
106 return mb.filesnotin(ma, match=match)
106 return mb.filesnotin(ma, match=match)
107
107
108
108
109 def usechangesetcentricalgo(repo):
109 def usechangesetcentricalgo(repo):
110 """Checks if we should use changeset-centric copy algorithms"""
110 """Checks if we should use changeset-centric copy algorithms"""
111 if repo.filecopiesmode == b'changeset-sidedata':
111 if repo.filecopiesmode == b'changeset-sidedata':
112 return True
112 return True
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
113 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
114 changesetsource = (b'changeset-only', b'compatibility')
114 changesetsource = (b'changeset-only', b'compatibility')
115 return readfrom in changesetsource
115 return readfrom in changesetsource
116
116
117
117
118 def _committedforwardcopies(a, b, base, match):
118 def _committedforwardcopies(a, b, base, match):
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
119 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
120 # files might have to be traced back to the fctx parent of the last
120 # files might have to be traced back to the fctx parent of the last
121 # one-side-only changeset, but not further back than that
121 # one-side-only changeset, but not further back than that
122 repo = a._repo
122 repo = a._repo
123
123
124 if usechangesetcentricalgo(repo):
124 if usechangesetcentricalgo(repo):
125 return _changesetforwardcopies(a, b, match)
125 return _changesetforwardcopies(a, b, match)
126
126
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
127 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
128 dbg = repo.ui.debug
128 dbg = repo.ui.debug
129 if debug:
129 if debug:
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
130 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
131 am = a.manifest()
131 am = a.manifest()
132 basemf = None if base is None else base.manifest()
132 basemf = None if base is None else base.manifest()
133
133
134 # find where new files came from
134 # find where new files came from
135 # we currently don't try to find where old files went, too expensive
135 # we currently don't try to find where old files went, too expensive
136 # this means we can miss a case like 'hg rm b; hg cp a b'
136 # this means we can miss a case like 'hg rm b; hg cp a b'
137 cm = {}
137 cm = {}
138
138
139 # Computing the forward missing is quite expensive on large manifests, since
139 # Computing the forward missing is quite expensive on large manifests, since
140 # it compares the entire manifests. We can optimize it in the common use
140 # it compares the entire manifests. We can optimize it in the common use
141 # case of computing what copies are in a commit versus its parent (like
141 # case of computing what copies are in a commit versus its parent (like
142 # during a rebase or histedit). Note, we exclude merge commits from this
142 # during a rebase or histedit). Note, we exclude merge commits from this
143 # optimization, since the ctx.files() for a merge commit is not correct for
143 # optimization, since the ctx.files() for a merge commit is not correct for
144 # this comparison.
144 # this comparison.
145 forwardmissingmatch = match
145 forwardmissingmatch = match
146 if b.p1() == a and b.p2().node() == node.nullid:
146 if b.p1() == a and b.p2().node() == node.nullid:
147 filesmatcher = matchmod.exact(b.files())
147 filesmatcher = matchmod.exact(b.files())
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
148 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
149 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
150
150
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
151 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
152
152
153 if debug:
153 if debug:
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
154 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
155
155
156 for f in sorted(missing):
156 for f in sorted(missing):
157 if debug:
157 if debug:
158 dbg(b'debug.copies: tracing file: %s\n' % f)
158 dbg(b'debug.copies: tracing file: %s\n' % f)
159 fctx = b[f]
159 fctx = b[f]
160 fctx._ancestrycontext = ancestrycontext
160 fctx._ancestrycontext = ancestrycontext
161
161
162 if debug:
162 if debug:
163 start = util.timer()
163 start = util.timer()
164 opath = _tracefile(fctx, am, basemf)
164 opath = _tracefile(fctx, am, basemf)
165 if opath:
165 if opath:
166 if debug:
166 if debug:
167 dbg(b'debug.copies: rename of: %s\n' % opath)
167 dbg(b'debug.copies: rename of: %s\n' % opath)
168 cm[f] = opath
168 cm[f] = opath
169 if debug:
169 if debug:
170 dbg(
170 dbg(
171 b'debug.copies: time: %f seconds\n'
171 b'debug.copies: time: %f seconds\n'
172 % (util.timer() - start)
172 % (util.timer() - start)
173 )
173 )
174 return cm
174 return cm
175
175
176
176
177 def _revinfogetter(repo):
177 def _revinfogetter(repo):
178 """return a function that return multiple data given a <rev>"i
178 """return a function that return multiple data given a <rev>"i
179
179
180 * p1: revision number of first parent
180 * p1: revision number of first parent
181 * p2: revision number of first parent
181 * p2: revision number of first parent
182 * p1copies: mapping of copies from p1
182 * p1copies: mapping of copies from p1
183 * p2copies: mapping of copies from p2
183 * p2copies: mapping of copies from p2
184 * removed: a list of removed files
184 * removed: a list of removed files
185 """
185 """
186 cl = repo.changelog
186 cl = repo.changelog
187 parents = cl.parentrevs
187 parents = cl.parentrevs
188
188
189 if repo.filecopiesmode == b'changeset-sidedata':
189 if repo.filecopiesmode == b'changeset-sidedata':
190 changelogrevision = cl.changelogrevision
190 changelogrevision = cl.changelogrevision
191 flags = cl.flags
191 flags = cl.flags
192
192
193 # A small cache to avoid doing the work twice for merges
193 # A small cache to avoid doing the work twice for merges
194 #
194 #
195 # In the vast majority of cases, if we ask information for a revision
195 # In the vast majority of cases, if we ask information for a revision
196 # about 1 parent, we'll later ask it for the other. So it make sense to
196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 # keep the information around when reaching the first parent of a merge
197 # keep the information around when reaching the first parent of a merge
198 # and dropping it after it was provided for the second parents.
198 # and dropping it after it was provided for the second parents.
199 #
199 #
200 # It exists cases were only one parent of the merge will be walked. It
200 # It exists cases were only one parent of the merge will be walked. It
201 # happens when the "destination" the copy tracing is descendant from a
201 # happens when the "destination" the copy tracing is descendant from a
202 # new root, not common with the "source". In that case, we will only walk
202 # new root, not common with the "source". In that case, we will only walk
203 # through merge parents that are descendant of changesets common
203 # through merge parents that are descendant of changesets common
204 # between "source" and "destination".
204 # between "source" and "destination".
205 #
205 #
206 # With the current case implementation if such changesets have a copy
206 # With the current case implementation if such changesets have a copy
207 # information, we'll keep them in memory until the end of
207 # information, we'll keep them in memory until the end of
208 # _changesetforwardcopies. We don't expect the case to be frequent
208 # _changesetforwardcopies. We don't expect the case to be frequent
209 # enough to matters.
209 # enough to matters.
210 #
210 #
211 # In addition, it would be possible to reach pathological case, were
211 # In addition, it would be possible to reach pathological case, were
212 # many first parent are met before any second parent is reached. In
212 # many first parent are met before any second parent is reached. In
213 # that case the cache could grow. If this even become an issue one can
213 # that case the cache could grow. If this even become an issue one can
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 # time to save memory.
215 # time to save memory.
216 merge_caches = {}
216 merge_caches = {}
217
217
218 def revinfo(rev):
218 def revinfo(rev):
219 p1, p2 = parents(rev)
219 p1, p2 = parents(rev)
220 if flags(rev) & REVIDX_SIDEDATA:
220 if flags(rev) & REVIDX_SIDEDATA:
221 e = merge_caches.pop(rev, None)
221 e = merge_caches.pop(rev, None)
222 if e is not None:
222 if e is not None:
223 return e
223 return e
224 c = changelogrevision(rev)
224 c = changelogrevision(rev)
225 p1copies = c.p1copies
225 p1copies = c.p1copies
226 p2copies = c.p2copies
226 p2copies = c.p2copies
227 removed = c.filesremoved
227 removed = c.filesremoved
228 if p1 != node.nullrev and p2 != node.nullrev:
228 if p1 != node.nullrev and p2 != node.nullrev:
229 # XXX some case we over cache, IGNORE
229 # XXX some case we over cache, IGNORE
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
230 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
231 else:
231 else:
232 p1copies = {}
232 p1copies = {}
233 p2copies = {}
233 p2copies = {}
234 removed = []
234 removed = []
235 return p1, p2, p1copies, p2copies, removed
235 return p1, p2, p1copies, p2copies, removed
236
236
237 else:
237 else:
238
238
239 def revinfo(rev):
239 def revinfo(rev):
240 p1, p2 = parents(rev)
240 p1, p2 = parents(rev)
241 ctx = repo[rev]
241 ctx = repo[rev]
242 p1copies, p2copies = ctx._copies
242 p1copies, p2copies = ctx._copies
243 removed = ctx.filesremoved()
243 removed = ctx.filesremoved()
244 return p1, p2, p1copies, p2copies, removed
244 return p1, p2, p1copies, p2copies, removed
245
245
246 return revinfo
246 return revinfo
247
247
248
248
249 def _changesetforwardcopies(a, b, match):
249 def _changesetforwardcopies(a, b, match):
250 if a.rev() in (node.nullrev, b.rev()):
250 if a.rev() in (node.nullrev, b.rev()):
251 return {}
251 return {}
252
252
253 repo = a.repo().unfiltered()
253 repo = a.repo().unfiltered()
254 children = {}
254 children = {}
255 revinfo = _revinfogetter(repo)
255 revinfo = _revinfogetter(repo)
256
256
257 cl = repo.changelog
257 cl = repo.changelog
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 mrset = set(missingrevs)
259 mrset = set(missingrevs)
260 roots = set()
260 roots = set()
261 for r in missingrevs:
261 for r in missingrevs:
262 for p in cl.parentrevs(r):
262 for p in cl.parentrevs(r):
263 if p == node.nullrev:
263 if p == node.nullrev:
264 continue
264 continue
265 if p not in children:
265 if p not in children:
266 children[p] = [r]
266 children[p] = [r]
267 else:
267 else:
268 children[p].append(r)
268 children[p].append(r)
269 if p not in mrset:
269 if p not in mrset:
270 roots.add(p)
270 roots.add(p)
271 if not roots:
271 if not roots:
272 # no common revision to track copies from
272 # no common revision to track copies from
273 return {}
273 return {}
274 min_root = min(roots)
274 min_root = min(roots)
275
275
276 from_head = set(
276 from_head = set(
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
277 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
278 )
278 )
279
279
280 iterrevs = set(from_head)
280 iterrevs = set(from_head)
281 iterrevs &= mrset
281 iterrevs &= mrset
282 iterrevs.update(roots)
282 iterrevs.update(roots)
283 iterrevs.remove(b.rev())
283 iterrevs.remove(b.rev())
284 revs = sorted(iterrevs)
285 return _combinechangesetcopies(revs, children, b.rev(), revinfo, match)
286
287
288 def _combinechangesetcopies(revs, children, targetrev, revinfo, match):
289 """combine the copies information for each item of iterrevs
290
291 revs: sorted iterable of revision to visit
292 children: a {parent: [children]} mapping.
293 targetrev: the final copies destination revision (not in iterrevs)
294 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
295 match: a matcher
296
297 It returns the aggregated copies information for `targetrev`.
298 """
284 all_copies = {}
299 all_copies = {}
285 alwaysmatch = match.always()
300 alwaysmatch = match.always()
286 for r in sorted(iterrevs):
301 for r in revs:
287 copies = all_copies.pop(r, None)
302 copies = all_copies.pop(r, None)
288 if copies is None:
303 if copies is None:
289 # this is a root
304 # this is a root
290 copies = {}
305 copies = {}
291 for i, c in enumerate(children[r]):
306 for i, c in enumerate(children[r]):
292 p1, p2, p1copies, p2copies, removed = revinfo(c)
307 p1, p2, p1copies, p2copies, removed = revinfo(c)
293 if r == p1:
308 if r == p1:
294 parent = 1
309 parent = 1
295 childcopies = p1copies
310 childcopies = p1copies
296 else:
311 else:
297 assert r == p2
312 assert r == p2
298 parent = 2
313 parent = 2
299 childcopies = p2copies
314 childcopies = p2copies
300 if not alwaysmatch:
315 if not alwaysmatch:
301 childcopies = {
316 childcopies = {
302 dst: src for dst, src in childcopies.items() if match(dst)
317 dst: src for dst, src in childcopies.items() if match(dst)
303 }
318 }
304 newcopies = copies
319 newcopies = copies
305 if childcopies:
320 if childcopies:
306 newcopies = _chain(newcopies, childcopies)
321 newcopies = _chain(newcopies, childcopies)
307 # _chain makes a copies, we can avoid doing so in some
322 # _chain makes a copies, we can avoid doing so in some
308 # simple/linear cases.
323 # simple/linear cases.
309 assert newcopies is not copies
324 assert newcopies is not copies
310 for f in removed:
325 for f in removed:
311 if f in newcopies:
326 if f in newcopies:
312 if newcopies is copies:
327 if newcopies is copies:
313 # copy on write to avoid affecting potential other
328 # copy on write to avoid affecting potential other
314 # branches. when there are no other branches, this
329 # branches. when there are no other branches, this
315 # could be avoided.
330 # could be avoided.
316 newcopies = copies.copy()
331 newcopies = copies.copy()
317 del newcopies[f]
332 del newcopies[f]
318 othercopies = all_copies.get(c)
333 othercopies = all_copies.get(c)
319 if othercopies is None:
334 if othercopies is None:
320 all_copies[c] = newcopies
335 all_copies[c] = newcopies
321 else:
336 else:
322 # we are the second parent to work on c, we need to merge our
337 # we are the second parent to work on c, we need to merge our
323 # work with the other.
338 # work with the other.
324 #
339 #
325 # Unlike when copies are stored in the filelog, we consider
340 # Unlike when copies are stored in the filelog, we consider
326 # it a copy even if the destination already existed on the
341 # it a copy even if the destination already existed on the
327 # other branch. It's simply too expensive to check if the
342 # other branch. It's simply too expensive to check if the
328 # file existed in the manifest.
343 # file existed in the manifest.
329 #
344 #
330 # In case of conflict, parent 1 take precedence over parent 2.
345 # In case of conflict, parent 1 take precedence over parent 2.
331 # This is an arbitrary choice made anew when implementing
346 # This is an arbitrary choice made anew when implementing
332 # changeset based copies. It was made without regards with
347 # changeset based copies. It was made without regards with
333 # potential filelog related behavior.
348 # potential filelog related behavior.
334 if parent == 1:
349 if parent == 1:
335 othercopies.update(newcopies)
350 othercopies.update(newcopies)
336 else:
351 else:
337 newcopies.update(othercopies)
352 newcopies.update(othercopies)
338 all_copies[c] = newcopies
353 all_copies[c] = newcopies
339 return all_copies[b.rev()]
354 return all_copies[targetrev]
340
355
341
356
342 def _forwardcopies(a, b, base=None, match=None):
357 def _forwardcopies(a, b, base=None, match=None):
343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
358 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
344
359
345 if base is None:
360 if base is None:
346 base = a
361 base = a
347 match = a.repo().narrowmatch(match)
362 match = a.repo().narrowmatch(match)
348 # check for working copy
363 # check for working copy
349 if b.rev() is None:
364 if b.rev() is None:
350 cm = _committedforwardcopies(a, b.p1(), base, match)
365 cm = _committedforwardcopies(a, b.p1(), base, match)
351 # combine copies from dirstate if necessary
366 # combine copies from dirstate if necessary
352 copies = _chain(cm, _dirstatecopies(b._repo, match))
367 copies = _chain(cm, _dirstatecopies(b._repo, match))
353 else:
368 else:
354 copies = _committedforwardcopies(a, b, base, match)
369 copies = _committedforwardcopies(a, b, base, match)
355 return copies
370 return copies
356
371
357
372
358 def _backwardrenames(a, b, match):
373 def _backwardrenames(a, b, match):
359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
374 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
360 return {}
375 return {}
361
376
362 # Even though we're not taking copies into account, 1:n rename situations
377 # Even though we're not taking copies into account, 1:n rename situations
363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
378 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
364 # arbitrarily pick one of the renames.
379 # arbitrarily pick one of the renames.
365 # We don't want to pass in "match" here, since that would filter
380 # We don't want to pass in "match" here, since that would filter
366 # the destination by it. Since we're reversing the copies, we want
381 # the destination by it. Since we're reversing the copies, we want
367 # to filter the source instead.
382 # to filter the source instead.
368 f = _forwardcopies(b, a)
383 f = _forwardcopies(b, a)
369 r = {}
384 r = {}
370 for k, v in sorted(pycompat.iteritems(f)):
385 for k, v in sorted(pycompat.iteritems(f)):
371 if match and not match(v):
386 if match and not match(v):
372 continue
387 continue
373 # remove copies
388 # remove copies
374 if v in a:
389 if v in a:
375 continue
390 continue
376 r[v] = k
391 r[v] = k
377 return r
392 return r
378
393
379
394
380 def pathcopies(x, y, match=None):
395 def pathcopies(x, y, match=None):
381 """find {dst@y: src@x} copy mapping for directed compare"""
396 """find {dst@y: src@x} copy mapping for directed compare"""
382 repo = x._repo
397 repo = x._repo
383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
398 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
384 if debug:
399 if debug:
385 repo.ui.debug(
400 repo.ui.debug(
386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
401 b'debug.copies: searching copies from %s to %s\n' % (x, y)
387 )
402 )
388 if x == y or not x or not y:
403 if x == y or not x or not y:
389 return {}
404 return {}
390 a = y.ancestor(x)
405 a = y.ancestor(x)
391 if a == x:
406 if a == x:
392 if debug:
407 if debug:
393 repo.ui.debug(b'debug.copies: search mode: forward\n')
408 repo.ui.debug(b'debug.copies: search mode: forward\n')
394 if y.rev() is None and x == y.p1():
409 if y.rev() is None and x == y.p1():
395 # short-circuit to avoid issues with merge states
410 # short-circuit to avoid issues with merge states
396 return _dirstatecopies(repo, match)
411 return _dirstatecopies(repo, match)
397 copies = _forwardcopies(x, y, match=match)
412 copies = _forwardcopies(x, y, match=match)
398 elif a == y:
413 elif a == y:
399 if debug:
414 if debug:
400 repo.ui.debug(b'debug.copies: search mode: backward\n')
415 repo.ui.debug(b'debug.copies: search mode: backward\n')
401 copies = _backwardrenames(x, y, match=match)
416 copies = _backwardrenames(x, y, match=match)
402 else:
417 else:
403 if debug:
418 if debug:
404 repo.ui.debug(b'debug.copies: search mode: combined\n')
419 repo.ui.debug(b'debug.copies: search mode: combined\n')
405 base = None
420 base = None
406 if a.rev() != node.nullrev:
421 if a.rev() != node.nullrev:
407 base = x
422 base = x
408 copies = _chain(
423 copies = _chain(
409 _backwardrenames(x, a, match=match),
424 _backwardrenames(x, a, match=match),
410 _forwardcopies(a, y, base, match=match),
425 _forwardcopies(a, y, base, match=match),
411 )
426 )
412 _filter(x, y, copies)
427 _filter(x, y, copies)
413 return copies
428 return copies
414
429
415
430
416 def mergecopies(repo, c1, c2, base):
431 def mergecopies(repo, c1, c2, base):
417 """
432 """
418 Finds moves and copies between context c1 and c2 that are relevant for
433 Finds moves and copies between context c1 and c2 that are relevant for
419 merging. 'base' will be used as the merge base.
434 merging. 'base' will be used as the merge base.
420
435
421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
436 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
422 files that were moved/ copied in one merge parent and modified in another.
437 files that were moved/ copied in one merge parent and modified in another.
423 For example:
438 For example:
424
439
425 o ---> 4 another commit
440 o ---> 4 another commit
426 |
441 |
427 | o ---> 3 commit that modifies a.txt
442 | o ---> 3 commit that modifies a.txt
428 | /
443 | /
429 o / ---> 2 commit that moves a.txt to b.txt
444 o / ---> 2 commit that moves a.txt to b.txt
430 |/
445 |/
431 o ---> 1 merge base
446 o ---> 1 merge base
432
447
433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
448 If we try to rebase revision 3 on revision 4, since there is no a.txt in
434 revision 4, and if user have copytrace disabled, we prints the following
449 revision 4, and if user have copytrace disabled, we prints the following
435 message:
450 message:
436
451
437 ```other changed <file> which local deleted```
452 ```other changed <file> which local deleted```
438
453
439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
454 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
440 "dirmove".
455 "dirmove".
441
456
442 "copy" is a mapping from destination name -> source name,
457 "copy" is a mapping from destination name -> source name,
443 where source is in c1 and destination is in c2 or vice-versa.
458 where source is in c1 and destination is in c2 or vice-versa.
444
459
445 "movewithdir" is a mapping from source name -> destination name,
460 "movewithdir" is a mapping from source name -> destination name,
446 where the file at source present in one context but not the other
461 where the file at source present in one context but not the other
447 needs to be moved to destination by the merge process, because the
462 needs to be moved to destination by the merge process, because the
448 other context moved the directory it is in.
463 other context moved the directory it is in.
449
464
450 "diverge" is a mapping of source name -> list of destination names
465 "diverge" is a mapping of source name -> list of destination names
451 for divergent renames.
466 for divergent renames.
452
467
453 "renamedelete" is a mapping of source name -> list of destination
468 "renamedelete" is a mapping of source name -> list of destination
454 names for files deleted in c1 that were renamed in c2 or vice-versa.
469 names for files deleted in c1 that were renamed in c2 or vice-versa.
455
470
456 "dirmove" is a mapping of detected source dir -> destination dir renames.
471 "dirmove" is a mapping of detected source dir -> destination dir renames.
457 This is needed for handling changes to new files previously grafted into
472 This is needed for handling changes to new files previously grafted into
458 renamed directories.
473 renamed directories.
459
474
460 This function calls different copytracing algorithms based on config.
475 This function calls different copytracing algorithms based on config.
461 """
476 """
462 # avoid silly behavior for update from empty dir
477 # avoid silly behavior for update from empty dir
463 if not c1 or not c2 or c1 == c2:
478 if not c1 or not c2 or c1 == c2:
464 return {}, {}, {}, {}, {}
479 return {}, {}, {}, {}, {}
465
480
466 narrowmatch = c1.repo().narrowmatch()
481 narrowmatch = c1.repo().narrowmatch()
467
482
468 # avoid silly behavior for parent -> working dir
483 # avoid silly behavior for parent -> working dir
469 if c2.node() is None and c1.node() == repo.dirstate.p1():
484 if c2.node() is None and c1.node() == repo.dirstate.p1():
470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
485 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
471
486
472 copytracing = repo.ui.config(b'experimental', b'copytrace')
487 copytracing = repo.ui.config(b'experimental', b'copytrace')
473 if stringutil.parsebool(copytracing) is False:
488 if stringutil.parsebool(copytracing) is False:
474 # stringutil.parsebool() returns None when it is unable to parse the
489 # stringutil.parsebool() returns None when it is unable to parse the
475 # value, so we should rely on making sure copytracing is on such cases
490 # value, so we should rely on making sure copytracing is on such cases
476 return {}, {}, {}, {}, {}
491 return {}, {}, {}, {}, {}
477
492
478 if usechangesetcentricalgo(repo):
493 if usechangesetcentricalgo(repo):
479 # The heuristics don't make sense when we need changeset-centric algos
494 # The heuristics don't make sense when we need changeset-centric algos
480 return _fullcopytracing(repo, c1, c2, base)
495 return _fullcopytracing(repo, c1, c2, base)
481
496
482 # Copy trace disabling is explicitly below the node == p1 logic above
497 # Copy trace disabling is explicitly below the node == p1 logic above
483 # because the logic above is required for a simple copy to be kept across a
498 # because the logic above is required for a simple copy to be kept across a
484 # rebase.
499 # rebase.
485 if copytracing == b'heuristics':
500 if copytracing == b'heuristics':
486 # Do full copytracing if only non-public revisions are involved as
501 # Do full copytracing if only non-public revisions are involved as
487 # that will be fast enough and will also cover the copies which could
502 # that will be fast enough and will also cover the copies which could
488 # be missed by heuristics
503 # be missed by heuristics
489 if _isfullcopytraceable(repo, c1, base):
504 if _isfullcopytraceable(repo, c1, base):
490 return _fullcopytracing(repo, c1, c2, base)
505 return _fullcopytracing(repo, c1, c2, base)
491 return _heuristicscopytracing(repo, c1, c2, base)
506 return _heuristicscopytracing(repo, c1, c2, base)
492 else:
507 else:
493 return _fullcopytracing(repo, c1, c2, base)
508 return _fullcopytracing(repo, c1, c2, base)
494
509
495
510
496 def _isfullcopytraceable(repo, c1, base):
511 def _isfullcopytraceable(repo, c1, base):
497 """ Checks that if base, source and destination are all no-public branches,
512 """ Checks that if base, source and destination are all no-public branches,
498 if yes let's use the full copytrace algorithm for increased capabilities
513 if yes let's use the full copytrace algorithm for increased capabilities
499 since it will be fast enough.
514 since it will be fast enough.
500
515
501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
516 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
502 number of changesets from c1 to base such that if number of changesets are
517 number of changesets from c1 to base such that if number of changesets are
503 more than the limit, full copytracing algorithm won't be used.
518 more than the limit, full copytracing algorithm won't be used.
504 """
519 """
505 if c1.rev() is None:
520 if c1.rev() is None:
506 c1 = c1.p1()
521 c1 = c1.p1()
507 if c1.mutable() and base.mutable():
522 if c1.mutable() and base.mutable():
508 sourcecommitlimit = repo.ui.configint(
523 sourcecommitlimit = repo.ui.configint(
509 b'experimental', b'copytrace.sourcecommitlimit'
524 b'experimental', b'copytrace.sourcecommitlimit'
510 )
525 )
511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
526 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
512 return commits < sourcecommitlimit
527 return commits < sourcecommitlimit
513 return False
528 return False
514
529
515
530
516 def _checksinglesidecopies(
531 def _checksinglesidecopies(
517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
532 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
518 ):
533 ):
519 if src not in m2:
534 if src not in m2:
520 # deleted on side 2
535 # deleted on side 2
521 if src not in m1:
536 if src not in m1:
522 # renamed on side 1, deleted on side 2
537 # renamed on side 1, deleted on side 2
523 renamedelete[src] = dsts1
538 renamedelete[src] = dsts1
524 elif m2[src] != mb[src]:
539 elif m2[src] != mb[src]:
525 if not _related(c2[src], base[src]):
540 if not _related(c2[src], base[src]):
526 return
541 return
527 # modified on side 2
542 # modified on side 2
528 for dst in dsts1:
543 for dst in dsts1:
529 if dst not in m2:
544 if dst not in m2:
530 # dst not added on side 2 (handle as regular
545 # dst not added on side 2 (handle as regular
531 # "both created" case in manifestmerge otherwise)
546 # "both created" case in manifestmerge otherwise)
532 copy[dst] = src
547 copy[dst] = src
533
548
534
549
535 def _fullcopytracing(repo, c1, c2, base):
550 def _fullcopytracing(repo, c1, c2, base):
536 """ The full copytracing algorithm which finds all the new files that were
551 """ The full copytracing algorithm which finds all the new files that were
537 added from merge base up to the top commit and for each file it checks if
552 added from merge base up to the top commit and for each file it checks if
538 this file was copied from another file.
553 this file was copied from another file.
539
554
540 This is pretty slow when a lot of changesets are involved but will track all
555 This is pretty slow when a lot of changesets are involved but will track all
541 the copies.
556 the copies.
542 """
557 """
543 m1 = c1.manifest()
558 m1 = c1.manifest()
544 m2 = c2.manifest()
559 m2 = c2.manifest()
545 mb = base.manifest()
560 mb = base.manifest()
546
561
547 copies1 = pathcopies(base, c1)
562 copies1 = pathcopies(base, c1)
548 copies2 = pathcopies(base, c2)
563 copies2 = pathcopies(base, c2)
549
564
550 inversecopies1 = {}
565 inversecopies1 = {}
551 inversecopies2 = {}
566 inversecopies2 = {}
552 for dst, src in copies1.items():
567 for dst, src in copies1.items():
553 inversecopies1.setdefault(src, []).append(dst)
568 inversecopies1.setdefault(src, []).append(dst)
554 for dst, src in copies2.items():
569 for dst, src in copies2.items():
555 inversecopies2.setdefault(src, []).append(dst)
570 inversecopies2.setdefault(src, []).append(dst)
556
571
557 copy = {}
572 copy = {}
558 diverge = {}
573 diverge = {}
559 renamedelete = {}
574 renamedelete = {}
560 allsources = set(inversecopies1) | set(inversecopies2)
575 allsources = set(inversecopies1) | set(inversecopies2)
561 for src in allsources:
576 for src in allsources:
562 dsts1 = inversecopies1.get(src)
577 dsts1 = inversecopies1.get(src)
563 dsts2 = inversecopies2.get(src)
578 dsts2 = inversecopies2.get(src)
564 if dsts1 and dsts2:
579 if dsts1 and dsts2:
565 # copied/renamed on both sides
580 # copied/renamed on both sides
566 if src not in m1 and src not in m2:
581 if src not in m1 and src not in m2:
567 # renamed on both sides
582 # renamed on both sides
568 dsts1 = set(dsts1)
583 dsts1 = set(dsts1)
569 dsts2 = set(dsts2)
584 dsts2 = set(dsts2)
570 # If there's some overlap in the rename destinations, we
585 # If there's some overlap in the rename destinations, we
571 # consider it not divergent. For example, if side 1 copies 'a'
586 # consider it not divergent. For example, if side 1 copies 'a'
572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
587 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
573 # and 'd' and deletes 'a'.
588 # and 'd' and deletes 'a'.
574 if dsts1 & dsts2:
589 if dsts1 & dsts2:
575 for dst in dsts1 & dsts2:
590 for dst in dsts1 & dsts2:
576 copy[dst] = src
591 copy[dst] = src
577 else:
592 else:
578 diverge[src] = sorted(dsts1 | dsts2)
593 diverge[src] = sorted(dsts1 | dsts2)
579 elif src in m1 and src in m2:
594 elif src in m1 and src in m2:
580 # copied on both sides
595 # copied on both sides
581 dsts1 = set(dsts1)
596 dsts1 = set(dsts1)
582 dsts2 = set(dsts2)
597 dsts2 = set(dsts2)
583 for dst in dsts1 & dsts2:
598 for dst in dsts1 & dsts2:
584 copy[dst] = src
599 copy[dst] = src
585 # TODO: Handle cases where it was renamed on one side and copied
600 # TODO: Handle cases where it was renamed on one side and copied
586 # on the other side
601 # on the other side
587 elif dsts1:
602 elif dsts1:
588 # copied/renamed only on side 1
603 # copied/renamed only on side 1
589 _checksinglesidecopies(
604 _checksinglesidecopies(
590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
605 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
591 )
606 )
592 elif dsts2:
607 elif dsts2:
593 # copied/renamed only on side 2
608 # copied/renamed only on side 2
594 _checksinglesidecopies(
609 _checksinglesidecopies(
595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
610 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
596 )
611 )
597
612
598 renamedeleteset = set()
613 renamedeleteset = set()
599 divergeset = set()
614 divergeset = set()
600 for dsts in diverge.values():
615 for dsts in diverge.values():
601 divergeset.update(dsts)
616 divergeset.update(dsts)
602 for dsts in renamedelete.values():
617 for dsts in renamedelete.values():
603 renamedeleteset.update(dsts)
618 renamedeleteset.update(dsts)
604
619
605 # find interesting file sets from manifests
620 # find interesting file sets from manifests
606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
621 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
622 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
608 u1 = sorted(addedinm1 - addedinm2)
623 u1 = sorted(addedinm1 - addedinm2)
609 u2 = sorted(addedinm2 - addedinm1)
624 u2 = sorted(addedinm2 - addedinm1)
610
625
611 header = b" unmatched files in %s"
626 header = b" unmatched files in %s"
612 if u1:
627 if u1:
613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
628 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
614 if u2:
629 if u2:
615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
630 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
616
631
617 fullcopy = copies1.copy()
632 fullcopy = copies1.copy()
618 fullcopy.update(copies2)
633 fullcopy.update(copies2)
619 if not fullcopy:
634 if not fullcopy:
620 return copy, {}, diverge, renamedelete, {}
635 return copy, {}, diverge, renamedelete, {}
621
636
622 if repo.ui.debugflag:
637 if repo.ui.debugflag:
623 repo.ui.debug(
638 repo.ui.debug(
624 b" all copies found (* = to merge, ! = divergent, "
639 b" all copies found (* = to merge, ! = divergent, "
625 b"% = renamed and deleted):\n"
640 b"% = renamed and deleted):\n"
626 )
641 )
627 for f in sorted(fullcopy):
642 for f in sorted(fullcopy):
628 note = b""
643 note = b""
629 if f in copy:
644 if f in copy:
630 note += b"*"
645 note += b"*"
631 if f in divergeset:
646 if f in divergeset:
632 note += b"!"
647 note += b"!"
633 if f in renamedeleteset:
648 if f in renamedeleteset:
634 note += b"%"
649 note += b"%"
635 repo.ui.debug(
650 repo.ui.debug(
636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
651 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
637 )
652 )
638 del divergeset
653 del divergeset
639
654
640 repo.ui.debug(b" checking for directory renames\n")
655 repo.ui.debug(b" checking for directory renames\n")
641
656
642 # generate a directory move map
657 # generate a directory move map
643 d1, d2 = c1.dirs(), c2.dirs()
658 d1, d2 = c1.dirs(), c2.dirs()
644 invalid = set()
659 invalid = set()
645 dirmove = {}
660 dirmove = {}
646
661
647 # examine each file copy for a potential directory move, which is
662 # examine each file copy for a potential directory move, which is
648 # when all the files in a directory are moved to a new directory
663 # when all the files in a directory are moved to a new directory
649 for dst, src in pycompat.iteritems(fullcopy):
664 for dst, src in pycompat.iteritems(fullcopy):
650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
665 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
651 if dsrc in invalid:
666 if dsrc in invalid:
652 # already seen to be uninteresting
667 # already seen to be uninteresting
653 continue
668 continue
654 elif dsrc in d1 and ddst in d1:
669 elif dsrc in d1 and ddst in d1:
655 # directory wasn't entirely moved locally
670 # directory wasn't entirely moved locally
656 invalid.add(dsrc)
671 invalid.add(dsrc)
657 elif dsrc in d2 and ddst in d2:
672 elif dsrc in d2 and ddst in d2:
658 # directory wasn't entirely moved remotely
673 # directory wasn't entirely moved remotely
659 invalid.add(dsrc)
674 invalid.add(dsrc)
660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
675 elif dsrc in dirmove and dirmove[dsrc] != ddst:
661 # files from the same directory moved to two different places
676 # files from the same directory moved to two different places
662 invalid.add(dsrc)
677 invalid.add(dsrc)
663 else:
678 else:
664 # looks good so far
679 # looks good so far
665 dirmove[dsrc] = ddst
680 dirmove[dsrc] = ddst
666
681
667 for i in invalid:
682 for i in invalid:
668 if i in dirmove:
683 if i in dirmove:
669 del dirmove[i]
684 del dirmove[i]
670 del d1, d2, invalid
685 del d1, d2, invalid
671
686
672 if not dirmove:
687 if not dirmove:
673 return copy, {}, diverge, renamedelete, {}
688 return copy, {}, diverge, renamedelete, {}
674
689
675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
690 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
676
691
677 for d in dirmove:
692 for d in dirmove:
678 repo.ui.debug(
693 repo.ui.debug(
679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
694 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
680 )
695 )
681
696
682 movewithdir = {}
697 movewithdir = {}
683 # check unaccounted nonoverlapping files against directory moves
698 # check unaccounted nonoverlapping files against directory moves
684 for f in u1 + u2:
699 for f in u1 + u2:
685 if f not in fullcopy:
700 if f not in fullcopy:
686 for d in dirmove:
701 for d in dirmove:
687 if f.startswith(d):
702 if f.startswith(d):
688 # new file added in a directory that was moved, move it
703 # new file added in a directory that was moved, move it
689 df = dirmove[d] + f[len(d) :]
704 df = dirmove[d] + f[len(d) :]
690 if df not in copy:
705 if df not in copy:
691 movewithdir[f] = df
706 movewithdir[f] = df
692 repo.ui.debug(
707 repo.ui.debug(
693 b" pending file src: '%s' -> dst: '%s'\n"
708 b" pending file src: '%s' -> dst: '%s'\n"
694 % (f, df)
709 % (f, df)
695 )
710 )
696 break
711 break
697
712
698 return copy, movewithdir, diverge, renamedelete, dirmove
713 return copy, movewithdir, diverge, renamedelete, dirmove
699
714
700
715
701 def _heuristicscopytracing(repo, c1, c2, base):
716 def _heuristicscopytracing(repo, c1, c2, base):
702 """ Fast copytracing using filename heuristics
717 """ Fast copytracing using filename heuristics
703
718
704 Assumes that moves or renames are of following two types:
719 Assumes that moves or renames are of following two types:
705
720
706 1) Inside a directory only (same directory name but different filenames)
721 1) Inside a directory only (same directory name but different filenames)
707 2) Move from one directory to another
722 2) Move from one directory to another
708 (same filenames but different directory names)
723 (same filenames but different directory names)
709
724
710 Works only when there are no merge commits in the "source branch".
725 Works only when there are no merge commits in the "source branch".
711 Source branch is commits from base up to c2 not including base.
726 Source branch is commits from base up to c2 not including base.
712
727
713 If merge is involved it fallbacks to _fullcopytracing().
728 If merge is involved it fallbacks to _fullcopytracing().
714
729
715 Can be used by setting the following config:
730 Can be used by setting the following config:
716
731
717 [experimental]
732 [experimental]
718 copytrace = heuristics
733 copytrace = heuristics
719
734
720 In some cases the copy/move candidates found by heuristics can be very large
735 In some cases the copy/move candidates found by heuristics can be very large
721 in number and that will make the algorithm slow. The number of possible
736 in number and that will make the algorithm slow. The number of possible
722 candidates to check can be limited by using the config
737 candidates to check can be limited by using the config
723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
738 `experimental.copytrace.movecandidateslimit` which defaults to 100.
724 """
739 """
725
740
726 if c1.rev() is None:
741 if c1.rev() is None:
727 c1 = c1.p1()
742 c1 = c1.p1()
728 if c2.rev() is None:
743 if c2.rev() is None:
729 c2 = c2.p1()
744 c2 = c2.p1()
730
745
731 copies = {}
746 copies = {}
732
747
733 changedfiles = set()
748 changedfiles = set()
734 m1 = c1.manifest()
749 m1 = c1.manifest()
735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
750 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
736 # If base is not in c2 branch, we switch to fullcopytracing
751 # If base is not in c2 branch, we switch to fullcopytracing
737 repo.ui.debug(
752 repo.ui.debug(
738 b"switching to full copytracing as base is not "
753 b"switching to full copytracing as base is not "
739 b"an ancestor of c2\n"
754 b"an ancestor of c2\n"
740 )
755 )
741 return _fullcopytracing(repo, c1, c2, base)
756 return _fullcopytracing(repo, c1, c2, base)
742
757
743 ctx = c2
758 ctx = c2
744 while ctx != base:
759 while ctx != base:
745 if len(ctx.parents()) == 2:
760 if len(ctx.parents()) == 2:
746 # To keep things simple let's not handle merges
761 # To keep things simple let's not handle merges
747 repo.ui.debug(b"switching to full copytracing because of merges\n")
762 repo.ui.debug(b"switching to full copytracing because of merges\n")
748 return _fullcopytracing(repo, c1, c2, base)
763 return _fullcopytracing(repo, c1, c2, base)
749 changedfiles.update(ctx.files())
764 changedfiles.update(ctx.files())
750 ctx = ctx.p1()
765 ctx = ctx.p1()
751
766
752 cp = _forwardcopies(base, c2)
767 cp = _forwardcopies(base, c2)
753 for dst, src in pycompat.iteritems(cp):
768 for dst, src in pycompat.iteritems(cp):
754 if src in m1:
769 if src in m1:
755 copies[dst] = src
770 copies[dst] = src
756
771
757 # file is missing if it isn't present in the destination, but is present in
772 # file is missing if it isn't present in the destination, but is present in
758 # the base and present in the source.
773 # the base and present in the source.
759 # Presence in the base is important to exclude added files, presence in the
774 # Presence in the base is important to exclude added files, presence in the
760 # source is important to exclude removed files.
775 # source is important to exclude removed files.
761 filt = lambda f: f not in m1 and f in base and f in c2
776 filt = lambda f: f not in m1 and f in base and f in c2
762 missingfiles = [f for f in changedfiles if filt(f)]
777 missingfiles = [f for f in changedfiles if filt(f)]
763
778
764 if missingfiles:
779 if missingfiles:
765 basenametofilename = collections.defaultdict(list)
780 basenametofilename = collections.defaultdict(list)
766 dirnametofilename = collections.defaultdict(list)
781 dirnametofilename = collections.defaultdict(list)
767
782
768 for f in m1.filesnotin(base.manifest()):
783 for f in m1.filesnotin(base.manifest()):
769 basename = os.path.basename(f)
784 basename = os.path.basename(f)
770 dirname = os.path.dirname(f)
785 dirname = os.path.dirname(f)
771 basenametofilename[basename].append(f)
786 basenametofilename[basename].append(f)
772 dirnametofilename[dirname].append(f)
787 dirnametofilename[dirname].append(f)
773
788
774 for f in missingfiles:
789 for f in missingfiles:
775 basename = os.path.basename(f)
790 basename = os.path.basename(f)
776 dirname = os.path.dirname(f)
791 dirname = os.path.dirname(f)
777 samebasename = basenametofilename[basename]
792 samebasename = basenametofilename[basename]
778 samedirname = dirnametofilename[dirname]
793 samedirname = dirnametofilename[dirname]
779 movecandidates = samebasename + samedirname
794 movecandidates = samebasename + samedirname
780 # f is guaranteed to be present in c2, that's why
795 # f is guaranteed to be present in c2, that's why
781 # c2.filectx(f) won't fail
796 # c2.filectx(f) won't fail
782 f2 = c2.filectx(f)
797 f2 = c2.filectx(f)
783 # we can have a lot of candidates which can slow down the heuristics
798 # we can have a lot of candidates which can slow down the heuristics
784 # config value to limit the number of candidates moves to check
799 # config value to limit the number of candidates moves to check
785 maxcandidates = repo.ui.configint(
800 maxcandidates = repo.ui.configint(
786 b'experimental', b'copytrace.movecandidateslimit'
801 b'experimental', b'copytrace.movecandidateslimit'
787 )
802 )
788
803
789 if len(movecandidates) > maxcandidates:
804 if len(movecandidates) > maxcandidates:
790 repo.ui.status(
805 repo.ui.status(
791 _(
806 _(
792 b"skipping copytracing for '%s', more "
807 b"skipping copytracing for '%s', more "
793 b"candidates than the limit: %d\n"
808 b"candidates than the limit: %d\n"
794 )
809 )
795 % (f, len(movecandidates))
810 % (f, len(movecandidates))
796 )
811 )
797 continue
812 continue
798
813
799 for candidate in movecandidates:
814 for candidate in movecandidates:
800 f1 = c1.filectx(candidate)
815 f1 = c1.filectx(candidate)
801 if _related(f1, f2):
816 if _related(f1, f2):
802 # if there are a few related copies then we'll merge
817 # if there are a few related copies then we'll merge
803 # changes into all of them. This matches the behaviour
818 # changes into all of them. This matches the behaviour
804 # of upstream copytracing
819 # of upstream copytracing
805 copies[candidate] = f
820 copies[candidate] = f
806
821
807 return copies, {}, {}, {}, {}
822 return copies, {}, {}, {}, {}
808
823
809
824
810 def _related(f1, f2):
825 def _related(f1, f2):
811 """return True if f1 and f2 filectx have a common ancestor
826 """return True if f1 and f2 filectx have a common ancestor
812
827
813 Walk back to common ancestor to see if the two files originate
828 Walk back to common ancestor to see if the two files originate
814 from the same file. Since workingfilectx's rev() is None it messes
829 from the same file. Since workingfilectx's rev() is None it messes
815 up the integer comparison logic, hence the pre-step check for
830 up the integer comparison logic, hence the pre-step check for
816 None (f1 and f2 can only be workingfilectx's initially).
831 None (f1 and f2 can only be workingfilectx's initially).
817 """
832 """
818
833
819 if f1 == f2:
834 if f1 == f2:
820 return True # a match
835 return True # a match
821
836
822 g1, g2 = f1.ancestors(), f2.ancestors()
837 g1, g2 = f1.ancestors(), f2.ancestors()
823 try:
838 try:
824 f1r, f2r = f1.linkrev(), f2.linkrev()
839 f1r, f2r = f1.linkrev(), f2.linkrev()
825
840
826 if f1r is None:
841 if f1r is None:
827 f1 = next(g1)
842 f1 = next(g1)
828 if f2r is None:
843 if f2r is None:
829 f2 = next(g2)
844 f2 = next(g2)
830
845
831 while True:
846 while True:
832 f1r, f2r = f1.linkrev(), f2.linkrev()
847 f1r, f2r = f1.linkrev(), f2.linkrev()
833 if f1r > f2r:
848 if f1r > f2r:
834 f1 = next(g1)
849 f1 = next(g1)
835 elif f2r > f1r:
850 elif f2r > f1r:
836 f2 = next(g2)
851 f2 = next(g2)
837 else: # f1 and f2 point to files in the same linkrev
852 else: # f1 and f2 point to files in the same linkrev
838 return f1 == f2 # true if they point to the same file
853 return f1 == f2 # true if they point to the same file
839 except StopIteration:
854 except StopIteration:
840 return False
855 return False
841
856
842
857
843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
858 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
844 """reproduce copies from fromrev to rev in the dirstate
859 """reproduce copies from fromrev to rev in the dirstate
845
860
846 If skiprev is specified, it's a revision that should be used to
861 If skiprev is specified, it's a revision that should be used to
847 filter copy records. Any copies that occur between fromrev and
862 filter copy records. Any copies that occur between fromrev and
848 skiprev will not be duplicated, even if they appear in the set of
863 skiprev will not be duplicated, even if they appear in the set of
849 copies between fromrev and rev.
864 copies between fromrev and rev.
850 """
865 """
851 exclude = {}
866 exclude = {}
852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
867 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
853 bctrace = stringutil.parsebool(ctraceconfig)
868 bctrace = stringutil.parsebool(ctraceconfig)
854 if skiprev is not None and (
869 if skiprev is not None and (
855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
870 ctraceconfig == b'heuristics' or bctrace or bctrace is None
856 ):
871 ):
857 # copytrace='off' skips this line, but not the entire function because
872 # copytrace='off' skips this line, but not the entire function because
858 # the line below is O(size of the repo) during a rebase, while the rest
873 # the line below is O(size of the repo) during a rebase, while the rest
859 # of the function is much faster (and is required for carrying copy
874 # of the function is much faster (and is required for carrying copy
860 # metadata across the rebase anyway).
875 # metadata across the rebase anyway).
861 exclude = pathcopies(repo[fromrev], repo[skiprev])
876 exclude = pathcopies(repo[fromrev], repo[skiprev])
862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
877 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
863 if dst in exclude:
878 if dst in exclude:
864 continue
879 continue
865 if dst in wctx:
880 if dst in wctx:
866 wctx[dst].markcopied(src)
881 wctx[dst].markcopied(src)
867
882
868
883
869 def computechangesetfilesadded(ctx):
884 def computechangesetfilesadded(ctx):
870 """return the list of files added in a changeset
885 """return the list of files added in a changeset
871 """
886 """
872 added = []
887 added = []
873 for f in ctx.files():
888 for f in ctx.files():
874 if not any(f in p for p in ctx.parents()):
889 if not any(f in p for p in ctx.parents()):
875 added.append(f)
890 added.append(f)
876 return added
891 return added
877
892
878
893
879 def computechangesetfilesremoved(ctx):
894 def computechangesetfilesremoved(ctx):
880 """return the list of files removed in a changeset
895 """return the list of files removed in a changeset
881 """
896 """
882 removed = []
897 removed = []
883 for f in ctx.files():
898 for f in ctx.files():
884 if f not in ctx:
899 if f not in ctx:
885 removed.append(f)
900 removed.append(f)
886 return removed
901 return removed
887
902
888
903
889 def computechangesetcopies(ctx):
904 def computechangesetcopies(ctx):
890 """return the copies data for a changeset
905 """return the copies data for a changeset
891
906
892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
907 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
893
908
894 Each dictionnary are in the form: `{newname: oldname}`
909 Each dictionnary are in the form: `{newname: oldname}`
895 """
910 """
896 p1copies = {}
911 p1copies = {}
897 p2copies = {}
912 p2copies = {}
898 p1 = ctx.p1()
913 p1 = ctx.p1()
899 p2 = ctx.p2()
914 p2 = ctx.p2()
900 narrowmatch = ctx._repo.narrowmatch()
915 narrowmatch = ctx._repo.narrowmatch()
901 for dst in ctx.files():
916 for dst in ctx.files():
902 if not narrowmatch(dst) or dst not in ctx:
917 if not narrowmatch(dst) or dst not in ctx:
903 continue
918 continue
904 copied = ctx[dst].renamed()
919 copied = ctx[dst].renamed()
905 if not copied:
920 if not copied:
906 continue
921 continue
907 src, srcnode = copied
922 src, srcnode = copied
908 if src in p1 and p1[src].filenode() == srcnode:
923 if src in p1 and p1[src].filenode() == srcnode:
909 p1copies[dst] = src
924 p1copies[dst] = src
910 elif src in p2 and p2[src].filenode() == srcnode:
925 elif src in p2 and p2[src].filenode() == srcnode:
911 p2copies[dst] = src
926 p2copies[dst] = src
912 return p1copies, p2copies
927 return p1copies, p2copies
913
928
914
929
915 def encodecopies(files, copies):
930 def encodecopies(files, copies):
916 items = []
931 items = []
917 for i, dst in enumerate(files):
932 for i, dst in enumerate(files):
918 if dst in copies:
933 if dst in copies:
919 items.append(b'%d\0%s' % (i, copies[dst]))
934 items.append(b'%d\0%s' % (i, copies[dst]))
920 if len(items) != len(copies):
935 if len(items) != len(copies):
921 raise error.ProgrammingError(
936 raise error.ProgrammingError(
922 b'some copy targets missing from file list'
937 b'some copy targets missing from file list'
923 )
938 )
924 return b"\n".join(items)
939 return b"\n".join(items)
925
940
926
941
927 def decodecopies(files, data):
942 def decodecopies(files, data):
928 try:
943 try:
929 copies = {}
944 copies = {}
930 if not data:
945 if not data:
931 return copies
946 return copies
932 for l in data.split(b'\n'):
947 for l in data.split(b'\n'):
933 strindex, src = l.split(b'\0')
948 strindex, src = l.split(b'\0')
934 i = int(strindex)
949 i = int(strindex)
935 dst = files[i]
950 dst = files[i]
936 copies[dst] = src
951 copies[dst] = src
937 return copies
952 return copies
938 except (ValueError, IndexError):
953 except (ValueError, IndexError):
939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
954 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
940 # used different syntax for the value.
955 # used different syntax for the value.
941 return None
956 return None
942
957
943
958
944 def encodefileindices(files, subset):
959 def encodefileindices(files, subset):
945 subset = set(subset)
960 subset = set(subset)
946 indices = []
961 indices = []
947 for i, f in enumerate(files):
962 for i, f in enumerate(files):
948 if f in subset:
963 if f in subset:
949 indices.append(b'%d' % i)
964 indices.append(b'%d' % i)
950 return b'\n'.join(indices)
965 return b'\n'.join(indices)
951
966
952
967
953 def decodefileindices(files, data):
968 def decodefileindices(files, data):
954 try:
969 try:
955 subset = []
970 subset = []
956 if not data:
971 if not data:
957 return subset
972 return subset
958 for strindex in data.split(b'\n'):
973 for strindex in data.split(b'\n'):
959 i = int(strindex)
974 i = int(strindex)
960 if i < 0 or i >= len(files):
975 if i < 0 or i >= len(files):
961 return None
976 return None
962 subset.append(files[i])
977 subset.append(files[i])
963 return subset
978 return subset
964 except (ValueError, IndexError):
979 except (ValueError, IndexError):
965 # Perhaps someone had chosen the same key name (e.g. "added") and
980 # Perhaps someone had chosen the same key name (e.g. "added") and
966 # used different syntax for the value.
981 # used different syntax for the value.
967 return None
982 return None
968
983
969
984
970 def _getsidedata(srcrepo, rev):
985 def _getsidedata(srcrepo, rev):
971 ctx = srcrepo[rev]
986 ctx = srcrepo[rev]
972 filescopies = computechangesetcopies(ctx)
987 filescopies = computechangesetcopies(ctx)
973 filesadded = computechangesetfilesadded(ctx)
988 filesadded = computechangesetfilesadded(ctx)
974 filesremoved = computechangesetfilesremoved(ctx)
989 filesremoved = computechangesetfilesremoved(ctx)
975 sidedata = {}
990 sidedata = {}
976 if any([filescopies, filesadded, filesremoved]):
991 if any([filescopies, filesadded, filesremoved]):
977 sortedfiles = sorted(ctx.files())
992 sortedfiles = sorted(ctx.files())
978 p1copies, p2copies = filescopies
993 p1copies, p2copies = filescopies
979 p1copies = encodecopies(sortedfiles, p1copies)
994 p1copies = encodecopies(sortedfiles, p1copies)
980 p2copies = encodecopies(sortedfiles, p2copies)
995 p2copies = encodecopies(sortedfiles, p2copies)
981 filesadded = encodefileindices(sortedfiles, filesadded)
996 filesadded = encodefileindices(sortedfiles, filesadded)
982 filesremoved = encodefileindices(sortedfiles, filesremoved)
997 filesremoved = encodefileindices(sortedfiles, filesremoved)
983 if p1copies:
998 if p1copies:
984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
999 sidedata[sidedatamod.SD_P1COPIES] = p1copies
985 if p2copies:
1000 if p2copies:
986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
1001 sidedata[sidedatamod.SD_P2COPIES] = p2copies
987 if filesadded:
1002 if filesadded:
988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
1003 sidedata[sidedatamod.SD_FILESADDED] = filesadded
989 if filesremoved:
1004 if filesremoved:
990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
1005 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
991 return sidedata
1006 return sidedata
992
1007
993
1008
994 def getsidedataadder(srcrepo, destrepo):
1009 def getsidedataadder(srcrepo, destrepo):
995 def sidedatacompanion(revlog, rev):
1010 def sidedatacompanion(revlog, rev):
996 sidedata = {}
1011 sidedata = {}
997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1012 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
998 sidedata = _getsidedata(srcrepo, rev)
1013 sidedata = _getsidedata(srcrepo, rev)
999 return False, (), sidedata
1014 return False, (), sidedata
1000
1015
1001 return sidedatacompanion
1016 return sidedatacompanion
1002
1017
1003
1018
1004 def getsidedataremover(srcrepo, destrepo):
1019 def getsidedataremover(srcrepo, destrepo):
1005 def sidedatacompanion(revlog, rev):
1020 def sidedatacompanion(revlog, rev):
1006 f = ()
1021 f = ()
1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1022 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
1023 if revlog.flags(rev) & REVIDX_SIDEDATA:
1009 f = (
1024 f = (
1010 sidedatamod.SD_P1COPIES,
1025 sidedatamod.SD_P1COPIES,
1011 sidedatamod.SD_P2COPIES,
1026 sidedatamod.SD_P2COPIES,
1012 sidedatamod.SD_FILESADDED,
1027 sidedatamod.SD_FILESADDED,
1013 sidedatamod.SD_FILESREMOVED,
1028 sidedatamod.SD_FILESREMOVED,
1014 )
1029 )
1015 return False, f, {}
1030 return False, f, {}
1016
1031
1017 return sidedatacompanion
1032 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now