##// END OF EJS Templates
copies: correctly skip directories that have already been considered...
Kyle Lippincott -
r39299:eebd5918 default
parent child Browse files
Show More
@@ -1,883 +1,885
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def _findlimit(repo, a, b):
24 def _findlimit(repo, a, b):
25 """
25 """
26 Find the last revision that needs to be checked to ensure that a full
26 Find the last revision that needs to be checked to ensure that a full
27 transitive closure for file copies can be properly calculated.
27 transitive closure for file copies can be properly calculated.
28 Generally, this means finding the earliest revision number that's an
28 Generally, this means finding the earliest revision number that's an
29 ancestor of a or b but not both, except when a or b is a direct descendent
29 ancestor of a or b but not both, except when a or b is a direct descendent
30 of the other, in which case we can return the minimum revnum of a and b.
30 of the other, in which case we can return the minimum revnum of a and b.
31 None if no such revision exists.
31 None if no such revision exists.
32 """
32 """
33
33
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85
85
86 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # 1/ File 'a0' committed
87 # 1/ File 'a0' committed
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 3/ Move back to first commit
89 # 3/ Move back to first commit
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 #
92 #
93 # During the amend in step five, we will be in this state:
93 # During the amend in step five, we will be in this state:
94 #
94 #
95 # @ 3 temporary amend commit for a1-amend
95 # @ 3 temporary amend commit for a1-amend
96 # |
96 # |
97 # o 2 a1-amend
97 # o 2 a1-amend
98 # |
98 # |
99 # | o 1 a1
99 # | o 1 a1
100 # |/
100 # |/
101 # o 0 a0
101 # o 0 a0
102 #
102 #
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # yet the filelog has the copy information in rev 1 and we will not look
104 # yet the filelog has the copy information in rev 1 and we will not look
105 # back far enough unless we also look at the a and b as candidates.
105 # back far enough unless we also look at the a and b as candidates.
106 # This only occurs when a is a descendent of b or visa-versa.
106 # This only occurs when a is a descendent of b or visa-versa.
107 return min(limit, a, b)
107 return min(limit, a, b)
108
108
109 def _chain(src, dst, a, b):
109 def _chain(src, dst, a, b):
110 """chain two sets of copies a->b"""
110 """chain two sets of copies a->b"""
111 t = a.copy()
111 t = a.copy()
112 for k, v in b.iteritems():
112 for k, v in b.iteritems():
113 if v in t:
113 if v in t:
114 # found a chain
114 # found a chain
115 if t[v] != k:
115 if t[v] != k:
116 # file wasn't renamed back to itself
116 # file wasn't renamed back to itself
117 t[k] = t[v]
117 t[k] = t[v]
118 if v not in dst:
118 if v not in dst:
119 # chain was a rename, not a copy
119 # chain was a rename, not a copy
120 del t[v]
120 del t[v]
121 if v in src:
121 if v in src:
122 # file is a copy of an existing file
122 # file is a copy of an existing file
123 t[k] = v
123 t[k] = v
124
124
125 # remove criss-crossed copies
125 # remove criss-crossed copies
126 for k, v in list(t.items()):
126 for k, v in list(t.items()):
127 if k in src and v in dst:
127 if k in src and v in dst:
128 del t[k]
128 del t[k]
129
129
130 return t
130 return t
131
131
132 def _tracefile(fctx, am, limit=-1):
132 def _tracefile(fctx, am, limit=-1):
133 """return file context that is the ancestor of fctx present in ancestor
133 """return file context that is the ancestor of fctx present in ancestor
134 manifest am, stopping after the first ancestor lower than limit"""
134 manifest am, stopping after the first ancestor lower than limit"""
135
135
136 for f in fctx.ancestors():
136 for f in fctx.ancestors():
137 if am.get(f.path(), None) == f.filenode():
137 if am.get(f.path(), None) == f.filenode():
138 return f
138 return f
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 return None
140 return None
141
141
142 def _dirstatecopies(d, match=None):
142 def _dirstatecopies(d, match=None):
143 ds = d._repo.dirstate
143 ds = d._repo.dirstate
144 c = ds.copies().copy()
144 c = ds.copies().copy()
145 for k in list(c):
145 for k in list(c):
146 if ds[k] not in 'anm' or (match and not match(k)):
146 if ds[k] not in 'anm' or (match and not match(k)):
147 del c[k]
147 del c[k]
148 return c
148 return c
149
149
150 def _computeforwardmissing(a, b, match=None):
150 def _computeforwardmissing(a, b, match=None):
151 """Computes which files are in b but not a.
151 """Computes which files are in b but not a.
152 This is its own function so extensions can easily wrap this call to see what
152 This is its own function so extensions can easily wrap this call to see what
153 files _forwardcopies is about to process.
153 files _forwardcopies is about to process.
154 """
154 """
155 ma = a.manifest()
155 ma = a.manifest()
156 mb = b.manifest()
156 mb = b.manifest()
157 return mb.filesnotin(ma, match=match)
157 return mb.filesnotin(ma, match=match)
158
158
159 def _committedforwardcopies(a, b, match):
159 def _committedforwardcopies(a, b, match):
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 # files might have to be traced back to the fctx parent of the last
161 # files might have to be traced back to the fctx parent of the last
162 # one-side-only changeset, but not further back than that
162 # one-side-only changeset, but not further back than that
163 limit = _findlimit(a._repo, a.rev(), b.rev())
163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 if limit is None:
164 if limit is None:
165 limit = -1
165 limit = -1
166 am = a.manifest()
166 am = a.manifest()
167
167
168 # find where new files came from
168 # find where new files came from
169 # we currently don't try to find where old files went, too expensive
169 # we currently don't try to find where old files went, too expensive
170 # this means we can miss a case like 'hg rm b; hg cp a b'
170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 cm = {}
171 cm = {}
172
172
173 # Computing the forward missing is quite expensive on large manifests, since
173 # Computing the forward missing is quite expensive on large manifests, since
174 # it compares the entire manifests. We can optimize it in the common use
174 # it compares the entire manifests. We can optimize it in the common use
175 # case of computing what copies are in a commit versus its parent (like
175 # case of computing what copies are in a commit versus its parent (like
176 # during a rebase or histedit). Note, we exclude merge commits from this
176 # during a rebase or histedit). Note, we exclude merge commits from this
177 # optimization, since the ctx.files() for a merge commit is not correct for
177 # optimization, since the ctx.files() for a merge commit is not correct for
178 # this comparison.
178 # this comparison.
179 forwardmissingmatch = match
179 forwardmissingmatch = match
180 if b.p1() == a and b.p2().node() == node.nullid:
180 if b.p1() == a and b.p2().node() == node.nullid:
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184
184
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 for f in missing:
186 for f in missing:
187 fctx = b[f]
187 fctx = b[f]
188 fctx._ancestrycontext = ancestrycontext
188 fctx._ancestrycontext = ancestrycontext
189 ofctx = _tracefile(fctx, am, limit)
189 ofctx = _tracefile(fctx, am, limit)
190 if ofctx:
190 if ofctx:
191 cm[f] = ofctx.path()
191 cm[f] = ofctx.path()
192 return cm
192 return cm
193
193
194 def _forwardcopies(a, b, match=None):
194 def _forwardcopies(a, b, match=None):
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196
196
197 # check for working copy
197 # check for working copy
198 if b.rev() is None:
198 if b.rev() is None:
199 if a == b.p1():
199 if a == b.p1():
200 # short-circuit to avoid issues with merge states
200 # short-circuit to avoid issues with merge states
201 return _dirstatecopies(b, match)
201 return _dirstatecopies(b, match)
202
202
203 cm = _committedforwardcopies(a, b.p1(), match)
203 cm = _committedforwardcopies(a, b.p1(), match)
204 # combine copies from dirstate if necessary
204 # combine copies from dirstate if necessary
205 return _chain(a, b, cm, _dirstatecopies(b, match))
205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 return _committedforwardcopies(a, b, match)
206 return _committedforwardcopies(a, b, match)
207
207
208 def _backwardrenames(a, b):
208 def _backwardrenames(a, b):
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 return {}
210 return {}
211
211
212 # Even though we're not taking copies into account, 1:n rename situations
212 # Even though we're not taking copies into account, 1:n rename situations
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 # arbitrarily pick one of the renames.
214 # arbitrarily pick one of the renames.
215 f = _forwardcopies(b, a)
215 f = _forwardcopies(b, a)
216 r = {}
216 r = {}
217 for k, v in sorted(f.iteritems()):
217 for k, v in sorted(f.iteritems()):
218 # remove copies
218 # remove copies
219 if v in a:
219 if v in a:
220 continue
220 continue
221 r[v] = k
221 r[v] = k
222 return r
222 return r
223
223
224 def pathcopies(x, y, match=None):
224 def pathcopies(x, y, match=None):
225 """find {dst@y: src@x} copy mapping for directed compare"""
225 """find {dst@y: src@x} copy mapping for directed compare"""
226 if x == y or not x or not y:
226 if x == y or not x or not y:
227 return {}
227 return {}
228 a = y.ancestor(x)
228 a = y.ancestor(x)
229 if a == x:
229 if a == x:
230 return _forwardcopies(x, y, match=match)
230 return _forwardcopies(x, y, match=match)
231 if a == y:
231 if a == y:
232 return _backwardrenames(x, y)
232 return _backwardrenames(x, y)
233 return _chain(x, y, _backwardrenames(x, a),
233 return _chain(x, y, _backwardrenames(x, a),
234 _forwardcopies(a, y, match=match))
234 _forwardcopies(a, y, match=match))
235
235
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 and c2. This is its own function so extensions can easily wrap this call
238 and c2. This is its own function so extensions can easily wrap this call
239 to see what files mergecopies is about to process.
239 to see what files mergecopies is about to process.
240
240
241 Even though c1 and c2 are not used in this function, they are useful in
241 Even though c1 and c2 are not used in this function, they are useful in
242 other extensions for being able to read the file nodes of the changed files.
242 other extensions for being able to read the file nodes of the changed files.
243
243
244 "baselabel" can be passed to help distinguish the multiple computations
244 "baselabel" can be passed to help distinguish the multiple computations
245 done in the graft case.
245 done in the graft case.
246 """
246 """
247 u1 = sorted(addedinm1 - addedinm2)
247 u1 = sorted(addedinm1 - addedinm2)
248 u2 = sorted(addedinm2 - addedinm1)
248 u2 = sorted(addedinm2 - addedinm1)
249
249
250 header = " unmatched files in %s"
250 header = " unmatched files in %s"
251 if baselabel:
251 if baselabel:
252 header += ' (from %s)' % baselabel
252 header += ' (from %s)' % baselabel
253 if u1:
253 if u1:
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 if u2:
255 if u2:
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257
257
258 narrowmatch = repo.narrowmatch()
258 narrowmatch = repo.narrowmatch()
259 if not narrowmatch.always():
259 if not narrowmatch.always():
260 u1 = [f for f in u1 if narrowmatch(f)]
260 u1 = [f for f in u1 if narrowmatch(f)]
261 u2 = [f for f in u2 if narrowmatch(f)]
261 u2 = [f for f in u2 if narrowmatch(f)]
262 return u1, u2
262 return u1, u2
263
263
264 def _makegetfctx(ctx):
264 def _makegetfctx(ctx):
265 """return a 'getfctx' function suitable for _checkcopies usage
265 """return a 'getfctx' function suitable for _checkcopies usage
266
266
267 We have to re-setup the function building 'filectx' for each
267 We have to re-setup the function building 'filectx' for each
268 '_checkcopies' to ensure the linkrev adjustment is properly setup for
268 '_checkcopies' to ensure the linkrev adjustment is properly setup for
269 each. Linkrev adjustment is important to avoid bug in rename
269 each. Linkrev adjustment is important to avoid bug in rename
270 detection. Moreover, having a proper '_ancestrycontext' setup ensures
270 detection. Moreover, having a proper '_ancestrycontext' setup ensures
271 the performance impact of this adjustment is kept limited. Without it,
271 the performance impact of this adjustment is kept limited. Without it,
272 each file could do a full dag traversal making the time complexity of
272 each file could do a full dag traversal making the time complexity of
273 the operation explode (see issue4537).
273 the operation explode (see issue4537).
274
274
275 This function exists here mostly to limit the impact on stable. Feel
275 This function exists here mostly to limit the impact on stable. Feel
276 free to refactor on default.
276 free to refactor on default.
277 """
277 """
278 rev = ctx.rev()
278 rev = ctx.rev()
279 repo = ctx._repo
279 repo = ctx._repo
280 ac = getattr(ctx, '_ancestrycontext', None)
280 ac = getattr(ctx, '_ancestrycontext', None)
281 if ac is None:
281 if ac is None:
282 revs = [rev]
282 revs = [rev]
283 if rev is None:
283 if rev is None:
284 revs = [p.rev() for p in ctx.parents()]
284 revs = [p.rev() for p in ctx.parents()]
285 ac = repo.changelog.ancestors(revs, inclusive=True)
285 ac = repo.changelog.ancestors(revs, inclusive=True)
286 ctx._ancestrycontext = ac
286 ctx._ancestrycontext = ac
287 def makectx(f, n):
287 def makectx(f, n):
288 if n in node.wdirfilenodeids: # in a working context?
288 if n in node.wdirfilenodeids: # in a working context?
289 if ctx.rev() is None:
289 if ctx.rev() is None:
290 return ctx.filectx(f)
290 return ctx.filectx(f)
291 return repo[None][f]
291 return repo[None][f]
292 fctx = repo.filectx(f, fileid=n)
292 fctx = repo.filectx(f, fileid=n)
293 # setup only needed for filectx not create from a changectx
293 # setup only needed for filectx not create from a changectx
294 fctx._ancestrycontext = ac
294 fctx._ancestrycontext = ac
295 fctx._descendantrev = rev
295 fctx._descendantrev = rev
296 return fctx
296 return fctx
297 return util.lrucachefunc(makectx)
297 return util.lrucachefunc(makectx)
298
298
299 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
299 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
300 """combine partial copy paths"""
300 """combine partial copy paths"""
301 remainder = {}
301 remainder = {}
302 for f in copyfrom:
302 for f in copyfrom:
303 if f in copyto:
303 if f in copyto:
304 finalcopy[copyto[f]] = copyfrom[f]
304 finalcopy[copyto[f]] = copyfrom[f]
305 del copyto[f]
305 del copyto[f]
306 for f in incompletediverge:
306 for f in incompletediverge:
307 assert f not in diverge
307 assert f not in diverge
308 ic = incompletediverge[f]
308 ic = incompletediverge[f]
309 if ic[0] in copyto:
309 if ic[0] in copyto:
310 diverge[f] = [copyto[ic[0]], ic[1]]
310 diverge[f] = [copyto[ic[0]], ic[1]]
311 else:
311 else:
312 remainder[f] = ic
312 remainder[f] = ic
313 return remainder
313 return remainder
314
314
315 def mergecopies(repo, c1, c2, base):
315 def mergecopies(repo, c1, c2, base):
316 """
316 """
317 The function calling different copytracing algorithms on the basis of config
317 The function calling different copytracing algorithms on the basis of config
318 which find moves and copies between context c1 and c2 that are relevant for
318 which find moves and copies between context c1 and c2 that are relevant for
319 merging. 'base' will be used as the merge base.
319 merging. 'base' will be used as the merge base.
320
320
321 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
321 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
322 files that were moved/ copied in one merge parent and modified in another.
322 files that were moved/ copied in one merge parent and modified in another.
323 For example:
323 For example:
324
324
325 o ---> 4 another commit
325 o ---> 4 another commit
326 |
326 |
327 | o ---> 3 commit that modifies a.txt
327 | o ---> 3 commit that modifies a.txt
328 | /
328 | /
329 o / ---> 2 commit that moves a.txt to b.txt
329 o / ---> 2 commit that moves a.txt to b.txt
330 |/
330 |/
331 o ---> 1 merge base
331 o ---> 1 merge base
332
332
333 If we try to rebase revision 3 on revision 4, since there is no a.txt in
333 If we try to rebase revision 3 on revision 4, since there is no a.txt in
334 revision 4, and if user have copytrace disabled, we prints the following
334 revision 4, and if user have copytrace disabled, we prints the following
335 message:
335 message:
336
336
337 ```other changed <file> which local deleted```
337 ```other changed <file> which local deleted```
338
338
339 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
339 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
340 "dirmove".
340 "dirmove".
341
341
342 "copy" is a mapping from destination name -> source name,
342 "copy" is a mapping from destination name -> source name,
343 where source is in c1 and destination is in c2 or vice-versa.
343 where source is in c1 and destination is in c2 or vice-versa.
344
344
345 "movewithdir" is a mapping from source name -> destination name,
345 "movewithdir" is a mapping from source name -> destination name,
346 where the file at source present in one context but not the other
346 where the file at source present in one context but not the other
347 needs to be moved to destination by the merge process, because the
347 needs to be moved to destination by the merge process, because the
348 other context moved the directory it is in.
348 other context moved the directory it is in.
349
349
350 "diverge" is a mapping of source name -> list of destination names
350 "diverge" is a mapping of source name -> list of destination names
351 for divergent renames.
351 for divergent renames.
352
352
353 "renamedelete" is a mapping of source name -> list of destination
353 "renamedelete" is a mapping of source name -> list of destination
354 names for files deleted in c1 that were renamed in c2 or vice-versa.
354 names for files deleted in c1 that were renamed in c2 or vice-versa.
355
355
356 "dirmove" is a mapping of detected source dir -> destination dir renames.
356 "dirmove" is a mapping of detected source dir -> destination dir renames.
357 This is needed for handling changes to new files previously grafted into
357 This is needed for handling changes to new files previously grafted into
358 renamed directories.
358 renamed directories.
359 """
359 """
360 # avoid silly behavior for update from empty dir
360 # avoid silly behavior for update from empty dir
361 if not c1 or not c2 or c1 == c2:
361 if not c1 or not c2 or c1 == c2:
362 return {}, {}, {}, {}, {}
362 return {}, {}, {}, {}, {}
363
363
364 # avoid silly behavior for parent -> working dir
364 # avoid silly behavior for parent -> working dir
365 if c2.node() is None and c1.node() == repo.dirstate.p1():
365 if c2.node() is None and c1.node() == repo.dirstate.p1():
366 return repo.dirstate.copies(), {}, {}, {}, {}
366 return repo.dirstate.copies(), {}, {}, {}, {}
367
367
368 copytracing = repo.ui.config('experimental', 'copytrace')
368 copytracing = repo.ui.config('experimental', 'copytrace')
369
369
370 # Copy trace disabling is explicitly below the node == p1 logic above
370 # Copy trace disabling is explicitly below the node == p1 logic above
371 # because the logic above is required for a simple copy to be kept across a
371 # because the logic above is required for a simple copy to be kept across a
372 # rebase.
372 # rebase.
373 if copytracing == 'off':
373 if copytracing == 'off':
374 return {}, {}, {}, {}, {}
374 return {}, {}, {}, {}, {}
375 elif copytracing == 'heuristics':
375 elif copytracing == 'heuristics':
376 # Do full copytracing if only non-public revisions are involved as
376 # Do full copytracing if only non-public revisions are involved as
377 # that will be fast enough and will also cover the copies which could
377 # that will be fast enough and will also cover the copies which could
378 # be missed by heuristics
378 # be missed by heuristics
379 if _isfullcopytraceable(repo, c1, base):
379 if _isfullcopytraceable(repo, c1, base):
380 return _fullcopytracing(repo, c1, c2, base)
380 return _fullcopytracing(repo, c1, c2, base)
381 return _heuristicscopytracing(repo, c1, c2, base)
381 return _heuristicscopytracing(repo, c1, c2, base)
382 else:
382 else:
383 return _fullcopytracing(repo, c1, c2, base)
383 return _fullcopytracing(repo, c1, c2, base)
384
384
385 def _isfullcopytraceable(repo, c1, base):
385 def _isfullcopytraceable(repo, c1, base):
386 """ Checks that if base, source and destination are all no-public branches,
386 """ Checks that if base, source and destination are all no-public branches,
387 if yes let's use the full copytrace algorithm for increased capabilities
387 if yes let's use the full copytrace algorithm for increased capabilities
388 since it will be fast enough.
388 since it will be fast enough.
389
389
390 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
390 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
391 number of changesets from c1 to base such that if number of changesets are
391 number of changesets from c1 to base such that if number of changesets are
392 more than the limit, full copytracing algorithm won't be used.
392 more than the limit, full copytracing algorithm won't be used.
393 """
393 """
394 if c1.rev() is None:
394 if c1.rev() is None:
395 c1 = c1.p1()
395 c1 = c1.p1()
396 if c1.mutable() and base.mutable():
396 if c1.mutable() and base.mutable():
397 sourcecommitlimit = repo.ui.configint('experimental',
397 sourcecommitlimit = repo.ui.configint('experimental',
398 'copytrace.sourcecommitlimit')
398 'copytrace.sourcecommitlimit')
399 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
399 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
400 return commits < sourcecommitlimit
400 return commits < sourcecommitlimit
401 return False
401 return False
402
402
403 def _fullcopytracing(repo, c1, c2, base):
403 def _fullcopytracing(repo, c1, c2, base):
404 """ The full copytracing algorithm which finds all the new files that were
404 """ The full copytracing algorithm which finds all the new files that were
405 added from merge base up to the top commit and for each file it checks if
405 added from merge base up to the top commit and for each file it checks if
406 this file was copied from another file.
406 this file was copied from another file.
407
407
408 This is pretty slow when a lot of changesets are involved but will track all
408 This is pretty slow when a lot of changesets are involved but will track all
409 the copies.
409 the copies.
410 """
410 """
411 # In certain scenarios (e.g. graft, update or rebase), base can be
411 # In certain scenarios (e.g. graft, update or rebase), base can be
412 # overridden We still need to know a real common ancestor in this case We
412 # overridden We still need to know a real common ancestor in this case We
413 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
413 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
414 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
414 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
415 # caller may not know if the revision passed in lieu of the CA is a genuine
415 # caller may not know if the revision passed in lieu of the CA is a genuine
416 # common ancestor or not without explicitly checking it, it's better to
416 # common ancestor or not without explicitly checking it, it's better to
417 # determine that here.
417 # determine that here.
418 #
418 #
419 # base.isancestorof(wc) is False, work around that
419 # base.isancestorof(wc) is False, work around that
420 _c1 = c1.p1() if c1.rev() is None else c1
420 _c1 = c1.p1() if c1.rev() is None else c1
421 _c2 = c2.p1() if c2.rev() is None else c2
421 _c2 = c2.p1() if c2.rev() is None else c2
422 # an endpoint is "dirty" if it isn't a descendant of the merge base
422 # an endpoint is "dirty" if it isn't a descendant of the merge base
423 # if we have a dirty endpoint, we need to trigger graft logic, and also
423 # if we have a dirty endpoint, we need to trigger graft logic, and also
424 # keep track of which endpoint is dirty
424 # keep track of which endpoint is dirty
425 dirtyc1 = not base.isancestorof(_c1)
425 dirtyc1 = not base.isancestorof(_c1)
426 dirtyc2 = not base.isancestorof(_c2)
426 dirtyc2 = not base.isancestorof(_c2)
427 graft = dirtyc1 or dirtyc2
427 graft = dirtyc1 or dirtyc2
428 tca = base
428 tca = base
429 if graft:
429 if graft:
430 tca = _c1.ancestor(_c2)
430 tca = _c1.ancestor(_c2)
431
431
432 limit = _findlimit(repo, c1.rev(), c2.rev())
432 limit = _findlimit(repo, c1.rev(), c2.rev())
433 if limit is None:
433 if limit is None:
434 # no common ancestor, no copies
434 # no common ancestor, no copies
435 return {}, {}, {}, {}, {}
435 return {}, {}, {}, {}, {}
436 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
436 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
437
437
438 m1 = c1.manifest()
438 m1 = c1.manifest()
439 m2 = c2.manifest()
439 m2 = c2.manifest()
440 mb = base.manifest()
440 mb = base.manifest()
441
441
442 # gather data from _checkcopies:
442 # gather data from _checkcopies:
443 # - diverge = record all diverges in this dict
443 # - diverge = record all diverges in this dict
444 # - copy = record all non-divergent copies in this dict
444 # - copy = record all non-divergent copies in this dict
445 # - fullcopy = record all copies in this dict
445 # - fullcopy = record all copies in this dict
446 # - incomplete = record non-divergent partial copies here
446 # - incomplete = record non-divergent partial copies here
447 # - incompletediverge = record divergent partial copies here
447 # - incompletediverge = record divergent partial copies here
448 diverge = {} # divergence data is shared
448 diverge = {} # divergence data is shared
449 incompletediverge = {}
449 incompletediverge = {}
450 data1 = {'copy': {},
450 data1 = {'copy': {},
451 'fullcopy': {},
451 'fullcopy': {},
452 'incomplete': {},
452 'incomplete': {},
453 'diverge': diverge,
453 'diverge': diverge,
454 'incompletediverge': incompletediverge,
454 'incompletediverge': incompletediverge,
455 }
455 }
456 data2 = {'copy': {},
456 data2 = {'copy': {},
457 'fullcopy': {},
457 'fullcopy': {},
458 'incomplete': {},
458 'incomplete': {},
459 'diverge': diverge,
459 'diverge': diverge,
460 'incompletediverge': incompletediverge,
460 'incompletediverge': incompletediverge,
461 }
461 }
462
462
463 # find interesting file sets from manifests
463 # find interesting file sets from manifests
464 addedinm1 = m1.filesnotin(mb)
464 addedinm1 = m1.filesnotin(mb)
465 addedinm2 = m2.filesnotin(mb)
465 addedinm2 = m2.filesnotin(mb)
466 bothnew = sorted(addedinm1 & addedinm2)
466 bothnew = sorted(addedinm1 & addedinm2)
467 if tca == base:
467 if tca == base:
468 # unmatched file from base
468 # unmatched file from base
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
470 u1u, u2u = u1r, u2r
470 u1u, u2u = u1r, u2r
471 else:
471 else:
472 # unmatched file from base (DAG rotation in the graft case)
472 # unmatched file from base (DAG rotation in the graft case)
473 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
473 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
474 baselabel='base')
474 baselabel='base')
475 # unmatched file from topological common ancestors (no DAG rotation)
475 # unmatched file from topological common ancestors (no DAG rotation)
476 # need to recompute this for directory move handling when grafting
476 # need to recompute this for directory move handling when grafting
477 mta = tca.manifest()
477 mta = tca.manifest()
478 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
478 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
479 m2.filesnotin(mta),
479 m2.filesnotin(mta),
480 baselabel='topological common ancestor')
480 baselabel='topological common ancestor')
481
481
482 for f in u1u:
482 for f in u1u:
483 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
483 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
484
484
485 for f in u2u:
485 for f in u2u:
486 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
486 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
487
487
488 copy = dict(data1['copy'])
488 copy = dict(data1['copy'])
489 copy.update(data2['copy'])
489 copy.update(data2['copy'])
490 fullcopy = dict(data1['fullcopy'])
490 fullcopy = dict(data1['fullcopy'])
491 fullcopy.update(data2['fullcopy'])
491 fullcopy.update(data2['fullcopy'])
492
492
493 if dirtyc1:
493 if dirtyc1:
494 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
494 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
495 incompletediverge)
495 incompletediverge)
496 else:
496 else:
497 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
497 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
498 incompletediverge)
498 incompletediverge)
499
499
500 renamedelete = {}
500 renamedelete = {}
501 renamedeleteset = set()
501 renamedeleteset = set()
502 divergeset = set()
502 divergeset = set()
503 for of, fl in list(diverge.items()):
503 for of, fl in list(diverge.items()):
504 if len(fl) == 1 or of in c1 or of in c2:
504 if len(fl) == 1 or of in c1 or of in c2:
505 del diverge[of] # not actually divergent, or not a rename
505 del diverge[of] # not actually divergent, or not a rename
506 if of not in c1 and of not in c2:
506 if of not in c1 and of not in c2:
507 # renamed on one side, deleted on the other side, but filter
507 # renamed on one side, deleted on the other side, but filter
508 # out files that have been renamed and then deleted
508 # out files that have been renamed and then deleted
509 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
509 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
510 renamedeleteset.update(fl) # reverse map for below
510 renamedeleteset.update(fl) # reverse map for below
511 else:
511 else:
512 divergeset.update(fl) # reverse map for below
512 divergeset.update(fl) # reverse map for below
513
513
514 if bothnew:
514 if bothnew:
515 repo.ui.debug(" unmatched files new in both:\n %s\n"
515 repo.ui.debug(" unmatched files new in both:\n %s\n"
516 % "\n ".join(bothnew))
516 % "\n ".join(bothnew))
517 bothdiverge = {}
517 bothdiverge = {}
518 bothincompletediverge = {}
518 bothincompletediverge = {}
519 remainder = {}
519 remainder = {}
520 both1 = {'copy': {},
520 both1 = {'copy': {},
521 'fullcopy': {},
521 'fullcopy': {},
522 'incomplete': {},
522 'incomplete': {},
523 'diverge': bothdiverge,
523 'diverge': bothdiverge,
524 'incompletediverge': bothincompletediverge
524 'incompletediverge': bothincompletediverge
525 }
525 }
526 both2 = {'copy': {},
526 both2 = {'copy': {},
527 'fullcopy': {},
527 'fullcopy': {},
528 'incomplete': {},
528 'incomplete': {},
529 'diverge': bothdiverge,
529 'diverge': bothdiverge,
530 'incompletediverge': bothincompletediverge
530 'incompletediverge': bothincompletediverge
531 }
531 }
532 for f in bothnew:
532 for f in bothnew:
533 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
533 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
534 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
534 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
535 if dirtyc1:
535 if dirtyc1:
536 # incomplete copies may only be found on the "dirty" side for bothnew
536 # incomplete copies may only be found on the "dirty" side for bothnew
537 assert not both2['incomplete']
537 assert not both2['incomplete']
538 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
538 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
539 bothincompletediverge)
539 bothincompletediverge)
540 elif dirtyc2:
540 elif dirtyc2:
541 assert not both1['incomplete']
541 assert not both1['incomplete']
542 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
542 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
543 bothincompletediverge)
543 bothincompletediverge)
544 else:
544 else:
545 # incomplete copies and divergences can't happen outside grafts
545 # incomplete copies and divergences can't happen outside grafts
546 assert not both1['incomplete']
546 assert not both1['incomplete']
547 assert not both2['incomplete']
547 assert not both2['incomplete']
548 assert not bothincompletediverge
548 assert not bothincompletediverge
549 for f in remainder:
549 for f in remainder:
550 assert f not in bothdiverge
550 assert f not in bothdiverge
551 ic = remainder[f]
551 ic = remainder[f]
552 if ic[0] in (m1 if dirtyc1 else m2):
552 if ic[0] in (m1 if dirtyc1 else m2):
553 # backed-out rename on one side, but watch out for deleted files
553 # backed-out rename on one side, but watch out for deleted files
554 bothdiverge[f] = ic
554 bothdiverge[f] = ic
555 for of, fl in bothdiverge.items():
555 for of, fl in bothdiverge.items():
556 if len(fl) == 2 and fl[0] == fl[1]:
556 if len(fl) == 2 and fl[0] == fl[1]:
557 copy[fl[0]] = of # not actually divergent, just matching renames
557 copy[fl[0]] = of # not actually divergent, just matching renames
558
558
559 if fullcopy and repo.ui.debugflag:
559 if fullcopy and repo.ui.debugflag:
560 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
560 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
561 "% = renamed and deleted):\n")
561 "% = renamed and deleted):\n")
562 for f in sorted(fullcopy):
562 for f in sorted(fullcopy):
563 note = ""
563 note = ""
564 if f in copy:
564 if f in copy:
565 note += "*"
565 note += "*"
566 if f in divergeset:
566 if f in divergeset:
567 note += "!"
567 note += "!"
568 if f in renamedeleteset:
568 if f in renamedeleteset:
569 note += "%"
569 note += "%"
570 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
570 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
571 note))
571 note))
572 del divergeset
572 del divergeset
573
573
574 if not fullcopy:
574 if not fullcopy:
575 return copy, {}, diverge, renamedelete, {}
575 return copy, {}, diverge, renamedelete, {}
576
576
577 repo.ui.debug(" checking for directory renames\n")
577 repo.ui.debug(" checking for directory renames\n")
578
578
579 # generate a directory move map
579 # generate a directory move map
580 d1, d2 = c1.dirs(), c2.dirs()
580 d1, d2 = c1.dirs(), c2.dirs()
581 # Hack for adding '', which is not otherwise added, to d1 and d2
581 # Hack for adding '', which is not otherwise added, to d1 and d2
582 d1.addpath('/')
582 d1.addpath('/')
583 d2.addpath('/')
583 d2.addpath('/')
584 invalid = set()
584 invalid = set()
585 dirmove = {}
585 dirmove = {}
586
586
587 # examine each file copy for a potential directory move, which is
587 # examine each file copy for a potential directory move, which is
588 # when all the files in a directory are moved to a new directory
588 # when all the files in a directory are moved to a new directory
589 for dst, src in fullcopy.iteritems():
589 for dst, src in fullcopy.iteritems():
590 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
590 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
591 if dsrc in invalid:
591 if dsrc in invalid:
592 # already seen to be uninteresting
592 # already seen to be uninteresting
593 continue
593 continue
594 elif dsrc in d1 and ddst in d1:
594 elif dsrc in d1 and ddst in d1:
595 # directory wasn't entirely moved locally
595 # directory wasn't entirely moved locally
596 invalid.add(dsrc + "/")
596 invalid.add(dsrc)
597 elif dsrc in d2 and ddst in d2:
597 elif dsrc in d2 and ddst in d2:
598 # directory wasn't entirely moved remotely
598 # directory wasn't entirely moved remotely
599 invalid.add(dsrc + "/")
599 invalid.add(dsrc)
600 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
600 elif dsrc in dirmove and dirmove[dsrc] != ddst:
601 # files from the same directory moved to two different places
601 # files from the same directory moved to two different places
602 invalid.add(dsrc + "/")
602 invalid.add(dsrc)
603 else:
603 else:
604 # looks good so far
604 # looks good so far
605 dirmove[dsrc + "/"] = ddst + "/"
605 dirmove[dsrc] = ddst
606
606
607 for i in invalid:
607 for i in invalid:
608 if i in dirmove:
608 if i in dirmove:
609 del dirmove[i]
609 del dirmove[i]
610 del d1, d2, invalid
610 del d1, d2, invalid
611
611
612 if not dirmove:
612 if not dirmove:
613 return copy, {}, diverge, renamedelete, {}
613 return copy, {}, diverge, renamedelete, {}
614
614
615 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
616
615 for d in dirmove:
617 for d in dirmove:
616 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
618 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
617 (d, dirmove[d]))
619 (d, dirmove[d]))
618
620
619 movewithdir = {}
621 movewithdir = {}
620 # check unaccounted nonoverlapping files against directory moves
622 # check unaccounted nonoverlapping files against directory moves
621 for f in u1r + u2r:
623 for f in u1r + u2r:
622 if f not in fullcopy:
624 if f not in fullcopy:
623 for d in dirmove:
625 for d in dirmove:
624 if f.startswith(d):
626 if f.startswith(d):
625 # new file added in a directory that was moved, move it
627 # new file added in a directory that was moved, move it
626 df = dirmove[d] + f[len(d):]
628 df = dirmove[d] + f[len(d):]
627 if df not in copy:
629 if df not in copy:
628 movewithdir[f] = df
630 movewithdir[f] = df
629 repo.ui.debug((" pending file src: '%s' -> "
631 repo.ui.debug((" pending file src: '%s' -> "
630 "dst: '%s'\n") % (f, df))
632 "dst: '%s'\n") % (f, df))
631 break
633 break
632
634
633 return copy, movewithdir, diverge, renamedelete, dirmove
635 return copy, movewithdir, diverge, renamedelete, dirmove
634
636
635 def _heuristicscopytracing(repo, c1, c2, base):
637 def _heuristicscopytracing(repo, c1, c2, base):
636 """ Fast copytracing using filename heuristics
638 """ Fast copytracing using filename heuristics
637
639
638 Assumes that moves or renames are of following two types:
640 Assumes that moves or renames are of following two types:
639
641
640 1) Inside a directory only (same directory name but different filenames)
642 1) Inside a directory only (same directory name but different filenames)
641 2) Move from one directory to another
643 2) Move from one directory to another
642 (same filenames but different directory names)
644 (same filenames but different directory names)
643
645
644 Works only when there are no merge commits in the "source branch".
646 Works only when there are no merge commits in the "source branch".
645 Source branch is commits from base up to c2 not including base.
647 Source branch is commits from base up to c2 not including base.
646
648
647 If merge is involved it fallbacks to _fullcopytracing().
649 If merge is involved it fallbacks to _fullcopytracing().
648
650
649 Can be used by setting the following config:
651 Can be used by setting the following config:
650
652
651 [experimental]
653 [experimental]
652 copytrace = heuristics
654 copytrace = heuristics
653
655
654 In some cases the copy/move candidates found by heuristics can be very large
656 In some cases the copy/move candidates found by heuristics can be very large
655 in number and that will make the algorithm slow. The number of possible
657 in number and that will make the algorithm slow. The number of possible
656 candidates to check can be limited by using the config
658 candidates to check can be limited by using the config
657 `experimental.copytrace.movecandidateslimit` which defaults to 100.
659 `experimental.copytrace.movecandidateslimit` which defaults to 100.
658 """
660 """
659
661
660 if c1.rev() is None:
662 if c1.rev() is None:
661 c1 = c1.p1()
663 c1 = c1.p1()
662 if c2.rev() is None:
664 if c2.rev() is None:
663 c2 = c2.p1()
665 c2 = c2.p1()
664
666
665 copies = {}
667 copies = {}
666
668
667 changedfiles = set()
669 changedfiles = set()
668 m1 = c1.manifest()
670 m1 = c1.manifest()
669 if not repo.revs('%d::%d', base.rev(), c2.rev()):
671 if not repo.revs('%d::%d', base.rev(), c2.rev()):
670 # If base is not in c2 branch, we switch to fullcopytracing
672 # If base is not in c2 branch, we switch to fullcopytracing
671 repo.ui.debug("switching to full copytracing as base is not "
673 repo.ui.debug("switching to full copytracing as base is not "
672 "an ancestor of c2\n")
674 "an ancestor of c2\n")
673 return _fullcopytracing(repo, c1, c2, base)
675 return _fullcopytracing(repo, c1, c2, base)
674
676
675 ctx = c2
677 ctx = c2
676 while ctx != base:
678 while ctx != base:
677 if len(ctx.parents()) == 2:
679 if len(ctx.parents()) == 2:
678 # To keep things simple let's not handle merges
680 # To keep things simple let's not handle merges
679 repo.ui.debug("switching to full copytracing because of merges\n")
681 repo.ui.debug("switching to full copytracing because of merges\n")
680 return _fullcopytracing(repo, c1, c2, base)
682 return _fullcopytracing(repo, c1, c2, base)
681 changedfiles.update(ctx.files())
683 changedfiles.update(ctx.files())
682 ctx = ctx.p1()
684 ctx = ctx.p1()
683
685
684 cp = _forwardcopies(base, c2)
686 cp = _forwardcopies(base, c2)
685 for dst, src in cp.iteritems():
687 for dst, src in cp.iteritems():
686 if src in m1:
688 if src in m1:
687 copies[dst] = src
689 copies[dst] = src
688
690
689 # file is missing if it isn't present in the destination, but is present in
691 # file is missing if it isn't present in the destination, but is present in
690 # the base and present in the source.
692 # the base and present in the source.
691 # Presence in the base is important to exclude added files, presence in the
693 # Presence in the base is important to exclude added files, presence in the
692 # source is important to exclude removed files.
694 # source is important to exclude removed files.
693 filt = lambda f: f not in m1 and f in base and f in c2
695 filt = lambda f: f not in m1 and f in base and f in c2
694 missingfiles = [f for f in changedfiles if filt(f)]
696 missingfiles = [f for f in changedfiles if filt(f)]
695
697
696 if missingfiles:
698 if missingfiles:
697 basenametofilename = collections.defaultdict(list)
699 basenametofilename = collections.defaultdict(list)
698 dirnametofilename = collections.defaultdict(list)
700 dirnametofilename = collections.defaultdict(list)
699
701
700 for f in m1.filesnotin(base.manifest()):
702 for f in m1.filesnotin(base.manifest()):
701 basename = os.path.basename(f)
703 basename = os.path.basename(f)
702 dirname = os.path.dirname(f)
704 dirname = os.path.dirname(f)
703 basenametofilename[basename].append(f)
705 basenametofilename[basename].append(f)
704 dirnametofilename[dirname].append(f)
706 dirnametofilename[dirname].append(f)
705
707
706 for f in missingfiles:
708 for f in missingfiles:
707 basename = os.path.basename(f)
709 basename = os.path.basename(f)
708 dirname = os.path.dirname(f)
710 dirname = os.path.dirname(f)
709 samebasename = basenametofilename[basename]
711 samebasename = basenametofilename[basename]
710 samedirname = dirnametofilename[dirname]
712 samedirname = dirnametofilename[dirname]
711 movecandidates = samebasename + samedirname
713 movecandidates = samebasename + samedirname
712 # f is guaranteed to be present in c2, that's why
714 # f is guaranteed to be present in c2, that's why
713 # c2.filectx(f) won't fail
715 # c2.filectx(f) won't fail
714 f2 = c2.filectx(f)
716 f2 = c2.filectx(f)
715 # we can have a lot of candidates which can slow down the heuristics
717 # we can have a lot of candidates which can slow down the heuristics
716 # config value to limit the number of candidates moves to check
718 # config value to limit the number of candidates moves to check
717 maxcandidates = repo.ui.configint('experimental',
719 maxcandidates = repo.ui.configint('experimental',
718 'copytrace.movecandidateslimit')
720 'copytrace.movecandidateslimit')
719
721
720 if len(movecandidates) > maxcandidates:
722 if len(movecandidates) > maxcandidates:
721 repo.ui.status(_("skipping copytracing for '%s', more "
723 repo.ui.status(_("skipping copytracing for '%s', more "
722 "candidates than the limit: %d\n")
724 "candidates than the limit: %d\n")
723 % (f, len(movecandidates)))
725 % (f, len(movecandidates)))
724 continue
726 continue
725
727
726 for candidate in movecandidates:
728 for candidate in movecandidates:
727 f1 = c1.filectx(candidate)
729 f1 = c1.filectx(candidate)
728 if _related(f1, f2):
730 if _related(f1, f2):
729 # if there are a few related copies then we'll merge
731 # if there are a few related copies then we'll merge
730 # changes into all of them. This matches the behaviour
732 # changes into all of them. This matches the behaviour
731 # of upstream copytracing
733 # of upstream copytracing
732 copies[candidate] = f
734 copies[candidate] = f
733
735
734 return copies, {}, {}, {}, {}
736 return copies, {}, {}, {}, {}
735
737
736 def _related(f1, f2):
738 def _related(f1, f2):
737 """return True if f1 and f2 filectx have a common ancestor
739 """return True if f1 and f2 filectx have a common ancestor
738
740
739 Walk back to common ancestor to see if the two files originate
741 Walk back to common ancestor to see if the two files originate
740 from the same file. Since workingfilectx's rev() is None it messes
742 from the same file. Since workingfilectx's rev() is None it messes
741 up the integer comparison logic, hence the pre-step check for
743 up the integer comparison logic, hence the pre-step check for
742 None (f1 and f2 can only be workingfilectx's initially).
744 None (f1 and f2 can only be workingfilectx's initially).
743 """
745 """
744
746
745 if f1 == f2:
747 if f1 == f2:
746 return f1 # a match
748 return f1 # a match
747
749
748 g1, g2 = f1.ancestors(), f2.ancestors()
750 g1, g2 = f1.ancestors(), f2.ancestors()
749 try:
751 try:
750 f1r, f2r = f1.linkrev(), f2.linkrev()
752 f1r, f2r = f1.linkrev(), f2.linkrev()
751
753
752 if f1r is None:
754 if f1r is None:
753 f1 = next(g1)
755 f1 = next(g1)
754 if f2r is None:
756 if f2r is None:
755 f2 = next(g2)
757 f2 = next(g2)
756
758
757 while True:
759 while True:
758 f1r, f2r = f1.linkrev(), f2.linkrev()
760 f1r, f2r = f1.linkrev(), f2.linkrev()
759 if f1r > f2r:
761 if f1r > f2r:
760 f1 = next(g1)
762 f1 = next(g1)
761 elif f2r > f1r:
763 elif f2r > f1r:
762 f2 = next(g2)
764 f2 = next(g2)
763 else: # f1 and f2 point to files in the same linkrev
765 else: # f1 and f2 point to files in the same linkrev
764 return f1 == f2 # true if they point to the same file
766 return f1 == f2 # true if they point to the same file
765 except StopIteration:
767 except StopIteration:
766 return False
768 return False
767
769
768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
770 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 """
771 """
770 check possible copies of f from msrc to mdst
772 check possible copies of f from msrc to mdst
771
773
772 srcctx = starting context for f in msrc
774 srcctx = starting context for f in msrc
773 dstctx = destination context for f in mdst
775 dstctx = destination context for f in mdst
774 f = the filename to check (as in msrc)
776 f = the filename to check (as in msrc)
775 base = the changectx used as a merge base
777 base = the changectx used as a merge base
776 tca = topological common ancestor for graft-like scenarios
778 tca = topological common ancestor for graft-like scenarios
777 remotebase = True if base is outside tca::srcctx, False otherwise
779 remotebase = True if base is outside tca::srcctx, False otherwise
778 limit = the rev number to not search beyond
780 limit = the rev number to not search beyond
779 data = dictionary of dictionary to store copy data. (see mergecopies)
781 data = dictionary of dictionary to store copy data. (see mergecopies)
780
782
781 note: limit is only an optimization, and provides no guarantee that
783 note: limit is only an optimization, and provides no guarantee that
782 irrelevant revisions will not be visited
784 irrelevant revisions will not be visited
783 there is no easy way to make this algorithm stop in a guaranteed way
785 there is no easy way to make this algorithm stop in a guaranteed way
784 once it "goes behind a certain revision".
786 once it "goes behind a certain revision".
785 """
787 """
786
788
787 msrc = srcctx.manifest()
789 msrc = srcctx.manifest()
788 mdst = dstctx.manifest()
790 mdst = dstctx.manifest()
789 mb = base.manifest()
791 mb = base.manifest()
790 mta = tca.manifest()
792 mta = tca.manifest()
791 # Might be true if this call is about finding backward renames,
793 # Might be true if this call is about finding backward renames,
792 # This happens in the case of grafts because the DAG is then rotated.
794 # This happens in the case of grafts because the DAG is then rotated.
793 # If the file exists in both the base and the source, we are not looking
795 # If the file exists in both the base and the source, we are not looking
794 # for a rename on the source side, but on the part of the DAG that is
796 # for a rename on the source side, but on the part of the DAG that is
795 # traversed backwards.
797 # traversed backwards.
796 #
798 #
797 # In the case there is both backward and forward renames (before and after
799 # In the case there is both backward and forward renames (before and after
798 # the base) this is more complicated as we must detect a divergence.
800 # the base) this is more complicated as we must detect a divergence.
799 # We use 'backwards = False' in that case.
801 # We use 'backwards = False' in that case.
800 backwards = not remotebase and base != tca and f in mb
802 backwards = not remotebase and base != tca and f in mb
801 getsrcfctx = _makegetfctx(srcctx)
803 getsrcfctx = _makegetfctx(srcctx)
802 getdstfctx = _makegetfctx(dstctx)
804 getdstfctx = _makegetfctx(dstctx)
803
805
804 if msrc[f] == mb.get(f) and not remotebase:
806 if msrc[f] == mb.get(f) and not remotebase:
805 # Nothing to merge
807 # Nothing to merge
806 return
808 return
807
809
808 of = None
810 of = None
809 seen = {f}
811 seen = {f}
810 for oc in getsrcfctx(f, msrc[f]).ancestors():
812 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 ocr = oc.linkrev()
813 ocr = oc.linkrev()
812 of = oc.path()
814 of = oc.path()
813 if of in seen:
815 if of in seen:
814 # check limit late - grab last rename before
816 # check limit late - grab last rename before
815 if ocr < limit:
817 if ocr < limit:
816 break
818 break
817 continue
819 continue
818 seen.add(of)
820 seen.add(of)
819
821
820 # remember for dir rename detection
822 # remember for dir rename detection
821 if backwards:
823 if backwards:
822 data['fullcopy'][of] = f # grafting backwards through renames
824 data['fullcopy'][of] = f # grafting backwards through renames
823 else:
825 else:
824 data['fullcopy'][f] = of
826 data['fullcopy'][f] = of
825 if of not in mdst:
827 if of not in mdst:
826 continue # no match, keep looking
828 continue # no match, keep looking
827 if mdst[of] == mb.get(of):
829 if mdst[of] == mb.get(of):
828 return # no merge needed, quit early
830 return # no merge needed, quit early
829 c2 = getdstfctx(of, mdst[of])
831 c2 = getdstfctx(of, mdst[of])
830 # c2 might be a plain new file on added on destination side that is
832 # c2 might be a plain new file on added on destination side that is
831 # unrelated to the droids we are looking for.
833 # unrelated to the droids we are looking for.
832 cr = _related(oc, c2)
834 cr = _related(oc, c2)
833 if cr and (of == f or of == c2.path()): # non-divergent
835 if cr and (of == f or of == c2.path()): # non-divergent
834 if backwards:
836 if backwards:
835 data['copy'][of] = f
837 data['copy'][of] = f
836 elif of in mb:
838 elif of in mb:
837 data['copy'][f] = of
839 data['copy'][f] = of
838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
840 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 data['copy'][of] = f
841 data['copy'][of] = f
840 del data['fullcopy'][f]
842 del data['fullcopy'][f]
841 data['fullcopy'][of] = f
843 data['fullcopy'][of] = f
842 else: # divergence w.r.t. graft CA on one side of topological CA
844 else: # divergence w.r.t. graft CA on one side of topological CA
843 for sf in seen:
845 for sf in seen:
844 if sf in mb:
846 if sf in mb:
845 assert sf not in data['diverge']
847 assert sf not in data['diverge']
846 data['diverge'][sf] = [f, of]
848 data['diverge'][sf] = [f, of]
847 break
849 break
848 return
850 return
849
851
850 if of in mta:
852 if of in mta:
851 if backwards or remotebase:
853 if backwards or remotebase:
852 data['incomplete'][of] = f
854 data['incomplete'][of] = f
853 else:
855 else:
854 for sf in seen:
856 for sf in seen:
855 if sf in mb:
857 if sf in mb:
856 if tca == base:
858 if tca == base:
857 data['diverge'].setdefault(sf, []).append(f)
859 data['diverge'].setdefault(sf, []).append(f)
858 else:
860 else:
859 data['incompletediverge'][sf] = [of, f]
861 data['incompletediverge'][sf] = [of, f]
860 return
862 return
861
863
862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
864 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 """reproduce copies from fromrev to rev in the dirstate
865 """reproduce copies from fromrev to rev in the dirstate
864
866
865 If skiprev is specified, it's a revision that should be used to
867 If skiprev is specified, it's a revision that should be used to
866 filter copy records. Any copies that occur between fromrev and
868 filter copy records. Any copies that occur between fromrev and
867 skiprev will not be duplicated, even if they appear in the set of
869 skiprev will not be duplicated, even if they appear in the set of
868 copies between fromrev and rev.
870 copies between fromrev and rev.
869 """
871 """
870 exclude = {}
872 exclude = {}
871 if (skiprev is not None and
873 if (skiprev is not None and
872 repo.ui.config('experimental', 'copytrace') != 'off'):
874 repo.ui.config('experimental', 'copytrace') != 'off'):
873 # copytrace='off' skips this line, but not the entire function because
875 # copytrace='off' skips this line, but not the entire function because
874 # the line below is O(size of the repo) during a rebase, while the rest
876 # the line below is O(size of the repo) during a rebase, while the rest
875 # of the function is much faster (and is required for carrying copy
877 # of the function is much faster (and is required for carrying copy
876 # metadata across the rebase anyway).
878 # metadata across the rebase anyway).
877 exclude = pathcopies(repo[fromrev], repo[skiprev])
879 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
880 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 # copies.pathcopies returns backward renames, so dst might not
881 # copies.pathcopies returns backward renames, so dst might not
880 # actually be in the dirstate
882 # actually be in the dirstate
881 if dst in exclude:
883 if dst in exclude:
882 continue
884 continue
883 wctx[dst].markcopied(src)
885 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now