##// END OF EJS Templates
copies: always respect matcher arg to _forwardcopies()...
Martin von Zweigbergk -
r35421:7ddc1e96 default
parent child Browse files
Show More
@@ -1,884 +1,884
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def _findlimit(repo, a, b):
24 def _findlimit(repo, a, b):
25 """
25 """
26 Find the last revision that needs to be checked to ensure that a full
26 Find the last revision that needs to be checked to ensure that a full
27 transitive closure for file copies can be properly calculated.
27 transitive closure for file copies can be properly calculated.
28 Generally, this means finding the earliest revision number that's an
28 Generally, this means finding the earliest revision number that's an
29 ancestor of a or b but not both, except when a or b is a direct descendent
29 ancestor of a or b but not both, except when a or b is a direct descendent
30 of the other, in which case we can return the minimum revnum of a and b.
30 of the other, in which case we can return the minimum revnum of a and b.
31 None if no such revision exists.
31 None if no such revision exists.
32 """
32 """
33
33
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85
85
86 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # 1/ File 'a0' committed
87 # 1/ File 'a0' committed
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 3/ Move back to first commit
89 # 3/ Move back to first commit
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 #
92 #
93 # During the amend in step five, we will be in this state:
93 # During the amend in step five, we will be in this state:
94 #
94 #
95 # @ 3 temporary amend commit for a1-amend
95 # @ 3 temporary amend commit for a1-amend
96 # |
96 # |
97 # o 2 a1-amend
97 # o 2 a1-amend
98 # |
98 # |
99 # | o 1 a1
99 # | o 1 a1
100 # |/
100 # |/
101 # o 0 a0
101 # o 0 a0
102 #
102 #
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # yet the filelog has the copy information in rev 1 and we will not look
104 # yet the filelog has the copy information in rev 1 and we will not look
105 # back far enough unless we also look at the a and b as candidates.
105 # back far enough unless we also look at the a and b as candidates.
106 # This only occurs when a is a descendent of b or visa-versa.
106 # This only occurs when a is a descendent of b or visa-versa.
107 return min(limit, a, b)
107 return min(limit, a, b)
108
108
109 def _chain(src, dst, a, b):
109 def _chain(src, dst, a, b):
110 '''chain two sets of copies a->b'''
110 '''chain two sets of copies a->b'''
111 t = a.copy()
111 t = a.copy()
112 for k, v in b.iteritems():
112 for k, v in b.iteritems():
113 if v in t:
113 if v in t:
114 # found a chain
114 # found a chain
115 if t[v] != k:
115 if t[v] != k:
116 # file wasn't renamed back to itself
116 # file wasn't renamed back to itself
117 t[k] = t[v]
117 t[k] = t[v]
118 if v not in dst:
118 if v not in dst:
119 # chain was a rename, not a copy
119 # chain was a rename, not a copy
120 del t[v]
120 del t[v]
121 if v in src:
121 if v in src:
122 # file is a copy of an existing file
122 # file is a copy of an existing file
123 t[k] = v
123 t[k] = v
124
124
125 # remove criss-crossed copies
125 # remove criss-crossed copies
126 for k, v in t.items():
126 for k, v in t.items():
127 if k in src and v in dst:
127 if k in src and v in dst:
128 del t[k]
128 del t[k]
129
129
130 return t
130 return t
131
131
132 def _tracefile(fctx, am, limit=-1):
132 def _tracefile(fctx, am, limit=-1):
133 '''return file context that is the ancestor of fctx present in ancestor
133 '''return file context that is the ancestor of fctx present in ancestor
134 manifest am, stopping after the first ancestor lower than limit'''
134 manifest am, stopping after the first ancestor lower than limit'''
135
135
136 for f in fctx.ancestors():
136 for f in fctx.ancestors():
137 if am.get(f.path(), None) == f.filenode():
137 if am.get(f.path(), None) == f.filenode():
138 return f
138 return f
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 return None
140 return None
141
141
142 def _dirstatecopies(d):
142 def _dirstatecopies(d, match=None):
143 ds = d._repo.dirstate
143 ds = d._repo.dirstate
144 c = ds.copies().copy()
144 c = ds.copies().copy()
145 for k in list(c):
145 for k in list(c):
146 if ds[k] not in 'anm':
146 if ds[k] not in 'anm' or (match and not match(k)):
147 del c[k]
147 del c[k]
148 return c
148 return c
149
149
150 def _computeforwardmissing(a, b, match=None):
150 def _computeforwardmissing(a, b, match=None):
151 """Computes which files are in b but not a.
151 """Computes which files are in b but not a.
152 This is its own function so extensions can easily wrap this call to see what
152 This is its own function so extensions can easily wrap this call to see what
153 files _forwardcopies is about to process.
153 files _forwardcopies is about to process.
154 """
154 """
155 ma = a.manifest()
155 ma = a.manifest()
156 mb = b.manifest()
156 mb = b.manifest()
157 return mb.filesnotin(ma, match=match)
157 return mb.filesnotin(ma, match=match)
158
158
159 def _forwardcopies(a, b, match=None):
159 def _forwardcopies(a, b, match=None):
160 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
160 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
161
161
162 # check for working copy
162 # check for working copy
163 w = None
163 w = None
164 if b.rev() is None:
164 if b.rev() is None:
165 w = b
165 w = b
166 b = w.p1()
166 b = w.p1()
167 if a == b:
167 if a == b:
168 # short-circuit to avoid issues with merge states
168 # short-circuit to avoid issues with merge states
169 return _dirstatecopies(w)
169 return _dirstatecopies(w, match)
170
170
171 # files might have to be traced back to the fctx parent of the last
171 # files might have to be traced back to the fctx parent of the last
172 # one-side-only changeset, but not further back than that
172 # one-side-only changeset, but not further back than that
173 limit = _findlimit(a._repo, a.rev(), b.rev())
173 limit = _findlimit(a._repo, a.rev(), b.rev())
174 if limit is None:
174 if limit is None:
175 limit = -1
175 limit = -1
176 am = a.manifest()
176 am = a.manifest()
177
177
178 # find where new files came from
178 # find where new files came from
179 # we currently don't try to find where old files went, too expensive
179 # we currently don't try to find where old files went, too expensive
180 # this means we can miss a case like 'hg rm b; hg cp a b'
180 # this means we can miss a case like 'hg rm b; hg cp a b'
181 cm = {}
181 cm = {}
182
182
183 # Computing the forward missing is quite expensive on large manifests, since
183 # Computing the forward missing is quite expensive on large manifests, since
184 # it compares the entire manifests. We can optimize it in the common use
184 # it compares the entire manifests. We can optimize it in the common use
185 # case of computing what copies are in a commit versus its parent (like
185 # case of computing what copies are in a commit versus its parent (like
186 # during a rebase or histedit). Note, we exclude merge commits from this
186 # during a rebase or histedit). Note, we exclude merge commits from this
187 # optimization, since the ctx.files() for a merge commit is not correct for
187 # optimization, since the ctx.files() for a merge commit is not correct for
188 # this comparison.
188 # this comparison.
189 forwardmissingmatch = match
189 forwardmissingmatch = match
190 if b.p1() == a and b.p2().node() == node.nullid:
190 if b.p1() == a and b.p2().node() == node.nullid:
191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194
194
195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 for f in missing:
196 for f in missing:
197 fctx = b[f]
197 fctx = b[f]
198 fctx._ancestrycontext = ancestrycontext
198 fctx._ancestrycontext = ancestrycontext
199 ofctx = _tracefile(fctx, am, limit)
199 ofctx = _tracefile(fctx, am, limit)
200 if ofctx:
200 if ofctx:
201 cm[f] = ofctx.path()
201 cm[f] = ofctx.path()
202
202
203 # combine copies from dirstate if necessary
203 # combine copies from dirstate if necessary
204 if w is not None:
204 if w is not None:
205 cm = _chain(a, w, cm, _dirstatecopies(w))
205 cm = _chain(a, w, cm, _dirstatecopies(w, match))
206
206
207 return cm
207 return cm
208
208
209 def _backwardrenames(a, b):
209 def _backwardrenames(a, b):
210 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 if a._repo.ui.config('experimental', 'copytrace') == 'off':
211 return {}
211 return {}
212
212
213 # Even though we're not taking copies into account, 1:n rename situations
213 # Even though we're not taking copies into account, 1:n rename situations
214 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
215 # arbitrarily pick one of the renames.
215 # arbitrarily pick one of the renames.
216 f = _forwardcopies(b, a)
216 f = _forwardcopies(b, a)
217 r = {}
217 r = {}
218 for k, v in sorted(f.iteritems()):
218 for k, v in sorted(f.iteritems()):
219 # remove copies
219 # remove copies
220 if v in a:
220 if v in a:
221 continue
221 continue
222 r[v] = k
222 r[v] = k
223 return r
223 return r
224
224
225 def pathcopies(x, y, match=None):
225 def pathcopies(x, y, match=None):
226 '''find {dst@y: src@x} copy mapping for directed compare'''
226 '''find {dst@y: src@x} copy mapping for directed compare'''
227 if x == y or not x or not y:
227 if x == y or not x or not y:
228 return {}
228 return {}
229 a = y.ancestor(x)
229 a = y.ancestor(x)
230 if a == x:
230 if a == x:
231 return _forwardcopies(x, y, match=match)
231 return _forwardcopies(x, y, match=match)
232 if a == y:
232 if a == y:
233 return _backwardrenames(x, y)
233 return _backwardrenames(x, y)
234 return _chain(x, y, _backwardrenames(x, a),
234 return _chain(x, y, _backwardrenames(x, a),
235 _forwardcopies(a, y, match=match))
235 _forwardcopies(a, y, match=match))
236
236
237 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
238 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
239 and c2. This is its own function so extensions can easily wrap this call
239 and c2. This is its own function so extensions can easily wrap this call
240 to see what files mergecopies is about to process.
240 to see what files mergecopies is about to process.
241
241
242 Even though c1 and c2 are not used in this function, they are useful in
242 Even though c1 and c2 are not used in this function, they are useful in
243 other extensions for being able to read the file nodes of the changed files.
243 other extensions for being able to read the file nodes of the changed files.
244
244
245 "baselabel" can be passed to help distinguish the multiple computations
245 "baselabel" can be passed to help distinguish the multiple computations
246 done in the graft case.
246 done in the graft case.
247 """
247 """
248 u1 = sorted(addedinm1 - addedinm2)
248 u1 = sorted(addedinm1 - addedinm2)
249 u2 = sorted(addedinm2 - addedinm1)
249 u2 = sorted(addedinm2 - addedinm1)
250
250
251 header = " unmatched files in %s"
251 header = " unmatched files in %s"
252 if baselabel:
252 if baselabel:
253 header += ' (from %s)' % baselabel
253 header += ' (from %s)' % baselabel
254 if u1:
254 if u1:
255 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
256 if u2:
256 if u2:
257 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
258 return u1, u2
258 return u1, u2
259
259
260 def _makegetfctx(ctx):
260 def _makegetfctx(ctx):
261 """return a 'getfctx' function suitable for _checkcopies usage
261 """return a 'getfctx' function suitable for _checkcopies usage
262
262
263 We have to re-setup the function building 'filectx' for each
263 We have to re-setup the function building 'filectx' for each
264 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 '_checkcopies' to ensure the linkrev adjustment is properly setup for
265 each. Linkrev adjustment is important to avoid bug in rename
265 each. Linkrev adjustment is important to avoid bug in rename
266 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 detection. Moreover, having a proper '_ancestrycontext' setup ensures
267 the performance impact of this adjustment is kept limited. Without it,
267 the performance impact of this adjustment is kept limited. Without it,
268 each file could do a full dag traversal making the time complexity of
268 each file could do a full dag traversal making the time complexity of
269 the operation explode (see issue4537).
269 the operation explode (see issue4537).
270
270
271 This function exists here mostly to limit the impact on stable. Feel
271 This function exists here mostly to limit the impact on stable. Feel
272 free to refactor on default.
272 free to refactor on default.
273 """
273 """
274 rev = ctx.rev()
274 rev = ctx.rev()
275 repo = ctx._repo
275 repo = ctx._repo
276 ac = getattr(ctx, '_ancestrycontext', None)
276 ac = getattr(ctx, '_ancestrycontext', None)
277 if ac is None:
277 if ac is None:
278 revs = [rev]
278 revs = [rev]
279 if rev is None:
279 if rev is None:
280 revs = [p.rev() for p in ctx.parents()]
280 revs = [p.rev() for p in ctx.parents()]
281 ac = repo.changelog.ancestors(revs, inclusive=True)
281 ac = repo.changelog.ancestors(revs, inclusive=True)
282 ctx._ancestrycontext = ac
282 ctx._ancestrycontext = ac
283 def makectx(f, n):
283 def makectx(f, n):
284 if n in node.wdirnodes: # in a working context?
284 if n in node.wdirnodes: # in a working context?
285 if ctx.rev() is None:
285 if ctx.rev() is None:
286 return ctx.filectx(f)
286 return ctx.filectx(f)
287 return repo[None][f]
287 return repo[None][f]
288 fctx = repo.filectx(f, fileid=n)
288 fctx = repo.filectx(f, fileid=n)
289 # setup only needed for filectx not create from a changectx
289 # setup only needed for filectx not create from a changectx
290 fctx._ancestrycontext = ac
290 fctx._ancestrycontext = ac
291 fctx._descendantrev = rev
291 fctx._descendantrev = rev
292 return fctx
292 return fctx
293 return util.lrucachefunc(makectx)
293 return util.lrucachefunc(makectx)
294
294
295 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
296 """combine partial copy paths"""
296 """combine partial copy paths"""
297 remainder = {}
297 remainder = {}
298 for f in copyfrom:
298 for f in copyfrom:
299 if f in copyto:
299 if f in copyto:
300 finalcopy[copyto[f]] = copyfrom[f]
300 finalcopy[copyto[f]] = copyfrom[f]
301 del copyto[f]
301 del copyto[f]
302 for f in incompletediverge:
302 for f in incompletediverge:
303 assert f not in diverge
303 assert f not in diverge
304 ic = incompletediverge[f]
304 ic = incompletediverge[f]
305 if ic[0] in copyto:
305 if ic[0] in copyto:
306 diverge[f] = [copyto[ic[0]], ic[1]]
306 diverge[f] = [copyto[ic[0]], ic[1]]
307 else:
307 else:
308 remainder[f] = ic
308 remainder[f] = ic
309 return remainder
309 return remainder
310
310
311 def mergecopies(repo, c1, c2, base):
311 def mergecopies(repo, c1, c2, base):
312 """
312 """
313 The function calling different copytracing algorithms on the basis of config
313 The function calling different copytracing algorithms on the basis of config
314 which find moves and copies between context c1 and c2 that are relevant for
314 which find moves and copies between context c1 and c2 that are relevant for
315 merging. 'base' will be used as the merge base.
315 merging. 'base' will be used as the merge base.
316
316
317 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
318 files that were moved/ copied in one merge parent and modified in another.
318 files that were moved/ copied in one merge parent and modified in another.
319 For example:
319 For example:
320
320
321 o ---> 4 another commit
321 o ---> 4 another commit
322 |
322 |
323 | o ---> 3 commit that modifies a.txt
323 | o ---> 3 commit that modifies a.txt
324 | /
324 | /
325 o / ---> 2 commit that moves a.txt to b.txt
325 o / ---> 2 commit that moves a.txt to b.txt
326 |/
326 |/
327 o ---> 1 merge base
327 o ---> 1 merge base
328
328
329 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 If we try to rebase revision 3 on revision 4, since there is no a.txt in
330 revision 4, and if user have copytrace disabled, we prints the following
330 revision 4, and if user have copytrace disabled, we prints the following
331 message:
331 message:
332
332
333 ```other changed <file> which local deleted```
333 ```other changed <file> which local deleted```
334
334
335 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
336 "dirmove".
336 "dirmove".
337
337
338 "copy" is a mapping from destination name -> source name,
338 "copy" is a mapping from destination name -> source name,
339 where source is in c1 and destination is in c2 or vice-versa.
339 where source is in c1 and destination is in c2 or vice-versa.
340
340
341 "movewithdir" is a mapping from source name -> destination name,
341 "movewithdir" is a mapping from source name -> destination name,
342 where the file at source present in one context but not the other
342 where the file at source present in one context but not the other
343 needs to be moved to destination by the merge process, because the
343 needs to be moved to destination by the merge process, because the
344 other context moved the directory it is in.
344 other context moved the directory it is in.
345
345
346 "diverge" is a mapping of source name -> list of destination names
346 "diverge" is a mapping of source name -> list of destination names
347 for divergent renames.
347 for divergent renames.
348
348
349 "renamedelete" is a mapping of source name -> list of destination
349 "renamedelete" is a mapping of source name -> list of destination
350 names for files deleted in c1 that were renamed in c2 or vice-versa.
350 names for files deleted in c1 that were renamed in c2 or vice-versa.
351
351
352 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 "dirmove" is a mapping of detected source dir -> destination dir renames.
353 This is needed for handling changes to new files previously grafted into
353 This is needed for handling changes to new files previously grafted into
354 renamed directories.
354 renamed directories.
355 """
355 """
356 # avoid silly behavior for update from empty dir
356 # avoid silly behavior for update from empty dir
357 if not c1 or not c2 or c1 == c2:
357 if not c1 or not c2 or c1 == c2:
358 return {}, {}, {}, {}, {}
358 return {}, {}, {}, {}, {}
359
359
360 # avoid silly behavior for parent -> working dir
360 # avoid silly behavior for parent -> working dir
361 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 if c2.node() is None and c1.node() == repo.dirstate.p1():
362 return repo.dirstate.copies(), {}, {}, {}, {}
362 return repo.dirstate.copies(), {}, {}, {}, {}
363
363
364 copytracing = repo.ui.config('experimental', 'copytrace')
364 copytracing = repo.ui.config('experimental', 'copytrace')
365
365
366 # Copy trace disabling is explicitly below the node == p1 logic above
366 # Copy trace disabling is explicitly below the node == p1 logic above
367 # because the logic above is required for a simple copy to be kept across a
367 # because the logic above is required for a simple copy to be kept across a
368 # rebase.
368 # rebase.
369 if copytracing == 'off':
369 if copytracing == 'off':
370 return {}, {}, {}, {}, {}
370 return {}, {}, {}, {}, {}
371 elif copytracing == 'heuristics':
371 elif copytracing == 'heuristics':
372 # Do full copytracing if only non-public revisions are involved as
372 # Do full copytracing if only non-public revisions are involved as
373 # that will be fast enough and will also cover the copies which could
373 # that will be fast enough and will also cover the copies which could
374 # be missed by heuristics
374 # be missed by heuristics
375 if _isfullcopytraceable(repo, c1, base):
375 if _isfullcopytraceable(repo, c1, base):
376 return _fullcopytracing(repo, c1, c2, base)
376 return _fullcopytracing(repo, c1, c2, base)
377 return _heuristicscopytracing(repo, c1, c2, base)
377 return _heuristicscopytracing(repo, c1, c2, base)
378 else:
378 else:
379 return _fullcopytracing(repo, c1, c2, base)
379 return _fullcopytracing(repo, c1, c2, base)
380
380
381 def _isfullcopytraceable(repo, c1, base):
381 def _isfullcopytraceable(repo, c1, base):
382 """ Checks that if base, source and destination are all no-public branches,
382 """ Checks that if base, source and destination are all no-public branches,
383 if yes let's use the full copytrace algorithm for increased capabilities
383 if yes let's use the full copytrace algorithm for increased capabilities
384 since it will be fast enough.
384 since it will be fast enough.
385
385
386 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
387 number of changesets from c1 to base such that if number of changesets are
387 number of changesets from c1 to base such that if number of changesets are
388 more than the limit, full copytracing algorithm won't be used.
388 more than the limit, full copytracing algorithm won't be used.
389 """
389 """
390 if c1.rev() is None:
390 if c1.rev() is None:
391 c1 = c1.p1()
391 c1 = c1.p1()
392 if c1.mutable() and base.mutable():
392 if c1.mutable() and base.mutable():
393 sourcecommitlimit = repo.ui.configint('experimental',
393 sourcecommitlimit = repo.ui.configint('experimental',
394 'copytrace.sourcecommitlimit')
394 'copytrace.sourcecommitlimit')
395 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
396 return commits < sourcecommitlimit
396 return commits < sourcecommitlimit
397 return False
397 return False
398
398
399 def _fullcopytracing(repo, c1, c2, base):
399 def _fullcopytracing(repo, c1, c2, base):
400 """ The full copytracing algorithm which finds all the new files that were
400 """ The full copytracing algorithm which finds all the new files that were
401 added from merge base up to the top commit and for each file it checks if
401 added from merge base up to the top commit and for each file it checks if
402 this file was copied from another file.
402 this file was copied from another file.
403
403
404 This is pretty slow when a lot of changesets are involved but will track all
404 This is pretty slow when a lot of changesets are involved but will track all
405 the copies.
405 the copies.
406 """
406 """
407 # In certain scenarios (e.g. graft, update or rebase), base can be
407 # In certain scenarios (e.g. graft, update or rebase), base can be
408 # overridden We still need to know a real common ancestor in this case We
408 # overridden We still need to know a real common ancestor in this case We
409 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
410 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
411 # caller may not know if the revision passed in lieu of the CA is a genuine
411 # caller may not know if the revision passed in lieu of the CA is a genuine
412 # common ancestor or not without explicitly checking it, it's better to
412 # common ancestor or not without explicitly checking it, it's better to
413 # determine that here.
413 # determine that here.
414 #
414 #
415 # base.descendant(wc) and base.descendant(base) are False, work around that
415 # base.descendant(wc) and base.descendant(base) are False, work around that
416 _c1 = c1.p1() if c1.rev() is None else c1
416 _c1 = c1.p1() if c1.rev() is None else c1
417 _c2 = c2.p1() if c2.rev() is None else c2
417 _c2 = c2.p1() if c2.rev() is None else c2
418 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 # an endpoint is "dirty" if it isn't a descendant of the merge base
419 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 # if we have a dirty endpoint, we need to trigger graft logic, and also
420 # keep track of which endpoint is dirty
420 # keep track of which endpoint is dirty
421 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 dirtyc1 = not (base == _c1 or base.descendant(_c1))
422 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 dirtyc2 = not (base == _c2 or base.descendant(_c2))
423 graft = dirtyc1 or dirtyc2
423 graft = dirtyc1 or dirtyc2
424 tca = base
424 tca = base
425 if graft:
425 if graft:
426 tca = _c1.ancestor(_c2)
426 tca = _c1.ancestor(_c2)
427
427
428 limit = _findlimit(repo, c1.rev(), c2.rev())
428 limit = _findlimit(repo, c1.rev(), c2.rev())
429 if limit is None:
429 if limit is None:
430 # no common ancestor, no copies
430 # no common ancestor, no copies
431 return {}, {}, {}, {}, {}
431 return {}, {}, {}, {}, {}
432 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
433
433
434 m1 = c1.manifest()
434 m1 = c1.manifest()
435 m2 = c2.manifest()
435 m2 = c2.manifest()
436 mb = base.manifest()
436 mb = base.manifest()
437
437
438 # gather data from _checkcopies:
438 # gather data from _checkcopies:
439 # - diverge = record all diverges in this dict
439 # - diverge = record all diverges in this dict
440 # - copy = record all non-divergent copies in this dict
440 # - copy = record all non-divergent copies in this dict
441 # - fullcopy = record all copies in this dict
441 # - fullcopy = record all copies in this dict
442 # - incomplete = record non-divergent partial copies here
442 # - incomplete = record non-divergent partial copies here
443 # - incompletediverge = record divergent partial copies here
443 # - incompletediverge = record divergent partial copies here
444 diverge = {} # divergence data is shared
444 diverge = {} # divergence data is shared
445 incompletediverge = {}
445 incompletediverge = {}
446 data1 = {'copy': {},
446 data1 = {'copy': {},
447 'fullcopy': {},
447 'fullcopy': {},
448 'incomplete': {},
448 'incomplete': {},
449 'diverge': diverge,
449 'diverge': diverge,
450 'incompletediverge': incompletediverge,
450 'incompletediverge': incompletediverge,
451 }
451 }
452 data2 = {'copy': {},
452 data2 = {'copy': {},
453 'fullcopy': {},
453 'fullcopy': {},
454 'incomplete': {},
454 'incomplete': {},
455 'diverge': diverge,
455 'diverge': diverge,
456 'incompletediverge': incompletediverge,
456 'incompletediverge': incompletediverge,
457 }
457 }
458
458
459 # find interesting file sets from manifests
459 # find interesting file sets from manifests
460 addedinm1 = m1.filesnotin(mb)
460 addedinm1 = m1.filesnotin(mb)
461 addedinm2 = m2.filesnotin(mb)
461 addedinm2 = m2.filesnotin(mb)
462 bothnew = sorted(addedinm1 & addedinm2)
462 bothnew = sorted(addedinm1 & addedinm2)
463 if tca == base:
463 if tca == base:
464 # unmatched file from base
464 # unmatched file from base
465 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
466 u1u, u2u = u1r, u2r
466 u1u, u2u = u1r, u2r
467 else:
467 else:
468 # unmatched file from base (DAG rotation in the graft case)
468 # unmatched file from base (DAG rotation in the graft case)
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
470 baselabel='base')
470 baselabel='base')
471 # unmatched file from topological common ancestors (no DAG rotation)
471 # unmatched file from topological common ancestors (no DAG rotation)
472 # need to recompute this for directory move handling when grafting
472 # need to recompute this for directory move handling when grafting
473 mta = tca.manifest()
473 mta = tca.manifest()
474 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
475 m2.filesnotin(mta),
475 m2.filesnotin(mta),
476 baselabel='topological common ancestor')
476 baselabel='topological common ancestor')
477
477
478 for f in u1u:
478 for f in u1u:
479 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
480
480
481 for f in u2u:
481 for f in u2u:
482 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
483
483
484 copy = dict(data1['copy'])
484 copy = dict(data1['copy'])
485 copy.update(data2['copy'])
485 copy.update(data2['copy'])
486 fullcopy = dict(data1['fullcopy'])
486 fullcopy = dict(data1['fullcopy'])
487 fullcopy.update(data2['fullcopy'])
487 fullcopy.update(data2['fullcopy'])
488
488
489 if dirtyc1:
489 if dirtyc1:
490 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
491 incompletediverge)
491 incompletediverge)
492 else:
492 else:
493 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
494 incompletediverge)
494 incompletediverge)
495
495
496 renamedelete = {}
496 renamedelete = {}
497 renamedeleteset = set()
497 renamedeleteset = set()
498 divergeset = set()
498 divergeset = set()
499 for of, fl in list(diverge.items()):
499 for of, fl in list(diverge.items()):
500 if len(fl) == 1 or of in c1 or of in c2:
500 if len(fl) == 1 or of in c1 or of in c2:
501 del diverge[of] # not actually divergent, or not a rename
501 del diverge[of] # not actually divergent, or not a rename
502 if of not in c1 and of not in c2:
502 if of not in c1 and of not in c2:
503 # renamed on one side, deleted on the other side, but filter
503 # renamed on one side, deleted on the other side, but filter
504 # out files that have been renamed and then deleted
504 # out files that have been renamed and then deleted
505 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
506 renamedeleteset.update(fl) # reverse map for below
506 renamedeleteset.update(fl) # reverse map for below
507 else:
507 else:
508 divergeset.update(fl) # reverse map for below
508 divergeset.update(fl) # reverse map for below
509
509
510 if bothnew:
510 if bothnew:
511 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 repo.ui.debug(" unmatched files new in both:\n %s\n"
512 % "\n ".join(bothnew))
512 % "\n ".join(bothnew))
513 bothdiverge = {}
513 bothdiverge = {}
514 bothincompletediverge = {}
514 bothincompletediverge = {}
515 remainder = {}
515 remainder = {}
516 both1 = {'copy': {},
516 both1 = {'copy': {},
517 'fullcopy': {},
517 'fullcopy': {},
518 'incomplete': {},
518 'incomplete': {},
519 'diverge': bothdiverge,
519 'diverge': bothdiverge,
520 'incompletediverge': bothincompletediverge
520 'incompletediverge': bothincompletediverge
521 }
521 }
522 both2 = {'copy': {},
522 both2 = {'copy': {},
523 'fullcopy': {},
523 'fullcopy': {},
524 'incomplete': {},
524 'incomplete': {},
525 'diverge': bothdiverge,
525 'diverge': bothdiverge,
526 'incompletediverge': bothincompletediverge
526 'incompletediverge': bothincompletediverge
527 }
527 }
528 for f in bothnew:
528 for f in bothnew:
529 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
530 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
531 if dirtyc1:
531 if dirtyc1:
532 # incomplete copies may only be found on the "dirty" side for bothnew
532 # incomplete copies may only be found on the "dirty" side for bothnew
533 assert not both2['incomplete']
533 assert not both2['incomplete']
534 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
535 bothincompletediverge)
535 bothincompletediverge)
536 elif dirtyc2:
536 elif dirtyc2:
537 assert not both1['incomplete']
537 assert not both1['incomplete']
538 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
539 bothincompletediverge)
539 bothincompletediverge)
540 else:
540 else:
541 # incomplete copies and divergences can't happen outside grafts
541 # incomplete copies and divergences can't happen outside grafts
542 assert not both1['incomplete']
542 assert not both1['incomplete']
543 assert not both2['incomplete']
543 assert not both2['incomplete']
544 assert not bothincompletediverge
544 assert not bothincompletediverge
545 for f in remainder:
545 for f in remainder:
546 assert f not in bothdiverge
546 assert f not in bothdiverge
547 ic = remainder[f]
547 ic = remainder[f]
548 if ic[0] in (m1 if dirtyc1 else m2):
548 if ic[0] in (m1 if dirtyc1 else m2):
549 # backed-out rename on one side, but watch out for deleted files
549 # backed-out rename on one side, but watch out for deleted files
550 bothdiverge[f] = ic
550 bothdiverge[f] = ic
551 for of, fl in bothdiverge.items():
551 for of, fl in bothdiverge.items():
552 if len(fl) == 2 and fl[0] == fl[1]:
552 if len(fl) == 2 and fl[0] == fl[1]:
553 copy[fl[0]] = of # not actually divergent, just matching renames
553 copy[fl[0]] = of # not actually divergent, just matching renames
554
554
555 if fullcopy and repo.ui.debugflag:
555 if fullcopy and repo.ui.debugflag:
556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 "% = renamed and deleted):\n")
557 "% = renamed and deleted):\n")
558 for f in sorted(fullcopy):
558 for f in sorted(fullcopy):
559 note = ""
559 note = ""
560 if f in copy:
560 if f in copy:
561 note += "*"
561 note += "*"
562 if f in divergeset:
562 if f in divergeset:
563 note += "!"
563 note += "!"
564 if f in renamedeleteset:
564 if f in renamedeleteset:
565 note += "%"
565 note += "%"
566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 note))
567 note))
568 del divergeset
568 del divergeset
569
569
570 if not fullcopy:
570 if not fullcopy:
571 return copy, {}, diverge, renamedelete, {}
571 return copy, {}, diverge, renamedelete, {}
572
572
573 repo.ui.debug(" checking for directory renames\n")
573 repo.ui.debug(" checking for directory renames\n")
574
574
575 # generate a directory move map
575 # generate a directory move map
576 d1, d2 = c1.dirs(), c2.dirs()
576 d1, d2 = c1.dirs(), c2.dirs()
577 # Hack for adding '', which is not otherwise added, to d1 and d2
577 # Hack for adding '', which is not otherwise added, to d1 and d2
578 d1.addpath('/')
578 d1.addpath('/')
579 d2.addpath('/')
579 d2.addpath('/')
580 invalid = set()
580 invalid = set()
581 dirmove = {}
581 dirmove = {}
582
582
583 # examine each file copy for a potential directory move, which is
583 # examine each file copy for a potential directory move, which is
584 # when all the files in a directory are moved to a new directory
584 # when all the files in a directory are moved to a new directory
585 for dst, src in fullcopy.iteritems():
585 for dst, src in fullcopy.iteritems():
586 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
587 if dsrc in invalid:
587 if dsrc in invalid:
588 # already seen to be uninteresting
588 # already seen to be uninteresting
589 continue
589 continue
590 elif dsrc in d1 and ddst in d1:
590 elif dsrc in d1 and ddst in d1:
591 # directory wasn't entirely moved locally
591 # directory wasn't entirely moved locally
592 invalid.add(dsrc + "/")
592 invalid.add(dsrc + "/")
593 elif dsrc in d2 and ddst in d2:
593 elif dsrc in d2 and ddst in d2:
594 # directory wasn't entirely moved remotely
594 # directory wasn't entirely moved remotely
595 invalid.add(dsrc + "/")
595 invalid.add(dsrc + "/")
596 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
597 # files from the same directory moved to two different places
597 # files from the same directory moved to two different places
598 invalid.add(dsrc + "/")
598 invalid.add(dsrc + "/")
599 else:
599 else:
600 # looks good so far
600 # looks good so far
601 dirmove[dsrc + "/"] = ddst + "/"
601 dirmove[dsrc + "/"] = ddst + "/"
602
602
603 for i in invalid:
603 for i in invalid:
604 if i in dirmove:
604 if i in dirmove:
605 del dirmove[i]
605 del dirmove[i]
606 del d1, d2, invalid
606 del d1, d2, invalid
607
607
608 if not dirmove:
608 if not dirmove:
609 return copy, {}, diverge, renamedelete, {}
609 return copy, {}, diverge, renamedelete, {}
610
610
611 for d in dirmove:
611 for d in dirmove:
612 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
613 (d, dirmove[d]))
613 (d, dirmove[d]))
614
614
615 movewithdir = {}
615 movewithdir = {}
616 # check unaccounted nonoverlapping files against directory moves
616 # check unaccounted nonoverlapping files against directory moves
617 for f in u1r + u2r:
617 for f in u1r + u2r:
618 if f not in fullcopy:
618 if f not in fullcopy:
619 for d in dirmove:
619 for d in dirmove:
620 if f.startswith(d):
620 if f.startswith(d):
621 # new file added in a directory that was moved, move it
621 # new file added in a directory that was moved, move it
622 df = dirmove[d] + f[len(d):]
622 df = dirmove[d] + f[len(d):]
623 if df not in copy:
623 if df not in copy:
624 movewithdir[f] = df
624 movewithdir[f] = df
625 repo.ui.debug((" pending file src: '%s' -> "
625 repo.ui.debug((" pending file src: '%s' -> "
626 "dst: '%s'\n") % (f, df))
626 "dst: '%s'\n") % (f, df))
627 break
627 break
628
628
629 return copy, movewithdir, diverge, renamedelete, dirmove
629 return copy, movewithdir, diverge, renamedelete, dirmove
630
630
631 def _heuristicscopytracing(repo, c1, c2, base):
631 def _heuristicscopytracing(repo, c1, c2, base):
632 """ Fast copytracing using filename heuristics
632 """ Fast copytracing using filename heuristics
633
633
634 Assumes that moves or renames are of following two types:
634 Assumes that moves or renames are of following two types:
635
635
636 1) Inside a directory only (same directory name but different filenames)
636 1) Inside a directory only (same directory name but different filenames)
637 2) Move from one directory to another
637 2) Move from one directory to another
638 (same filenames but different directory names)
638 (same filenames but different directory names)
639
639
640 Works only when there are no merge commits in the "source branch".
640 Works only when there are no merge commits in the "source branch".
641 Source branch is commits from base up to c2 not including base.
641 Source branch is commits from base up to c2 not including base.
642
642
643 If merge is involved it fallbacks to _fullcopytracing().
643 If merge is involved it fallbacks to _fullcopytracing().
644
644
645 Can be used by setting the following config:
645 Can be used by setting the following config:
646
646
647 [experimental]
647 [experimental]
648 copytrace = heuristics
648 copytrace = heuristics
649
649
650 In some cases the copy/move candidates found by heuristics can be very large
650 In some cases the copy/move candidates found by heuristics can be very large
651 in number and that will make the algorithm slow. The number of possible
651 in number and that will make the algorithm slow. The number of possible
652 candidates to check can be limited by using the config
652 candidates to check can be limited by using the config
653 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 `experimental.copytrace.movecandidateslimit` which defaults to 100.
654 """
654 """
655
655
656 if c1.rev() is None:
656 if c1.rev() is None:
657 c1 = c1.p1()
657 c1 = c1.p1()
658 if c2.rev() is None:
658 if c2.rev() is None:
659 c2 = c2.p1()
659 c2 = c2.p1()
660
660
661 copies = {}
661 copies = {}
662
662
663 changedfiles = set()
663 changedfiles = set()
664 m1 = c1.manifest()
664 m1 = c1.manifest()
665 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 if not repo.revs('%d::%d', base.rev(), c2.rev()):
666 # If base is not in c2 branch, we switch to fullcopytracing
666 # If base is not in c2 branch, we switch to fullcopytracing
667 repo.ui.debug("switching to full copytracing as base is not "
667 repo.ui.debug("switching to full copytracing as base is not "
668 "an ancestor of c2\n")
668 "an ancestor of c2\n")
669 return _fullcopytracing(repo, c1, c2, base)
669 return _fullcopytracing(repo, c1, c2, base)
670
670
671 ctx = c2
671 ctx = c2
672 while ctx != base:
672 while ctx != base:
673 if len(ctx.parents()) == 2:
673 if len(ctx.parents()) == 2:
674 # To keep things simple let's not handle merges
674 # To keep things simple let's not handle merges
675 repo.ui.debug("switching to full copytracing because of merges\n")
675 repo.ui.debug("switching to full copytracing because of merges\n")
676 return _fullcopytracing(repo, c1, c2, base)
676 return _fullcopytracing(repo, c1, c2, base)
677 changedfiles.update(ctx.files())
677 changedfiles.update(ctx.files())
678 ctx = ctx.p1()
678 ctx = ctx.p1()
679
679
680 cp = _forwardcopies(base, c2)
680 cp = _forwardcopies(base, c2)
681 for dst, src in cp.iteritems():
681 for dst, src in cp.iteritems():
682 if src in m1:
682 if src in m1:
683 copies[dst] = src
683 copies[dst] = src
684
684
685 # file is missing if it isn't present in the destination, but is present in
685 # file is missing if it isn't present in the destination, but is present in
686 # the base and present in the source.
686 # the base and present in the source.
687 # Presence in the base is important to exclude added files, presence in the
687 # Presence in the base is important to exclude added files, presence in the
688 # source is important to exclude removed files.
688 # source is important to exclude removed files.
689 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
689 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
690 changedfiles)
690 changedfiles)
691
691
692 if missingfiles:
692 if missingfiles:
693 basenametofilename = collections.defaultdict(list)
693 basenametofilename = collections.defaultdict(list)
694 dirnametofilename = collections.defaultdict(list)
694 dirnametofilename = collections.defaultdict(list)
695
695
696 for f in m1.filesnotin(base.manifest()):
696 for f in m1.filesnotin(base.manifest()):
697 basename = os.path.basename(f)
697 basename = os.path.basename(f)
698 dirname = os.path.dirname(f)
698 dirname = os.path.dirname(f)
699 basenametofilename[basename].append(f)
699 basenametofilename[basename].append(f)
700 dirnametofilename[dirname].append(f)
700 dirnametofilename[dirname].append(f)
701
701
702 # in case of a rebase/graft, base may not be a common ancestor
702 # in case of a rebase/graft, base may not be a common ancestor
703 anc = c1.ancestor(c2)
703 anc = c1.ancestor(c2)
704
704
705 for f in missingfiles:
705 for f in missingfiles:
706 basename = os.path.basename(f)
706 basename = os.path.basename(f)
707 dirname = os.path.dirname(f)
707 dirname = os.path.dirname(f)
708 samebasename = basenametofilename[basename]
708 samebasename = basenametofilename[basename]
709 samedirname = dirnametofilename[dirname]
709 samedirname = dirnametofilename[dirname]
710 movecandidates = samebasename + samedirname
710 movecandidates = samebasename + samedirname
711 # f is guaranteed to be present in c2, that's why
711 # f is guaranteed to be present in c2, that's why
712 # c2.filectx(f) won't fail
712 # c2.filectx(f) won't fail
713 f2 = c2.filectx(f)
713 f2 = c2.filectx(f)
714 # we can have a lot of candidates which can slow down the heuristics
714 # we can have a lot of candidates which can slow down the heuristics
715 # config value to limit the number of candidates moves to check
715 # config value to limit the number of candidates moves to check
716 maxcandidates = repo.ui.configint('experimental',
716 maxcandidates = repo.ui.configint('experimental',
717 'copytrace.movecandidateslimit')
717 'copytrace.movecandidateslimit')
718
718
719 if len(movecandidates) > maxcandidates:
719 if len(movecandidates) > maxcandidates:
720 repo.ui.status(_("skipping copytracing for '%s', more "
720 repo.ui.status(_("skipping copytracing for '%s', more "
721 "candidates than the limit: %d\n")
721 "candidates than the limit: %d\n")
722 % (f, len(movecandidates)))
722 % (f, len(movecandidates)))
723 continue
723 continue
724
724
725 for candidate in movecandidates:
725 for candidate in movecandidates:
726 f1 = c1.filectx(candidate)
726 f1 = c1.filectx(candidate)
727 if _related(f1, f2, anc.rev()):
727 if _related(f1, f2, anc.rev()):
728 # if there are a few related copies then we'll merge
728 # if there are a few related copies then we'll merge
729 # changes into all of them. This matches the behaviour
729 # changes into all of them. This matches the behaviour
730 # of upstream copytracing
730 # of upstream copytracing
731 copies[candidate] = f
731 copies[candidate] = f
732
732
733 return copies, {}, {}, {}, {}
733 return copies, {}, {}, {}, {}
734
734
735 def _related(f1, f2, limit):
735 def _related(f1, f2, limit):
736 """return True if f1 and f2 filectx have a common ancestor
736 """return True if f1 and f2 filectx have a common ancestor
737
737
738 Walk back to common ancestor to see if the two files originate
738 Walk back to common ancestor to see if the two files originate
739 from the same file. Since workingfilectx's rev() is None it messes
739 from the same file. Since workingfilectx's rev() is None it messes
740 up the integer comparison logic, hence the pre-step check for
740 up the integer comparison logic, hence the pre-step check for
741 None (f1 and f2 can only be workingfilectx's initially).
741 None (f1 and f2 can only be workingfilectx's initially).
742 """
742 """
743
743
744 if f1 == f2:
744 if f1 == f2:
745 return f1 # a match
745 return f1 # a match
746
746
747 g1, g2 = f1.ancestors(), f2.ancestors()
747 g1, g2 = f1.ancestors(), f2.ancestors()
748 try:
748 try:
749 f1r, f2r = f1.linkrev(), f2.linkrev()
749 f1r, f2r = f1.linkrev(), f2.linkrev()
750
750
751 if f1r is None:
751 if f1r is None:
752 f1 = next(g1)
752 f1 = next(g1)
753 if f2r is None:
753 if f2r is None:
754 f2 = next(g2)
754 f2 = next(g2)
755
755
756 while True:
756 while True:
757 f1r, f2r = f1.linkrev(), f2.linkrev()
757 f1r, f2r = f1.linkrev(), f2.linkrev()
758 if f1r > f2r:
758 if f1r > f2r:
759 f1 = next(g1)
759 f1 = next(g1)
760 elif f2r > f1r:
760 elif f2r > f1r:
761 f2 = next(g2)
761 f2 = next(g2)
762 elif f1 == f2:
762 elif f1 == f2:
763 return f1 # a match
763 return f1 # a match
764 elif f1r == f2r or f1r < limit or f2r < limit:
764 elif f1r == f2r or f1r < limit or f2r < limit:
765 return False # copy no longer relevant
765 return False # copy no longer relevant
766 except StopIteration:
766 except StopIteration:
767 return False
767 return False
768
768
769 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
770 """
770 """
771 check possible copies of f from msrc to mdst
771 check possible copies of f from msrc to mdst
772
772
773 srcctx = starting context for f in msrc
773 srcctx = starting context for f in msrc
774 dstctx = destination context for f in mdst
774 dstctx = destination context for f in mdst
775 f = the filename to check (as in msrc)
775 f = the filename to check (as in msrc)
776 base = the changectx used as a merge base
776 base = the changectx used as a merge base
777 tca = topological common ancestor for graft-like scenarios
777 tca = topological common ancestor for graft-like scenarios
778 remotebase = True if base is outside tca::srcctx, False otherwise
778 remotebase = True if base is outside tca::srcctx, False otherwise
779 limit = the rev number to not search beyond
779 limit = the rev number to not search beyond
780 data = dictionary of dictionary to store copy data. (see mergecopies)
780 data = dictionary of dictionary to store copy data. (see mergecopies)
781
781
782 note: limit is only an optimization, and provides no guarantee that
782 note: limit is only an optimization, and provides no guarantee that
783 irrelevant revisions will not be visited
783 irrelevant revisions will not be visited
784 there is no easy way to make this algorithm stop in a guaranteed way
784 there is no easy way to make this algorithm stop in a guaranteed way
785 once it "goes behind a certain revision".
785 once it "goes behind a certain revision".
786 """
786 """
787
787
788 msrc = srcctx.manifest()
788 msrc = srcctx.manifest()
789 mdst = dstctx.manifest()
789 mdst = dstctx.manifest()
790 mb = base.manifest()
790 mb = base.manifest()
791 mta = tca.manifest()
791 mta = tca.manifest()
792 # Might be true if this call is about finding backward renames,
792 # Might be true if this call is about finding backward renames,
793 # This happens in the case of grafts because the DAG is then rotated.
793 # This happens in the case of grafts because the DAG is then rotated.
794 # If the file exists in both the base and the source, we are not looking
794 # If the file exists in both the base and the source, we are not looking
795 # for a rename on the source side, but on the part of the DAG that is
795 # for a rename on the source side, but on the part of the DAG that is
796 # traversed backwards.
796 # traversed backwards.
797 #
797 #
798 # In the case there is both backward and forward renames (before and after
798 # In the case there is both backward and forward renames (before and after
799 # the base) this is more complicated as we must detect a divergence.
799 # the base) this is more complicated as we must detect a divergence.
800 # We use 'backwards = False' in that case.
800 # We use 'backwards = False' in that case.
801 backwards = not remotebase and base != tca and f in mb
801 backwards = not remotebase and base != tca and f in mb
802 getsrcfctx = _makegetfctx(srcctx)
802 getsrcfctx = _makegetfctx(srcctx)
803 getdstfctx = _makegetfctx(dstctx)
803 getdstfctx = _makegetfctx(dstctx)
804
804
805 if msrc[f] == mb.get(f) and not remotebase:
805 if msrc[f] == mb.get(f) and not remotebase:
806 # Nothing to merge
806 # Nothing to merge
807 return
807 return
808
808
809 of = None
809 of = None
810 seen = {f}
810 seen = {f}
811 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 for oc in getsrcfctx(f, msrc[f]).ancestors():
812 ocr = oc.linkrev()
812 ocr = oc.linkrev()
813 of = oc.path()
813 of = oc.path()
814 if of in seen:
814 if of in seen:
815 # check limit late - grab last rename before
815 # check limit late - grab last rename before
816 if ocr < limit:
816 if ocr < limit:
817 break
817 break
818 continue
818 continue
819 seen.add(of)
819 seen.add(of)
820
820
821 # remember for dir rename detection
821 # remember for dir rename detection
822 if backwards:
822 if backwards:
823 data['fullcopy'][of] = f # grafting backwards through renames
823 data['fullcopy'][of] = f # grafting backwards through renames
824 else:
824 else:
825 data['fullcopy'][f] = of
825 data['fullcopy'][f] = of
826 if of not in mdst:
826 if of not in mdst:
827 continue # no match, keep looking
827 continue # no match, keep looking
828 if mdst[of] == mb.get(of):
828 if mdst[of] == mb.get(of):
829 return # no merge needed, quit early
829 return # no merge needed, quit early
830 c2 = getdstfctx(of, mdst[of])
830 c2 = getdstfctx(of, mdst[of])
831 # c2 might be a plain new file on added on destination side that is
831 # c2 might be a plain new file on added on destination side that is
832 # unrelated to the droids we are looking for.
832 # unrelated to the droids we are looking for.
833 cr = _related(oc, c2, tca.rev())
833 cr = _related(oc, c2, tca.rev())
834 if cr and (of == f or of == c2.path()): # non-divergent
834 if cr and (of == f or of == c2.path()): # non-divergent
835 if backwards:
835 if backwards:
836 data['copy'][of] = f
836 data['copy'][of] = f
837 elif of in mb:
837 elif of in mb:
838 data['copy'][f] = of
838 data['copy'][f] = of
839 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
840 data['copy'][of] = f
840 data['copy'][of] = f
841 del data['fullcopy'][f]
841 del data['fullcopy'][f]
842 data['fullcopy'][of] = f
842 data['fullcopy'][of] = f
843 else: # divergence w.r.t. graft CA on one side of topological CA
843 else: # divergence w.r.t. graft CA on one side of topological CA
844 for sf in seen:
844 for sf in seen:
845 if sf in mb:
845 if sf in mb:
846 assert sf not in data['diverge']
846 assert sf not in data['diverge']
847 data['diverge'][sf] = [f, of]
847 data['diverge'][sf] = [f, of]
848 break
848 break
849 return
849 return
850
850
851 if of in mta:
851 if of in mta:
852 if backwards or remotebase:
852 if backwards or remotebase:
853 data['incomplete'][of] = f
853 data['incomplete'][of] = f
854 else:
854 else:
855 for sf in seen:
855 for sf in seen:
856 if sf in mb:
856 if sf in mb:
857 if tca == base:
857 if tca == base:
858 data['diverge'].setdefault(sf, []).append(f)
858 data['diverge'].setdefault(sf, []).append(f)
859 else:
859 else:
860 data['incompletediverge'][sf] = [of, f]
860 data['incompletediverge'][sf] = [of, f]
861 return
861 return
862
862
863 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
864 '''reproduce copies from fromrev to rev in the dirstate
864 '''reproduce copies from fromrev to rev in the dirstate
865
865
866 If skiprev is specified, it's a revision that should be used to
866 If skiprev is specified, it's a revision that should be used to
867 filter copy records. Any copies that occur between fromrev and
867 filter copy records. Any copies that occur between fromrev and
868 skiprev will not be duplicated, even if they appear in the set of
868 skiprev will not be duplicated, even if they appear in the set of
869 copies between fromrev and rev.
869 copies between fromrev and rev.
870 '''
870 '''
871 exclude = {}
871 exclude = {}
872 if (skiprev is not None and
872 if (skiprev is not None and
873 repo.ui.config('experimental', 'copytrace') != 'off'):
873 repo.ui.config('experimental', 'copytrace') != 'off'):
874 # copytrace='off' skips this line, but not the entire function because
874 # copytrace='off' skips this line, but not the entire function because
875 # the line below is O(size of the repo) during a rebase, while the rest
875 # the line below is O(size of the repo) during a rebase, while the rest
876 # of the function is much faster (and is required for carrying copy
876 # of the function is much faster (and is required for carrying copy
877 # metadata across the rebase anyway).
877 # metadata across the rebase anyway).
878 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 exclude = pathcopies(repo[fromrev], repo[skiprev])
879 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
880 # copies.pathcopies returns backward renames, so dst might not
880 # copies.pathcopies returns backward renames, so dst might not
881 # actually be in the dirstate
881 # actually be in the dirstate
882 if dst in exclude:
882 if dst in exclude:
883 continue
883 continue
884 wctx[dst].markcopied(src)
884 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now