##// END OF EJS Templates
py3: avoid changing dictionary during iteration...
Gregory Szorc -
r36134:c0277161 default
parent child Browse files
Show More
@@ -1,883 +1,883 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def _findlimit(repo, a, b):
24 def _findlimit(repo, a, b):
25 """
25 """
26 Find the last revision that needs to be checked to ensure that a full
26 Find the last revision that needs to be checked to ensure that a full
27 transitive closure for file copies can be properly calculated.
27 transitive closure for file copies can be properly calculated.
28 Generally, this means finding the earliest revision number that's an
28 Generally, this means finding the earliest revision number that's an
29 ancestor of a or b but not both, except when a or b is a direct descendent
29 ancestor of a or b but not both, except when a or b is a direct descendent
30 of the other, in which case we can return the minimum revnum of a and b.
30 of the other, in which case we can return the minimum revnum of a and b.
31 None if no such revision exists.
31 None if no such revision exists.
32 """
32 """
33
33
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85
85
86 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # 1/ File 'a0' committed
87 # 1/ File 'a0' committed
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 3/ Move back to first commit
89 # 3/ Move back to first commit
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 #
92 #
93 # During the amend in step five, we will be in this state:
93 # During the amend in step five, we will be in this state:
94 #
94 #
95 # @ 3 temporary amend commit for a1-amend
95 # @ 3 temporary amend commit for a1-amend
96 # |
96 # |
97 # o 2 a1-amend
97 # o 2 a1-amend
98 # |
98 # |
99 # | o 1 a1
99 # | o 1 a1
100 # |/
100 # |/
101 # o 0 a0
101 # o 0 a0
102 #
102 #
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # yet the filelog has the copy information in rev 1 and we will not look
104 # yet the filelog has the copy information in rev 1 and we will not look
105 # back far enough unless we also look at the a and b as candidates.
105 # back far enough unless we also look at the a and b as candidates.
106 # This only occurs when a is a descendent of b or visa-versa.
106 # This only occurs when a is a descendent of b or visa-versa.
107 return min(limit, a, b)
107 return min(limit, a, b)
108
108
109 def _chain(src, dst, a, b):
109 def _chain(src, dst, a, b):
110 """chain two sets of copies a->b"""
110 """chain two sets of copies a->b"""
111 t = a.copy()
111 t = a.copy()
112 for k, v in b.iteritems():
112 for k, v in b.iteritems():
113 if v in t:
113 if v in t:
114 # found a chain
114 # found a chain
115 if t[v] != k:
115 if t[v] != k:
116 # file wasn't renamed back to itself
116 # file wasn't renamed back to itself
117 t[k] = t[v]
117 t[k] = t[v]
118 if v not in dst:
118 if v not in dst:
119 # chain was a rename, not a copy
119 # chain was a rename, not a copy
120 del t[v]
120 del t[v]
121 if v in src:
121 if v in src:
122 # file is a copy of an existing file
122 # file is a copy of an existing file
123 t[k] = v
123 t[k] = v
124
124
125 # remove criss-crossed copies
125 # remove criss-crossed copies
126 for k, v in t.items():
126 for k, v in list(t.items()):
127 if k in src and v in dst:
127 if k in src and v in dst:
128 del t[k]
128 del t[k]
129
129
130 return t
130 return t
131
131
132 def _tracefile(fctx, am, limit=-1):
132 def _tracefile(fctx, am, limit=-1):
133 """return file context that is the ancestor of fctx present in ancestor
133 """return file context that is the ancestor of fctx present in ancestor
134 manifest am, stopping after the first ancestor lower than limit"""
134 manifest am, stopping after the first ancestor lower than limit"""
135
135
136 for f in fctx.ancestors():
136 for f in fctx.ancestors():
137 if am.get(f.path(), None) == f.filenode():
137 if am.get(f.path(), None) == f.filenode():
138 return f
138 return f
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 return None
140 return None
141
141
142 def _dirstatecopies(d, match=None):
142 def _dirstatecopies(d, match=None):
143 ds = d._repo.dirstate
143 ds = d._repo.dirstate
144 c = ds.copies().copy()
144 c = ds.copies().copy()
145 for k in list(c):
145 for k in list(c):
146 if ds[k] not in 'anm' or (match and not match(k)):
146 if ds[k] not in 'anm' or (match and not match(k)):
147 del c[k]
147 del c[k]
148 return c
148 return c
149
149
150 def _computeforwardmissing(a, b, match=None):
150 def _computeforwardmissing(a, b, match=None):
151 """Computes which files are in b but not a.
151 """Computes which files are in b but not a.
152 This is its own function so extensions can easily wrap this call to see what
152 This is its own function so extensions can easily wrap this call to see what
153 files _forwardcopies is about to process.
153 files _forwardcopies is about to process.
154 """
154 """
155 ma = a.manifest()
155 ma = a.manifest()
156 mb = b.manifest()
156 mb = b.manifest()
157 return mb.filesnotin(ma, match=match)
157 return mb.filesnotin(ma, match=match)
158
158
159 def _committedforwardcopies(a, b, match):
159 def _committedforwardcopies(a, b, match):
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 # files might have to be traced back to the fctx parent of the last
161 # files might have to be traced back to the fctx parent of the last
162 # one-side-only changeset, but not further back than that
162 # one-side-only changeset, but not further back than that
163 limit = _findlimit(a._repo, a.rev(), b.rev())
163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 if limit is None:
164 if limit is None:
165 limit = -1
165 limit = -1
166 am = a.manifest()
166 am = a.manifest()
167
167
168 # find where new files came from
168 # find where new files came from
169 # we currently don't try to find where old files went, too expensive
169 # we currently don't try to find where old files went, too expensive
170 # this means we can miss a case like 'hg rm b; hg cp a b'
170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 cm = {}
171 cm = {}
172
172
173 # Computing the forward missing is quite expensive on large manifests, since
173 # Computing the forward missing is quite expensive on large manifests, since
174 # it compares the entire manifests. We can optimize it in the common use
174 # it compares the entire manifests. We can optimize it in the common use
175 # case of computing what copies are in a commit versus its parent (like
175 # case of computing what copies are in a commit versus its parent (like
176 # during a rebase or histedit). Note, we exclude merge commits from this
176 # during a rebase or histedit). Note, we exclude merge commits from this
177 # optimization, since the ctx.files() for a merge commit is not correct for
177 # optimization, since the ctx.files() for a merge commit is not correct for
178 # this comparison.
178 # this comparison.
179 forwardmissingmatch = match
179 forwardmissingmatch = match
180 if b.p1() == a and b.p2().node() == node.nullid:
180 if b.p1() == a and b.p2().node() == node.nullid:
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184
184
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 for f in missing:
186 for f in missing:
187 fctx = b[f]
187 fctx = b[f]
188 fctx._ancestrycontext = ancestrycontext
188 fctx._ancestrycontext = ancestrycontext
189 ofctx = _tracefile(fctx, am, limit)
189 ofctx = _tracefile(fctx, am, limit)
190 if ofctx:
190 if ofctx:
191 cm[f] = ofctx.path()
191 cm[f] = ofctx.path()
192 return cm
192 return cm
193
193
194 def _forwardcopies(a, b, match=None):
194 def _forwardcopies(a, b, match=None):
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196
196
197 # check for working copy
197 # check for working copy
198 if b.rev() is None:
198 if b.rev() is None:
199 if a == b.p1():
199 if a == b.p1():
200 # short-circuit to avoid issues with merge states
200 # short-circuit to avoid issues with merge states
201 return _dirstatecopies(b, match)
201 return _dirstatecopies(b, match)
202
202
203 cm = _committedforwardcopies(a, b.p1(), match)
203 cm = _committedforwardcopies(a, b.p1(), match)
204 # combine copies from dirstate if necessary
204 # combine copies from dirstate if necessary
205 return _chain(a, b, cm, _dirstatecopies(b, match))
205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 return _committedforwardcopies(a, b, match)
206 return _committedforwardcopies(a, b, match)
207
207
208 def _backwardrenames(a, b):
208 def _backwardrenames(a, b):
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 return {}
210 return {}
211
211
212 # Even though we're not taking copies into account, 1:n rename situations
212 # Even though we're not taking copies into account, 1:n rename situations
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 # arbitrarily pick one of the renames.
214 # arbitrarily pick one of the renames.
215 f = _forwardcopies(b, a)
215 f = _forwardcopies(b, a)
216 r = {}
216 r = {}
217 for k, v in sorted(f.iteritems()):
217 for k, v in sorted(f.iteritems()):
218 # remove copies
218 # remove copies
219 if v in a:
219 if v in a:
220 continue
220 continue
221 r[v] = k
221 r[v] = k
222 return r
222 return r
223
223
224 def pathcopies(x, y, match=None):
224 def pathcopies(x, y, match=None):
225 """find {dst@y: src@x} copy mapping for directed compare"""
225 """find {dst@y: src@x} copy mapping for directed compare"""
226 if x == y or not x or not y:
226 if x == y or not x or not y:
227 return {}
227 return {}
228 a = y.ancestor(x)
228 a = y.ancestor(x)
229 if a == x:
229 if a == x:
230 return _forwardcopies(x, y, match=match)
230 return _forwardcopies(x, y, match=match)
231 if a == y:
231 if a == y:
232 return _backwardrenames(x, y)
232 return _backwardrenames(x, y)
233 return _chain(x, y, _backwardrenames(x, a),
233 return _chain(x, y, _backwardrenames(x, a),
234 _forwardcopies(a, y, match=match))
234 _forwardcopies(a, y, match=match))
235
235
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 and c2. This is its own function so extensions can easily wrap this call
238 and c2. This is its own function so extensions can easily wrap this call
239 to see what files mergecopies is about to process.
239 to see what files mergecopies is about to process.
240
240
241 Even though c1 and c2 are not used in this function, they are useful in
241 Even though c1 and c2 are not used in this function, they are useful in
242 other extensions for being able to read the file nodes of the changed files.
242 other extensions for being able to read the file nodes of the changed files.
243
243
244 "baselabel" can be passed to help distinguish the multiple computations
244 "baselabel" can be passed to help distinguish the multiple computations
245 done in the graft case.
245 done in the graft case.
246 """
246 """
247 u1 = sorted(addedinm1 - addedinm2)
247 u1 = sorted(addedinm1 - addedinm2)
248 u2 = sorted(addedinm2 - addedinm1)
248 u2 = sorted(addedinm2 - addedinm1)
249
249
250 header = " unmatched files in %s"
250 header = " unmatched files in %s"
251 if baselabel:
251 if baselabel:
252 header += ' (from %s)' % baselabel
252 header += ' (from %s)' % baselabel
253 if u1:
253 if u1:
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 if u2:
255 if u2:
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 return u1, u2
257 return u1, u2
258
258
259 def _makegetfctx(ctx):
259 def _makegetfctx(ctx):
260 """return a 'getfctx' function suitable for _checkcopies usage
260 """return a 'getfctx' function suitable for _checkcopies usage
261
261
262 We have to re-setup the function building 'filectx' for each
262 We have to re-setup the function building 'filectx' for each
263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 each. Linkrev adjustment is important to avoid bug in rename
264 each. Linkrev adjustment is important to avoid bug in rename
265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 the performance impact of this adjustment is kept limited. Without it,
266 the performance impact of this adjustment is kept limited. Without it,
267 each file could do a full dag traversal making the time complexity of
267 each file could do a full dag traversal making the time complexity of
268 the operation explode (see issue4537).
268 the operation explode (see issue4537).
269
269
270 This function exists here mostly to limit the impact on stable. Feel
270 This function exists here mostly to limit the impact on stable. Feel
271 free to refactor on default.
271 free to refactor on default.
272 """
272 """
273 rev = ctx.rev()
273 rev = ctx.rev()
274 repo = ctx._repo
274 repo = ctx._repo
275 ac = getattr(ctx, '_ancestrycontext', None)
275 ac = getattr(ctx, '_ancestrycontext', None)
276 if ac is None:
276 if ac is None:
277 revs = [rev]
277 revs = [rev]
278 if rev is None:
278 if rev is None:
279 revs = [p.rev() for p in ctx.parents()]
279 revs = [p.rev() for p in ctx.parents()]
280 ac = repo.changelog.ancestors(revs, inclusive=True)
280 ac = repo.changelog.ancestors(revs, inclusive=True)
281 ctx._ancestrycontext = ac
281 ctx._ancestrycontext = ac
282 def makectx(f, n):
282 def makectx(f, n):
283 if n in node.wdirnodes: # in a working context?
283 if n in node.wdirnodes: # in a working context?
284 if ctx.rev() is None:
284 if ctx.rev() is None:
285 return ctx.filectx(f)
285 return ctx.filectx(f)
286 return repo[None][f]
286 return repo[None][f]
287 fctx = repo.filectx(f, fileid=n)
287 fctx = repo.filectx(f, fileid=n)
288 # setup only needed for filectx not create from a changectx
288 # setup only needed for filectx not create from a changectx
289 fctx._ancestrycontext = ac
289 fctx._ancestrycontext = ac
290 fctx._descendantrev = rev
290 fctx._descendantrev = rev
291 return fctx
291 return fctx
292 return util.lrucachefunc(makectx)
292 return util.lrucachefunc(makectx)
293
293
294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 """combine partial copy paths"""
295 """combine partial copy paths"""
296 remainder = {}
296 remainder = {}
297 for f in copyfrom:
297 for f in copyfrom:
298 if f in copyto:
298 if f in copyto:
299 finalcopy[copyto[f]] = copyfrom[f]
299 finalcopy[copyto[f]] = copyfrom[f]
300 del copyto[f]
300 del copyto[f]
301 for f in incompletediverge:
301 for f in incompletediverge:
302 assert f not in diverge
302 assert f not in diverge
303 ic = incompletediverge[f]
303 ic = incompletediverge[f]
304 if ic[0] in copyto:
304 if ic[0] in copyto:
305 diverge[f] = [copyto[ic[0]], ic[1]]
305 diverge[f] = [copyto[ic[0]], ic[1]]
306 else:
306 else:
307 remainder[f] = ic
307 remainder[f] = ic
308 return remainder
308 return remainder
309
309
310 def mergecopies(repo, c1, c2, base):
310 def mergecopies(repo, c1, c2, base):
311 """
311 """
312 The function calling different copytracing algorithms on the basis of config
312 The function calling different copytracing algorithms on the basis of config
313 which find moves and copies between context c1 and c2 that are relevant for
313 which find moves and copies between context c1 and c2 that are relevant for
314 merging. 'base' will be used as the merge base.
314 merging. 'base' will be used as the merge base.
315
315
316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 files that were moved/ copied in one merge parent and modified in another.
317 files that were moved/ copied in one merge parent and modified in another.
318 For example:
318 For example:
319
319
320 o ---> 4 another commit
320 o ---> 4 another commit
321 |
321 |
322 | o ---> 3 commit that modifies a.txt
322 | o ---> 3 commit that modifies a.txt
323 | /
323 | /
324 o / ---> 2 commit that moves a.txt to b.txt
324 o / ---> 2 commit that moves a.txt to b.txt
325 |/
325 |/
326 o ---> 1 merge base
326 o ---> 1 merge base
327
327
328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 revision 4, and if user have copytrace disabled, we prints the following
329 revision 4, and if user have copytrace disabled, we prints the following
330 message:
330 message:
331
331
332 ```other changed <file> which local deleted```
332 ```other changed <file> which local deleted```
333
333
334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 "dirmove".
335 "dirmove".
336
336
337 "copy" is a mapping from destination name -> source name,
337 "copy" is a mapping from destination name -> source name,
338 where source is in c1 and destination is in c2 or vice-versa.
338 where source is in c1 and destination is in c2 or vice-versa.
339
339
340 "movewithdir" is a mapping from source name -> destination name,
340 "movewithdir" is a mapping from source name -> destination name,
341 where the file at source present in one context but not the other
341 where the file at source present in one context but not the other
342 needs to be moved to destination by the merge process, because the
342 needs to be moved to destination by the merge process, because the
343 other context moved the directory it is in.
343 other context moved the directory it is in.
344
344
345 "diverge" is a mapping of source name -> list of destination names
345 "diverge" is a mapping of source name -> list of destination names
346 for divergent renames.
346 for divergent renames.
347
347
348 "renamedelete" is a mapping of source name -> list of destination
348 "renamedelete" is a mapping of source name -> list of destination
349 names for files deleted in c1 that were renamed in c2 or vice-versa.
349 names for files deleted in c1 that were renamed in c2 or vice-versa.
350
350
351 "dirmove" is a mapping of detected source dir -> destination dir renames.
351 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 This is needed for handling changes to new files previously grafted into
352 This is needed for handling changes to new files previously grafted into
353 renamed directories.
353 renamed directories.
354 """
354 """
355 # avoid silly behavior for update from empty dir
355 # avoid silly behavior for update from empty dir
356 if not c1 or not c2 or c1 == c2:
356 if not c1 or not c2 or c1 == c2:
357 return {}, {}, {}, {}, {}
357 return {}, {}, {}, {}, {}
358
358
359 # avoid silly behavior for parent -> working dir
359 # avoid silly behavior for parent -> working dir
360 if c2.node() is None and c1.node() == repo.dirstate.p1():
360 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 return repo.dirstate.copies(), {}, {}, {}, {}
361 return repo.dirstate.copies(), {}, {}, {}, {}
362
362
363 copytracing = repo.ui.config('experimental', 'copytrace')
363 copytracing = repo.ui.config('experimental', 'copytrace')
364
364
365 # Copy trace disabling is explicitly below the node == p1 logic above
365 # Copy trace disabling is explicitly below the node == p1 logic above
366 # because the logic above is required for a simple copy to be kept across a
366 # because the logic above is required for a simple copy to be kept across a
367 # rebase.
367 # rebase.
368 if copytracing == 'off':
368 if copytracing == 'off':
369 return {}, {}, {}, {}, {}
369 return {}, {}, {}, {}, {}
370 elif copytracing == 'heuristics':
370 elif copytracing == 'heuristics':
371 # Do full copytracing if only non-public revisions are involved as
371 # Do full copytracing if only non-public revisions are involved as
372 # that will be fast enough and will also cover the copies which could
372 # that will be fast enough and will also cover the copies which could
373 # be missed by heuristics
373 # be missed by heuristics
374 if _isfullcopytraceable(repo, c1, base):
374 if _isfullcopytraceable(repo, c1, base):
375 return _fullcopytracing(repo, c1, c2, base)
375 return _fullcopytracing(repo, c1, c2, base)
376 return _heuristicscopytracing(repo, c1, c2, base)
376 return _heuristicscopytracing(repo, c1, c2, base)
377 else:
377 else:
378 return _fullcopytracing(repo, c1, c2, base)
378 return _fullcopytracing(repo, c1, c2, base)
379
379
380 def _isfullcopytraceable(repo, c1, base):
380 def _isfullcopytraceable(repo, c1, base):
381 """ Checks that if base, source and destination are all no-public branches,
381 """ Checks that if base, source and destination are all no-public branches,
382 if yes let's use the full copytrace algorithm for increased capabilities
382 if yes let's use the full copytrace algorithm for increased capabilities
383 since it will be fast enough.
383 since it will be fast enough.
384
384
385 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
385 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 number of changesets from c1 to base such that if number of changesets are
386 number of changesets from c1 to base such that if number of changesets are
387 more than the limit, full copytracing algorithm won't be used.
387 more than the limit, full copytracing algorithm won't be used.
388 """
388 """
389 if c1.rev() is None:
389 if c1.rev() is None:
390 c1 = c1.p1()
390 c1 = c1.p1()
391 if c1.mutable() and base.mutable():
391 if c1.mutable() and base.mutable():
392 sourcecommitlimit = repo.ui.configint('experimental',
392 sourcecommitlimit = repo.ui.configint('experimental',
393 'copytrace.sourcecommitlimit')
393 'copytrace.sourcecommitlimit')
394 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
394 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 return commits < sourcecommitlimit
395 return commits < sourcecommitlimit
396 return False
396 return False
397
397
398 def _fullcopytracing(repo, c1, c2, base):
398 def _fullcopytracing(repo, c1, c2, base):
399 """ The full copytracing algorithm which finds all the new files that were
399 """ The full copytracing algorithm which finds all the new files that were
400 added from merge base up to the top commit and for each file it checks if
400 added from merge base up to the top commit and for each file it checks if
401 this file was copied from another file.
401 this file was copied from another file.
402
402
403 This is pretty slow when a lot of changesets are involved but will track all
403 This is pretty slow when a lot of changesets are involved but will track all
404 the copies.
404 the copies.
405 """
405 """
406 # In certain scenarios (e.g. graft, update or rebase), base can be
406 # In certain scenarios (e.g. graft, update or rebase), base can be
407 # overridden We still need to know a real common ancestor in this case We
407 # overridden We still need to know a real common ancestor in this case We
408 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
408 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
409 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 # caller may not know if the revision passed in lieu of the CA is a genuine
410 # caller may not know if the revision passed in lieu of the CA is a genuine
411 # common ancestor or not without explicitly checking it, it's better to
411 # common ancestor or not without explicitly checking it, it's better to
412 # determine that here.
412 # determine that here.
413 #
413 #
414 # base.descendant(wc) and base.descendant(base) are False, work around that
414 # base.descendant(wc) and base.descendant(base) are False, work around that
415 _c1 = c1.p1() if c1.rev() is None else c1
415 _c1 = c1.p1() if c1.rev() is None else c1
416 _c2 = c2.p1() if c2.rev() is None else c2
416 _c2 = c2.p1() if c2.rev() is None else c2
417 # an endpoint is "dirty" if it isn't a descendant of the merge base
417 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 # if we have a dirty endpoint, we need to trigger graft logic, and also
418 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 # keep track of which endpoint is dirty
419 # keep track of which endpoint is dirty
420 dirtyc1 = not (base == _c1 or base.descendant(_c1))
420 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 dirtyc2 = not (base == _c2 or base.descendant(_c2))
421 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 graft = dirtyc1 or dirtyc2
422 graft = dirtyc1 or dirtyc2
423 tca = base
423 tca = base
424 if graft:
424 if graft:
425 tca = _c1.ancestor(_c2)
425 tca = _c1.ancestor(_c2)
426
426
427 limit = _findlimit(repo, c1.rev(), c2.rev())
427 limit = _findlimit(repo, c1.rev(), c2.rev())
428 if limit is None:
428 if limit is None:
429 # no common ancestor, no copies
429 # no common ancestor, no copies
430 return {}, {}, {}, {}, {}
430 return {}, {}, {}, {}, {}
431 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
431 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432
432
433 m1 = c1.manifest()
433 m1 = c1.manifest()
434 m2 = c2.manifest()
434 m2 = c2.manifest()
435 mb = base.manifest()
435 mb = base.manifest()
436
436
437 # gather data from _checkcopies:
437 # gather data from _checkcopies:
438 # - diverge = record all diverges in this dict
438 # - diverge = record all diverges in this dict
439 # - copy = record all non-divergent copies in this dict
439 # - copy = record all non-divergent copies in this dict
440 # - fullcopy = record all copies in this dict
440 # - fullcopy = record all copies in this dict
441 # - incomplete = record non-divergent partial copies here
441 # - incomplete = record non-divergent partial copies here
442 # - incompletediverge = record divergent partial copies here
442 # - incompletediverge = record divergent partial copies here
443 diverge = {} # divergence data is shared
443 diverge = {} # divergence data is shared
444 incompletediverge = {}
444 incompletediverge = {}
445 data1 = {'copy': {},
445 data1 = {'copy': {},
446 'fullcopy': {},
446 'fullcopy': {},
447 'incomplete': {},
447 'incomplete': {},
448 'diverge': diverge,
448 'diverge': diverge,
449 'incompletediverge': incompletediverge,
449 'incompletediverge': incompletediverge,
450 }
450 }
451 data2 = {'copy': {},
451 data2 = {'copy': {},
452 'fullcopy': {},
452 'fullcopy': {},
453 'incomplete': {},
453 'incomplete': {},
454 'diverge': diverge,
454 'diverge': diverge,
455 'incompletediverge': incompletediverge,
455 'incompletediverge': incompletediverge,
456 }
456 }
457
457
458 # find interesting file sets from manifests
458 # find interesting file sets from manifests
459 addedinm1 = m1.filesnotin(mb)
459 addedinm1 = m1.filesnotin(mb)
460 addedinm2 = m2.filesnotin(mb)
460 addedinm2 = m2.filesnotin(mb)
461 bothnew = sorted(addedinm1 & addedinm2)
461 bothnew = sorted(addedinm1 & addedinm2)
462 if tca == base:
462 if tca == base:
463 # unmatched file from base
463 # unmatched file from base
464 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
464 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 u1u, u2u = u1r, u2r
465 u1u, u2u = u1r, u2r
466 else:
466 else:
467 # unmatched file from base (DAG rotation in the graft case)
467 # unmatched file from base (DAG rotation in the graft case)
468 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
468 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 baselabel='base')
469 baselabel='base')
470 # unmatched file from topological common ancestors (no DAG rotation)
470 # unmatched file from topological common ancestors (no DAG rotation)
471 # need to recompute this for directory move handling when grafting
471 # need to recompute this for directory move handling when grafting
472 mta = tca.manifest()
472 mta = tca.manifest()
473 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
473 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 m2.filesnotin(mta),
474 m2.filesnotin(mta),
475 baselabel='topological common ancestor')
475 baselabel='topological common ancestor')
476
476
477 for f in u1u:
477 for f in u1u:
478 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
478 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479
479
480 for f in u2u:
480 for f in u2u:
481 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
481 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482
482
483 copy = dict(data1['copy'])
483 copy = dict(data1['copy'])
484 copy.update(data2['copy'])
484 copy.update(data2['copy'])
485 fullcopy = dict(data1['fullcopy'])
485 fullcopy = dict(data1['fullcopy'])
486 fullcopy.update(data2['fullcopy'])
486 fullcopy.update(data2['fullcopy'])
487
487
488 if dirtyc1:
488 if dirtyc1:
489 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
489 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 incompletediverge)
490 incompletediverge)
491 else:
491 else:
492 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
492 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 incompletediverge)
493 incompletediverge)
494
494
495 renamedelete = {}
495 renamedelete = {}
496 renamedeleteset = set()
496 renamedeleteset = set()
497 divergeset = set()
497 divergeset = set()
498 for of, fl in list(diverge.items()):
498 for of, fl in list(diverge.items()):
499 if len(fl) == 1 or of in c1 or of in c2:
499 if len(fl) == 1 or of in c1 or of in c2:
500 del diverge[of] # not actually divergent, or not a rename
500 del diverge[of] # not actually divergent, or not a rename
501 if of not in c1 and of not in c2:
501 if of not in c1 and of not in c2:
502 # renamed on one side, deleted on the other side, but filter
502 # renamed on one side, deleted on the other side, but filter
503 # out files that have been renamed and then deleted
503 # out files that have been renamed and then deleted
504 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
504 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 renamedeleteset.update(fl) # reverse map for below
505 renamedeleteset.update(fl) # reverse map for below
506 else:
506 else:
507 divergeset.update(fl) # reverse map for below
507 divergeset.update(fl) # reverse map for below
508
508
509 if bothnew:
509 if bothnew:
510 repo.ui.debug(" unmatched files new in both:\n %s\n"
510 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 % "\n ".join(bothnew))
511 % "\n ".join(bothnew))
512 bothdiverge = {}
512 bothdiverge = {}
513 bothincompletediverge = {}
513 bothincompletediverge = {}
514 remainder = {}
514 remainder = {}
515 both1 = {'copy': {},
515 both1 = {'copy': {},
516 'fullcopy': {},
516 'fullcopy': {},
517 'incomplete': {},
517 'incomplete': {},
518 'diverge': bothdiverge,
518 'diverge': bothdiverge,
519 'incompletediverge': bothincompletediverge
519 'incompletediverge': bothincompletediverge
520 }
520 }
521 both2 = {'copy': {},
521 both2 = {'copy': {},
522 'fullcopy': {},
522 'fullcopy': {},
523 'incomplete': {},
523 'incomplete': {},
524 'diverge': bothdiverge,
524 'diverge': bothdiverge,
525 'incompletediverge': bothincompletediverge
525 'incompletediverge': bothincompletediverge
526 }
526 }
527 for f in bothnew:
527 for f in bothnew:
528 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
528 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
529 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 if dirtyc1:
530 if dirtyc1:
531 # incomplete copies may only be found on the "dirty" side for bothnew
531 # incomplete copies may only be found on the "dirty" side for bothnew
532 assert not both2['incomplete']
532 assert not both2['incomplete']
533 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
533 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 bothincompletediverge)
534 bothincompletediverge)
535 elif dirtyc2:
535 elif dirtyc2:
536 assert not both1['incomplete']
536 assert not both1['incomplete']
537 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
537 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 bothincompletediverge)
538 bothincompletediverge)
539 else:
539 else:
540 # incomplete copies and divergences can't happen outside grafts
540 # incomplete copies and divergences can't happen outside grafts
541 assert not both1['incomplete']
541 assert not both1['incomplete']
542 assert not both2['incomplete']
542 assert not both2['incomplete']
543 assert not bothincompletediverge
543 assert not bothincompletediverge
544 for f in remainder:
544 for f in remainder:
545 assert f not in bothdiverge
545 assert f not in bothdiverge
546 ic = remainder[f]
546 ic = remainder[f]
547 if ic[0] in (m1 if dirtyc1 else m2):
547 if ic[0] in (m1 if dirtyc1 else m2):
548 # backed-out rename on one side, but watch out for deleted files
548 # backed-out rename on one side, but watch out for deleted files
549 bothdiverge[f] = ic
549 bothdiverge[f] = ic
550 for of, fl in bothdiverge.items():
550 for of, fl in bothdiverge.items():
551 if len(fl) == 2 and fl[0] == fl[1]:
551 if len(fl) == 2 and fl[0] == fl[1]:
552 copy[fl[0]] = of # not actually divergent, just matching renames
552 copy[fl[0]] = of # not actually divergent, just matching renames
553
553
554 if fullcopy and repo.ui.debugflag:
554 if fullcopy and repo.ui.debugflag:
555 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
555 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 "% = renamed and deleted):\n")
556 "% = renamed and deleted):\n")
557 for f in sorted(fullcopy):
557 for f in sorted(fullcopy):
558 note = ""
558 note = ""
559 if f in copy:
559 if f in copy:
560 note += "*"
560 note += "*"
561 if f in divergeset:
561 if f in divergeset:
562 note += "!"
562 note += "!"
563 if f in renamedeleteset:
563 if f in renamedeleteset:
564 note += "%"
564 note += "%"
565 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
565 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 note))
566 note))
567 del divergeset
567 del divergeset
568
568
569 if not fullcopy:
569 if not fullcopy:
570 return copy, {}, diverge, renamedelete, {}
570 return copy, {}, diverge, renamedelete, {}
571
571
572 repo.ui.debug(" checking for directory renames\n")
572 repo.ui.debug(" checking for directory renames\n")
573
573
574 # generate a directory move map
574 # generate a directory move map
575 d1, d2 = c1.dirs(), c2.dirs()
575 d1, d2 = c1.dirs(), c2.dirs()
576 # Hack for adding '', which is not otherwise added, to d1 and d2
576 # Hack for adding '', which is not otherwise added, to d1 and d2
577 d1.addpath('/')
577 d1.addpath('/')
578 d2.addpath('/')
578 d2.addpath('/')
579 invalid = set()
579 invalid = set()
580 dirmove = {}
580 dirmove = {}
581
581
582 # examine each file copy for a potential directory move, which is
582 # examine each file copy for a potential directory move, which is
583 # when all the files in a directory are moved to a new directory
583 # when all the files in a directory are moved to a new directory
584 for dst, src in fullcopy.iteritems():
584 for dst, src in fullcopy.iteritems():
585 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
585 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 if dsrc in invalid:
586 if dsrc in invalid:
587 # already seen to be uninteresting
587 # already seen to be uninteresting
588 continue
588 continue
589 elif dsrc in d1 and ddst in d1:
589 elif dsrc in d1 and ddst in d1:
590 # directory wasn't entirely moved locally
590 # directory wasn't entirely moved locally
591 invalid.add(dsrc + "/")
591 invalid.add(dsrc + "/")
592 elif dsrc in d2 and ddst in d2:
592 elif dsrc in d2 and ddst in d2:
593 # directory wasn't entirely moved remotely
593 # directory wasn't entirely moved remotely
594 invalid.add(dsrc + "/")
594 invalid.add(dsrc + "/")
595 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
595 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 # files from the same directory moved to two different places
596 # files from the same directory moved to two different places
597 invalid.add(dsrc + "/")
597 invalid.add(dsrc + "/")
598 else:
598 else:
599 # looks good so far
599 # looks good so far
600 dirmove[dsrc + "/"] = ddst + "/"
600 dirmove[dsrc + "/"] = ddst + "/"
601
601
602 for i in invalid:
602 for i in invalid:
603 if i in dirmove:
603 if i in dirmove:
604 del dirmove[i]
604 del dirmove[i]
605 del d1, d2, invalid
605 del d1, d2, invalid
606
606
607 if not dirmove:
607 if not dirmove:
608 return copy, {}, diverge, renamedelete, {}
608 return copy, {}, diverge, renamedelete, {}
609
609
610 for d in dirmove:
610 for d in dirmove:
611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 (d, dirmove[d]))
612 (d, dirmove[d]))
613
613
614 movewithdir = {}
614 movewithdir = {}
615 # check unaccounted nonoverlapping files against directory moves
615 # check unaccounted nonoverlapping files against directory moves
616 for f in u1r + u2r:
616 for f in u1r + u2r:
617 if f not in fullcopy:
617 if f not in fullcopy:
618 for d in dirmove:
618 for d in dirmove:
619 if f.startswith(d):
619 if f.startswith(d):
620 # new file added in a directory that was moved, move it
620 # new file added in a directory that was moved, move it
621 df = dirmove[d] + f[len(d):]
621 df = dirmove[d] + f[len(d):]
622 if df not in copy:
622 if df not in copy:
623 movewithdir[f] = df
623 movewithdir[f] = df
624 repo.ui.debug((" pending file src: '%s' -> "
624 repo.ui.debug((" pending file src: '%s' -> "
625 "dst: '%s'\n") % (f, df))
625 "dst: '%s'\n") % (f, df))
626 break
626 break
627
627
628 return copy, movewithdir, diverge, renamedelete, dirmove
628 return copy, movewithdir, diverge, renamedelete, dirmove
629
629
630 def _heuristicscopytracing(repo, c1, c2, base):
630 def _heuristicscopytracing(repo, c1, c2, base):
631 """ Fast copytracing using filename heuristics
631 """ Fast copytracing using filename heuristics
632
632
633 Assumes that moves or renames are of following two types:
633 Assumes that moves or renames are of following two types:
634
634
635 1) Inside a directory only (same directory name but different filenames)
635 1) Inside a directory only (same directory name but different filenames)
636 2) Move from one directory to another
636 2) Move from one directory to another
637 (same filenames but different directory names)
637 (same filenames but different directory names)
638
638
639 Works only when there are no merge commits in the "source branch".
639 Works only when there are no merge commits in the "source branch".
640 Source branch is commits from base up to c2 not including base.
640 Source branch is commits from base up to c2 not including base.
641
641
642 If merge is involved it fallbacks to _fullcopytracing().
642 If merge is involved it fallbacks to _fullcopytracing().
643
643
644 Can be used by setting the following config:
644 Can be used by setting the following config:
645
645
646 [experimental]
646 [experimental]
647 copytrace = heuristics
647 copytrace = heuristics
648
648
649 In some cases the copy/move candidates found by heuristics can be very large
649 In some cases the copy/move candidates found by heuristics can be very large
650 in number and that will make the algorithm slow. The number of possible
650 in number and that will make the algorithm slow. The number of possible
651 candidates to check can be limited by using the config
651 candidates to check can be limited by using the config
652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 """
653 """
654
654
655 if c1.rev() is None:
655 if c1.rev() is None:
656 c1 = c1.p1()
656 c1 = c1.p1()
657 if c2.rev() is None:
657 if c2.rev() is None:
658 c2 = c2.p1()
658 c2 = c2.p1()
659
659
660 copies = {}
660 copies = {}
661
661
662 changedfiles = set()
662 changedfiles = set()
663 m1 = c1.manifest()
663 m1 = c1.manifest()
664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 # If base is not in c2 branch, we switch to fullcopytracing
665 # If base is not in c2 branch, we switch to fullcopytracing
666 repo.ui.debug("switching to full copytracing as base is not "
666 repo.ui.debug("switching to full copytracing as base is not "
667 "an ancestor of c2\n")
667 "an ancestor of c2\n")
668 return _fullcopytracing(repo, c1, c2, base)
668 return _fullcopytracing(repo, c1, c2, base)
669
669
670 ctx = c2
670 ctx = c2
671 while ctx != base:
671 while ctx != base:
672 if len(ctx.parents()) == 2:
672 if len(ctx.parents()) == 2:
673 # To keep things simple let's not handle merges
673 # To keep things simple let's not handle merges
674 repo.ui.debug("switching to full copytracing because of merges\n")
674 repo.ui.debug("switching to full copytracing because of merges\n")
675 return _fullcopytracing(repo, c1, c2, base)
675 return _fullcopytracing(repo, c1, c2, base)
676 changedfiles.update(ctx.files())
676 changedfiles.update(ctx.files())
677 ctx = ctx.p1()
677 ctx = ctx.p1()
678
678
679 cp = _forwardcopies(base, c2)
679 cp = _forwardcopies(base, c2)
680 for dst, src in cp.iteritems():
680 for dst, src in cp.iteritems():
681 if src in m1:
681 if src in m1:
682 copies[dst] = src
682 copies[dst] = src
683
683
684 # file is missing if it isn't present in the destination, but is present in
684 # file is missing if it isn't present in the destination, but is present in
685 # the base and present in the source.
685 # the base and present in the source.
686 # Presence in the base is important to exclude added files, presence in the
686 # Presence in the base is important to exclude added files, presence in the
687 # source is important to exclude removed files.
687 # source is important to exclude removed files.
688 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
688 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
689 changedfiles)
689 changedfiles)
690
690
691 if missingfiles:
691 if missingfiles:
692 basenametofilename = collections.defaultdict(list)
692 basenametofilename = collections.defaultdict(list)
693 dirnametofilename = collections.defaultdict(list)
693 dirnametofilename = collections.defaultdict(list)
694
694
695 for f in m1.filesnotin(base.manifest()):
695 for f in m1.filesnotin(base.manifest()):
696 basename = os.path.basename(f)
696 basename = os.path.basename(f)
697 dirname = os.path.dirname(f)
697 dirname = os.path.dirname(f)
698 basenametofilename[basename].append(f)
698 basenametofilename[basename].append(f)
699 dirnametofilename[dirname].append(f)
699 dirnametofilename[dirname].append(f)
700
700
701 # in case of a rebase/graft, base may not be a common ancestor
701 # in case of a rebase/graft, base may not be a common ancestor
702 anc = c1.ancestor(c2)
702 anc = c1.ancestor(c2)
703
703
704 for f in missingfiles:
704 for f in missingfiles:
705 basename = os.path.basename(f)
705 basename = os.path.basename(f)
706 dirname = os.path.dirname(f)
706 dirname = os.path.dirname(f)
707 samebasename = basenametofilename[basename]
707 samebasename = basenametofilename[basename]
708 samedirname = dirnametofilename[dirname]
708 samedirname = dirnametofilename[dirname]
709 movecandidates = samebasename + samedirname
709 movecandidates = samebasename + samedirname
710 # f is guaranteed to be present in c2, that's why
710 # f is guaranteed to be present in c2, that's why
711 # c2.filectx(f) won't fail
711 # c2.filectx(f) won't fail
712 f2 = c2.filectx(f)
712 f2 = c2.filectx(f)
713 # we can have a lot of candidates which can slow down the heuristics
713 # we can have a lot of candidates which can slow down the heuristics
714 # config value to limit the number of candidates moves to check
714 # config value to limit the number of candidates moves to check
715 maxcandidates = repo.ui.configint('experimental',
715 maxcandidates = repo.ui.configint('experimental',
716 'copytrace.movecandidateslimit')
716 'copytrace.movecandidateslimit')
717
717
718 if len(movecandidates) > maxcandidates:
718 if len(movecandidates) > maxcandidates:
719 repo.ui.status(_("skipping copytracing for '%s', more "
719 repo.ui.status(_("skipping copytracing for '%s', more "
720 "candidates than the limit: %d\n")
720 "candidates than the limit: %d\n")
721 % (f, len(movecandidates)))
721 % (f, len(movecandidates)))
722 continue
722 continue
723
723
724 for candidate in movecandidates:
724 for candidate in movecandidates:
725 f1 = c1.filectx(candidate)
725 f1 = c1.filectx(candidate)
726 if _related(f1, f2, anc.rev()):
726 if _related(f1, f2, anc.rev()):
727 # if there are a few related copies then we'll merge
727 # if there are a few related copies then we'll merge
728 # changes into all of them. This matches the behaviour
728 # changes into all of them. This matches the behaviour
729 # of upstream copytracing
729 # of upstream copytracing
730 copies[candidate] = f
730 copies[candidate] = f
731
731
732 return copies, {}, {}, {}, {}
732 return copies, {}, {}, {}, {}
733
733
734 def _related(f1, f2, limit):
734 def _related(f1, f2, limit):
735 """return True if f1 and f2 filectx have a common ancestor
735 """return True if f1 and f2 filectx have a common ancestor
736
736
737 Walk back to common ancestor to see if the two files originate
737 Walk back to common ancestor to see if the two files originate
738 from the same file. Since workingfilectx's rev() is None it messes
738 from the same file. Since workingfilectx's rev() is None it messes
739 up the integer comparison logic, hence the pre-step check for
739 up the integer comparison logic, hence the pre-step check for
740 None (f1 and f2 can only be workingfilectx's initially).
740 None (f1 and f2 can only be workingfilectx's initially).
741 """
741 """
742
742
743 if f1 == f2:
743 if f1 == f2:
744 return f1 # a match
744 return f1 # a match
745
745
746 g1, g2 = f1.ancestors(), f2.ancestors()
746 g1, g2 = f1.ancestors(), f2.ancestors()
747 try:
747 try:
748 f1r, f2r = f1.linkrev(), f2.linkrev()
748 f1r, f2r = f1.linkrev(), f2.linkrev()
749
749
750 if f1r is None:
750 if f1r is None:
751 f1 = next(g1)
751 f1 = next(g1)
752 if f2r is None:
752 if f2r is None:
753 f2 = next(g2)
753 f2 = next(g2)
754
754
755 while True:
755 while True:
756 f1r, f2r = f1.linkrev(), f2.linkrev()
756 f1r, f2r = f1.linkrev(), f2.linkrev()
757 if f1r > f2r:
757 if f1r > f2r:
758 f1 = next(g1)
758 f1 = next(g1)
759 elif f2r > f1r:
759 elif f2r > f1r:
760 f2 = next(g2)
760 f2 = next(g2)
761 elif f1 == f2:
761 elif f1 == f2:
762 return f1 # a match
762 return f1 # a match
763 elif f1r == f2r or f1r < limit or f2r < limit:
763 elif f1r == f2r or f1r < limit or f2r < limit:
764 return False # copy no longer relevant
764 return False # copy no longer relevant
765 except StopIteration:
765 except StopIteration:
766 return False
766 return False
767
767
768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
769 """
769 """
770 check possible copies of f from msrc to mdst
770 check possible copies of f from msrc to mdst
771
771
772 srcctx = starting context for f in msrc
772 srcctx = starting context for f in msrc
773 dstctx = destination context for f in mdst
773 dstctx = destination context for f in mdst
774 f = the filename to check (as in msrc)
774 f = the filename to check (as in msrc)
775 base = the changectx used as a merge base
775 base = the changectx used as a merge base
776 tca = topological common ancestor for graft-like scenarios
776 tca = topological common ancestor for graft-like scenarios
777 remotebase = True if base is outside tca::srcctx, False otherwise
777 remotebase = True if base is outside tca::srcctx, False otherwise
778 limit = the rev number to not search beyond
778 limit = the rev number to not search beyond
779 data = dictionary of dictionary to store copy data. (see mergecopies)
779 data = dictionary of dictionary to store copy data. (see mergecopies)
780
780
781 note: limit is only an optimization, and provides no guarantee that
781 note: limit is only an optimization, and provides no guarantee that
782 irrelevant revisions will not be visited
782 irrelevant revisions will not be visited
783 there is no easy way to make this algorithm stop in a guaranteed way
783 there is no easy way to make this algorithm stop in a guaranteed way
784 once it "goes behind a certain revision".
784 once it "goes behind a certain revision".
785 """
785 """
786
786
787 msrc = srcctx.manifest()
787 msrc = srcctx.manifest()
788 mdst = dstctx.manifest()
788 mdst = dstctx.manifest()
789 mb = base.manifest()
789 mb = base.manifest()
790 mta = tca.manifest()
790 mta = tca.manifest()
791 # Might be true if this call is about finding backward renames,
791 # Might be true if this call is about finding backward renames,
792 # This happens in the case of grafts because the DAG is then rotated.
792 # This happens in the case of grafts because the DAG is then rotated.
793 # If the file exists in both the base and the source, we are not looking
793 # If the file exists in both the base and the source, we are not looking
794 # for a rename on the source side, but on the part of the DAG that is
794 # for a rename on the source side, but on the part of the DAG that is
795 # traversed backwards.
795 # traversed backwards.
796 #
796 #
797 # In the case there is both backward and forward renames (before and after
797 # In the case there is both backward and forward renames (before and after
798 # the base) this is more complicated as we must detect a divergence.
798 # the base) this is more complicated as we must detect a divergence.
799 # We use 'backwards = False' in that case.
799 # We use 'backwards = False' in that case.
800 backwards = not remotebase and base != tca and f in mb
800 backwards = not remotebase and base != tca and f in mb
801 getsrcfctx = _makegetfctx(srcctx)
801 getsrcfctx = _makegetfctx(srcctx)
802 getdstfctx = _makegetfctx(dstctx)
802 getdstfctx = _makegetfctx(dstctx)
803
803
804 if msrc[f] == mb.get(f) and not remotebase:
804 if msrc[f] == mb.get(f) and not remotebase:
805 # Nothing to merge
805 # Nothing to merge
806 return
806 return
807
807
808 of = None
808 of = None
809 seen = {f}
809 seen = {f}
810 for oc in getsrcfctx(f, msrc[f]).ancestors():
810 for oc in getsrcfctx(f, msrc[f]).ancestors():
811 ocr = oc.linkrev()
811 ocr = oc.linkrev()
812 of = oc.path()
812 of = oc.path()
813 if of in seen:
813 if of in seen:
814 # check limit late - grab last rename before
814 # check limit late - grab last rename before
815 if ocr < limit:
815 if ocr < limit:
816 break
816 break
817 continue
817 continue
818 seen.add(of)
818 seen.add(of)
819
819
820 # remember for dir rename detection
820 # remember for dir rename detection
821 if backwards:
821 if backwards:
822 data['fullcopy'][of] = f # grafting backwards through renames
822 data['fullcopy'][of] = f # grafting backwards through renames
823 else:
823 else:
824 data['fullcopy'][f] = of
824 data['fullcopy'][f] = of
825 if of not in mdst:
825 if of not in mdst:
826 continue # no match, keep looking
826 continue # no match, keep looking
827 if mdst[of] == mb.get(of):
827 if mdst[of] == mb.get(of):
828 return # no merge needed, quit early
828 return # no merge needed, quit early
829 c2 = getdstfctx(of, mdst[of])
829 c2 = getdstfctx(of, mdst[of])
830 # c2 might be a plain new file on added on destination side that is
830 # c2 might be a plain new file on added on destination side that is
831 # unrelated to the droids we are looking for.
831 # unrelated to the droids we are looking for.
832 cr = _related(oc, c2, tca.rev())
832 cr = _related(oc, c2, tca.rev())
833 if cr and (of == f or of == c2.path()): # non-divergent
833 if cr and (of == f or of == c2.path()): # non-divergent
834 if backwards:
834 if backwards:
835 data['copy'][of] = f
835 data['copy'][of] = f
836 elif of in mb:
836 elif of in mb:
837 data['copy'][f] = of
837 data['copy'][f] = of
838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
839 data['copy'][of] = f
839 data['copy'][of] = f
840 del data['fullcopy'][f]
840 del data['fullcopy'][f]
841 data['fullcopy'][of] = f
841 data['fullcopy'][of] = f
842 else: # divergence w.r.t. graft CA on one side of topological CA
842 else: # divergence w.r.t. graft CA on one side of topological CA
843 for sf in seen:
843 for sf in seen:
844 if sf in mb:
844 if sf in mb:
845 assert sf not in data['diverge']
845 assert sf not in data['diverge']
846 data['diverge'][sf] = [f, of]
846 data['diverge'][sf] = [f, of]
847 break
847 break
848 return
848 return
849
849
850 if of in mta:
850 if of in mta:
851 if backwards or remotebase:
851 if backwards or remotebase:
852 data['incomplete'][of] = f
852 data['incomplete'][of] = f
853 else:
853 else:
854 for sf in seen:
854 for sf in seen:
855 if sf in mb:
855 if sf in mb:
856 if tca == base:
856 if tca == base:
857 data['diverge'].setdefault(sf, []).append(f)
857 data['diverge'].setdefault(sf, []).append(f)
858 else:
858 else:
859 data['incompletediverge'][sf] = [of, f]
859 data['incompletediverge'][sf] = [of, f]
860 return
860 return
861
861
862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
863 """reproduce copies from fromrev to rev in the dirstate
863 """reproduce copies from fromrev to rev in the dirstate
864
864
865 If skiprev is specified, it's a revision that should be used to
865 If skiprev is specified, it's a revision that should be used to
866 filter copy records. Any copies that occur between fromrev and
866 filter copy records. Any copies that occur between fromrev and
867 skiprev will not be duplicated, even if they appear in the set of
867 skiprev will not be duplicated, even if they appear in the set of
868 copies between fromrev and rev.
868 copies between fromrev and rev.
869 """
869 """
870 exclude = {}
870 exclude = {}
871 if (skiprev is not None and
871 if (skiprev is not None and
872 repo.ui.config('experimental', 'copytrace') != 'off'):
872 repo.ui.config('experimental', 'copytrace') != 'off'):
873 # copytrace='off' skips this line, but not the entire function because
873 # copytrace='off' skips this line, but not the entire function because
874 # the line below is O(size of the repo) during a rebase, while the rest
874 # the line below is O(size of the repo) during a rebase, while the rest
875 # of the function is much faster (and is required for carrying copy
875 # of the function is much faster (and is required for carrying copy
876 # metadata across the rebase anyway).
876 # metadata across the rebase anyway).
877 exclude = pathcopies(repo[fromrev], repo[skiprev])
877 exclude = pathcopies(repo[fromrev], repo[skiprev])
878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
879 # copies.pathcopies returns backward renames, so dst might not
879 # copies.pathcopies returns backward renames, so dst might not
880 # actually be in the dirstate
880 # actually be in the dirstate
881 if dst in exclude:
881 if dst in exclude:
882 continue
882 continue
883 wctx[dst].markcopied(src)
883 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now