##// END OF EJS Templates
copies: extract method for getting non-wdir forward copies...
Martin von Zweigbergk -
r35423:8801cdce default
parent child Browse files
Show More
@@ -1,884 +1,889 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def _findlimit(repo, a, b):
24 def _findlimit(repo, a, b):
25 """
25 """
26 Find the last revision that needs to be checked to ensure that a full
26 Find the last revision that needs to be checked to ensure that a full
27 transitive closure for file copies can be properly calculated.
27 transitive closure for file copies can be properly calculated.
28 Generally, this means finding the earliest revision number that's an
28 Generally, this means finding the earliest revision number that's an
29 ancestor of a or b but not both, except when a or b is a direct descendent
29 ancestor of a or b but not both, except when a or b is a direct descendent
30 of the other, in which case we can return the minimum revnum of a and b.
30 of the other, in which case we can return the minimum revnum of a and b.
31 None if no such revision exists.
31 None if no such revision exists.
32 """
32 """
33
33
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85
85
86 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # 1/ File 'a0' committed
87 # 1/ File 'a0' committed
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 3/ Move back to first commit
89 # 3/ Move back to first commit
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 #
92 #
93 # During the amend in step five, we will be in this state:
93 # During the amend in step five, we will be in this state:
94 #
94 #
95 # @ 3 temporary amend commit for a1-amend
95 # @ 3 temporary amend commit for a1-amend
96 # |
96 # |
97 # o 2 a1-amend
97 # o 2 a1-amend
98 # |
98 # |
99 # | o 1 a1
99 # | o 1 a1
100 # |/
100 # |/
101 # o 0 a0
101 # o 0 a0
102 #
102 #
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # yet the filelog has the copy information in rev 1 and we will not look
104 # yet the filelog has the copy information in rev 1 and we will not look
105 # back far enough unless we also look at the a and b as candidates.
105 # back far enough unless we also look at the a and b as candidates.
106 # This only occurs when a is a descendent of b or visa-versa.
106 # This only occurs when a is a descendent of b or visa-versa.
107 return min(limit, a, b)
107 return min(limit, a, b)
108
108
109 def _chain(src, dst, a, b):
109 def _chain(src, dst, a, b):
110 """chain two sets of copies a->b"""
110 """chain two sets of copies a->b"""
111 t = a.copy()
111 t = a.copy()
112 for k, v in b.iteritems():
112 for k, v in b.iteritems():
113 if v in t:
113 if v in t:
114 # found a chain
114 # found a chain
115 if t[v] != k:
115 if t[v] != k:
116 # file wasn't renamed back to itself
116 # file wasn't renamed back to itself
117 t[k] = t[v]
117 t[k] = t[v]
118 if v not in dst:
118 if v not in dst:
119 # chain was a rename, not a copy
119 # chain was a rename, not a copy
120 del t[v]
120 del t[v]
121 if v in src:
121 if v in src:
122 # file is a copy of an existing file
122 # file is a copy of an existing file
123 t[k] = v
123 t[k] = v
124
124
125 # remove criss-crossed copies
125 # remove criss-crossed copies
126 for k, v in t.items():
126 for k, v in t.items():
127 if k in src and v in dst:
127 if k in src and v in dst:
128 del t[k]
128 del t[k]
129
129
130 return t
130 return t
131
131
132 def _tracefile(fctx, am, limit=-1):
132 def _tracefile(fctx, am, limit=-1):
133 """return file context that is the ancestor of fctx present in ancestor
133 """return file context that is the ancestor of fctx present in ancestor
134 manifest am, stopping after the first ancestor lower than limit"""
134 manifest am, stopping after the first ancestor lower than limit"""
135
135
136 for f in fctx.ancestors():
136 for f in fctx.ancestors():
137 if am.get(f.path(), None) == f.filenode():
137 if am.get(f.path(), None) == f.filenode():
138 return f
138 return f
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 return None
140 return None
141
141
142 def _dirstatecopies(d, match=None):
142 def _dirstatecopies(d, match=None):
143 ds = d._repo.dirstate
143 ds = d._repo.dirstate
144 c = ds.copies().copy()
144 c = ds.copies().copy()
145 for k in list(c):
145 for k in list(c):
146 if ds[k] not in 'anm' or (match and not match(k)):
146 if ds[k] not in 'anm' or (match and not match(k)):
147 del c[k]
147 del c[k]
148 return c
148 return c
149
149
150 def _computeforwardmissing(a, b, match=None):
150 def _computeforwardmissing(a, b, match=None):
151 """Computes which files are in b but not a.
151 """Computes which files are in b but not a.
152 This is its own function so extensions can easily wrap this call to see what
152 This is its own function so extensions can easily wrap this call to see what
153 files _forwardcopies is about to process.
153 files _forwardcopies is about to process.
154 """
154 """
155 ma = a.manifest()
155 ma = a.manifest()
156 mb = b.manifest()
156 mb = b.manifest()
157 return mb.filesnotin(ma, match=match)
157 return mb.filesnotin(ma, match=match)
158
158
159 def _forwardcopies(a, b, match=None):
159 def _committedforwardcopies(a, b, match):
160 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161
162 # check for working copy
163 w = None
164 if b.rev() is None:
165 w = b
166 b = w.p1()
167 if a == b:
168 # short-circuit to avoid issues with merge states
169 return _dirstatecopies(w, match)
170
171 # files might have to be traced back to the fctx parent of the last
161 # files might have to be traced back to the fctx parent of the last
172 # one-side-only changeset, but not further back than that
162 # one-side-only changeset, but not further back than that
173 limit = _findlimit(a._repo, a.rev(), b.rev())
163 limit = _findlimit(a._repo, a.rev(), b.rev())
174 if limit is None:
164 if limit is None:
175 limit = -1
165 limit = -1
176 am = a.manifest()
166 am = a.manifest()
177
167
178 # find where new files came from
168 # find where new files came from
179 # we currently don't try to find where old files went, too expensive
169 # we currently don't try to find where old files went, too expensive
180 # this means we can miss a case like 'hg rm b; hg cp a b'
170 # this means we can miss a case like 'hg rm b; hg cp a b'
181 cm = {}
171 cm = {}
182
172
183 # Computing the forward missing is quite expensive on large manifests, since
173 # Computing the forward missing is quite expensive on large manifests, since
184 # it compares the entire manifests. We can optimize it in the common use
174 # it compares the entire manifests. We can optimize it in the common use
185 # case of computing what copies are in a commit versus its parent (like
175 # case of computing what copies are in a commit versus its parent (like
186 # during a rebase or histedit). Note, we exclude merge commits from this
176 # during a rebase or histedit). Note, we exclude merge commits from this
187 # optimization, since the ctx.files() for a merge commit is not correct for
177 # optimization, since the ctx.files() for a merge commit is not correct for
188 # this comparison.
178 # this comparison.
189 forwardmissingmatch = match
179 forwardmissingmatch = match
190 if b.p1() == a and b.p2().node() == node.nullid:
180 if b.p1() == a and b.p2().node() == node.nullid:
191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194
184
195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 for f in missing:
186 for f in missing:
197 fctx = b[f]
187 fctx = b[f]
198 fctx._ancestrycontext = ancestrycontext
188 fctx._ancestrycontext = ancestrycontext
199 ofctx = _tracefile(fctx, am, limit)
189 ofctx = _tracefile(fctx, am, limit)
200 if ofctx:
190 if ofctx:
201 cm[f] = ofctx.path()
191 cm[f] = ofctx.path()
192 return cm
193
194 def _forwardcopies(a, b, match=None):
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196
197 # check for working copy
198 w = None
199 if b.rev() is None:
200 w = b
201 b = w.p1()
202 if a == b:
203 # short-circuit to avoid issues with merge states
204 return _dirstatecopies(w, match)
205
206 cm = _committedforwardcopies(a, b, match)
202
207
203 # combine copies from dirstate if necessary
208 # combine copies from dirstate if necessary
204 if w is not None:
209 if w is not None:
205 cm = _chain(a, w, cm, _dirstatecopies(w, match))
210 cm = _chain(a, w, cm, _dirstatecopies(w, match))
206
211
207 return cm
212 return cm
208
213
209 def _backwardrenames(a, b):
214 def _backwardrenames(a, b):
210 if a._repo.ui.config('experimental', 'copytrace') == 'off':
215 if a._repo.ui.config('experimental', 'copytrace') == 'off':
211 return {}
216 return {}
212
217
213 # Even though we're not taking copies into account, 1:n rename situations
218 # Even though we're not taking copies into account, 1:n rename situations
214 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
219 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
215 # arbitrarily pick one of the renames.
220 # arbitrarily pick one of the renames.
216 f = _forwardcopies(b, a)
221 f = _forwardcopies(b, a)
217 r = {}
222 r = {}
218 for k, v in sorted(f.iteritems()):
223 for k, v in sorted(f.iteritems()):
219 # remove copies
224 # remove copies
220 if v in a:
225 if v in a:
221 continue
226 continue
222 r[v] = k
227 r[v] = k
223 return r
228 return r
224
229
225 def pathcopies(x, y, match=None):
230 def pathcopies(x, y, match=None):
226 """find {dst@y: src@x} copy mapping for directed compare"""
231 """find {dst@y: src@x} copy mapping for directed compare"""
227 if x == y or not x or not y:
232 if x == y or not x or not y:
228 return {}
233 return {}
229 a = y.ancestor(x)
234 a = y.ancestor(x)
230 if a == x:
235 if a == x:
231 return _forwardcopies(x, y, match=match)
236 return _forwardcopies(x, y, match=match)
232 if a == y:
237 if a == y:
233 return _backwardrenames(x, y)
238 return _backwardrenames(x, y)
234 return _chain(x, y, _backwardrenames(x, a),
239 return _chain(x, y, _backwardrenames(x, a),
235 _forwardcopies(a, y, match=match))
240 _forwardcopies(a, y, match=match))
236
241
237 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
242 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
238 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
243 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
239 and c2. This is its own function so extensions can easily wrap this call
244 and c2. This is its own function so extensions can easily wrap this call
240 to see what files mergecopies is about to process.
245 to see what files mergecopies is about to process.
241
246
242 Even though c1 and c2 are not used in this function, they are useful in
247 Even though c1 and c2 are not used in this function, they are useful in
243 other extensions for being able to read the file nodes of the changed files.
248 other extensions for being able to read the file nodes of the changed files.
244
249
245 "baselabel" can be passed to help distinguish the multiple computations
250 "baselabel" can be passed to help distinguish the multiple computations
246 done in the graft case.
251 done in the graft case.
247 """
252 """
248 u1 = sorted(addedinm1 - addedinm2)
253 u1 = sorted(addedinm1 - addedinm2)
249 u2 = sorted(addedinm2 - addedinm1)
254 u2 = sorted(addedinm2 - addedinm1)
250
255
251 header = " unmatched files in %s"
256 header = " unmatched files in %s"
252 if baselabel:
257 if baselabel:
253 header += ' (from %s)' % baselabel
258 header += ' (from %s)' % baselabel
254 if u1:
259 if u1:
255 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
260 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
256 if u2:
261 if u2:
257 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
262 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
258 return u1, u2
263 return u1, u2
259
264
260 def _makegetfctx(ctx):
265 def _makegetfctx(ctx):
261 """return a 'getfctx' function suitable for _checkcopies usage
266 """return a 'getfctx' function suitable for _checkcopies usage
262
267
263 We have to re-setup the function building 'filectx' for each
268 We have to re-setup the function building 'filectx' for each
264 '_checkcopies' to ensure the linkrev adjustment is properly setup for
269 '_checkcopies' to ensure the linkrev adjustment is properly setup for
265 each. Linkrev adjustment is important to avoid bug in rename
270 each. Linkrev adjustment is important to avoid bug in rename
266 detection. Moreover, having a proper '_ancestrycontext' setup ensures
271 detection. Moreover, having a proper '_ancestrycontext' setup ensures
267 the performance impact of this adjustment is kept limited. Without it,
272 the performance impact of this adjustment is kept limited. Without it,
268 each file could do a full dag traversal making the time complexity of
273 each file could do a full dag traversal making the time complexity of
269 the operation explode (see issue4537).
274 the operation explode (see issue4537).
270
275
271 This function exists here mostly to limit the impact on stable. Feel
276 This function exists here mostly to limit the impact on stable. Feel
272 free to refactor on default.
277 free to refactor on default.
273 """
278 """
274 rev = ctx.rev()
279 rev = ctx.rev()
275 repo = ctx._repo
280 repo = ctx._repo
276 ac = getattr(ctx, '_ancestrycontext', None)
281 ac = getattr(ctx, '_ancestrycontext', None)
277 if ac is None:
282 if ac is None:
278 revs = [rev]
283 revs = [rev]
279 if rev is None:
284 if rev is None:
280 revs = [p.rev() for p in ctx.parents()]
285 revs = [p.rev() for p in ctx.parents()]
281 ac = repo.changelog.ancestors(revs, inclusive=True)
286 ac = repo.changelog.ancestors(revs, inclusive=True)
282 ctx._ancestrycontext = ac
287 ctx._ancestrycontext = ac
283 def makectx(f, n):
288 def makectx(f, n):
284 if n in node.wdirnodes: # in a working context?
289 if n in node.wdirnodes: # in a working context?
285 if ctx.rev() is None:
290 if ctx.rev() is None:
286 return ctx.filectx(f)
291 return ctx.filectx(f)
287 return repo[None][f]
292 return repo[None][f]
288 fctx = repo.filectx(f, fileid=n)
293 fctx = repo.filectx(f, fileid=n)
289 # setup only needed for filectx not create from a changectx
294 # setup only needed for filectx not create from a changectx
290 fctx._ancestrycontext = ac
295 fctx._ancestrycontext = ac
291 fctx._descendantrev = rev
296 fctx._descendantrev = rev
292 return fctx
297 return fctx
293 return util.lrucachefunc(makectx)
298 return util.lrucachefunc(makectx)
294
299
295 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
300 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
296 """combine partial copy paths"""
301 """combine partial copy paths"""
297 remainder = {}
302 remainder = {}
298 for f in copyfrom:
303 for f in copyfrom:
299 if f in copyto:
304 if f in copyto:
300 finalcopy[copyto[f]] = copyfrom[f]
305 finalcopy[copyto[f]] = copyfrom[f]
301 del copyto[f]
306 del copyto[f]
302 for f in incompletediverge:
307 for f in incompletediverge:
303 assert f not in diverge
308 assert f not in diverge
304 ic = incompletediverge[f]
309 ic = incompletediverge[f]
305 if ic[0] in copyto:
310 if ic[0] in copyto:
306 diverge[f] = [copyto[ic[0]], ic[1]]
311 diverge[f] = [copyto[ic[0]], ic[1]]
307 else:
312 else:
308 remainder[f] = ic
313 remainder[f] = ic
309 return remainder
314 return remainder
310
315
311 def mergecopies(repo, c1, c2, base):
316 def mergecopies(repo, c1, c2, base):
312 """
317 """
313 The function calling different copytracing algorithms on the basis of config
318 The function calling different copytracing algorithms on the basis of config
314 which find moves and copies between context c1 and c2 that are relevant for
319 which find moves and copies between context c1 and c2 that are relevant for
315 merging. 'base' will be used as the merge base.
320 merging. 'base' will be used as the merge base.
316
321
317 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
322 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
318 files that were moved/ copied in one merge parent and modified in another.
323 files that were moved/ copied in one merge parent and modified in another.
319 For example:
324 For example:
320
325
321 o ---> 4 another commit
326 o ---> 4 another commit
322 |
327 |
323 | o ---> 3 commit that modifies a.txt
328 | o ---> 3 commit that modifies a.txt
324 | /
329 | /
325 o / ---> 2 commit that moves a.txt to b.txt
330 o / ---> 2 commit that moves a.txt to b.txt
326 |/
331 |/
327 o ---> 1 merge base
332 o ---> 1 merge base
328
333
329 If we try to rebase revision 3 on revision 4, since there is no a.txt in
334 If we try to rebase revision 3 on revision 4, since there is no a.txt in
330 revision 4, and if user have copytrace disabled, we prints the following
335 revision 4, and if user have copytrace disabled, we prints the following
331 message:
336 message:
332
337
333 ```other changed <file> which local deleted```
338 ```other changed <file> which local deleted```
334
339
335 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
340 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
336 "dirmove".
341 "dirmove".
337
342
338 "copy" is a mapping from destination name -> source name,
343 "copy" is a mapping from destination name -> source name,
339 where source is in c1 and destination is in c2 or vice-versa.
344 where source is in c1 and destination is in c2 or vice-versa.
340
345
341 "movewithdir" is a mapping from source name -> destination name,
346 "movewithdir" is a mapping from source name -> destination name,
342 where the file at source present in one context but not the other
347 where the file at source present in one context but not the other
343 needs to be moved to destination by the merge process, because the
348 needs to be moved to destination by the merge process, because the
344 other context moved the directory it is in.
349 other context moved the directory it is in.
345
350
346 "diverge" is a mapping of source name -> list of destination names
351 "diverge" is a mapping of source name -> list of destination names
347 for divergent renames.
352 for divergent renames.
348
353
349 "renamedelete" is a mapping of source name -> list of destination
354 "renamedelete" is a mapping of source name -> list of destination
350 names for files deleted in c1 that were renamed in c2 or vice-versa.
355 names for files deleted in c1 that were renamed in c2 or vice-versa.
351
356
352 "dirmove" is a mapping of detected source dir -> destination dir renames.
357 "dirmove" is a mapping of detected source dir -> destination dir renames.
353 This is needed for handling changes to new files previously grafted into
358 This is needed for handling changes to new files previously grafted into
354 renamed directories.
359 renamed directories.
355 """
360 """
356 # avoid silly behavior for update from empty dir
361 # avoid silly behavior for update from empty dir
357 if not c1 or not c2 or c1 == c2:
362 if not c1 or not c2 or c1 == c2:
358 return {}, {}, {}, {}, {}
363 return {}, {}, {}, {}, {}
359
364
360 # avoid silly behavior for parent -> working dir
365 # avoid silly behavior for parent -> working dir
361 if c2.node() is None and c1.node() == repo.dirstate.p1():
366 if c2.node() is None and c1.node() == repo.dirstate.p1():
362 return repo.dirstate.copies(), {}, {}, {}, {}
367 return repo.dirstate.copies(), {}, {}, {}, {}
363
368
364 copytracing = repo.ui.config('experimental', 'copytrace')
369 copytracing = repo.ui.config('experimental', 'copytrace')
365
370
366 # Copy trace disabling is explicitly below the node == p1 logic above
371 # Copy trace disabling is explicitly below the node == p1 logic above
367 # because the logic above is required for a simple copy to be kept across a
372 # because the logic above is required for a simple copy to be kept across a
368 # rebase.
373 # rebase.
369 if copytracing == 'off':
374 if copytracing == 'off':
370 return {}, {}, {}, {}, {}
375 return {}, {}, {}, {}, {}
371 elif copytracing == 'heuristics':
376 elif copytracing == 'heuristics':
372 # Do full copytracing if only non-public revisions are involved as
377 # Do full copytracing if only non-public revisions are involved as
373 # that will be fast enough and will also cover the copies which could
378 # that will be fast enough and will also cover the copies which could
374 # be missed by heuristics
379 # be missed by heuristics
375 if _isfullcopytraceable(repo, c1, base):
380 if _isfullcopytraceable(repo, c1, base):
376 return _fullcopytracing(repo, c1, c2, base)
381 return _fullcopytracing(repo, c1, c2, base)
377 return _heuristicscopytracing(repo, c1, c2, base)
382 return _heuristicscopytracing(repo, c1, c2, base)
378 else:
383 else:
379 return _fullcopytracing(repo, c1, c2, base)
384 return _fullcopytracing(repo, c1, c2, base)
380
385
381 def _isfullcopytraceable(repo, c1, base):
386 def _isfullcopytraceable(repo, c1, base):
382 """ Checks that if base, source and destination are all no-public branches,
387 """ Checks that if base, source and destination are all no-public branches,
383 if yes let's use the full copytrace algorithm for increased capabilities
388 if yes let's use the full copytrace algorithm for increased capabilities
384 since it will be fast enough.
389 since it will be fast enough.
385
390
386 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
391 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
387 number of changesets from c1 to base such that if number of changesets are
392 number of changesets from c1 to base such that if number of changesets are
388 more than the limit, full copytracing algorithm won't be used.
393 more than the limit, full copytracing algorithm won't be used.
389 """
394 """
390 if c1.rev() is None:
395 if c1.rev() is None:
391 c1 = c1.p1()
396 c1 = c1.p1()
392 if c1.mutable() and base.mutable():
397 if c1.mutable() and base.mutable():
393 sourcecommitlimit = repo.ui.configint('experimental',
398 sourcecommitlimit = repo.ui.configint('experimental',
394 'copytrace.sourcecommitlimit')
399 'copytrace.sourcecommitlimit')
395 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
400 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
396 return commits < sourcecommitlimit
401 return commits < sourcecommitlimit
397 return False
402 return False
398
403
399 def _fullcopytracing(repo, c1, c2, base):
404 def _fullcopytracing(repo, c1, c2, base):
400 """ The full copytracing algorithm which finds all the new files that were
405 """ The full copytracing algorithm which finds all the new files that were
401 added from merge base up to the top commit and for each file it checks if
406 added from merge base up to the top commit and for each file it checks if
402 this file was copied from another file.
407 this file was copied from another file.
403
408
404 This is pretty slow when a lot of changesets are involved but will track all
409 This is pretty slow when a lot of changesets are involved but will track all
405 the copies.
410 the copies.
406 """
411 """
407 # In certain scenarios (e.g. graft, update or rebase), base can be
412 # In certain scenarios (e.g. graft, update or rebase), base can be
408 # overridden We still need to know a real common ancestor in this case We
413 # overridden We still need to know a real common ancestor in this case We
409 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
414 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
410 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
415 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
411 # caller may not know if the revision passed in lieu of the CA is a genuine
416 # caller may not know if the revision passed in lieu of the CA is a genuine
412 # common ancestor or not without explicitly checking it, it's better to
417 # common ancestor or not without explicitly checking it, it's better to
413 # determine that here.
418 # determine that here.
414 #
419 #
415 # base.descendant(wc) and base.descendant(base) are False, work around that
420 # base.descendant(wc) and base.descendant(base) are False, work around that
416 _c1 = c1.p1() if c1.rev() is None else c1
421 _c1 = c1.p1() if c1.rev() is None else c1
417 _c2 = c2.p1() if c2.rev() is None else c2
422 _c2 = c2.p1() if c2.rev() is None else c2
418 # an endpoint is "dirty" if it isn't a descendant of the merge base
423 # an endpoint is "dirty" if it isn't a descendant of the merge base
419 # if we have a dirty endpoint, we need to trigger graft logic, and also
424 # if we have a dirty endpoint, we need to trigger graft logic, and also
420 # keep track of which endpoint is dirty
425 # keep track of which endpoint is dirty
421 dirtyc1 = not (base == _c1 or base.descendant(_c1))
426 dirtyc1 = not (base == _c1 or base.descendant(_c1))
422 dirtyc2 = not (base == _c2 or base.descendant(_c2))
427 dirtyc2 = not (base == _c2 or base.descendant(_c2))
423 graft = dirtyc1 or dirtyc2
428 graft = dirtyc1 or dirtyc2
424 tca = base
429 tca = base
425 if graft:
430 if graft:
426 tca = _c1.ancestor(_c2)
431 tca = _c1.ancestor(_c2)
427
432
428 limit = _findlimit(repo, c1.rev(), c2.rev())
433 limit = _findlimit(repo, c1.rev(), c2.rev())
429 if limit is None:
434 if limit is None:
430 # no common ancestor, no copies
435 # no common ancestor, no copies
431 return {}, {}, {}, {}, {}
436 return {}, {}, {}, {}, {}
432 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
437 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
433
438
434 m1 = c1.manifest()
439 m1 = c1.manifest()
435 m2 = c2.manifest()
440 m2 = c2.manifest()
436 mb = base.manifest()
441 mb = base.manifest()
437
442
438 # gather data from _checkcopies:
443 # gather data from _checkcopies:
439 # - diverge = record all diverges in this dict
444 # - diverge = record all diverges in this dict
440 # - copy = record all non-divergent copies in this dict
445 # - copy = record all non-divergent copies in this dict
441 # - fullcopy = record all copies in this dict
446 # - fullcopy = record all copies in this dict
442 # - incomplete = record non-divergent partial copies here
447 # - incomplete = record non-divergent partial copies here
443 # - incompletediverge = record divergent partial copies here
448 # - incompletediverge = record divergent partial copies here
444 diverge = {} # divergence data is shared
449 diverge = {} # divergence data is shared
445 incompletediverge = {}
450 incompletediverge = {}
446 data1 = {'copy': {},
451 data1 = {'copy': {},
447 'fullcopy': {},
452 'fullcopy': {},
448 'incomplete': {},
453 'incomplete': {},
449 'diverge': diverge,
454 'diverge': diverge,
450 'incompletediverge': incompletediverge,
455 'incompletediverge': incompletediverge,
451 }
456 }
452 data2 = {'copy': {},
457 data2 = {'copy': {},
453 'fullcopy': {},
458 'fullcopy': {},
454 'incomplete': {},
459 'incomplete': {},
455 'diverge': diverge,
460 'diverge': diverge,
456 'incompletediverge': incompletediverge,
461 'incompletediverge': incompletediverge,
457 }
462 }
458
463
459 # find interesting file sets from manifests
464 # find interesting file sets from manifests
460 addedinm1 = m1.filesnotin(mb)
465 addedinm1 = m1.filesnotin(mb)
461 addedinm2 = m2.filesnotin(mb)
466 addedinm2 = m2.filesnotin(mb)
462 bothnew = sorted(addedinm1 & addedinm2)
467 bothnew = sorted(addedinm1 & addedinm2)
463 if tca == base:
468 if tca == base:
464 # unmatched file from base
469 # unmatched file from base
465 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
470 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
466 u1u, u2u = u1r, u2r
471 u1u, u2u = u1r, u2r
467 else:
472 else:
468 # unmatched file from base (DAG rotation in the graft case)
473 # unmatched file from base (DAG rotation in the graft case)
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
474 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
470 baselabel='base')
475 baselabel='base')
471 # unmatched file from topological common ancestors (no DAG rotation)
476 # unmatched file from topological common ancestors (no DAG rotation)
472 # need to recompute this for directory move handling when grafting
477 # need to recompute this for directory move handling when grafting
473 mta = tca.manifest()
478 mta = tca.manifest()
474 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
479 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
475 m2.filesnotin(mta),
480 m2.filesnotin(mta),
476 baselabel='topological common ancestor')
481 baselabel='topological common ancestor')
477
482
478 for f in u1u:
483 for f in u1u:
479 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
484 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
480
485
481 for f in u2u:
486 for f in u2u:
482 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
487 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
483
488
484 copy = dict(data1['copy'])
489 copy = dict(data1['copy'])
485 copy.update(data2['copy'])
490 copy.update(data2['copy'])
486 fullcopy = dict(data1['fullcopy'])
491 fullcopy = dict(data1['fullcopy'])
487 fullcopy.update(data2['fullcopy'])
492 fullcopy.update(data2['fullcopy'])
488
493
489 if dirtyc1:
494 if dirtyc1:
490 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
495 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
491 incompletediverge)
496 incompletediverge)
492 else:
497 else:
493 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
498 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
494 incompletediverge)
499 incompletediverge)
495
500
496 renamedelete = {}
501 renamedelete = {}
497 renamedeleteset = set()
502 renamedeleteset = set()
498 divergeset = set()
503 divergeset = set()
499 for of, fl in list(diverge.items()):
504 for of, fl in list(diverge.items()):
500 if len(fl) == 1 or of in c1 or of in c2:
505 if len(fl) == 1 or of in c1 or of in c2:
501 del diverge[of] # not actually divergent, or not a rename
506 del diverge[of] # not actually divergent, or not a rename
502 if of not in c1 and of not in c2:
507 if of not in c1 and of not in c2:
503 # renamed on one side, deleted on the other side, but filter
508 # renamed on one side, deleted on the other side, but filter
504 # out files that have been renamed and then deleted
509 # out files that have been renamed and then deleted
505 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
510 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
506 renamedeleteset.update(fl) # reverse map for below
511 renamedeleteset.update(fl) # reverse map for below
507 else:
512 else:
508 divergeset.update(fl) # reverse map for below
513 divergeset.update(fl) # reverse map for below
509
514
510 if bothnew:
515 if bothnew:
511 repo.ui.debug(" unmatched files new in both:\n %s\n"
516 repo.ui.debug(" unmatched files new in both:\n %s\n"
512 % "\n ".join(bothnew))
517 % "\n ".join(bothnew))
513 bothdiverge = {}
518 bothdiverge = {}
514 bothincompletediverge = {}
519 bothincompletediverge = {}
515 remainder = {}
520 remainder = {}
516 both1 = {'copy': {},
521 both1 = {'copy': {},
517 'fullcopy': {},
522 'fullcopy': {},
518 'incomplete': {},
523 'incomplete': {},
519 'diverge': bothdiverge,
524 'diverge': bothdiverge,
520 'incompletediverge': bothincompletediverge
525 'incompletediverge': bothincompletediverge
521 }
526 }
522 both2 = {'copy': {},
527 both2 = {'copy': {},
523 'fullcopy': {},
528 'fullcopy': {},
524 'incomplete': {},
529 'incomplete': {},
525 'diverge': bothdiverge,
530 'diverge': bothdiverge,
526 'incompletediverge': bothincompletediverge
531 'incompletediverge': bothincompletediverge
527 }
532 }
528 for f in bothnew:
533 for f in bothnew:
529 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
534 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
530 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
535 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
531 if dirtyc1:
536 if dirtyc1:
532 # incomplete copies may only be found on the "dirty" side for bothnew
537 # incomplete copies may only be found on the "dirty" side for bothnew
533 assert not both2['incomplete']
538 assert not both2['incomplete']
534 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
539 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
535 bothincompletediverge)
540 bothincompletediverge)
536 elif dirtyc2:
541 elif dirtyc2:
537 assert not both1['incomplete']
542 assert not both1['incomplete']
538 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
543 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
539 bothincompletediverge)
544 bothincompletediverge)
540 else:
545 else:
541 # incomplete copies and divergences can't happen outside grafts
546 # incomplete copies and divergences can't happen outside grafts
542 assert not both1['incomplete']
547 assert not both1['incomplete']
543 assert not both2['incomplete']
548 assert not both2['incomplete']
544 assert not bothincompletediverge
549 assert not bothincompletediverge
545 for f in remainder:
550 for f in remainder:
546 assert f not in bothdiverge
551 assert f not in bothdiverge
547 ic = remainder[f]
552 ic = remainder[f]
548 if ic[0] in (m1 if dirtyc1 else m2):
553 if ic[0] in (m1 if dirtyc1 else m2):
549 # backed-out rename on one side, but watch out for deleted files
554 # backed-out rename on one side, but watch out for deleted files
550 bothdiverge[f] = ic
555 bothdiverge[f] = ic
551 for of, fl in bothdiverge.items():
556 for of, fl in bothdiverge.items():
552 if len(fl) == 2 and fl[0] == fl[1]:
557 if len(fl) == 2 and fl[0] == fl[1]:
553 copy[fl[0]] = of # not actually divergent, just matching renames
558 copy[fl[0]] = of # not actually divergent, just matching renames
554
559
555 if fullcopy and repo.ui.debugflag:
560 if fullcopy and repo.ui.debugflag:
556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
561 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 "% = renamed and deleted):\n")
562 "% = renamed and deleted):\n")
558 for f in sorted(fullcopy):
563 for f in sorted(fullcopy):
559 note = ""
564 note = ""
560 if f in copy:
565 if f in copy:
561 note += "*"
566 note += "*"
562 if f in divergeset:
567 if f in divergeset:
563 note += "!"
568 note += "!"
564 if f in renamedeleteset:
569 if f in renamedeleteset:
565 note += "%"
570 note += "%"
566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
571 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 note))
572 note))
568 del divergeset
573 del divergeset
569
574
570 if not fullcopy:
575 if not fullcopy:
571 return copy, {}, diverge, renamedelete, {}
576 return copy, {}, diverge, renamedelete, {}
572
577
573 repo.ui.debug(" checking for directory renames\n")
578 repo.ui.debug(" checking for directory renames\n")
574
579
575 # generate a directory move map
580 # generate a directory move map
576 d1, d2 = c1.dirs(), c2.dirs()
581 d1, d2 = c1.dirs(), c2.dirs()
577 # Hack for adding '', which is not otherwise added, to d1 and d2
582 # Hack for adding '', which is not otherwise added, to d1 and d2
578 d1.addpath('/')
583 d1.addpath('/')
579 d2.addpath('/')
584 d2.addpath('/')
580 invalid = set()
585 invalid = set()
581 dirmove = {}
586 dirmove = {}
582
587
583 # examine each file copy for a potential directory move, which is
588 # examine each file copy for a potential directory move, which is
584 # when all the files in a directory are moved to a new directory
589 # when all the files in a directory are moved to a new directory
585 for dst, src in fullcopy.iteritems():
590 for dst, src in fullcopy.iteritems():
586 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
591 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
587 if dsrc in invalid:
592 if dsrc in invalid:
588 # already seen to be uninteresting
593 # already seen to be uninteresting
589 continue
594 continue
590 elif dsrc in d1 and ddst in d1:
595 elif dsrc in d1 and ddst in d1:
591 # directory wasn't entirely moved locally
596 # directory wasn't entirely moved locally
592 invalid.add(dsrc + "/")
597 invalid.add(dsrc + "/")
593 elif dsrc in d2 and ddst in d2:
598 elif dsrc in d2 and ddst in d2:
594 # directory wasn't entirely moved remotely
599 # directory wasn't entirely moved remotely
595 invalid.add(dsrc + "/")
600 invalid.add(dsrc + "/")
596 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
601 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
597 # files from the same directory moved to two different places
602 # files from the same directory moved to two different places
598 invalid.add(dsrc + "/")
603 invalid.add(dsrc + "/")
599 else:
604 else:
600 # looks good so far
605 # looks good so far
601 dirmove[dsrc + "/"] = ddst + "/"
606 dirmove[dsrc + "/"] = ddst + "/"
602
607
603 for i in invalid:
608 for i in invalid:
604 if i in dirmove:
609 if i in dirmove:
605 del dirmove[i]
610 del dirmove[i]
606 del d1, d2, invalid
611 del d1, d2, invalid
607
612
608 if not dirmove:
613 if not dirmove:
609 return copy, {}, diverge, renamedelete, {}
614 return copy, {}, diverge, renamedelete, {}
610
615
611 for d in dirmove:
616 for d in dirmove:
612 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
617 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
613 (d, dirmove[d]))
618 (d, dirmove[d]))
614
619
615 movewithdir = {}
620 movewithdir = {}
616 # check unaccounted nonoverlapping files against directory moves
621 # check unaccounted nonoverlapping files against directory moves
617 for f in u1r + u2r:
622 for f in u1r + u2r:
618 if f not in fullcopy:
623 if f not in fullcopy:
619 for d in dirmove:
624 for d in dirmove:
620 if f.startswith(d):
625 if f.startswith(d):
621 # new file added in a directory that was moved, move it
626 # new file added in a directory that was moved, move it
622 df = dirmove[d] + f[len(d):]
627 df = dirmove[d] + f[len(d):]
623 if df not in copy:
628 if df not in copy:
624 movewithdir[f] = df
629 movewithdir[f] = df
625 repo.ui.debug((" pending file src: '%s' -> "
630 repo.ui.debug((" pending file src: '%s' -> "
626 "dst: '%s'\n") % (f, df))
631 "dst: '%s'\n") % (f, df))
627 break
632 break
628
633
629 return copy, movewithdir, diverge, renamedelete, dirmove
634 return copy, movewithdir, diverge, renamedelete, dirmove
630
635
631 def _heuristicscopytracing(repo, c1, c2, base):
636 def _heuristicscopytracing(repo, c1, c2, base):
632 """ Fast copytracing using filename heuristics
637 """ Fast copytracing using filename heuristics
633
638
634 Assumes that moves or renames are of following two types:
639 Assumes that moves or renames are of following two types:
635
640
636 1) Inside a directory only (same directory name but different filenames)
641 1) Inside a directory only (same directory name but different filenames)
637 2) Move from one directory to another
642 2) Move from one directory to another
638 (same filenames but different directory names)
643 (same filenames but different directory names)
639
644
640 Works only when there are no merge commits in the "source branch".
645 Works only when there are no merge commits in the "source branch".
641 Source branch is commits from base up to c2 not including base.
646 Source branch is commits from base up to c2 not including base.
642
647
643 If merge is involved it fallbacks to _fullcopytracing().
648 If merge is involved it fallbacks to _fullcopytracing().
644
649
645 Can be used by setting the following config:
650 Can be used by setting the following config:
646
651
647 [experimental]
652 [experimental]
648 copytrace = heuristics
653 copytrace = heuristics
649
654
650 In some cases the copy/move candidates found by heuristics can be very large
655 In some cases the copy/move candidates found by heuristics can be very large
651 in number and that will make the algorithm slow. The number of possible
656 in number and that will make the algorithm slow. The number of possible
652 candidates to check can be limited by using the config
657 candidates to check can be limited by using the config
653 `experimental.copytrace.movecandidateslimit` which defaults to 100.
658 `experimental.copytrace.movecandidateslimit` which defaults to 100.
654 """
659 """
655
660
656 if c1.rev() is None:
661 if c1.rev() is None:
657 c1 = c1.p1()
662 c1 = c1.p1()
658 if c2.rev() is None:
663 if c2.rev() is None:
659 c2 = c2.p1()
664 c2 = c2.p1()
660
665
661 copies = {}
666 copies = {}
662
667
663 changedfiles = set()
668 changedfiles = set()
664 m1 = c1.manifest()
669 m1 = c1.manifest()
665 if not repo.revs('%d::%d', base.rev(), c2.rev()):
670 if not repo.revs('%d::%d', base.rev(), c2.rev()):
666 # If base is not in c2 branch, we switch to fullcopytracing
671 # If base is not in c2 branch, we switch to fullcopytracing
667 repo.ui.debug("switching to full copytracing as base is not "
672 repo.ui.debug("switching to full copytracing as base is not "
668 "an ancestor of c2\n")
673 "an ancestor of c2\n")
669 return _fullcopytracing(repo, c1, c2, base)
674 return _fullcopytracing(repo, c1, c2, base)
670
675
671 ctx = c2
676 ctx = c2
672 while ctx != base:
677 while ctx != base:
673 if len(ctx.parents()) == 2:
678 if len(ctx.parents()) == 2:
674 # To keep things simple let's not handle merges
679 # To keep things simple let's not handle merges
675 repo.ui.debug("switching to full copytracing because of merges\n")
680 repo.ui.debug("switching to full copytracing because of merges\n")
676 return _fullcopytracing(repo, c1, c2, base)
681 return _fullcopytracing(repo, c1, c2, base)
677 changedfiles.update(ctx.files())
682 changedfiles.update(ctx.files())
678 ctx = ctx.p1()
683 ctx = ctx.p1()
679
684
680 cp = _forwardcopies(base, c2)
685 cp = _forwardcopies(base, c2)
681 for dst, src in cp.iteritems():
686 for dst, src in cp.iteritems():
682 if src in m1:
687 if src in m1:
683 copies[dst] = src
688 copies[dst] = src
684
689
685 # file is missing if it isn't present in the destination, but is present in
690 # file is missing if it isn't present in the destination, but is present in
686 # the base and present in the source.
691 # the base and present in the source.
687 # Presence in the base is important to exclude added files, presence in the
692 # Presence in the base is important to exclude added files, presence in the
688 # source is important to exclude removed files.
693 # source is important to exclude removed files.
689 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
694 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
690 changedfiles)
695 changedfiles)
691
696
692 if missingfiles:
697 if missingfiles:
693 basenametofilename = collections.defaultdict(list)
698 basenametofilename = collections.defaultdict(list)
694 dirnametofilename = collections.defaultdict(list)
699 dirnametofilename = collections.defaultdict(list)
695
700
696 for f in m1.filesnotin(base.manifest()):
701 for f in m1.filesnotin(base.manifest()):
697 basename = os.path.basename(f)
702 basename = os.path.basename(f)
698 dirname = os.path.dirname(f)
703 dirname = os.path.dirname(f)
699 basenametofilename[basename].append(f)
704 basenametofilename[basename].append(f)
700 dirnametofilename[dirname].append(f)
705 dirnametofilename[dirname].append(f)
701
706
702 # in case of a rebase/graft, base may not be a common ancestor
707 # in case of a rebase/graft, base may not be a common ancestor
703 anc = c1.ancestor(c2)
708 anc = c1.ancestor(c2)
704
709
705 for f in missingfiles:
710 for f in missingfiles:
706 basename = os.path.basename(f)
711 basename = os.path.basename(f)
707 dirname = os.path.dirname(f)
712 dirname = os.path.dirname(f)
708 samebasename = basenametofilename[basename]
713 samebasename = basenametofilename[basename]
709 samedirname = dirnametofilename[dirname]
714 samedirname = dirnametofilename[dirname]
710 movecandidates = samebasename + samedirname
715 movecandidates = samebasename + samedirname
711 # f is guaranteed to be present in c2, that's why
716 # f is guaranteed to be present in c2, that's why
712 # c2.filectx(f) won't fail
717 # c2.filectx(f) won't fail
713 f2 = c2.filectx(f)
718 f2 = c2.filectx(f)
714 # we can have a lot of candidates which can slow down the heuristics
719 # we can have a lot of candidates which can slow down the heuristics
715 # config value to limit the number of candidates moves to check
720 # config value to limit the number of candidates moves to check
716 maxcandidates = repo.ui.configint('experimental',
721 maxcandidates = repo.ui.configint('experimental',
717 'copytrace.movecandidateslimit')
722 'copytrace.movecandidateslimit')
718
723
719 if len(movecandidates) > maxcandidates:
724 if len(movecandidates) > maxcandidates:
720 repo.ui.status(_("skipping copytracing for '%s', more "
725 repo.ui.status(_("skipping copytracing for '%s', more "
721 "candidates than the limit: %d\n")
726 "candidates than the limit: %d\n")
722 % (f, len(movecandidates)))
727 % (f, len(movecandidates)))
723 continue
728 continue
724
729
725 for candidate in movecandidates:
730 for candidate in movecandidates:
726 f1 = c1.filectx(candidate)
731 f1 = c1.filectx(candidate)
727 if _related(f1, f2, anc.rev()):
732 if _related(f1, f2, anc.rev()):
728 # if there are a few related copies then we'll merge
733 # if there are a few related copies then we'll merge
729 # changes into all of them. This matches the behaviour
734 # changes into all of them. This matches the behaviour
730 # of upstream copytracing
735 # of upstream copytracing
731 copies[candidate] = f
736 copies[candidate] = f
732
737
733 return copies, {}, {}, {}, {}
738 return copies, {}, {}, {}, {}
734
739
735 def _related(f1, f2, limit):
740 def _related(f1, f2, limit):
736 """return True if f1 and f2 filectx have a common ancestor
741 """return True if f1 and f2 filectx have a common ancestor
737
742
738 Walk back to common ancestor to see if the two files originate
743 Walk back to common ancestor to see if the two files originate
739 from the same file. Since workingfilectx's rev() is None it messes
744 from the same file. Since workingfilectx's rev() is None it messes
740 up the integer comparison logic, hence the pre-step check for
745 up the integer comparison logic, hence the pre-step check for
741 None (f1 and f2 can only be workingfilectx's initially).
746 None (f1 and f2 can only be workingfilectx's initially).
742 """
747 """
743
748
744 if f1 == f2:
749 if f1 == f2:
745 return f1 # a match
750 return f1 # a match
746
751
747 g1, g2 = f1.ancestors(), f2.ancestors()
752 g1, g2 = f1.ancestors(), f2.ancestors()
748 try:
753 try:
749 f1r, f2r = f1.linkrev(), f2.linkrev()
754 f1r, f2r = f1.linkrev(), f2.linkrev()
750
755
751 if f1r is None:
756 if f1r is None:
752 f1 = next(g1)
757 f1 = next(g1)
753 if f2r is None:
758 if f2r is None:
754 f2 = next(g2)
759 f2 = next(g2)
755
760
756 while True:
761 while True:
757 f1r, f2r = f1.linkrev(), f2.linkrev()
762 f1r, f2r = f1.linkrev(), f2.linkrev()
758 if f1r > f2r:
763 if f1r > f2r:
759 f1 = next(g1)
764 f1 = next(g1)
760 elif f2r > f1r:
765 elif f2r > f1r:
761 f2 = next(g2)
766 f2 = next(g2)
762 elif f1 == f2:
767 elif f1 == f2:
763 return f1 # a match
768 return f1 # a match
764 elif f1r == f2r or f1r < limit or f2r < limit:
769 elif f1r == f2r or f1r < limit or f2r < limit:
765 return False # copy no longer relevant
770 return False # copy no longer relevant
766 except StopIteration:
771 except StopIteration:
767 return False
772 return False
768
773
769 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
774 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
770 """
775 """
771 check possible copies of f from msrc to mdst
776 check possible copies of f from msrc to mdst
772
777
773 srcctx = starting context for f in msrc
778 srcctx = starting context for f in msrc
774 dstctx = destination context for f in mdst
779 dstctx = destination context for f in mdst
775 f = the filename to check (as in msrc)
780 f = the filename to check (as in msrc)
776 base = the changectx used as a merge base
781 base = the changectx used as a merge base
777 tca = topological common ancestor for graft-like scenarios
782 tca = topological common ancestor for graft-like scenarios
778 remotebase = True if base is outside tca::srcctx, False otherwise
783 remotebase = True if base is outside tca::srcctx, False otherwise
779 limit = the rev number to not search beyond
784 limit = the rev number to not search beyond
780 data = dictionary of dictionary to store copy data. (see mergecopies)
785 data = dictionary of dictionary to store copy data. (see mergecopies)
781
786
782 note: limit is only an optimization, and provides no guarantee that
787 note: limit is only an optimization, and provides no guarantee that
783 irrelevant revisions will not be visited
788 irrelevant revisions will not be visited
784 there is no easy way to make this algorithm stop in a guaranteed way
789 there is no easy way to make this algorithm stop in a guaranteed way
785 once it "goes behind a certain revision".
790 once it "goes behind a certain revision".
786 """
791 """
787
792
788 msrc = srcctx.manifest()
793 msrc = srcctx.manifest()
789 mdst = dstctx.manifest()
794 mdst = dstctx.manifest()
790 mb = base.manifest()
795 mb = base.manifest()
791 mta = tca.manifest()
796 mta = tca.manifest()
792 # Might be true if this call is about finding backward renames,
797 # Might be true if this call is about finding backward renames,
793 # This happens in the case of grafts because the DAG is then rotated.
798 # This happens in the case of grafts because the DAG is then rotated.
794 # If the file exists in both the base and the source, we are not looking
799 # If the file exists in both the base and the source, we are not looking
795 # for a rename on the source side, but on the part of the DAG that is
800 # for a rename on the source side, but on the part of the DAG that is
796 # traversed backwards.
801 # traversed backwards.
797 #
802 #
798 # In the case there is both backward and forward renames (before and after
803 # In the case there is both backward and forward renames (before and after
799 # the base) this is more complicated as we must detect a divergence.
804 # the base) this is more complicated as we must detect a divergence.
800 # We use 'backwards = False' in that case.
805 # We use 'backwards = False' in that case.
801 backwards = not remotebase and base != tca and f in mb
806 backwards = not remotebase and base != tca and f in mb
802 getsrcfctx = _makegetfctx(srcctx)
807 getsrcfctx = _makegetfctx(srcctx)
803 getdstfctx = _makegetfctx(dstctx)
808 getdstfctx = _makegetfctx(dstctx)
804
809
805 if msrc[f] == mb.get(f) and not remotebase:
810 if msrc[f] == mb.get(f) and not remotebase:
806 # Nothing to merge
811 # Nothing to merge
807 return
812 return
808
813
809 of = None
814 of = None
810 seen = {f}
815 seen = {f}
811 for oc in getsrcfctx(f, msrc[f]).ancestors():
816 for oc in getsrcfctx(f, msrc[f]).ancestors():
812 ocr = oc.linkrev()
817 ocr = oc.linkrev()
813 of = oc.path()
818 of = oc.path()
814 if of in seen:
819 if of in seen:
815 # check limit late - grab last rename before
820 # check limit late - grab last rename before
816 if ocr < limit:
821 if ocr < limit:
817 break
822 break
818 continue
823 continue
819 seen.add(of)
824 seen.add(of)
820
825
821 # remember for dir rename detection
826 # remember for dir rename detection
822 if backwards:
827 if backwards:
823 data['fullcopy'][of] = f # grafting backwards through renames
828 data['fullcopy'][of] = f # grafting backwards through renames
824 else:
829 else:
825 data['fullcopy'][f] = of
830 data['fullcopy'][f] = of
826 if of not in mdst:
831 if of not in mdst:
827 continue # no match, keep looking
832 continue # no match, keep looking
828 if mdst[of] == mb.get(of):
833 if mdst[of] == mb.get(of):
829 return # no merge needed, quit early
834 return # no merge needed, quit early
830 c2 = getdstfctx(of, mdst[of])
835 c2 = getdstfctx(of, mdst[of])
831 # c2 might be a plain new file on added on destination side that is
836 # c2 might be a plain new file on added on destination side that is
832 # unrelated to the droids we are looking for.
837 # unrelated to the droids we are looking for.
833 cr = _related(oc, c2, tca.rev())
838 cr = _related(oc, c2, tca.rev())
834 if cr and (of == f or of == c2.path()): # non-divergent
839 if cr and (of == f or of == c2.path()): # non-divergent
835 if backwards:
840 if backwards:
836 data['copy'][of] = f
841 data['copy'][of] = f
837 elif of in mb:
842 elif of in mb:
838 data['copy'][f] = of
843 data['copy'][f] = of
839 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
844 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
840 data['copy'][of] = f
845 data['copy'][of] = f
841 del data['fullcopy'][f]
846 del data['fullcopy'][f]
842 data['fullcopy'][of] = f
847 data['fullcopy'][of] = f
843 else: # divergence w.r.t. graft CA on one side of topological CA
848 else: # divergence w.r.t. graft CA on one side of topological CA
844 for sf in seen:
849 for sf in seen:
845 if sf in mb:
850 if sf in mb:
846 assert sf not in data['diverge']
851 assert sf not in data['diverge']
847 data['diverge'][sf] = [f, of]
852 data['diverge'][sf] = [f, of]
848 break
853 break
849 return
854 return
850
855
851 if of in mta:
856 if of in mta:
852 if backwards or remotebase:
857 if backwards or remotebase:
853 data['incomplete'][of] = f
858 data['incomplete'][of] = f
854 else:
859 else:
855 for sf in seen:
860 for sf in seen:
856 if sf in mb:
861 if sf in mb:
857 if tca == base:
862 if tca == base:
858 data['diverge'].setdefault(sf, []).append(f)
863 data['diverge'].setdefault(sf, []).append(f)
859 else:
864 else:
860 data['incompletediverge'][sf] = [of, f]
865 data['incompletediverge'][sf] = [of, f]
861 return
866 return
862
867
863 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
868 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
864 """reproduce copies from fromrev to rev in the dirstate
869 """reproduce copies from fromrev to rev in the dirstate
865
870
866 If skiprev is specified, it's a revision that should be used to
871 If skiprev is specified, it's a revision that should be used to
867 filter copy records. Any copies that occur between fromrev and
872 filter copy records. Any copies that occur between fromrev and
868 skiprev will not be duplicated, even if they appear in the set of
873 skiprev will not be duplicated, even if they appear in the set of
869 copies between fromrev and rev.
874 copies between fromrev and rev.
870 """
875 """
871 exclude = {}
876 exclude = {}
872 if (skiprev is not None and
877 if (skiprev is not None and
873 repo.ui.config('experimental', 'copytrace') != 'off'):
878 repo.ui.config('experimental', 'copytrace') != 'off'):
874 # copytrace='off' skips this line, but not the entire function because
879 # copytrace='off' skips this line, but not the entire function because
875 # the line below is O(size of the repo) during a rebase, while the rest
880 # the line below is O(size of the repo) during a rebase, while the rest
876 # of the function is much faster (and is required for carrying copy
881 # of the function is much faster (and is required for carrying copy
877 # metadata across the rebase anyway).
882 # metadata across the rebase anyway).
878 exclude = pathcopies(repo[fromrev], repo[skiprev])
883 exclude = pathcopies(repo[fromrev], repo[skiprev])
879 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
884 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
880 # copies.pathcopies returns backward renames, so dst might not
885 # copies.pathcopies returns backward renames, so dst might not
881 # actually be in the dirstate
886 # actually be in the dirstate
882 if dst in exclude:
887 if dst in exclude:
883 continue
888 continue
884 wctx[dst].markcopied(src)
889 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now