##// END OF EJS Templates
copies: inline a variable that's used only once...
Martin von Zweigbergk -
r39981:85c8ff26 default
parent child Browse files
Show More
@@ -1,893 +1,892 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, a, b):
27 def _findlimit(repo, a, b):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
34 None if no such revision exists.
35 """
35 """
36
36
37 # basic idea:
37 # basic idea:
38 # - mark a and b with different sides
38 # - mark a and b with different sides
39 # - if a parent's children are all on the same side, the parent is
39 # - if a parent's children are all on the same side, the parent is
40 # on that side, otherwise it is on no side
40 # on that side, otherwise it is on no side
41 # - walk the graph in topological order with the help of a heap;
41 # - walk the graph in topological order with the help of a heap;
42 # - add unseen parents to side map
42 # - add unseen parents to side map
43 # - clear side of any parent that has children on different sides
43 # - clear side of any parent that has children on different sides
44 # - track number of interesting revs that might still be on a side
44 # - track number of interesting revs that might still be on a side
45 # - track the lowest interesting rev seen
45 # - track the lowest interesting rev seen
46 # - quit when interesting revs is zero
46 # - quit when interesting revs is zero
47
47
48 cl = repo.changelog
48 cl = repo.changelog
49 working = len(cl) # pseudo rev for the working directory
49 working = len(cl) # pseudo rev for the working directory
50 if a is None:
50 if a is None:
51 a = working
51 a = working
52 if b is None:
52 if b is None:
53 b = working
53 b = working
54
54
55 side = {a: -1, b: 1}
55 side = {a: -1, b: 1}
56 visit = [-a, -b]
56 visit = [-a, -b]
57 heapq.heapify(visit)
57 heapq.heapify(visit)
58 interesting = len(visit)
58 interesting = len(visit)
59 hascommonancestor = False
59 hascommonancestor = False
60 limit = working
60 limit = working
61
61
62 while interesting:
62 while interesting:
63 r = -heapq.heappop(visit)
63 r = -heapq.heappop(visit)
64 if r == working:
64 if r == working:
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 else:
66 else:
67 parents = cl.parentrevs(r)
67 parents = cl.parentrevs(r)
68 for p in parents:
68 for p in parents:
69 if p < 0:
69 if p < 0:
70 continue
70 continue
71 if p not in side:
71 if p not in side:
72 # first time we see p; add it to visit
72 # first time we see p; add it to visit
73 side[p] = side[r]
73 side[p] = side[r]
74 if side[p]:
74 if side[p]:
75 interesting += 1
75 interesting += 1
76 heapq.heappush(visit, -p)
76 heapq.heappush(visit, -p)
77 elif side[p] and side[p] != side[r]:
77 elif side[p] and side[p] != side[r]:
78 # p was interesting but now we know better
78 # p was interesting but now we know better
79 side[p] = 0
79 side[p] = 0
80 interesting -= 1
80 interesting -= 1
81 hascommonancestor = True
81 hascommonancestor = True
82 if side[r]:
82 if side[r]:
83 limit = r # lowest rev visited
83 limit = r # lowest rev visited
84 interesting -= 1
84 interesting -= 1
85
85
86 if not hascommonancestor:
86 if not hascommonancestor:
87 return None
87 return None
88
88
89 # Consider the following flow (see test-commit-amend.t under issue4405):
89 # Consider the following flow (see test-commit-amend.t under issue4405):
90 # 1/ File 'a0' committed
90 # 1/ File 'a0' committed
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 # 3/ Move back to first commit
92 # 3/ Move back to first commit
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 #
95 #
96 # During the amend in step five, we will be in this state:
96 # During the amend in step five, we will be in this state:
97 #
97 #
98 # @ 3 temporary amend commit for a1-amend
98 # @ 3 temporary amend commit for a1-amend
99 # |
99 # |
100 # o 2 a1-amend
100 # o 2 a1-amend
101 # |
101 # |
102 # | o 1 a1
102 # | o 1 a1
103 # |/
103 # |/
104 # o 0 a0
104 # o 0 a0
105 #
105 #
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 # yet the filelog has the copy information in rev 1 and we will not look
107 # yet the filelog has the copy information in rev 1 and we will not look
108 # back far enough unless we also look at the a and b as candidates.
108 # back far enough unless we also look at the a and b as candidates.
109 # This only occurs when a is a descendent of b or visa-versa.
109 # This only occurs when a is a descendent of b or visa-versa.
110 return min(limit, a, b)
110 return min(limit, a, b)
111
111
112 def _chain(src, dst, a, b):
112 def _chain(src, dst, a, b):
113 """chain two sets of copies a->b"""
113 """chain two sets of copies a->b"""
114 t = a.copy()
114 t = a.copy()
115 for k, v in b.iteritems():
115 for k, v in b.iteritems():
116 if v in t:
116 if v in t:
117 # found a chain
117 # found a chain
118 if t[v] != k:
118 if t[v] != k:
119 # file wasn't renamed back to itself
119 # file wasn't renamed back to itself
120 t[k] = t[v]
120 t[k] = t[v]
121 if v not in dst:
121 if v not in dst:
122 # chain was a rename, not a copy
122 # chain was a rename, not a copy
123 del t[v]
123 del t[v]
124 if v in src:
124 if v in src:
125 # file is a copy of an existing file
125 # file is a copy of an existing file
126 t[k] = v
126 t[k] = v
127
127
128 # remove criss-crossed copies
128 # remove criss-crossed copies
129 for k, v in list(t.items()):
129 for k, v in list(t.items()):
130 if k in src and v in dst:
130 if k in src and v in dst:
131 del t[k]
131 del t[k]
132
132
133 return t
133 return t
134
134
135 def _tracefile(fctx, am, limit=-1):
135 def _tracefile(fctx, am, limit=-1):
136 """return file context that is the ancestor of fctx present in ancestor
136 """return file context that is the ancestor of fctx present in ancestor
137 manifest am, stopping after the first ancestor lower than limit"""
137 manifest am, stopping after the first ancestor lower than limit"""
138
138
139 for f in fctx.ancestors():
139 for f in fctx.ancestors():
140 if am.get(f.path(), None) == f.filenode():
140 if am.get(f.path(), None) == f.filenode():
141 return f
141 return f
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
143 return None
143 return None
144
144
145 def _dirstatecopies(d, match=None):
145 def _dirstatecopies(d, match=None):
146 ds = d._repo.dirstate
146 ds = d._repo.dirstate
147 c = ds.copies().copy()
147 c = ds.copies().copy()
148 for k in list(c):
148 for k in list(c):
149 if ds[k] not in 'anm' or (match and not match(k)):
149 if ds[k] not in 'anm' or (match and not match(k)):
150 del c[k]
150 del c[k]
151 return c
151 return c
152
152
153 def _computeforwardmissing(a, b, match=None):
153 def _computeforwardmissing(a, b, match=None):
154 """Computes which files are in b but not a.
154 """Computes which files are in b but not a.
155 This is its own function so extensions can easily wrap this call to see what
155 This is its own function so extensions can easily wrap this call to see what
156 files _forwardcopies is about to process.
156 files _forwardcopies is about to process.
157 """
157 """
158 ma = a.manifest()
158 ma = a.manifest()
159 mb = b.manifest()
159 mb = b.manifest()
160 return mb.filesnotin(ma, match=match)
160 return mb.filesnotin(ma, match=match)
161
161
162 def _committedforwardcopies(a, b, match):
162 def _committedforwardcopies(a, b, match):
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 # files might have to be traced back to the fctx parent of the last
164 # files might have to be traced back to the fctx parent of the last
165 # one-side-only changeset, but not further back than that
165 # one-side-only changeset, but not further back than that
166 limit = _findlimit(a._repo, a.rev(), b.rev())
166 limit = _findlimit(a._repo, a.rev(), b.rev())
167 if limit is None:
167 if limit is None:
168 limit = -1
168 limit = -1
169 am = a.manifest()
169 am = a.manifest()
170
170
171 # find where new files came from
171 # find where new files came from
172 # we currently don't try to find where old files went, too expensive
172 # we currently don't try to find where old files went, too expensive
173 # this means we can miss a case like 'hg rm b; hg cp a b'
173 # this means we can miss a case like 'hg rm b; hg cp a b'
174 cm = {}
174 cm = {}
175
175
176 # Computing the forward missing is quite expensive on large manifests, since
176 # Computing the forward missing is quite expensive on large manifests, since
177 # it compares the entire manifests. We can optimize it in the common use
177 # it compares the entire manifests. We can optimize it in the common use
178 # case of computing what copies are in a commit versus its parent (like
178 # case of computing what copies are in a commit versus its parent (like
179 # during a rebase or histedit). Note, we exclude merge commits from this
179 # during a rebase or histedit). Note, we exclude merge commits from this
180 # optimization, since the ctx.files() for a merge commit is not correct for
180 # optimization, since the ctx.files() for a merge commit is not correct for
181 # this comparison.
181 # this comparison.
182 forwardmissingmatch = match
182 forwardmissingmatch = match
183 if b.p1() == a and b.p2().node() == node.nullid:
183 if b.p1() == a and b.p2().node() == node.nullid:
184 filesmatcher = scmutil.matchfiles(a._repo, b.files())
184 filesmatcher = scmutil.matchfiles(a._repo, b.files())
185 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
185 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
186 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
186 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
187
187
188 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
188 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
189 for f in missing:
189 for f in missing:
190 fctx = b[f]
190 fctx = b[f]
191 fctx._ancestrycontext = ancestrycontext
191 fctx._ancestrycontext = ancestrycontext
192 ofctx = _tracefile(fctx, am, limit)
192 ofctx = _tracefile(fctx, am, limit)
193 if ofctx:
193 if ofctx:
194 cm[f] = ofctx.path()
194 cm[f] = ofctx.path()
195 return cm
195 return cm
196
196
197 def _forwardcopies(a, b, match=None):
197 def _forwardcopies(a, b, match=None):
198 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
198 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
199
199
200 # check for working copy
200 # check for working copy
201 if b.rev() is None:
201 if b.rev() is None:
202 if a == b.p1():
202 if a == b.p1():
203 # short-circuit to avoid issues with merge states
203 # short-circuit to avoid issues with merge states
204 return _dirstatecopies(b, match)
204 return _dirstatecopies(b, match)
205
205
206 cm = _committedforwardcopies(a, b.p1(), match)
206 cm = _committedforwardcopies(a, b.p1(), match)
207 # combine copies from dirstate if necessary
207 # combine copies from dirstate if necessary
208 return _chain(a, b, cm, _dirstatecopies(b, match))
208 return _chain(a, b, cm, _dirstatecopies(b, match))
209 return _committedforwardcopies(a, b, match)
209 return _committedforwardcopies(a, b, match)
210
210
211 def _backwardrenames(a, b):
211 def _backwardrenames(a, b):
212 if a._repo.ui.config('experimental', 'copytrace') == 'off':
212 if a._repo.ui.config('experimental', 'copytrace') == 'off':
213 return {}
213 return {}
214
214
215 # Even though we're not taking copies into account, 1:n rename situations
215 # Even though we're not taking copies into account, 1:n rename situations
216 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
216 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
217 # arbitrarily pick one of the renames.
217 # arbitrarily pick one of the renames.
218 f = _forwardcopies(b, a)
218 f = _forwardcopies(b, a)
219 r = {}
219 r = {}
220 for k, v in sorted(f.iteritems()):
220 for k, v in sorted(f.iteritems()):
221 # remove copies
221 # remove copies
222 if v in a:
222 if v in a:
223 continue
223 continue
224 r[v] = k
224 r[v] = k
225 return r
225 return r
226
226
227 def pathcopies(x, y, match=None):
227 def pathcopies(x, y, match=None):
228 """find {dst@y: src@x} copy mapping for directed compare"""
228 """find {dst@y: src@x} copy mapping for directed compare"""
229 if x == y or not x or not y:
229 if x == y or not x or not y:
230 return {}
230 return {}
231 a = y.ancestor(x)
231 a = y.ancestor(x)
232 if a == x:
232 if a == x:
233 return _forwardcopies(x, y, match=match)
233 return _forwardcopies(x, y, match=match)
234 if a == y:
234 if a == y:
235 return _backwardrenames(x, y)
235 return _backwardrenames(x, y)
236 return _chain(x, y, _backwardrenames(x, a),
236 return _chain(x, y, _backwardrenames(x, a),
237 _forwardcopies(a, y, match=match))
237 _forwardcopies(a, y, match=match))
238
238
239 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
239 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
240 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
240 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
241 and c2. This is its own function so extensions can easily wrap this call
241 and c2. This is its own function so extensions can easily wrap this call
242 to see what files mergecopies is about to process.
242 to see what files mergecopies is about to process.
243
243
244 Even though c1 and c2 are not used in this function, they are useful in
244 Even though c1 and c2 are not used in this function, they are useful in
245 other extensions for being able to read the file nodes of the changed files.
245 other extensions for being able to read the file nodes of the changed files.
246
246
247 "baselabel" can be passed to help distinguish the multiple computations
247 "baselabel" can be passed to help distinguish the multiple computations
248 done in the graft case.
248 done in the graft case.
249 """
249 """
250 u1 = sorted(addedinm1 - addedinm2)
250 u1 = sorted(addedinm1 - addedinm2)
251 u2 = sorted(addedinm2 - addedinm1)
251 u2 = sorted(addedinm2 - addedinm1)
252
252
253 header = " unmatched files in %s"
253 header = " unmatched files in %s"
254 if baselabel:
254 if baselabel:
255 header += ' (from %s)' % baselabel
255 header += ' (from %s)' % baselabel
256 if u1:
256 if u1:
257 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
257 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
258 if u2:
258 if u2:
259 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
259 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
260
260
261 narrowmatch = repo.narrowmatch()
261 narrowmatch = repo.narrowmatch()
262 if not narrowmatch.always():
262 if not narrowmatch.always():
263 u1 = [f for f in u1 if narrowmatch(f)]
263 u1 = [f for f in u1 if narrowmatch(f)]
264 u2 = [f for f in u2 if narrowmatch(f)]
264 u2 = [f for f in u2 if narrowmatch(f)]
265 return u1, u2
265 return u1, u2
266
266
267 def _makegetfctx(ctx):
267 def _makegetfctx(ctx):
268 """return a 'getfctx' function suitable for _checkcopies usage
268 """return a 'getfctx' function suitable for _checkcopies usage
269
269
270 We have to re-setup the function building 'filectx' for each
270 We have to re-setup the function building 'filectx' for each
271 '_checkcopies' to ensure the linkrev adjustment is properly setup for
271 '_checkcopies' to ensure the linkrev adjustment is properly setup for
272 each. Linkrev adjustment is important to avoid bug in rename
272 each. Linkrev adjustment is important to avoid bug in rename
273 detection. Moreover, having a proper '_ancestrycontext' setup ensures
273 detection. Moreover, having a proper '_ancestrycontext' setup ensures
274 the performance impact of this adjustment is kept limited. Without it,
274 the performance impact of this adjustment is kept limited. Without it,
275 each file could do a full dag traversal making the time complexity of
275 each file could do a full dag traversal making the time complexity of
276 the operation explode (see issue4537).
276 the operation explode (see issue4537).
277
277
278 This function exists here mostly to limit the impact on stable. Feel
278 This function exists here mostly to limit the impact on stable. Feel
279 free to refactor on default.
279 free to refactor on default.
280 """
280 """
281 rev = ctx.rev()
281 rev = ctx.rev()
282 repo = ctx._repo
282 repo = ctx._repo
283 ac = getattr(ctx, '_ancestrycontext', None)
283 ac = getattr(ctx, '_ancestrycontext', None)
284 if ac is None:
284 if ac is None:
285 revs = [rev]
285 revs = [rev]
286 if rev is None:
286 if rev is None:
287 revs = [p.rev() for p in ctx.parents()]
287 revs = [p.rev() for p in ctx.parents()]
288 ac = repo.changelog.ancestors(revs, inclusive=True)
288 ac = repo.changelog.ancestors(revs, inclusive=True)
289 ctx._ancestrycontext = ac
289 ctx._ancestrycontext = ac
290 def makectx(f, n):
290 def makectx(f, n):
291 if n in node.wdirfilenodeids: # in a working context?
291 if n in node.wdirfilenodeids: # in a working context?
292 if ctx.rev() is None:
292 if ctx.rev() is None:
293 return ctx.filectx(f)
293 return ctx.filectx(f)
294 return repo[None][f]
294 return repo[None][f]
295 fctx = repo.filectx(f, fileid=n)
295 fctx = repo.filectx(f, fileid=n)
296 # setup only needed for filectx not create from a changectx
296 # setup only needed for filectx not create from a changectx
297 fctx._ancestrycontext = ac
297 fctx._ancestrycontext = ac
298 fctx._descendantrev = rev
298 fctx._descendantrev = rev
299 return fctx
299 return fctx
300 return util.lrucachefunc(makectx)
300 return util.lrucachefunc(makectx)
301
301
302 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
302 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
303 """combine partial copy paths"""
303 """combine partial copy paths"""
304 remainder = {}
304 remainder = {}
305 for f in copyfrom:
305 for f in copyfrom:
306 if f in copyto:
306 if f in copyto:
307 finalcopy[copyto[f]] = copyfrom[f]
307 finalcopy[copyto[f]] = copyfrom[f]
308 del copyto[f]
308 del copyto[f]
309 for f in incompletediverge:
309 for f in incompletediverge:
310 assert f not in diverge
310 assert f not in diverge
311 ic = incompletediverge[f]
311 ic = incompletediverge[f]
312 if ic[0] in copyto:
312 if ic[0] in copyto:
313 diverge[f] = [copyto[ic[0]], ic[1]]
313 diverge[f] = [copyto[ic[0]], ic[1]]
314 else:
314 else:
315 remainder[f] = ic
315 remainder[f] = ic
316 return remainder
316 return remainder
317
317
318 def mergecopies(repo, c1, c2, base):
318 def mergecopies(repo, c1, c2, base):
319 """
319 """
320 The function calling different copytracing algorithms on the basis of config
320 The function calling different copytracing algorithms on the basis of config
321 which find moves and copies between context c1 and c2 that are relevant for
321 which find moves and copies between context c1 and c2 that are relevant for
322 merging. 'base' will be used as the merge base.
322 merging. 'base' will be used as the merge base.
323
323
324 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
324 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
325 files that were moved/ copied in one merge parent and modified in another.
325 files that were moved/ copied in one merge parent and modified in another.
326 For example:
326 For example:
327
327
328 o ---> 4 another commit
328 o ---> 4 another commit
329 |
329 |
330 | o ---> 3 commit that modifies a.txt
330 | o ---> 3 commit that modifies a.txt
331 | /
331 | /
332 o / ---> 2 commit that moves a.txt to b.txt
332 o / ---> 2 commit that moves a.txt to b.txt
333 |/
333 |/
334 o ---> 1 merge base
334 o ---> 1 merge base
335
335
336 If we try to rebase revision 3 on revision 4, since there is no a.txt in
336 If we try to rebase revision 3 on revision 4, since there is no a.txt in
337 revision 4, and if user have copytrace disabled, we prints the following
337 revision 4, and if user have copytrace disabled, we prints the following
338 message:
338 message:
339
339
340 ```other changed <file> which local deleted```
340 ```other changed <file> which local deleted```
341
341
342 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
342 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
343 "dirmove".
343 "dirmove".
344
344
345 "copy" is a mapping from destination name -> source name,
345 "copy" is a mapping from destination name -> source name,
346 where source is in c1 and destination is in c2 or vice-versa.
346 where source is in c1 and destination is in c2 or vice-versa.
347
347
348 "movewithdir" is a mapping from source name -> destination name,
348 "movewithdir" is a mapping from source name -> destination name,
349 where the file at source present in one context but not the other
349 where the file at source present in one context but not the other
350 needs to be moved to destination by the merge process, because the
350 needs to be moved to destination by the merge process, because the
351 other context moved the directory it is in.
351 other context moved the directory it is in.
352
352
353 "diverge" is a mapping of source name -> list of destination names
353 "diverge" is a mapping of source name -> list of destination names
354 for divergent renames.
354 for divergent renames.
355
355
356 "renamedelete" is a mapping of source name -> list of destination
356 "renamedelete" is a mapping of source name -> list of destination
357 names for files deleted in c1 that were renamed in c2 or vice-versa.
357 names for files deleted in c1 that were renamed in c2 or vice-versa.
358
358
359 "dirmove" is a mapping of detected source dir -> destination dir renames.
359 "dirmove" is a mapping of detected source dir -> destination dir renames.
360 This is needed for handling changes to new files previously grafted into
360 This is needed for handling changes to new files previously grafted into
361 renamed directories.
361 renamed directories.
362 """
362 """
363 # avoid silly behavior for update from empty dir
363 # avoid silly behavior for update from empty dir
364 if not c1 or not c2 or c1 == c2:
364 if not c1 or not c2 or c1 == c2:
365 return {}, {}, {}, {}, {}
365 return {}, {}, {}, {}, {}
366
366
367 # avoid silly behavior for parent -> working dir
367 # avoid silly behavior for parent -> working dir
368 if c2.node() is None and c1.node() == repo.dirstate.p1():
368 if c2.node() is None and c1.node() == repo.dirstate.p1():
369 return repo.dirstate.copies(), {}, {}, {}, {}
369 return repo.dirstate.copies(), {}, {}, {}, {}
370
370
371 copytracing = repo.ui.config('experimental', 'copytrace')
371 copytracing = repo.ui.config('experimental', 'copytrace')
372 boolctrace = stringutil.parsebool(copytracing)
372 boolctrace = stringutil.parsebool(copytracing)
373
373
374 # Copy trace disabling is explicitly below the node == p1 logic above
374 # Copy trace disabling is explicitly below the node == p1 logic above
375 # because the logic above is required for a simple copy to be kept across a
375 # because the logic above is required for a simple copy to be kept across a
376 # rebase.
376 # rebase.
377 if copytracing == 'heuristics':
377 if copytracing == 'heuristics':
378 # Do full copytracing if only non-public revisions are involved as
378 # Do full copytracing if only non-public revisions are involved as
379 # that will be fast enough and will also cover the copies which could
379 # that will be fast enough and will also cover the copies which could
380 # be missed by heuristics
380 # be missed by heuristics
381 if _isfullcopytraceable(repo, c1, base):
381 if _isfullcopytraceable(repo, c1, base):
382 return _fullcopytracing(repo, c1, c2, base)
382 return _fullcopytracing(repo, c1, c2, base)
383 return _heuristicscopytracing(repo, c1, c2, base)
383 return _heuristicscopytracing(repo, c1, c2, base)
384 elif boolctrace is False:
384 elif boolctrace is False:
385 # stringutil.parsebool() returns None when it is unable to parse the
385 # stringutil.parsebool() returns None when it is unable to parse the
386 # value, so we should rely on making sure copytracing is on such cases
386 # value, so we should rely on making sure copytracing is on such cases
387 return {}, {}, {}, {}, {}
387 return {}, {}, {}, {}, {}
388 else:
388 else:
389 return _fullcopytracing(repo, c1, c2, base)
389 return _fullcopytracing(repo, c1, c2, base)
390
390
391 def _isfullcopytraceable(repo, c1, base):
391 def _isfullcopytraceable(repo, c1, base):
392 """ Checks that if base, source and destination are all no-public branches,
392 """ Checks that if base, source and destination are all no-public branches,
393 if yes let's use the full copytrace algorithm for increased capabilities
393 if yes let's use the full copytrace algorithm for increased capabilities
394 since it will be fast enough.
394 since it will be fast enough.
395
395
396 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
396 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
397 number of changesets from c1 to base such that if number of changesets are
397 number of changesets from c1 to base such that if number of changesets are
398 more than the limit, full copytracing algorithm won't be used.
398 more than the limit, full copytracing algorithm won't be used.
399 """
399 """
400 if c1.rev() is None:
400 if c1.rev() is None:
401 c1 = c1.p1()
401 c1 = c1.p1()
402 if c1.mutable() and base.mutable():
402 if c1.mutable() and base.mutable():
403 sourcecommitlimit = repo.ui.configint('experimental',
403 sourcecommitlimit = repo.ui.configint('experimental',
404 'copytrace.sourcecommitlimit')
404 'copytrace.sourcecommitlimit')
405 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
405 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
406 return commits < sourcecommitlimit
406 return commits < sourcecommitlimit
407 return False
407 return False
408
408
409 def _fullcopytracing(repo, c1, c2, base):
409 def _fullcopytracing(repo, c1, c2, base):
410 """ The full copytracing algorithm which finds all the new files that were
410 """ The full copytracing algorithm which finds all the new files that were
411 added from merge base up to the top commit and for each file it checks if
411 added from merge base up to the top commit and for each file it checks if
412 this file was copied from another file.
412 this file was copied from another file.
413
413
414 This is pretty slow when a lot of changesets are involved but will track all
414 This is pretty slow when a lot of changesets are involved but will track all
415 the copies.
415 the copies.
416 """
416 """
417 # In certain scenarios (e.g. graft, update or rebase), base can be
417 # In certain scenarios (e.g. graft, update or rebase), base can be
418 # overridden We still need to know a real common ancestor in this case We
418 # overridden We still need to know a real common ancestor in this case We
419 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
419 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
420 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
420 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
421 # caller may not know if the revision passed in lieu of the CA is a genuine
421 # caller may not know if the revision passed in lieu of the CA is a genuine
422 # common ancestor or not without explicitly checking it, it's better to
422 # common ancestor or not without explicitly checking it, it's better to
423 # determine that here.
423 # determine that here.
424 #
424 #
425 # base.isancestorof(wc) is False, work around that
425 # base.isancestorof(wc) is False, work around that
426 _c1 = c1.p1() if c1.rev() is None else c1
426 _c1 = c1.p1() if c1.rev() is None else c1
427 _c2 = c2.p1() if c2.rev() is None else c2
427 _c2 = c2.p1() if c2.rev() is None else c2
428 # an endpoint is "dirty" if it isn't a descendant of the merge base
428 # an endpoint is "dirty" if it isn't a descendant of the merge base
429 # if we have a dirty endpoint, we need to trigger graft logic, and also
429 # if we have a dirty endpoint, we need to trigger graft logic, and also
430 # keep track of which endpoint is dirty
430 # keep track of which endpoint is dirty
431 dirtyc1 = not base.isancestorof(_c1)
431 dirtyc1 = not base.isancestorof(_c1)
432 dirtyc2 = not base.isancestorof(_c2)
432 dirtyc2 = not base.isancestorof(_c2)
433 graft = dirtyc1 or dirtyc2
433 graft = dirtyc1 or dirtyc2
434 tca = base
434 tca = base
435 if graft:
435 if graft:
436 tca = _c1.ancestor(_c2)
436 tca = _c1.ancestor(_c2)
437
437
438 limit = _findlimit(repo, c1.rev(), c2.rev())
438 limit = _findlimit(repo, c1.rev(), c2.rev())
439 if limit is None:
439 if limit is None:
440 # no common ancestor, no copies
440 # no common ancestor, no copies
441 return {}, {}, {}, {}, {}
441 return {}, {}, {}, {}, {}
442 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
442 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
443
443
444 m1 = c1.manifest()
444 m1 = c1.manifest()
445 m2 = c2.manifest()
445 m2 = c2.manifest()
446 mb = base.manifest()
446 mb = base.manifest()
447
447
448 # gather data from _checkcopies:
448 # gather data from _checkcopies:
449 # - diverge = record all diverges in this dict
449 # - diverge = record all diverges in this dict
450 # - copy = record all non-divergent copies in this dict
450 # - copy = record all non-divergent copies in this dict
451 # - fullcopy = record all copies in this dict
451 # - fullcopy = record all copies in this dict
452 # - incomplete = record non-divergent partial copies here
452 # - incomplete = record non-divergent partial copies here
453 # - incompletediverge = record divergent partial copies here
453 # - incompletediverge = record divergent partial copies here
454 diverge = {} # divergence data is shared
454 diverge = {} # divergence data is shared
455 incompletediverge = {}
455 incompletediverge = {}
456 data1 = {'copy': {},
456 data1 = {'copy': {},
457 'fullcopy': {},
457 'fullcopy': {},
458 'incomplete': {},
458 'incomplete': {},
459 'diverge': diverge,
459 'diverge': diverge,
460 'incompletediverge': incompletediverge,
460 'incompletediverge': incompletediverge,
461 }
461 }
462 data2 = {'copy': {},
462 data2 = {'copy': {},
463 'fullcopy': {},
463 'fullcopy': {},
464 'incomplete': {},
464 'incomplete': {},
465 'diverge': diverge,
465 'diverge': diverge,
466 'incompletediverge': incompletediverge,
466 'incompletediverge': incompletediverge,
467 }
467 }
468
468
469 # find interesting file sets from manifests
469 # find interesting file sets from manifests
470 addedinm1 = m1.filesnotin(mb)
470 addedinm1 = m1.filesnotin(mb)
471 addedinm2 = m2.filesnotin(mb)
471 addedinm2 = m2.filesnotin(mb)
472 bothnew = sorted(addedinm1 & addedinm2)
472 bothnew = sorted(addedinm1 & addedinm2)
473 if tca == base:
473 if tca == base:
474 # unmatched file from base
474 # unmatched file from base
475 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
475 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
476 u1u, u2u = u1r, u2r
476 u1u, u2u = u1r, u2r
477 else:
477 else:
478 # unmatched file from base (DAG rotation in the graft case)
478 # unmatched file from base (DAG rotation in the graft case)
479 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
479 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
480 baselabel='base')
480 baselabel='base')
481 # unmatched file from topological common ancestors (no DAG rotation)
481 # unmatched file from topological common ancestors (no DAG rotation)
482 # need to recompute this for directory move handling when grafting
482 # need to recompute this for directory move handling when grafting
483 mta = tca.manifest()
483 mta = tca.manifest()
484 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
484 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
485 m2.filesnotin(mta),
485 m2.filesnotin(mta),
486 baselabel='topological common ancestor')
486 baselabel='topological common ancestor')
487
487
488 for f in u1u:
488 for f in u1u:
489 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
489 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
490
490
491 for f in u2u:
491 for f in u2u:
492 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
492 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
493
493
494 copy = dict(data1['copy'])
494 copy = dict(data1['copy'])
495 copy.update(data2['copy'])
495 copy.update(data2['copy'])
496 fullcopy = dict(data1['fullcopy'])
496 fullcopy = dict(data1['fullcopy'])
497 fullcopy.update(data2['fullcopy'])
497 fullcopy.update(data2['fullcopy'])
498
498
499 if dirtyc1:
499 if dirtyc1:
500 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
500 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
501 incompletediverge)
501 incompletediverge)
502 else:
502 else:
503 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
503 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
504 incompletediverge)
504 incompletediverge)
505
505
506 renamedelete = {}
506 renamedelete = {}
507 renamedeleteset = set()
507 renamedeleteset = set()
508 divergeset = set()
508 divergeset = set()
509 for of, fl in list(diverge.items()):
509 for of, fl in list(diverge.items()):
510 if len(fl) == 1 or of in c1 or of in c2:
510 if len(fl) == 1 or of in c1 or of in c2:
511 del diverge[of] # not actually divergent, or not a rename
511 del diverge[of] # not actually divergent, or not a rename
512 if of not in c1 and of not in c2:
512 if of not in c1 and of not in c2:
513 # renamed on one side, deleted on the other side, but filter
513 # renamed on one side, deleted on the other side, but filter
514 # out files that have been renamed and then deleted
514 # out files that have been renamed and then deleted
515 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
515 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
516 renamedeleteset.update(fl) # reverse map for below
516 renamedeleteset.update(fl) # reverse map for below
517 else:
517 else:
518 divergeset.update(fl) # reverse map for below
518 divergeset.update(fl) # reverse map for below
519
519
520 if bothnew:
520 if bothnew:
521 repo.ui.debug(" unmatched files new in both:\n %s\n"
521 repo.ui.debug(" unmatched files new in both:\n %s\n"
522 % "\n ".join(bothnew))
522 % "\n ".join(bothnew))
523 bothdiverge = {}
523 bothdiverge = {}
524 bothincompletediverge = {}
524 bothincompletediverge = {}
525 remainder = {}
525 remainder = {}
526 both1 = {'copy': {},
526 both1 = {'copy': {},
527 'fullcopy': {},
527 'fullcopy': {},
528 'incomplete': {},
528 'incomplete': {},
529 'diverge': bothdiverge,
529 'diverge': bothdiverge,
530 'incompletediverge': bothincompletediverge
530 'incompletediverge': bothincompletediverge
531 }
531 }
532 both2 = {'copy': {},
532 both2 = {'copy': {},
533 'fullcopy': {},
533 'fullcopy': {},
534 'incomplete': {},
534 'incomplete': {},
535 'diverge': bothdiverge,
535 'diverge': bothdiverge,
536 'incompletediverge': bothincompletediverge
536 'incompletediverge': bothincompletediverge
537 }
537 }
538 for f in bothnew:
538 for f in bothnew:
539 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
539 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
540 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
540 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
541 if dirtyc1:
541 if dirtyc1:
542 # incomplete copies may only be found on the "dirty" side for bothnew
542 # incomplete copies may only be found on the "dirty" side for bothnew
543 assert not both2['incomplete']
543 assert not both2['incomplete']
544 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
544 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
545 bothincompletediverge)
545 bothincompletediverge)
546 elif dirtyc2:
546 elif dirtyc2:
547 assert not both1['incomplete']
547 assert not both1['incomplete']
548 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
548 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
549 bothincompletediverge)
549 bothincompletediverge)
550 else:
550 else:
551 # incomplete copies and divergences can't happen outside grafts
551 # incomplete copies and divergences can't happen outside grafts
552 assert not both1['incomplete']
552 assert not both1['incomplete']
553 assert not both2['incomplete']
553 assert not both2['incomplete']
554 assert not bothincompletediverge
554 assert not bothincompletediverge
555 for f in remainder:
555 for f in remainder:
556 assert f not in bothdiverge
556 assert f not in bothdiverge
557 ic = remainder[f]
557 ic = remainder[f]
558 if ic[0] in (m1 if dirtyc1 else m2):
558 if ic[0] in (m1 if dirtyc1 else m2):
559 # backed-out rename on one side, but watch out for deleted files
559 # backed-out rename on one side, but watch out for deleted files
560 bothdiverge[f] = ic
560 bothdiverge[f] = ic
561 for of, fl in bothdiverge.items():
561 for of, fl in bothdiverge.items():
562 if len(fl) == 2 and fl[0] == fl[1]:
562 if len(fl) == 2 and fl[0] == fl[1]:
563 copy[fl[0]] = of # not actually divergent, just matching renames
563 copy[fl[0]] = of # not actually divergent, just matching renames
564
564
565 if fullcopy and repo.ui.debugflag:
565 if fullcopy and repo.ui.debugflag:
566 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
566 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
567 "% = renamed and deleted):\n")
567 "% = renamed and deleted):\n")
568 for f in sorted(fullcopy):
568 for f in sorted(fullcopy):
569 note = ""
569 note = ""
570 if f in copy:
570 if f in copy:
571 note += "*"
571 note += "*"
572 if f in divergeset:
572 if f in divergeset:
573 note += "!"
573 note += "!"
574 if f in renamedeleteset:
574 if f in renamedeleteset:
575 note += "%"
575 note += "%"
576 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
576 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
577 note))
577 note))
578 del divergeset
578 del divergeset
579
579
580 if not fullcopy:
580 if not fullcopy:
581 return copy, {}, diverge, renamedelete, {}
581 return copy, {}, diverge, renamedelete, {}
582
582
583 repo.ui.debug(" checking for directory renames\n")
583 repo.ui.debug(" checking for directory renames\n")
584
584
585 # generate a directory move map
585 # generate a directory move map
586 d1, d2 = c1.dirs(), c2.dirs()
586 d1, d2 = c1.dirs(), c2.dirs()
587 # Hack for adding '', which is not otherwise added, to d1 and d2
587 # Hack for adding '', which is not otherwise added, to d1 and d2
588 d1.addpath('/')
588 d1.addpath('/')
589 d2.addpath('/')
589 d2.addpath('/')
590 invalid = set()
590 invalid = set()
591 dirmove = {}
591 dirmove = {}
592
592
593 # examine each file copy for a potential directory move, which is
593 # examine each file copy for a potential directory move, which is
594 # when all the files in a directory are moved to a new directory
594 # when all the files in a directory are moved to a new directory
595 for dst, src in fullcopy.iteritems():
595 for dst, src in fullcopy.iteritems():
596 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
596 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
597 if dsrc in invalid:
597 if dsrc in invalid:
598 # already seen to be uninteresting
598 # already seen to be uninteresting
599 continue
599 continue
600 elif dsrc in d1 and ddst in d1:
600 elif dsrc in d1 and ddst in d1:
601 # directory wasn't entirely moved locally
601 # directory wasn't entirely moved locally
602 invalid.add(dsrc)
602 invalid.add(dsrc)
603 elif dsrc in d2 and ddst in d2:
603 elif dsrc in d2 and ddst in d2:
604 # directory wasn't entirely moved remotely
604 # directory wasn't entirely moved remotely
605 invalid.add(dsrc)
605 invalid.add(dsrc)
606 elif dsrc in dirmove and dirmove[dsrc] != ddst:
606 elif dsrc in dirmove and dirmove[dsrc] != ddst:
607 # files from the same directory moved to two different places
607 # files from the same directory moved to two different places
608 invalid.add(dsrc)
608 invalid.add(dsrc)
609 else:
609 else:
610 # looks good so far
610 # looks good so far
611 dirmove[dsrc] = ddst
611 dirmove[dsrc] = ddst
612
612
613 for i in invalid:
613 for i in invalid:
614 if i in dirmove:
614 if i in dirmove:
615 del dirmove[i]
615 del dirmove[i]
616 del d1, d2, invalid
616 del d1, d2, invalid
617
617
618 if not dirmove:
618 if not dirmove:
619 return copy, {}, diverge, renamedelete, {}
619 return copy, {}, diverge, renamedelete, {}
620
620
621 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
621 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
622
622
623 for d in dirmove:
623 for d in dirmove:
624 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
624 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
625 (d, dirmove[d]))
625 (d, dirmove[d]))
626
626
627 movewithdir = {}
627 movewithdir = {}
628 # check unaccounted nonoverlapping files against directory moves
628 # check unaccounted nonoverlapping files against directory moves
629 for f in u1r + u2r:
629 for f in u1r + u2r:
630 if f not in fullcopy:
630 if f not in fullcopy:
631 for d in dirmove:
631 for d in dirmove:
632 if f.startswith(d):
632 if f.startswith(d):
633 # new file added in a directory that was moved, move it
633 # new file added in a directory that was moved, move it
634 df = dirmove[d] + f[len(d):]
634 df = dirmove[d] + f[len(d):]
635 if df not in copy:
635 if df not in copy:
636 movewithdir[f] = df
636 movewithdir[f] = df
637 repo.ui.debug((" pending file src: '%s' -> "
637 repo.ui.debug((" pending file src: '%s' -> "
638 "dst: '%s'\n") % (f, df))
638 "dst: '%s'\n") % (f, df))
639 break
639 break
640
640
641 return copy, movewithdir, diverge, renamedelete, dirmove
641 return copy, movewithdir, diverge, renamedelete, dirmove
642
642
643 def _heuristicscopytracing(repo, c1, c2, base):
643 def _heuristicscopytracing(repo, c1, c2, base):
644 """ Fast copytracing using filename heuristics
644 """ Fast copytracing using filename heuristics
645
645
646 Assumes that moves or renames are of following two types:
646 Assumes that moves or renames are of following two types:
647
647
648 1) Inside a directory only (same directory name but different filenames)
648 1) Inside a directory only (same directory name but different filenames)
649 2) Move from one directory to another
649 2) Move from one directory to another
650 (same filenames but different directory names)
650 (same filenames but different directory names)
651
651
652 Works only when there are no merge commits in the "source branch".
652 Works only when there are no merge commits in the "source branch".
653 Source branch is commits from base up to c2 not including base.
653 Source branch is commits from base up to c2 not including base.
654
654
655 If merge is involved it fallbacks to _fullcopytracing().
655 If merge is involved it fallbacks to _fullcopytracing().
656
656
657 Can be used by setting the following config:
657 Can be used by setting the following config:
658
658
659 [experimental]
659 [experimental]
660 copytrace = heuristics
660 copytrace = heuristics
661
661
662 In some cases the copy/move candidates found by heuristics can be very large
662 In some cases the copy/move candidates found by heuristics can be very large
663 in number and that will make the algorithm slow. The number of possible
663 in number and that will make the algorithm slow. The number of possible
664 candidates to check can be limited by using the config
664 candidates to check can be limited by using the config
665 `experimental.copytrace.movecandidateslimit` which defaults to 100.
665 `experimental.copytrace.movecandidateslimit` which defaults to 100.
666 """
666 """
667
667
668 if c1.rev() is None:
668 if c1.rev() is None:
669 c1 = c1.p1()
669 c1 = c1.p1()
670 if c2.rev() is None:
670 if c2.rev() is None:
671 c2 = c2.p1()
671 c2 = c2.p1()
672
672
673 copies = {}
673 copies = {}
674
674
675 changedfiles = set()
675 changedfiles = set()
676 m1 = c1.manifest()
676 m1 = c1.manifest()
677 if not repo.revs('%d::%d', base.rev(), c2.rev()):
677 if not repo.revs('%d::%d', base.rev(), c2.rev()):
678 # If base is not in c2 branch, we switch to fullcopytracing
678 # If base is not in c2 branch, we switch to fullcopytracing
679 repo.ui.debug("switching to full copytracing as base is not "
679 repo.ui.debug("switching to full copytracing as base is not "
680 "an ancestor of c2\n")
680 "an ancestor of c2\n")
681 return _fullcopytracing(repo, c1, c2, base)
681 return _fullcopytracing(repo, c1, c2, base)
682
682
683 ctx = c2
683 ctx = c2
684 while ctx != base:
684 while ctx != base:
685 if len(ctx.parents()) == 2:
685 if len(ctx.parents()) == 2:
686 # To keep things simple let's not handle merges
686 # To keep things simple let's not handle merges
687 repo.ui.debug("switching to full copytracing because of merges\n")
687 repo.ui.debug("switching to full copytracing because of merges\n")
688 return _fullcopytracing(repo, c1, c2, base)
688 return _fullcopytracing(repo, c1, c2, base)
689 changedfiles.update(ctx.files())
689 changedfiles.update(ctx.files())
690 ctx = ctx.p1()
690 ctx = ctx.p1()
691
691
692 cp = _forwardcopies(base, c2)
692 cp = _forwardcopies(base, c2)
693 for dst, src in cp.iteritems():
693 for dst, src in cp.iteritems():
694 if src in m1:
694 if src in m1:
695 copies[dst] = src
695 copies[dst] = src
696
696
697 # file is missing if it isn't present in the destination, but is present in
697 # file is missing if it isn't present in the destination, but is present in
698 # the base and present in the source.
698 # the base and present in the source.
699 # Presence in the base is important to exclude added files, presence in the
699 # Presence in the base is important to exclude added files, presence in the
700 # source is important to exclude removed files.
700 # source is important to exclude removed files.
701 filt = lambda f: f not in m1 and f in base and f in c2
701 filt = lambda f: f not in m1 and f in base and f in c2
702 missingfiles = [f for f in changedfiles if filt(f)]
702 missingfiles = [f for f in changedfiles if filt(f)]
703
703
704 if missingfiles:
704 if missingfiles:
705 basenametofilename = collections.defaultdict(list)
705 basenametofilename = collections.defaultdict(list)
706 dirnametofilename = collections.defaultdict(list)
706 dirnametofilename = collections.defaultdict(list)
707
707
708 for f in m1.filesnotin(base.manifest()):
708 for f in m1.filesnotin(base.manifest()):
709 basename = os.path.basename(f)
709 basename = os.path.basename(f)
710 dirname = os.path.dirname(f)
710 dirname = os.path.dirname(f)
711 basenametofilename[basename].append(f)
711 basenametofilename[basename].append(f)
712 dirnametofilename[dirname].append(f)
712 dirnametofilename[dirname].append(f)
713
713
714 for f in missingfiles:
714 for f in missingfiles:
715 basename = os.path.basename(f)
715 basename = os.path.basename(f)
716 dirname = os.path.dirname(f)
716 dirname = os.path.dirname(f)
717 samebasename = basenametofilename[basename]
717 samebasename = basenametofilename[basename]
718 samedirname = dirnametofilename[dirname]
718 samedirname = dirnametofilename[dirname]
719 movecandidates = samebasename + samedirname
719 movecandidates = samebasename + samedirname
720 # f is guaranteed to be present in c2, that's why
720 # f is guaranteed to be present in c2, that's why
721 # c2.filectx(f) won't fail
721 # c2.filectx(f) won't fail
722 f2 = c2.filectx(f)
722 f2 = c2.filectx(f)
723 # we can have a lot of candidates which can slow down the heuristics
723 # we can have a lot of candidates which can slow down the heuristics
724 # config value to limit the number of candidates moves to check
724 # config value to limit the number of candidates moves to check
725 maxcandidates = repo.ui.configint('experimental',
725 maxcandidates = repo.ui.configint('experimental',
726 'copytrace.movecandidateslimit')
726 'copytrace.movecandidateslimit')
727
727
728 if len(movecandidates) > maxcandidates:
728 if len(movecandidates) > maxcandidates:
729 repo.ui.status(_("skipping copytracing for '%s', more "
729 repo.ui.status(_("skipping copytracing for '%s', more "
730 "candidates than the limit: %d\n")
730 "candidates than the limit: %d\n")
731 % (f, len(movecandidates)))
731 % (f, len(movecandidates)))
732 continue
732 continue
733
733
734 for candidate in movecandidates:
734 for candidate in movecandidates:
735 f1 = c1.filectx(candidate)
735 f1 = c1.filectx(candidate)
736 if _related(f1, f2):
736 if _related(f1, f2):
737 # if there are a few related copies then we'll merge
737 # if there are a few related copies then we'll merge
738 # changes into all of them. This matches the behaviour
738 # changes into all of them. This matches the behaviour
739 # of upstream copytracing
739 # of upstream copytracing
740 copies[candidate] = f
740 copies[candidate] = f
741
741
742 return copies, {}, {}, {}, {}
742 return copies, {}, {}, {}, {}
743
743
744 def _related(f1, f2):
744 def _related(f1, f2):
745 """return True if f1 and f2 filectx have a common ancestor
745 """return True if f1 and f2 filectx have a common ancestor
746
746
747 Walk back to common ancestor to see if the two files originate
747 Walk back to common ancestor to see if the two files originate
748 from the same file. Since workingfilectx's rev() is None it messes
748 from the same file. Since workingfilectx's rev() is None it messes
749 up the integer comparison logic, hence the pre-step check for
749 up the integer comparison logic, hence the pre-step check for
750 None (f1 and f2 can only be workingfilectx's initially).
750 None (f1 and f2 can only be workingfilectx's initially).
751 """
751 """
752
752
753 if f1 == f2:
753 if f1 == f2:
754 return f1 # a match
754 return f1 # a match
755
755
756 g1, g2 = f1.ancestors(), f2.ancestors()
756 g1, g2 = f1.ancestors(), f2.ancestors()
757 try:
757 try:
758 f1r, f2r = f1.linkrev(), f2.linkrev()
758 f1r, f2r = f1.linkrev(), f2.linkrev()
759
759
760 if f1r is None:
760 if f1r is None:
761 f1 = next(g1)
761 f1 = next(g1)
762 if f2r is None:
762 if f2r is None:
763 f2 = next(g2)
763 f2 = next(g2)
764
764
765 while True:
765 while True:
766 f1r, f2r = f1.linkrev(), f2.linkrev()
766 f1r, f2r = f1.linkrev(), f2.linkrev()
767 if f1r > f2r:
767 if f1r > f2r:
768 f1 = next(g1)
768 f1 = next(g1)
769 elif f2r > f1r:
769 elif f2r > f1r:
770 f2 = next(g2)
770 f2 = next(g2)
771 else: # f1 and f2 point to files in the same linkrev
771 else: # f1 and f2 point to files in the same linkrev
772 return f1 == f2 # true if they point to the same file
772 return f1 == f2 # true if they point to the same file
773 except StopIteration:
773 except StopIteration:
774 return False
774 return False
775
775
776 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
776 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
777 """
777 """
778 check possible copies of f from msrc to mdst
778 check possible copies of f from msrc to mdst
779
779
780 srcctx = starting context for f in msrc
780 srcctx = starting context for f in msrc
781 dstctx = destination context for f in mdst
781 dstctx = destination context for f in mdst
782 f = the filename to check (as in msrc)
782 f = the filename to check (as in msrc)
783 base = the changectx used as a merge base
783 base = the changectx used as a merge base
784 tca = topological common ancestor for graft-like scenarios
784 tca = topological common ancestor for graft-like scenarios
785 remotebase = True if base is outside tca::srcctx, False otherwise
785 remotebase = True if base is outside tca::srcctx, False otherwise
786 limit = the rev number to not search beyond
786 limit = the rev number to not search beyond
787 data = dictionary of dictionary to store copy data. (see mergecopies)
787 data = dictionary of dictionary to store copy data. (see mergecopies)
788
788
789 note: limit is only an optimization, and provides no guarantee that
789 note: limit is only an optimization, and provides no guarantee that
790 irrelevant revisions will not be visited
790 irrelevant revisions will not be visited
791 there is no easy way to make this algorithm stop in a guaranteed way
791 there is no easy way to make this algorithm stop in a guaranteed way
792 once it "goes behind a certain revision".
792 once it "goes behind a certain revision".
793 """
793 """
794
794
795 msrc = srcctx.manifest()
795 msrc = srcctx.manifest()
796 mdst = dstctx.manifest()
796 mdst = dstctx.manifest()
797 mb = base.manifest()
797 mb = base.manifest()
798 mta = tca.manifest()
798 mta = tca.manifest()
799 # Might be true if this call is about finding backward renames,
799 # Might be true if this call is about finding backward renames,
800 # This happens in the case of grafts because the DAG is then rotated.
800 # This happens in the case of grafts because the DAG is then rotated.
801 # If the file exists in both the base and the source, we are not looking
801 # If the file exists in both the base and the source, we are not looking
802 # for a rename on the source side, but on the part of the DAG that is
802 # for a rename on the source side, but on the part of the DAG that is
803 # traversed backwards.
803 # traversed backwards.
804 #
804 #
805 # In the case there is both backward and forward renames (before and after
805 # In the case there is both backward and forward renames (before and after
806 # the base) this is more complicated as we must detect a divergence.
806 # the base) this is more complicated as we must detect a divergence.
807 # We use 'backwards = False' in that case.
807 # We use 'backwards = False' in that case.
808 backwards = not remotebase and base != tca and f in mb
808 backwards = not remotebase and base != tca and f in mb
809 getsrcfctx = _makegetfctx(srcctx)
809 getsrcfctx = _makegetfctx(srcctx)
810 getdstfctx = _makegetfctx(dstctx)
810 getdstfctx = _makegetfctx(dstctx)
811
811
812 if msrc[f] == mb.get(f) and not remotebase:
812 if msrc[f] == mb.get(f) and not remotebase:
813 # Nothing to merge
813 # Nothing to merge
814 return
814 return
815
815
816 of = None
816 of = None
817 seen = {f}
817 seen = {f}
818 for oc in getsrcfctx(f, msrc[f]).ancestors():
818 for oc in getsrcfctx(f, msrc[f]).ancestors():
819 ocr = oc.linkrev()
820 of = oc.path()
819 of = oc.path()
821 if of in seen:
820 if of in seen:
822 # check limit late - grab last rename before
821 # check limit late - grab last rename before
823 if ocr < limit:
822 if oc.linkrev() < limit:
824 break
823 break
825 continue
824 continue
826 seen.add(of)
825 seen.add(of)
827
826
828 # remember for dir rename detection
827 # remember for dir rename detection
829 if backwards:
828 if backwards:
830 data['fullcopy'][of] = f # grafting backwards through renames
829 data['fullcopy'][of] = f # grafting backwards through renames
831 else:
830 else:
832 data['fullcopy'][f] = of
831 data['fullcopy'][f] = of
833 if of not in mdst:
832 if of not in mdst:
834 continue # no match, keep looking
833 continue # no match, keep looking
835 if mdst[of] == mb.get(of):
834 if mdst[of] == mb.get(of):
836 return # no merge needed, quit early
835 return # no merge needed, quit early
837 c2 = getdstfctx(of, mdst[of])
836 c2 = getdstfctx(of, mdst[of])
838 # c2 might be a plain new file on added on destination side that is
837 # c2 might be a plain new file on added on destination side that is
839 # unrelated to the droids we are looking for.
838 # unrelated to the droids we are looking for.
840 cr = _related(oc, c2)
839 cr = _related(oc, c2)
841 if cr and (of == f or of == c2.path()): # non-divergent
840 if cr and (of == f or of == c2.path()): # non-divergent
842 if backwards:
841 if backwards:
843 data['copy'][of] = f
842 data['copy'][of] = f
844 elif of in mb:
843 elif of in mb:
845 data['copy'][f] = of
844 data['copy'][f] = of
846 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
845 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
847 data['copy'][of] = f
846 data['copy'][of] = f
848 del data['fullcopy'][f]
847 del data['fullcopy'][f]
849 data['fullcopy'][of] = f
848 data['fullcopy'][of] = f
850 else: # divergence w.r.t. graft CA on one side of topological CA
849 else: # divergence w.r.t. graft CA on one side of topological CA
851 for sf in seen:
850 for sf in seen:
852 if sf in mb:
851 if sf in mb:
853 assert sf not in data['diverge']
852 assert sf not in data['diverge']
854 data['diverge'][sf] = [f, of]
853 data['diverge'][sf] = [f, of]
855 break
854 break
856 return
855 return
857
856
858 if of in mta:
857 if of in mta:
859 if backwards or remotebase:
858 if backwards or remotebase:
860 data['incomplete'][of] = f
859 data['incomplete'][of] = f
861 else:
860 else:
862 for sf in seen:
861 for sf in seen:
863 if sf in mb:
862 if sf in mb:
864 if tca == base:
863 if tca == base:
865 data['diverge'].setdefault(sf, []).append(f)
864 data['diverge'].setdefault(sf, []).append(f)
866 else:
865 else:
867 data['incompletediverge'][sf] = [of, f]
866 data['incompletediverge'][sf] = [of, f]
868 return
867 return
869
868
870 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
869 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
871 """reproduce copies from fromrev to rev in the dirstate
870 """reproduce copies from fromrev to rev in the dirstate
872
871
873 If skiprev is specified, it's a revision that should be used to
872 If skiprev is specified, it's a revision that should be used to
874 filter copy records. Any copies that occur between fromrev and
873 filter copy records. Any copies that occur between fromrev and
875 skiprev will not be duplicated, even if they appear in the set of
874 skiprev will not be duplicated, even if they appear in the set of
876 copies between fromrev and rev.
875 copies between fromrev and rev.
877 """
876 """
878 exclude = {}
877 exclude = {}
879 ctraceconfig = repo.ui.config('experimental', 'copytrace')
878 ctraceconfig = repo.ui.config('experimental', 'copytrace')
880 bctrace = stringutil.parsebool(ctraceconfig)
879 bctrace = stringutil.parsebool(ctraceconfig)
881 if (skiprev is not None and
880 if (skiprev is not None and
882 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
881 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
883 # copytrace='off' skips this line, but not the entire function because
882 # copytrace='off' skips this line, but not the entire function because
884 # the line below is O(size of the repo) during a rebase, while the rest
883 # the line below is O(size of the repo) during a rebase, while the rest
885 # of the function is much faster (and is required for carrying copy
884 # of the function is much faster (and is required for carrying copy
886 # metadata across the rebase anyway).
885 # metadata across the rebase anyway).
887 exclude = pathcopies(repo[fromrev], repo[skiprev])
886 exclude = pathcopies(repo[fromrev], repo[skiprev])
888 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
887 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
889 # copies.pathcopies returns backward renames, so dst might not
888 # copies.pathcopies returns backward renames, so dst might not
890 # actually be in the dirstate
889 # actually be in the dirstate
891 if dst in exclude:
890 if dst in exclude:
892 continue
891 continue
893 wctx[dst].markcopied(src)
892 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now