##// END OF EJS Templates
copytrace: use ctx.mutable() instead of adhoc constant of non-public phases
Yuya Nishihara -
r34365:d00910b2 default
parent child Browse files
Show More
@@ -1,867 +1,863 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from . import (
14 from . import (
15 match as matchmod,
15 match as matchmod,
16 node,
16 node,
17 pathutil,
17 pathutil,
18 phases,
19 scmutil,
18 scmutil,
20 util,
19 util,
21 )
20 )
22
21
23 def _findlimit(repo, a, b):
22 def _findlimit(repo, a, b):
24 """
23 """
25 Find the last revision that needs to be checked to ensure that a full
24 Find the last revision that needs to be checked to ensure that a full
26 transitive closure for file copies can be properly calculated.
25 transitive closure for file copies can be properly calculated.
27 Generally, this means finding the earliest revision number that's an
26 Generally, this means finding the earliest revision number that's an
28 ancestor of a or b but not both, except when a or b is a direct descendent
27 ancestor of a or b but not both, except when a or b is a direct descendent
29 of the other, in which case we can return the minimum revnum of a and b.
28 of the other, in which case we can return the minimum revnum of a and b.
30 None if no such revision exists.
29 None if no such revision exists.
31 """
30 """
32
31
33 # basic idea:
32 # basic idea:
34 # - mark a and b with different sides
33 # - mark a and b with different sides
35 # - if a parent's children are all on the same side, the parent is
34 # - if a parent's children are all on the same side, the parent is
36 # on that side, otherwise it is on no side
35 # on that side, otherwise it is on no side
37 # - walk the graph in topological order with the help of a heap;
36 # - walk the graph in topological order with the help of a heap;
38 # - add unseen parents to side map
37 # - add unseen parents to side map
39 # - clear side of any parent that has children on different sides
38 # - clear side of any parent that has children on different sides
40 # - track number of interesting revs that might still be on a side
39 # - track number of interesting revs that might still be on a side
41 # - track the lowest interesting rev seen
40 # - track the lowest interesting rev seen
42 # - quit when interesting revs is zero
41 # - quit when interesting revs is zero
43
42
44 cl = repo.changelog
43 cl = repo.changelog
45 working = len(cl) # pseudo rev for the working directory
44 working = len(cl) # pseudo rev for the working directory
46 if a is None:
45 if a is None:
47 a = working
46 a = working
48 if b is None:
47 if b is None:
49 b = working
48 b = working
50
49
51 side = {a: -1, b: 1}
50 side = {a: -1, b: 1}
52 visit = [-a, -b]
51 visit = [-a, -b]
53 heapq.heapify(visit)
52 heapq.heapify(visit)
54 interesting = len(visit)
53 interesting = len(visit)
55 hascommonancestor = False
54 hascommonancestor = False
56 limit = working
55 limit = working
57
56
58 while interesting:
57 while interesting:
59 r = -heapq.heappop(visit)
58 r = -heapq.heappop(visit)
60 if r == working:
59 if r == working:
61 parents = [cl.rev(p) for p in repo.dirstate.parents()]
60 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 else:
61 else:
63 parents = cl.parentrevs(r)
62 parents = cl.parentrevs(r)
64 for p in parents:
63 for p in parents:
65 if p < 0:
64 if p < 0:
66 continue
65 continue
67 if p not in side:
66 if p not in side:
68 # first time we see p; add it to visit
67 # first time we see p; add it to visit
69 side[p] = side[r]
68 side[p] = side[r]
70 if side[p]:
69 if side[p]:
71 interesting += 1
70 interesting += 1
72 heapq.heappush(visit, -p)
71 heapq.heappush(visit, -p)
73 elif side[p] and side[p] != side[r]:
72 elif side[p] and side[p] != side[r]:
74 # p was interesting but now we know better
73 # p was interesting but now we know better
75 side[p] = 0
74 side[p] = 0
76 interesting -= 1
75 interesting -= 1
77 hascommonancestor = True
76 hascommonancestor = True
78 if side[r]:
77 if side[r]:
79 limit = r # lowest rev visited
78 limit = r # lowest rev visited
80 interesting -= 1
79 interesting -= 1
81
80
82 if not hascommonancestor:
81 if not hascommonancestor:
83 return None
82 return None
84
83
85 # Consider the following flow (see test-commit-amend.t under issue4405):
84 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # 1/ File 'a0' committed
85 # 1/ File 'a0' committed
87 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
86 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 3/ Move back to first commit
87 # 3/ Move back to first commit
89 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
88 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
89 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 #
90 #
92 # During the amend in step five, we will be in this state:
91 # During the amend in step five, we will be in this state:
93 #
92 #
94 # @ 3 temporary amend commit for a1-amend
93 # @ 3 temporary amend commit for a1-amend
95 # |
94 # |
96 # o 2 a1-amend
95 # o 2 a1-amend
97 # |
96 # |
98 # | o 1 a1
97 # | o 1 a1
99 # |/
98 # |/
100 # o 0 a0
99 # o 0 a0
101 #
100 #
102 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
101 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # yet the filelog has the copy information in rev 1 and we will not look
102 # yet the filelog has the copy information in rev 1 and we will not look
104 # back far enough unless we also look at the a and b as candidates.
103 # back far enough unless we also look at the a and b as candidates.
105 # This only occurs when a is a descendent of b or visa-versa.
104 # This only occurs when a is a descendent of b or visa-versa.
106 return min(limit, a, b)
105 return min(limit, a, b)
107
106
108 def _chain(src, dst, a, b):
107 def _chain(src, dst, a, b):
109 '''chain two sets of copies a->b'''
108 '''chain two sets of copies a->b'''
110 t = a.copy()
109 t = a.copy()
111 for k, v in b.iteritems():
110 for k, v in b.iteritems():
112 if v in t:
111 if v in t:
113 # found a chain
112 # found a chain
114 if t[v] != k:
113 if t[v] != k:
115 # file wasn't renamed back to itself
114 # file wasn't renamed back to itself
116 t[k] = t[v]
115 t[k] = t[v]
117 if v not in dst:
116 if v not in dst:
118 # chain was a rename, not a copy
117 # chain was a rename, not a copy
119 del t[v]
118 del t[v]
120 if v in src:
119 if v in src:
121 # file is a copy of an existing file
120 # file is a copy of an existing file
122 t[k] = v
121 t[k] = v
123
122
124 # remove criss-crossed copies
123 # remove criss-crossed copies
125 for k, v in t.items():
124 for k, v in t.items():
126 if k in src and v in dst:
125 if k in src and v in dst:
127 del t[k]
126 del t[k]
128
127
129 return t
128 return t
130
129
131 def _tracefile(fctx, am, limit=-1):
130 def _tracefile(fctx, am, limit=-1):
132 '''return file context that is the ancestor of fctx present in ancestor
131 '''return file context that is the ancestor of fctx present in ancestor
133 manifest am, stopping after the first ancestor lower than limit'''
132 manifest am, stopping after the first ancestor lower than limit'''
134
133
135 for f in fctx.ancestors():
134 for f in fctx.ancestors():
136 if am.get(f.path(), None) == f.filenode():
135 if am.get(f.path(), None) == f.filenode():
137 return f
136 return f
138 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
137 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 return None
138 return None
140
139
141 def _dirstatecopies(d):
140 def _dirstatecopies(d):
142 ds = d._repo.dirstate
141 ds = d._repo.dirstate
143 c = ds.copies().copy()
142 c = ds.copies().copy()
144 for k in list(c):
143 for k in list(c):
145 if ds[k] not in 'anm':
144 if ds[k] not in 'anm':
146 del c[k]
145 del c[k]
147 return c
146 return c
148
147
149 def _computeforwardmissing(a, b, match=None):
148 def _computeforwardmissing(a, b, match=None):
150 """Computes which files are in b but not a.
149 """Computes which files are in b but not a.
151 This is its own function so extensions can easily wrap this call to see what
150 This is its own function so extensions can easily wrap this call to see what
152 files _forwardcopies is about to process.
151 files _forwardcopies is about to process.
153 """
152 """
154 ma = a.manifest()
153 ma = a.manifest()
155 mb = b.manifest()
154 mb = b.manifest()
156 return mb.filesnotin(ma, match=match)
155 return mb.filesnotin(ma, match=match)
157
156
158 def _forwardcopies(a, b, match=None):
157 def _forwardcopies(a, b, match=None):
159 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
160
159
161 # check for working copy
160 # check for working copy
162 w = None
161 w = None
163 if b.rev() is None:
162 if b.rev() is None:
164 w = b
163 w = b
165 b = w.p1()
164 b = w.p1()
166 if a == b:
165 if a == b:
167 # short-circuit to avoid issues with merge states
166 # short-circuit to avoid issues with merge states
168 return _dirstatecopies(w)
167 return _dirstatecopies(w)
169
168
170 # files might have to be traced back to the fctx parent of the last
169 # files might have to be traced back to the fctx parent of the last
171 # one-side-only changeset, but not further back than that
170 # one-side-only changeset, but not further back than that
172 limit = _findlimit(a._repo, a.rev(), b.rev())
171 limit = _findlimit(a._repo, a.rev(), b.rev())
173 if limit is None:
172 if limit is None:
174 limit = -1
173 limit = -1
175 am = a.manifest()
174 am = a.manifest()
176
175
177 # find where new files came from
176 # find where new files came from
178 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
179 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
180 cm = {}
179 cm = {}
181
180
182 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
183 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
184 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
185 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
186 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
187 # this comparison.
186 # this comparison.
188 forwardmissingmatch = match
187 forwardmissingmatch = match
189 if b.p1() == a and b.p2().node() == node.nullid:
188 if b.p1() == a and b.p2().node() == node.nullid:
190 filesmatcher = scmutil.matchfiles(a._repo, b.files())
189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
191 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
192 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
193
192
194 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
195 for f in missing:
194 for f in missing:
196 fctx = b[f]
195 fctx = b[f]
197 fctx._ancestrycontext = ancestrycontext
196 fctx._ancestrycontext = ancestrycontext
198 ofctx = _tracefile(fctx, am, limit)
197 ofctx = _tracefile(fctx, am, limit)
199 if ofctx:
198 if ofctx:
200 cm[f] = ofctx.path()
199 cm[f] = ofctx.path()
201
200
202 # combine copies from dirstate if necessary
201 # combine copies from dirstate if necessary
203 if w is not None:
202 if w is not None:
204 cm = _chain(a, w, cm, _dirstatecopies(w))
203 cm = _chain(a, w, cm, _dirstatecopies(w))
205
204
206 return cm
205 return cm
207
206
208 def _backwardrenames(a, b):
207 def _backwardrenames(a, b):
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
208 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 return {}
209 return {}
211
210
212 # Even though we're not taking copies into account, 1:n rename situations
211 # Even though we're not taking copies into account, 1:n rename situations
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 # arbitrarily pick one of the renames.
213 # arbitrarily pick one of the renames.
215 f = _forwardcopies(b, a)
214 f = _forwardcopies(b, a)
216 r = {}
215 r = {}
217 for k, v in sorted(f.iteritems()):
216 for k, v in sorted(f.iteritems()):
218 # remove copies
217 # remove copies
219 if v in a:
218 if v in a:
220 continue
219 continue
221 r[v] = k
220 r[v] = k
222 return r
221 return r
223
222
224 def pathcopies(x, y, match=None):
223 def pathcopies(x, y, match=None):
225 '''find {dst@y: src@x} copy mapping for directed compare'''
224 '''find {dst@y: src@x} copy mapping for directed compare'''
226 if x == y or not x or not y:
225 if x == y or not x or not y:
227 return {}
226 return {}
228 a = y.ancestor(x)
227 a = y.ancestor(x)
229 if a == x:
228 if a == x:
230 return _forwardcopies(x, y, match=match)
229 return _forwardcopies(x, y, match=match)
231 if a == y:
230 if a == y:
232 return _backwardrenames(x, y)
231 return _backwardrenames(x, y)
233 return _chain(x, y, _backwardrenames(x, a),
232 return _chain(x, y, _backwardrenames(x, a),
234 _forwardcopies(a, y, match=match))
233 _forwardcopies(a, y, match=match))
235
234
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
235 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 and c2. This is its own function so extensions can easily wrap this call
237 and c2. This is its own function so extensions can easily wrap this call
239 to see what files mergecopies is about to process.
238 to see what files mergecopies is about to process.
240
239
241 Even though c1 and c2 are not used in this function, they are useful in
240 Even though c1 and c2 are not used in this function, they are useful in
242 other extensions for being able to read the file nodes of the changed files.
241 other extensions for being able to read the file nodes of the changed files.
243
242
244 "baselabel" can be passed to help distinguish the multiple computations
243 "baselabel" can be passed to help distinguish the multiple computations
245 done in the graft case.
244 done in the graft case.
246 """
245 """
247 u1 = sorted(addedinm1 - addedinm2)
246 u1 = sorted(addedinm1 - addedinm2)
248 u2 = sorted(addedinm2 - addedinm1)
247 u2 = sorted(addedinm2 - addedinm1)
249
248
250 header = " unmatched files in %s"
249 header = " unmatched files in %s"
251 if baselabel:
250 if baselabel:
252 header += ' (from %s)' % baselabel
251 header += ' (from %s)' % baselabel
253 if u1:
252 if u1:
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
253 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 if u2:
254 if u2:
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
255 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257 return u1, u2
256 return u1, u2
258
257
259 def _makegetfctx(ctx):
258 def _makegetfctx(ctx):
260 """return a 'getfctx' function suitable for _checkcopies usage
259 """return a 'getfctx' function suitable for _checkcopies usage
261
260
262 We have to re-setup the function building 'filectx' for each
261 We have to re-setup the function building 'filectx' for each
263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
262 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 each. Linkrev adjustment is important to avoid bug in rename
263 each. Linkrev adjustment is important to avoid bug in rename
265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
264 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 the performance impact of this adjustment is kept limited. Without it,
265 the performance impact of this adjustment is kept limited. Without it,
267 each file could do a full dag traversal making the time complexity of
266 each file could do a full dag traversal making the time complexity of
268 the operation explode (see issue4537).
267 the operation explode (see issue4537).
269
268
270 This function exists here mostly to limit the impact on stable. Feel
269 This function exists here mostly to limit the impact on stable. Feel
271 free to refactor on default.
270 free to refactor on default.
272 """
271 """
273 rev = ctx.rev()
272 rev = ctx.rev()
274 repo = ctx._repo
273 repo = ctx._repo
275 ac = getattr(ctx, '_ancestrycontext', None)
274 ac = getattr(ctx, '_ancestrycontext', None)
276 if ac is None:
275 if ac is None:
277 revs = [rev]
276 revs = [rev]
278 if rev is None:
277 if rev is None:
279 revs = [p.rev() for p in ctx.parents()]
278 revs = [p.rev() for p in ctx.parents()]
280 ac = repo.changelog.ancestors(revs, inclusive=True)
279 ac = repo.changelog.ancestors(revs, inclusive=True)
281 ctx._ancestrycontext = ac
280 ctx._ancestrycontext = ac
282 def makectx(f, n):
281 def makectx(f, n):
283 if n in node.wdirnodes: # in a working context?
282 if n in node.wdirnodes: # in a working context?
284 if ctx.rev() is None:
283 if ctx.rev() is None:
285 return ctx.filectx(f)
284 return ctx.filectx(f)
286 return repo[None][f]
285 return repo[None][f]
287 fctx = repo.filectx(f, fileid=n)
286 fctx = repo.filectx(f, fileid=n)
288 # setup only needed for filectx not create from a changectx
287 # setup only needed for filectx not create from a changectx
289 fctx._ancestrycontext = ac
288 fctx._ancestrycontext = ac
290 fctx._descendantrev = rev
289 fctx._descendantrev = rev
291 return fctx
290 return fctx
292 return util.lrucachefunc(makectx)
291 return util.lrucachefunc(makectx)
293
292
294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
293 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 """combine partial copy paths"""
294 """combine partial copy paths"""
296 remainder = {}
295 remainder = {}
297 for f in copyfrom:
296 for f in copyfrom:
298 if f in copyto:
297 if f in copyto:
299 finalcopy[copyto[f]] = copyfrom[f]
298 finalcopy[copyto[f]] = copyfrom[f]
300 del copyto[f]
299 del copyto[f]
301 for f in incompletediverge:
300 for f in incompletediverge:
302 assert f not in diverge
301 assert f not in diverge
303 ic = incompletediverge[f]
302 ic = incompletediverge[f]
304 if ic[0] in copyto:
303 if ic[0] in copyto:
305 diverge[f] = [copyto[ic[0]], ic[1]]
304 diverge[f] = [copyto[ic[0]], ic[1]]
306 else:
305 else:
307 remainder[f] = ic
306 remainder[f] = ic
308 return remainder
307 return remainder
309
308
310 def mergecopies(repo, c1, c2, base):
309 def mergecopies(repo, c1, c2, base):
311 """
310 """
312 The function calling different copytracing algorithms on the basis of config
311 The function calling different copytracing algorithms on the basis of config
313 which find moves and copies between context c1 and c2 that are relevant for
312 which find moves and copies between context c1 and c2 that are relevant for
314 merging. 'base' will be used as the merge base.
313 merging. 'base' will be used as the merge base.
315
314
316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
315 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 files that were moved/ copied in one merge parent and modified in another.
316 files that were moved/ copied in one merge parent and modified in another.
318 For example:
317 For example:
319
318
320 o ---> 4 another commit
319 o ---> 4 another commit
321 |
320 |
322 | o ---> 3 commit that modifies a.txt
321 | o ---> 3 commit that modifies a.txt
323 | /
322 | /
324 o / ---> 2 commit that moves a.txt to b.txt
323 o / ---> 2 commit that moves a.txt to b.txt
325 |/
324 |/
326 o ---> 1 merge base
325 o ---> 1 merge base
327
326
328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
327 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 revision 4, and if user have copytrace disabled, we prints the following
328 revision 4, and if user have copytrace disabled, we prints the following
330 message:
329 message:
331
330
332 ```other changed <file> which local deleted```
331 ```other changed <file> which local deleted```
333
332
334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
333 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 "dirmove".
334 "dirmove".
336
335
337 "copy" is a mapping from destination name -> source name,
336 "copy" is a mapping from destination name -> source name,
338 where source is in c1 and destination is in c2 or vice-versa.
337 where source is in c1 and destination is in c2 or vice-versa.
339
338
340 "movewithdir" is a mapping from source name -> destination name,
339 "movewithdir" is a mapping from source name -> destination name,
341 where the file at source present in one context but not the other
340 where the file at source present in one context but not the other
342 needs to be moved to destination by the merge process, because the
341 needs to be moved to destination by the merge process, because the
343 other context moved the directory it is in.
342 other context moved the directory it is in.
344
343
345 "diverge" is a mapping of source name -> list of destination names
344 "diverge" is a mapping of source name -> list of destination names
346 for divergent renames.
345 for divergent renames.
347
346
348 "renamedelete" is a mapping of source name -> list of destination
347 "renamedelete" is a mapping of source name -> list of destination
349 names for files deleted in c1 that were renamed in c2 or vice-versa.
348 names for files deleted in c1 that were renamed in c2 or vice-versa.
350
349
351 "dirmove" is a mapping of detected source dir -> destination dir renames.
350 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 This is needed for handling changes to new files previously grafted into
351 This is needed for handling changes to new files previously grafted into
353 renamed directories.
352 renamed directories.
354 """
353 """
355 # avoid silly behavior for update from empty dir
354 # avoid silly behavior for update from empty dir
356 if not c1 or not c2 or c1 == c2:
355 if not c1 or not c2 or c1 == c2:
357 return {}, {}, {}, {}, {}
356 return {}, {}, {}, {}, {}
358
357
359 # avoid silly behavior for parent -> working dir
358 # avoid silly behavior for parent -> working dir
360 if c2.node() is None and c1.node() == repo.dirstate.p1():
359 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 return repo.dirstate.copies(), {}, {}, {}, {}
360 return repo.dirstate.copies(), {}, {}, {}, {}
362
361
363 copytracing = repo.ui.config('experimental', 'copytrace')
362 copytracing = repo.ui.config('experimental', 'copytrace')
364
363
365 # Copy trace disabling is explicitly below the node == p1 logic above
364 # Copy trace disabling is explicitly below the node == p1 logic above
366 # because the logic above is required for a simple copy to be kept across a
365 # because the logic above is required for a simple copy to be kept across a
367 # rebase.
366 # rebase.
368 if copytracing == 'off':
367 if copytracing == 'off':
369 return {}, {}, {}, {}, {}
368 return {}, {}, {}, {}, {}
370 elif copytracing == 'heuristics':
369 elif copytracing == 'heuristics':
371 # Do full copytracing if only drafts are involved as that will be fast
370 # Do full copytracing if only non-public revisions are involved as
372 # enough and will also cover the copies which can be missed by
371 # that will be fast enough and will also cover the copies which could
373 # heuristics
372 # be missed by heuristics
374 if _isfullcopytraceable(repo, c1, base):
373 if _isfullcopytraceable(repo, c1, base):
375 return _fullcopytracing(repo, c1, c2, base)
374 return _fullcopytracing(repo, c1, c2, base)
376 return _heuristicscopytracing(repo, c1, c2, base)
375 return _heuristicscopytracing(repo, c1, c2, base)
377 else:
376 else:
378 return _fullcopytracing(repo, c1, c2, base)
377 return _fullcopytracing(repo, c1, c2, base)
379
378
380 def _isfullcopytraceable(repo, c1, base):
379 def _isfullcopytraceable(repo, c1, base):
381 """ Checks that if base, source and destination are all draft branches, if
380 """ Checks that if base, source and destination are all no-public branches,
382 yes let's use the full copytrace algorithm for increased capabilities since
381 if yes let's use the full copytrace algorithm for increased capabilities
383 it will be fast enough.
382 since it will be fast enough.
384 """
383 """
385 if c1.rev() is None:
384 if c1.rev() is None:
386 c1 = c1.p1()
385 c1 = c1.p1()
387
386 if c1.mutable() and base.mutable():
388 nonpublicphases = set([phases.draft, phases.secret])
389
390 if (c1.phase() in nonpublicphases) and (base.phase() in nonpublicphases):
391 sourcecommitlimit = repo.ui.configint('experimental',
387 sourcecommitlimit = repo.ui.configint('experimental',
392 'copytrace.sourcecommitlimit')
388 'copytrace.sourcecommitlimit')
393 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
389 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
394 return commits < sourcecommitlimit
390 return commits < sourcecommitlimit
395 return False
391 return False
396
392
397 def _fullcopytracing(repo, c1, c2, base):
393 def _fullcopytracing(repo, c1, c2, base):
398 """ The full copytracing algorithm which finds all the new files that were
394 """ The full copytracing algorithm which finds all the new files that were
399 added from merge base up to the top commit and for each file it checks if
395 added from merge base up to the top commit and for each file it checks if
400 this file was copied from another file.
396 this file was copied from another file.
401
397
402 This is pretty slow when a lot of changesets are involved but will track all
398 This is pretty slow when a lot of changesets are involved but will track all
403 the copies.
399 the copies.
404 """
400 """
405 # In certain scenarios (e.g. graft, update or rebase), base can be
401 # In certain scenarios (e.g. graft, update or rebase), base can be
406 # overridden We still need to know a real common ancestor in this case We
402 # overridden We still need to know a real common ancestor in this case We
407 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
403 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
408 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
404 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
409 # caller may not know if the revision passed in lieu of the CA is a genuine
405 # caller may not know if the revision passed in lieu of the CA is a genuine
410 # common ancestor or not without explicitly checking it, it's better to
406 # common ancestor or not without explicitly checking it, it's better to
411 # determine that here.
407 # determine that here.
412 #
408 #
413 # base.descendant(wc) and base.descendant(base) are False, work around that
409 # base.descendant(wc) and base.descendant(base) are False, work around that
414 _c1 = c1.p1() if c1.rev() is None else c1
410 _c1 = c1.p1() if c1.rev() is None else c1
415 _c2 = c2.p1() if c2.rev() is None else c2
411 _c2 = c2.p1() if c2.rev() is None else c2
416 # an endpoint is "dirty" if it isn't a descendant of the merge base
412 # an endpoint is "dirty" if it isn't a descendant of the merge base
417 # if we have a dirty endpoint, we need to trigger graft logic, and also
413 # if we have a dirty endpoint, we need to trigger graft logic, and also
418 # keep track of which endpoint is dirty
414 # keep track of which endpoint is dirty
419 dirtyc1 = not (base == _c1 or base.descendant(_c1))
415 dirtyc1 = not (base == _c1 or base.descendant(_c1))
420 dirtyc2 = not (base == _c2 or base.descendant(_c2))
416 dirtyc2 = not (base == _c2 or base.descendant(_c2))
421 graft = dirtyc1 or dirtyc2
417 graft = dirtyc1 or dirtyc2
422 tca = base
418 tca = base
423 if graft:
419 if graft:
424 tca = _c1.ancestor(_c2)
420 tca = _c1.ancestor(_c2)
425
421
426 limit = _findlimit(repo, c1.rev(), c2.rev())
422 limit = _findlimit(repo, c1.rev(), c2.rev())
427 if limit is None:
423 if limit is None:
428 # no common ancestor, no copies
424 # no common ancestor, no copies
429 return {}, {}, {}, {}, {}
425 return {}, {}, {}, {}, {}
430 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
426 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
431
427
432 m1 = c1.manifest()
428 m1 = c1.manifest()
433 m2 = c2.manifest()
429 m2 = c2.manifest()
434 mb = base.manifest()
430 mb = base.manifest()
435
431
436 # gather data from _checkcopies:
432 # gather data from _checkcopies:
437 # - diverge = record all diverges in this dict
433 # - diverge = record all diverges in this dict
438 # - copy = record all non-divergent copies in this dict
434 # - copy = record all non-divergent copies in this dict
439 # - fullcopy = record all copies in this dict
435 # - fullcopy = record all copies in this dict
440 # - incomplete = record non-divergent partial copies here
436 # - incomplete = record non-divergent partial copies here
441 # - incompletediverge = record divergent partial copies here
437 # - incompletediverge = record divergent partial copies here
442 diverge = {} # divergence data is shared
438 diverge = {} # divergence data is shared
443 incompletediverge = {}
439 incompletediverge = {}
444 data1 = {'copy': {},
440 data1 = {'copy': {},
445 'fullcopy': {},
441 'fullcopy': {},
446 'incomplete': {},
442 'incomplete': {},
447 'diverge': diverge,
443 'diverge': diverge,
448 'incompletediverge': incompletediverge,
444 'incompletediverge': incompletediverge,
449 }
445 }
450 data2 = {'copy': {},
446 data2 = {'copy': {},
451 'fullcopy': {},
447 'fullcopy': {},
452 'incomplete': {},
448 'incomplete': {},
453 'diverge': diverge,
449 'diverge': diverge,
454 'incompletediverge': incompletediverge,
450 'incompletediverge': incompletediverge,
455 }
451 }
456
452
457 # find interesting file sets from manifests
453 # find interesting file sets from manifests
458 addedinm1 = m1.filesnotin(mb)
454 addedinm1 = m1.filesnotin(mb)
459 addedinm2 = m2.filesnotin(mb)
455 addedinm2 = m2.filesnotin(mb)
460 bothnew = sorted(addedinm1 & addedinm2)
456 bothnew = sorted(addedinm1 & addedinm2)
461 if tca == base:
457 if tca == base:
462 # unmatched file from base
458 # unmatched file from base
463 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
459 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
464 u1u, u2u = u1r, u2r
460 u1u, u2u = u1r, u2r
465 else:
461 else:
466 # unmatched file from base (DAG rotation in the graft case)
462 # unmatched file from base (DAG rotation in the graft case)
467 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
463 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
468 baselabel='base')
464 baselabel='base')
469 # unmatched file from topological common ancestors (no DAG rotation)
465 # unmatched file from topological common ancestors (no DAG rotation)
470 # need to recompute this for directory move handling when grafting
466 # need to recompute this for directory move handling when grafting
471 mta = tca.manifest()
467 mta = tca.manifest()
472 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
468 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
473 m2.filesnotin(mta),
469 m2.filesnotin(mta),
474 baselabel='topological common ancestor')
470 baselabel='topological common ancestor')
475
471
476 for f in u1u:
472 for f in u1u:
477 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
473 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
478
474
479 for f in u2u:
475 for f in u2u:
480 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
476 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
481
477
482 copy = dict(data1['copy'])
478 copy = dict(data1['copy'])
483 copy.update(data2['copy'])
479 copy.update(data2['copy'])
484 fullcopy = dict(data1['fullcopy'])
480 fullcopy = dict(data1['fullcopy'])
485 fullcopy.update(data2['fullcopy'])
481 fullcopy.update(data2['fullcopy'])
486
482
487 if dirtyc1:
483 if dirtyc1:
488 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
484 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
489 incompletediverge)
485 incompletediverge)
490 else:
486 else:
491 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
487 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
492 incompletediverge)
488 incompletediverge)
493
489
494 renamedelete = {}
490 renamedelete = {}
495 renamedeleteset = set()
491 renamedeleteset = set()
496 divergeset = set()
492 divergeset = set()
497 for of, fl in list(diverge.items()):
493 for of, fl in list(diverge.items()):
498 if len(fl) == 1 or of in c1 or of in c2:
494 if len(fl) == 1 or of in c1 or of in c2:
499 del diverge[of] # not actually divergent, or not a rename
495 del diverge[of] # not actually divergent, or not a rename
500 if of not in c1 and of not in c2:
496 if of not in c1 and of not in c2:
501 # renamed on one side, deleted on the other side, but filter
497 # renamed on one side, deleted on the other side, but filter
502 # out files that have been renamed and then deleted
498 # out files that have been renamed and then deleted
503 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
499 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
504 renamedeleteset.update(fl) # reverse map for below
500 renamedeleteset.update(fl) # reverse map for below
505 else:
501 else:
506 divergeset.update(fl) # reverse map for below
502 divergeset.update(fl) # reverse map for below
507
503
508 if bothnew:
504 if bothnew:
509 repo.ui.debug(" unmatched files new in both:\n %s\n"
505 repo.ui.debug(" unmatched files new in both:\n %s\n"
510 % "\n ".join(bothnew))
506 % "\n ".join(bothnew))
511 bothdiverge = {}
507 bothdiverge = {}
512 bothincompletediverge = {}
508 bothincompletediverge = {}
513 remainder = {}
509 remainder = {}
514 both1 = {'copy': {},
510 both1 = {'copy': {},
515 'fullcopy': {},
511 'fullcopy': {},
516 'incomplete': {},
512 'incomplete': {},
517 'diverge': bothdiverge,
513 'diverge': bothdiverge,
518 'incompletediverge': bothincompletediverge
514 'incompletediverge': bothincompletediverge
519 }
515 }
520 both2 = {'copy': {},
516 both2 = {'copy': {},
521 'fullcopy': {},
517 'fullcopy': {},
522 'incomplete': {},
518 'incomplete': {},
523 'diverge': bothdiverge,
519 'diverge': bothdiverge,
524 'incompletediverge': bothincompletediverge
520 'incompletediverge': bothincompletediverge
525 }
521 }
526 for f in bothnew:
522 for f in bothnew:
527 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
523 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
528 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
524 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
529 if dirtyc1:
525 if dirtyc1:
530 # incomplete copies may only be found on the "dirty" side for bothnew
526 # incomplete copies may only be found on the "dirty" side for bothnew
531 assert not both2['incomplete']
527 assert not both2['incomplete']
532 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
528 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
533 bothincompletediverge)
529 bothincompletediverge)
534 elif dirtyc2:
530 elif dirtyc2:
535 assert not both1['incomplete']
531 assert not both1['incomplete']
536 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
532 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
537 bothincompletediverge)
533 bothincompletediverge)
538 else:
534 else:
539 # incomplete copies and divergences can't happen outside grafts
535 # incomplete copies and divergences can't happen outside grafts
540 assert not both1['incomplete']
536 assert not both1['incomplete']
541 assert not both2['incomplete']
537 assert not both2['incomplete']
542 assert not bothincompletediverge
538 assert not bothincompletediverge
543 for f in remainder:
539 for f in remainder:
544 assert f not in bothdiverge
540 assert f not in bothdiverge
545 ic = remainder[f]
541 ic = remainder[f]
546 if ic[0] in (m1 if dirtyc1 else m2):
542 if ic[0] in (m1 if dirtyc1 else m2):
547 # backed-out rename on one side, but watch out for deleted files
543 # backed-out rename on one side, but watch out for deleted files
548 bothdiverge[f] = ic
544 bothdiverge[f] = ic
549 for of, fl in bothdiverge.items():
545 for of, fl in bothdiverge.items():
550 if len(fl) == 2 and fl[0] == fl[1]:
546 if len(fl) == 2 and fl[0] == fl[1]:
551 copy[fl[0]] = of # not actually divergent, just matching renames
547 copy[fl[0]] = of # not actually divergent, just matching renames
552
548
553 if fullcopy and repo.ui.debugflag:
549 if fullcopy and repo.ui.debugflag:
554 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
550 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
555 "% = renamed and deleted):\n")
551 "% = renamed and deleted):\n")
556 for f in sorted(fullcopy):
552 for f in sorted(fullcopy):
557 note = ""
553 note = ""
558 if f in copy:
554 if f in copy:
559 note += "*"
555 note += "*"
560 if f in divergeset:
556 if f in divergeset:
561 note += "!"
557 note += "!"
562 if f in renamedeleteset:
558 if f in renamedeleteset:
563 note += "%"
559 note += "%"
564 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
560 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
565 note))
561 note))
566 del divergeset
562 del divergeset
567
563
568 if not fullcopy:
564 if not fullcopy:
569 return copy, {}, diverge, renamedelete, {}
565 return copy, {}, diverge, renamedelete, {}
570
566
571 repo.ui.debug(" checking for directory renames\n")
567 repo.ui.debug(" checking for directory renames\n")
572
568
573 # generate a directory move map
569 # generate a directory move map
574 d1, d2 = c1.dirs(), c2.dirs()
570 d1, d2 = c1.dirs(), c2.dirs()
575 # Hack for adding '', which is not otherwise added, to d1 and d2
571 # Hack for adding '', which is not otherwise added, to d1 and d2
576 d1.addpath('/')
572 d1.addpath('/')
577 d2.addpath('/')
573 d2.addpath('/')
578 invalid = set()
574 invalid = set()
579 dirmove = {}
575 dirmove = {}
580
576
581 # examine each file copy for a potential directory move, which is
577 # examine each file copy for a potential directory move, which is
582 # when all the files in a directory are moved to a new directory
578 # when all the files in a directory are moved to a new directory
583 for dst, src in fullcopy.iteritems():
579 for dst, src in fullcopy.iteritems():
584 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
580 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
585 if dsrc in invalid:
581 if dsrc in invalid:
586 # already seen to be uninteresting
582 # already seen to be uninteresting
587 continue
583 continue
588 elif dsrc in d1 and ddst in d1:
584 elif dsrc in d1 and ddst in d1:
589 # directory wasn't entirely moved locally
585 # directory wasn't entirely moved locally
590 invalid.add(dsrc + "/")
586 invalid.add(dsrc + "/")
591 elif dsrc in d2 and ddst in d2:
587 elif dsrc in d2 and ddst in d2:
592 # directory wasn't entirely moved remotely
588 # directory wasn't entirely moved remotely
593 invalid.add(dsrc + "/")
589 invalid.add(dsrc + "/")
594 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
590 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
595 # files from the same directory moved to two different places
591 # files from the same directory moved to two different places
596 invalid.add(dsrc + "/")
592 invalid.add(dsrc + "/")
597 else:
593 else:
598 # looks good so far
594 # looks good so far
599 dirmove[dsrc + "/"] = ddst + "/"
595 dirmove[dsrc + "/"] = ddst + "/"
600
596
601 for i in invalid:
597 for i in invalid:
602 if i in dirmove:
598 if i in dirmove:
603 del dirmove[i]
599 del dirmove[i]
604 del d1, d2, invalid
600 del d1, d2, invalid
605
601
606 if not dirmove:
602 if not dirmove:
607 return copy, {}, diverge, renamedelete, {}
603 return copy, {}, diverge, renamedelete, {}
608
604
609 for d in dirmove:
605 for d in dirmove:
610 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
606 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
611 (d, dirmove[d]))
607 (d, dirmove[d]))
612
608
613 movewithdir = {}
609 movewithdir = {}
614 # check unaccounted nonoverlapping files against directory moves
610 # check unaccounted nonoverlapping files against directory moves
615 for f in u1r + u2r:
611 for f in u1r + u2r:
616 if f not in fullcopy:
612 if f not in fullcopy:
617 for d in dirmove:
613 for d in dirmove:
618 if f.startswith(d):
614 if f.startswith(d):
619 # new file added in a directory that was moved, move it
615 # new file added in a directory that was moved, move it
620 df = dirmove[d] + f[len(d):]
616 df = dirmove[d] + f[len(d):]
621 if df not in copy:
617 if df not in copy:
622 movewithdir[f] = df
618 movewithdir[f] = df
623 repo.ui.debug((" pending file src: '%s' -> "
619 repo.ui.debug((" pending file src: '%s' -> "
624 "dst: '%s'\n") % (f, df))
620 "dst: '%s'\n") % (f, df))
625 break
621 break
626
622
627 return copy, movewithdir, diverge, renamedelete, dirmove
623 return copy, movewithdir, diverge, renamedelete, dirmove
628
624
629 def _heuristicscopytracing(repo, c1, c2, base):
625 def _heuristicscopytracing(repo, c1, c2, base):
630 """ Fast copytracing using filename heuristics
626 """ Fast copytracing using filename heuristics
631
627
632 Assumes that moves or renames are of following two types:
628 Assumes that moves or renames are of following two types:
633
629
634 1) Inside a directory only (same directory name but different filenames)
630 1) Inside a directory only (same directory name but different filenames)
635 2) Move from one directory to another
631 2) Move from one directory to another
636 (same filenames but different directory names)
632 (same filenames but different directory names)
637
633
638 Works only when there are no merge commits in the "source branch".
634 Works only when there are no merge commits in the "source branch".
639 Source branch is commits from base up to c2 not including base.
635 Source branch is commits from base up to c2 not including base.
640
636
641 If merge is involved it fallbacks to _fullcopytracing().
637 If merge is involved it fallbacks to _fullcopytracing().
642
638
643 Can be used by setting the following config:
639 Can be used by setting the following config:
644
640
645 [experimental]
641 [experimental]
646 copytrace = heuristics
642 copytrace = heuristics
647 """
643 """
648
644
649 if c1.rev() is None:
645 if c1.rev() is None:
650 c1 = c1.p1()
646 c1 = c1.p1()
651 if c2.rev() is None:
647 if c2.rev() is None:
652 c2 = c2.p1()
648 c2 = c2.p1()
653
649
654 copies = {}
650 copies = {}
655
651
656 changedfiles = set()
652 changedfiles = set()
657 m1 = c1.manifest()
653 m1 = c1.manifest()
658 if not repo.revs('%d::%d', base.rev(), c2.rev()):
654 if not repo.revs('%d::%d', base.rev(), c2.rev()):
659 # If base is not in c2 branch, we switch to fullcopytracing
655 # If base is not in c2 branch, we switch to fullcopytracing
660 repo.ui.debug("switching to full copytracing as base is not "
656 repo.ui.debug("switching to full copytracing as base is not "
661 "an ancestor of c2\n")
657 "an ancestor of c2\n")
662 return _fullcopytracing(repo, c1, c2, base)
658 return _fullcopytracing(repo, c1, c2, base)
663
659
664 ctx = c2
660 ctx = c2
665 while ctx != base:
661 while ctx != base:
666 if len(ctx.parents()) == 2:
662 if len(ctx.parents()) == 2:
667 # To keep things simple let's not handle merges
663 # To keep things simple let's not handle merges
668 repo.ui.debug("switching to full copytracing because of merges\n")
664 repo.ui.debug("switching to full copytracing because of merges\n")
669 return _fullcopytracing(repo, c1, c2, base)
665 return _fullcopytracing(repo, c1, c2, base)
670 changedfiles.update(ctx.files())
666 changedfiles.update(ctx.files())
671 ctx = ctx.p1()
667 ctx = ctx.p1()
672
668
673 cp = _forwardcopies(base, c2)
669 cp = _forwardcopies(base, c2)
674 for dst, src in cp.iteritems():
670 for dst, src in cp.iteritems():
675 if src in m1:
671 if src in m1:
676 copies[dst] = src
672 copies[dst] = src
677
673
678 # file is missing if it isn't present in the destination, but is present in
674 # file is missing if it isn't present in the destination, but is present in
679 # the base and present in the source.
675 # the base and present in the source.
680 # Presence in the base is important to exclude added files, presence in the
676 # Presence in the base is important to exclude added files, presence in the
681 # source is important to exclude removed files.
677 # source is important to exclude removed files.
682 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
678 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
683 changedfiles)
679 changedfiles)
684
680
685 if missingfiles:
681 if missingfiles:
686 basenametofilename = collections.defaultdict(list)
682 basenametofilename = collections.defaultdict(list)
687 dirnametofilename = collections.defaultdict(list)
683 dirnametofilename = collections.defaultdict(list)
688
684
689 for f in m1.filesnotin(base.manifest()):
685 for f in m1.filesnotin(base.manifest()):
690 basename = os.path.basename(f)
686 basename = os.path.basename(f)
691 dirname = os.path.dirname(f)
687 dirname = os.path.dirname(f)
692 basenametofilename[basename].append(f)
688 basenametofilename[basename].append(f)
693 dirnametofilename[dirname].append(f)
689 dirnametofilename[dirname].append(f)
694
690
695 # in case of a rebase/graft, base may not be a common ancestor
691 # in case of a rebase/graft, base may not be a common ancestor
696 anc = c1.ancestor(c2)
692 anc = c1.ancestor(c2)
697
693
698 for f in missingfiles:
694 for f in missingfiles:
699 basename = os.path.basename(f)
695 basename = os.path.basename(f)
700 dirname = os.path.dirname(f)
696 dirname = os.path.dirname(f)
701 samebasename = basenametofilename[basename]
697 samebasename = basenametofilename[basename]
702 samedirname = dirnametofilename[dirname]
698 samedirname = dirnametofilename[dirname]
703 movecandidates = samebasename + samedirname
699 movecandidates = samebasename + samedirname
704 # f is guaranteed to be present in c2, that's why
700 # f is guaranteed to be present in c2, that's why
705 # c2.filectx(f) won't fail
701 # c2.filectx(f) won't fail
706 f2 = c2.filectx(f)
702 f2 = c2.filectx(f)
707 for candidate in movecandidates:
703 for candidate in movecandidates:
708 f1 = c1.filectx(candidate)
704 f1 = c1.filectx(candidate)
709 if _related(f1, f2, anc.rev()):
705 if _related(f1, f2, anc.rev()):
710 # if there are a few related copies then we'll merge
706 # if there are a few related copies then we'll merge
711 # changes into all of them. This matches the behaviour
707 # changes into all of them. This matches the behaviour
712 # of upstream copytracing
708 # of upstream copytracing
713 copies[candidate] = f
709 copies[candidate] = f
714
710
715 return copies, {}, {}, {}, {}
711 return copies, {}, {}, {}, {}
716
712
717 def _related(f1, f2, limit):
713 def _related(f1, f2, limit):
718 """return True if f1 and f2 filectx have a common ancestor
714 """return True if f1 and f2 filectx have a common ancestor
719
715
720 Walk back to common ancestor to see if the two files originate
716 Walk back to common ancestor to see if the two files originate
721 from the same file. Since workingfilectx's rev() is None it messes
717 from the same file. Since workingfilectx's rev() is None it messes
722 up the integer comparison logic, hence the pre-step check for
718 up the integer comparison logic, hence the pre-step check for
723 None (f1 and f2 can only be workingfilectx's initially).
719 None (f1 and f2 can only be workingfilectx's initially).
724 """
720 """
725
721
726 if f1 == f2:
722 if f1 == f2:
727 return f1 # a match
723 return f1 # a match
728
724
729 g1, g2 = f1.ancestors(), f2.ancestors()
725 g1, g2 = f1.ancestors(), f2.ancestors()
730 try:
726 try:
731 f1r, f2r = f1.linkrev(), f2.linkrev()
727 f1r, f2r = f1.linkrev(), f2.linkrev()
732
728
733 if f1r is None:
729 if f1r is None:
734 f1 = next(g1)
730 f1 = next(g1)
735 if f2r is None:
731 if f2r is None:
736 f2 = next(g2)
732 f2 = next(g2)
737
733
738 while True:
734 while True:
739 f1r, f2r = f1.linkrev(), f2.linkrev()
735 f1r, f2r = f1.linkrev(), f2.linkrev()
740 if f1r > f2r:
736 if f1r > f2r:
741 f1 = next(g1)
737 f1 = next(g1)
742 elif f2r > f1r:
738 elif f2r > f1r:
743 f2 = next(g2)
739 f2 = next(g2)
744 elif f1 == f2:
740 elif f1 == f2:
745 return f1 # a match
741 return f1 # a match
746 elif f1r == f2r or f1r < limit or f2r < limit:
742 elif f1r == f2r or f1r < limit or f2r < limit:
747 return False # copy no longer relevant
743 return False # copy no longer relevant
748 except StopIteration:
744 except StopIteration:
749 return False
745 return False
750
746
751 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
747 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
752 """
748 """
753 check possible copies of f from msrc to mdst
749 check possible copies of f from msrc to mdst
754
750
755 srcctx = starting context for f in msrc
751 srcctx = starting context for f in msrc
756 dstctx = destination context for f in mdst
752 dstctx = destination context for f in mdst
757 f = the filename to check (as in msrc)
753 f = the filename to check (as in msrc)
758 base = the changectx used as a merge base
754 base = the changectx used as a merge base
759 tca = topological common ancestor for graft-like scenarios
755 tca = topological common ancestor for graft-like scenarios
760 remotebase = True if base is outside tca::srcctx, False otherwise
756 remotebase = True if base is outside tca::srcctx, False otherwise
761 limit = the rev number to not search beyond
757 limit = the rev number to not search beyond
762 data = dictionary of dictionary to store copy data. (see mergecopies)
758 data = dictionary of dictionary to store copy data. (see mergecopies)
763
759
764 note: limit is only an optimization, and provides no guarantee that
760 note: limit is only an optimization, and provides no guarantee that
765 irrelevant revisions will not be visited
761 irrelevant revisions will not be visited
766 there is no easy way to make this algorithm stop in a guaranteed way
762 there is no easy way to make this algorithm stop in a guaranteed way
767 once it "goes behind a certain revision".
763 once it "goes behind a certain revision".
768 """
764 """
769
765
770 msrc = srcctx.manifest()
766 msrc = srcctx.manifest()
771 mdst = dstctx.manifest()
767 mdst = dstctx.manifest()
772 mb = base.manifest()
768 mb = base.manifest()
773 mta = tca.manifest()
769 mta = tca.manifest()
774 # Might be true if this call is about finding backward renames,
770 # Might be true if this call is about finding backward renames,
775 # This happens in the case of grafts because the DAG is then rotated.
771 # This happens in the case of grafts because the DAG is then rotated.
776 # If the file exists in both the base and the source, we are not looking
772 # If the file exists in both the base and the source, we are not looking
777 # for a rename on the source side, but on the part of the DAG that is
773 # for a rename on the source side, but on the part of the DAG that is
778 # traversed backwards.
774 # traversed backwards.
779 #
775 #
780 # In the case there is both backward and forward renames (before and after
776 # In the case there is both backward and forward renames (before and after
781 # the base) this is more complicated as we must detect a divergence.
777 # the base) this is more complicated as we must detect a divergence.
782 # We use 'backwards = False' in that case.
778 # We use 'backwards = False' in that case.
783 backwards = not remotebase and base != tca and f in mb
779 backwards = not remotebase and base != tca and f in mb
784 getsrcfctx = _makegetfctx(srcctx)
780 getsrcfctx = _makegetfctx(srcctx)
785 getdstfctx = _makegetfctx(dstctx)
781 getdstfctx = _makegetfctx(dstctx)
786
782
787 if msrc[f] == mb.get(f) and not remotebase:
783 if msrc[f] == mb.get(f) and not remotebase:
788 # Nothing to merge
784 # Nothing to merge
789 return
785 return
790
786
791 of = None
787 of = None
792 seen = {f}
788 seen = {f}
793 for oc in getsrcfctx(f, msrc[f]).ancestors():
789 for oc in getsrcfctx(f, msrc[f]).ancestors():
794 ocr = oc.linkrev()
790 ocr = oc.linkrev()
795 of = oc.path()
791 of = oc.path()
796 if of in seen:
792 if of in seen:
797 # check limit late - grab last rename before
793 # check limit late - grab last rename before
798 if ocr < limit:
794 if ocr < limit:
799 break
795 break
800 continue
796 continue
801 seen.add(of)
797 seen.add(of)
802
798
803 # remember for dir rename detection
799 # remember for dir rename detection
804 if backwards:
800 if backwards:
805 data['fullcopy'][of] = f # grafting backwards through renames
801 data['fullcopy'][of] = f # grafting backwards through renames
806 else:
802 else:
807 data['fullcopy'][f] = of
803 data['fullcopy'][f] = of
808 if of not in mdst:
804 if of not in mdst:
809 continue # no match, keep looking
805 continue # no match, keep looking
810 if mdst[of] == mb.get(of):
806 if mdst[of] == mb.get(of):
811 return # no merge needed, quit early
807 return # no merge needed, quit early
812 c2 = getdstfctx(of, mdst[of])
808 c2 = getdstfctx(of, mdst[of])
813 # c2 might be a plain new file on added on destination side that is
809 # c2 might be a plain new file on added on destination side that is
814 # unrelated to the droids we are looking for.
810 # unrelated to the droids we are looking for.
815 cr = _related(oc, c2, tca.rev())
811 cr = _related(oc, c2, tca.rev())
816 if cr and (of == f or of == c2.path()): # non-divergent
812 if cr and (of == f or of == c2.path()): # non-divergent
817 if backwards:
813 if backwards:
818 data['copy'][of] = f
814 data['copy'][of] = f
819 elif of in mb:
815 elif of in mb:
820 data['copy'][f] = of
816 data['copy'][f] = of
821 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
817 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
822 data['copy'][of] = f
818 data['copy'][of] = f
823 del data['fullcopy'][f]
819 del data['fullcopy'][f]
824 data['fullcopy'][of] = f
820 data['fullcopy'][of] = f
825 else: # divergence w.r.t. graft CA on one side of topological CA
821 else: # divergence w.r.t. graft CA on one side of topological CA
826 for sf in seen:
822 for sf in seen:
827 if sf in mb:
823 if sf in mb:
828 assert sf not in data['diverge']
824 assert sf not in data['diverge']
829 data['diverge'][sf] = [f, of]
825 data['diverge'][sf] = [f, of]
830 break
826 break
831 return
827 return
832
828
833 if of in mta:
829 if of in mta:
834 if backwards or remotebase:
830 if backwards or remotebase:
835 data['incomplete'][of] = f
831 data['incomplete'][of] = f
836 else:
832 else:
837 for sf in seen:
833 for sf in seen:
838 if sf in mb:
834 if sf in mb:
839 if tca == base:
835 if tca == base:
840 data['diverge'].setdefault(sf, []).append(f)
836 data['diverge'].setdefault(sf, []).append(f)
841 else:
837 else:
842 data['incompletediverge'][sf] = [of, f]
838 data['incompletediverge'][sf] = [of, f]
843 return
839 return
844
840
845 def duplicatecopies(repo, rev, fromrev, skiprev=None):
841 def duplicatecopies(repo, rev, fromrev, skiprev=None):
846 '''reproduce copies from fromrev to rev in the dirstate
842 '''reproduce copies from fromrev to rev in the dirstate
847
843
848 If skiprev is specified, it's a revision that should be used to
844 If skiprev is specified, it's a revision that should be used to
849 filter copy records. Any copies that occur between fromrev and
845 filter copy records. Any copies that occur between fromrev and
850 skiprev will not be duplicated, even if they appear in the set of
846 skiprev will not be duplicated, even if they appear in the set of
851 copies between fromrev and rev.
847 copies between fromrev and rev.
852 '''
848 '''
853 exclude = {}
849 exclude = {}
854 if (skiprev is not None and
850 if (skiprev is not None and
855 repo.ui.config('experimental', 'copytrace') != 'off'):
851 repo.ui.config('experimental', 'copytrace') != 'off'):
856 # copytrace='off' skips this line, but not the entire function because
852 # copytrace='off' skips this line, but not the entire function because
857 # the line below is O(size of the repo) during a rebase, while the rest
853 # the line below is O(size of the repo) during a rebase, while the rest
858 # of the function is much faster (and is required for carrying copy
854 # of the function is much faster (and is required for carrying copy
859 # metadata across the rebase anyway).
855 # metadata across the rebase anyway).
860 exclude = pathcopies(repo[fromrev], repo[skiprev])
856 exclude = pathcopies(repo[fromrev], repo[skiprev])
861 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
857 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
862 # copies.pathcopies returns backward renames, so dst might not
858 # copies.pathcopies returns backward renames, so dst might not
863 # actually be in the dirstate
859 # actually be in the dirstate
864 if dst in exclude:
860 if dst in exclude:
865 continue
861 continue
866 if repo.dirstate[dst] in "nma":
862 if repo.dirstate[dst] in "nma":
867 repo.dirstate.copy(src, dst)
863 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now