##// END OF EJS Templates
copies: add docs for config `experimental.copytrace.sourcecommitlimit`...
Pulkit Goyal -
r34517:e79b3611 default
parent child Browse files
Show More
@@ -1,863 +1,867
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from . import (
14 from . import (
15 match as matchmod,
15 match as matchmod,
16 node,
16 node,
17 pathutil,
17 pathutil,
18 scmutil,
18 scmutil,
19 util,
19 util,
20 )
20 )
21
21
22 def _findlimit(repo, a, b):
22 def _findlimit(repo, a, b):
23 """
23 """
24 Find the last revision that needs to be checked to ensure that a full
24 Find the last revision that needs to be checked to ensure that a full
25 transitive closure for file copies can be properly calculated.
25 transitive closure for file copies can be properly calculated.
26 Generally, this means finding the earliest revision number that's an
26 Generally, this means finding the earliest revision number that's an
27 ancestor of a or b but not both, except when a or b is a direct descendent
27 ancestor of a or b but not both, except when a or b is a direct descendent
28 of the other, in which case we can return the minimum revnum of a and b.
28 of the other, in which case we can return the minimum revnum of a and b.
29 None if no such revision exists.
29 None if no such revision exists.
30 """
30 """
31
31
32 # basic idea:
32 # basic idea:
33 # - mark a and b with different sides
33 # - mark a and b with different sides
34 # - if a parent's children are all on the same side, the parent is
34 # - if a parent's children are all on the same side, the parent is
35 # on that side, otherwise it is on no side
35 # on that side, otherwise it is on no side
36 # - walk the graph in topological order with the help of a heap;
36 # - walk the graph in topological order with the help of a heap;
37 # - add unseen parents to side map
37 # - add unseen parents to side map
38 # - clear side of any parent that has children on different sides
38 # - clear side of any parent that has children on different sides
39 # - track number of interesting revs that might still be on a side
39 # - track number of interesting revs that might still be on a side
40 # - track the lowest interesting rev seen
40 # - track the lowest interesting rev seen
41 # - quit when interesting revs is zero
41 # - quit when interesting revs is zero
42
42
43 cl = repo.changelog
43 cl = repo.changelog
44 working = len(cl) # pseudo rev for the working directory
44 working = len(cl) # pseudo rev for the working directory
45 if a is None:
45 if a is None:
46 a = working
46 a = working
47 if b is None:
47 if b is None:
48 b = working
48 b = working
49
49
50 side = {a: -1, b: 1}
50 side = {a: -1, b: 1}
51 visit = [-a, -b]
51 visit = [-a, -b]
52 heapq.heapify(visit)
52 heapq.heapify(visit)
53 interesting = len(visit)
53 interesting = len(visit)
54 hascommonancestor = False
54 hascommonancestor = False
55 limit = working
55 limit = working
56
56
57 while interesting:
57 while interesting:
58 r = -heapq.heappop(visit)
58 r = -heapq.heappop(visit)
59 if r == working:
59 if r == working:
60 parents = [cl.rev(p) for p in repo.dirstate.parents()]
60 parents = [cl.rev(p) for p in repo.dirstate.parents()]
61 else:
61 else:
62 parents = cl.parentrevs(r)
62 parents = cl.parentrevs(r)
63 for p in parents:
63 for p in parents:
64 if p < 0:
64 if p < 0:
65 continue
65 continue
66 if p not in side:
66 if p not in side:
67 # first time we see p; add it to visit
67 # first time we see p; add it to visit
68 side[p] = side[r]
68 side[p] = side[r]
69 if side[p]:
69 if side[p]:
70 interesting += 1
70 interesting += 1
71 heapq.heappush(visit, -p)
71 heapq.heappush(visit, -p)
72 elif side[p] and side[p] != side[r]:
72 elif side[p] and side[p] != side[r]:
73 # p was interesting but now we know better
73 # p was interesting but now we know better
74 side[p] = 0
74 side[p] = 0
75 interesting -= 1
75 interesting -= 1
76 hascommonancestor = True
76 hascommonancestor = True
77 if side[r]:
77 if side[r]:
78 limit = r # lowest rev visited
78 limit = r # lowest rev visited
79 interesting -= 1
79 interesting -= 1
80
80
81 if not hascommonancestor:
81 if not hascommonancestor:
82 return None
82 return None
83
83
84 # Consider the following flow (see test-commit-amend.t under issue4405):
84 # Consider the following flow (see test-commit-amend.t under issue4405):
85 # 1/ File 'a0' committed
85 # 1/ File 'a0' committed
86 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
86 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
87 # 3/ Move back to first commit
87 # 3/ Move back to first commit
88 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
88 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
89 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
89 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
90 #
90 #
91 # During the amend in step five, we will be in this state:
91 # During the amend in step five, we will be in this state:
92 #
92 #
93 # @ 3 temporary amend commit for a1-amend
93 # @ 3 temporary amend commit for a1-amend
94 # |
94 # |
95 # o 2 a1-amend
95 # o 2 a1-amend
96 # |
96 # |
97 # | o 1 a1
97 # | o 1 a1
98 # |/
98 # |/
99 # o 0 a0
99 # o 0 a0
100 #
100 #
101 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
101 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
102 # yet the filelog has the copy information in rev 1 and we will not look
102 # yet the filelog has the copy information in rev 1 and we will not look
103 # back far enough unless we also look at the a and b as candidates.
103 # back far enough unless we also look at the a and b as candidates.
104 # This only occurs when a is a descendent of b or visa-versa.
104 # This only occurs when a is a descendent of b or visa-versa.
105 return min(limit, a, b)
105 return min(limit, a, b)
106
106
107 def _chain(src, dst, a, b):
107 def _chain(src, dst, a, b):
108 '''chain two sets of copies a->b'''
108 '''chain two sets of copies a->b'''
109 t = a.copy()
109 t = a.copy()
110 for k, v in b.iteritems():
110 for k, v in b.iteritems():
111 if v in t:
111 if v in t:
112 # found a chain
112 # found a chain
113 if t[v] != k:
113 if t[v] != k:
114 # file wasn't renamed back to itself
114 # file wasn't renamed back to itself
115 t[k] = t[v]
115 t[k] = t[v]
116 if v not in dst:
116 if v not in dst:
117 # chain was a rename, not a copy
117 # chain was a rename, not a copy
118 del t[v]
118 del t[v]
119 if v in src:
119 if v in src:
120 # file is a copy of an existing file
120 # file is a copy of an existing file
121 t[k] = v
121 t[k] = v
122
122
123 # remove criss-crossed copies
123 # remove criss-crossed copies
124 for k, v in t.items():
124 for k, v in t.items():
125 if k in src and v in dst:
125 if k in src and v in dst:
126 del t[k]
126 del t[k]
127
127
128 return t
128 return t
129
129
130 def _tracefile(fctx, am, limit=-1):
130 def _tracefile(fctx, am, limit=-1):
131 '''return file context that is the ancestor of fctx present in ancestor
131 '''return file context that is the ancestor of fctx present in ancestor
132 manifest am, stopping after the first ancestor lower than limit'''
132 manifest am, stopping after the first ancestor lower than limit'''
133
133
134 for f in fctx.ancestors():
134 for f in fctx.ancestors():
135 if am.get(f.path(), None) == f.filenode():
135 if am.get(f.path(), None) == f.filenode():
136 return f
136 return f
137 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
137 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
138 return None
138 return None
139
139
140 def _dirstatecopies(d):
140 def _dirstatecopies(d):
141 ds = d._repo.dirstate
141 ds = d._repo.dirstate
142 c = ds.copies().copy()
142 c = ds.copies().copy()
143 for k in list(c):
143 for k in list(c):
144 if ds[k] not in 'anm':
144 if ds[k] not in 'anm':
145 del c[k]
145 del c[k]
146 return c
146 return c
147
147
148 def _computeforwardmissing(a, b, match=None):
148 def _computeforwardmissing(a, b, match=None):
149 """Computes which files are in b but not a.
149 """Computes which files are in b but not a.
150 This is its own function so extensions can easily wrap this call to see what
150 This is its own function so extensions can easily wrap this call to see what
151 files _forwardcopies is about to process.
151 files _forwardcopies is about to process.
152 """
152 """
153 ma = a.manifest()
153 ma = a.manifest()
154 mb = b.manifest()
154 mb = b.manifest()
155 return mb.filesnotin(ma, match=match)
155 return mb.filesnotin(ma, match=match)
156
156
157 def _forwardcopies(a, b, match=None):
157 def _forwardcopies(a, b, match=None):
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159
159
160 # check for working copy
160 # check for working copy
161 w = None
161 w = None
162 if b.rev() is None:
162 if b.rev() is None:
163 w = b
163 w = b
164 b = w.p1()
164 b = w.p1()
165 if a == b:
165 if a == b:
166 # short-circuit to avoid issues with merge states
166 # short-circuit to avoid issues with merge states
167 return _dirstatecopies(w)
167 return _dirstatecopies(w)
168
168
169 # files might have to be traced back to the fctx parent of the last
169 # files might have to be traced back to the fctx parent of the last
170 # one-side-only changeset, but not further back than that
170 # one-side-only changeset, but not further back than that
171 limit = _findlimit(a._repo, a.rev(), b.rev())
171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 if limit is None:
172 if limit is None:
173 limit = -1
173 limit = -1
174 am = a.manifest()
174 am = a.manifest()
175
175
176 # find where new files came from
176 # find where new files came from
177 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
178 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 cm = {}
179 cm = {}
180
180
181 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
182 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
183 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
184 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
185 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
186 # this comparison.
186 # this comparison.
187 forwardmissingmatch = match
187 forwardmissingmatch = match
188 if b.p1() == a and b.p2().node() == node.nullid:
188 if b.p1() == a and b.p2().node() == node.nullid:
189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
192
192
193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
194 for f in missing:
194 for f in missing:
195 fctx = b[f]
195 fctx = b[f]
196 fctx._ancestrycontext = ancestrycontext
196 fctx._ancestrycontext = ancestrycontext
197 ofctx = _tracefile(fctx, am, limit)
197 ofctx = _tracefile(fctx, am, limit)
198 if ofctx:
198 if ofctx:
199 cm[f] = ofctx.path()
199 cm[f] = ofctx.path()
200
200
201 # combine copies from dirstate if necessary
201 # combine copies from dirstate if necessary
202 if w is not None:
202 if w is not None:
203 cm = _chain(a, w, cm, _dirstatecopies(w))
203 cm = _chain(a, w, cm, _dirstatecopies(w))
204
204
205 return cm
205 return cm
206
206
207 def _backwardrenames(a, b):
207 def _backwardrenames(a, b):
208 if a._repo.ui.config('experimental', 'copytrace') == 'off':
208 if a._repo.ui.config('experimental', 'copytrace') == 'off':
209 return {}
209 return {}
210
210
211 # Even though we're not taking copies into account, 1:n rename situations
211 # Even though we're not taking copies into account, 1:n rename situations
212 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
213 # arbitrarily pick one of the renames.
213 # arbitrarily pick one of the renames.
214 f = _forwardcopies(b, a)
214 f = _forwardcopies(b, a)
215 r = {}
215 r = {}
216 for k, v in sorted(f.iteritems()):
216 for k, v in sorted(f.iteritems()):
217 # remove copies
217 # remove copies
218 if v in a:
218 if v in a:
219 continue
219 continue
220 r[v] = k
220 r[v] = k
221 return r
221 return r
222
222
223 def pathcopies(x, y, match=None):
223 def pathcopies(x, y, match=None):
224 '''find {dst@y: src@x} copy mapping for directed compare'''
224 '''find {dst@y: src@x} copy mapping for directed compare'''
225 if x == y or not x or not y:
225 if x == y or not x or not y:
226 return {}
226 return {}
227 a = y.ancestor(x)
227 a = y.ancestor(x)
228 if a == x:
228 if a == x:
229 return _forwardcopies(x, y, match=match)
229 return _forwardcopies(x, y, match=match)
230 if a == y:
230 if a == y:
231 return _backwardrenames(x, y)
231 return _backwardrenames(x, y)
232 return _chain(x, y, _backwardrenames(x, a),
232 return _chain(x, y, _backwardrenames(x, a),
233 _forwardcopies(a, y, match=match))
233 _forwardcopies(a, y, match=match))
234
234
235 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
235 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
236 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
237 and c2. This is its own function so extensions can easily wrap this call
237 and c2. This is its own function so extensions can easily wrap this call
238 to see what files mergecopies is about to process.
238 to see what files mergecopies is about to process.
239
239
240 Even though c1 and c2 are not used in this function, they are useful in
240 Even though c1 and c2 are not used in this function, they are useful in
241 other extensions for being able to read the file nodes of the changed files.
241 other extensions for being able to read the file nodes of the changed files.
242
242
243 "baselabel" can be passed to help distinguish the multiple computations
243 "baselabel" can be passed to help distinguish the multiple computations
244 done in the graft case.
244 done in the graft case.
245 """
245 """
246 u1 = sorted(addedinm1 - addedinm2)
246 u1 = sorted(addedinm1 - addedinm2)
247 u2 = sorted(addedinm2 - addedinm1)
247 u2 = sorted(addedinm2 - addedinm1)
248
248
249 header = " unmatched files in %s"
249 header = " unmatched files in %s"
250 if baselabel:
250 if baselabel:
251 header += ' (from %s)' % baselabel
251 header += ' (from %s)' % baselabel
252 if u1:
252 if u1:
253 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
253 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
254 if u2:
254 if u2:
255 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
255 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
256 return u1, u2
256 return u1, u2
257
257
258 def _makegetfctx(ctx):
258 def _makegetfctx(ctx):
259 """return a 'getfctx' function suitable for _checkcopies usage
259 """return a 'getfctx' function suitable for _checkcopies usage
260
260
261 We have to re-setup the function building 'filectx' for each
261 We have to re-setup the function building 'filectx' for each
262 '_checkcopies' to ensure the linkrev adjustment is properly setup for
262 '_checkcopies' to ensure the linkrev adjustment is properly setup for
263 each. Linkrev adjustment is important to avoid bug in rename
263 each. Linkrev adjustment is important to avoid bug in rename
264 detection. Moreover, having a proper '_ancestrycontext' setup ensures
264 detection. Moreover, having a proper '_ancestrycontext' setup ensures
265 the performance impact of this adjustment is kept limited. Without it,
265 the performance impact of this adjustment is kept limited. Without it,
266 each file could do a full dag traversal making the time complexity of
266 each file could do a full dag traversal making the time complexity of
267 the operation explode (see issue4537).
267 the operation explode (see issue4537).
268
268
269 This function exists here mostly to limit the impact on stable. Feel
269 This function exists here mostly to limit the impact on stable. Feel
270 free to refactor on default.
270 free to refactor on default.
271 """
271 """
272 rev = ctx.rev()
272 rev = ctx.rev()
273 repo = ctx._repo
273 repo = ctx._repo
274 ac = getattr(ctx, '_ancestrycontext', None)
274 ac = getattr(ctx, '_ancestrycontext', None)
275 if ac is None:
275 if ac is None:
276 revs = [rev]
276 revs = [rev]
277 if rev is None:
277 if rev is None:
278 revs = [p.rev() for p in ctx.parents()]
278 revs = [p.rev() for p in ctx.parents()]
279 ac = repo.changelog.ancestors(revs, inclusive=True)
279 ac = repo.changelog.ancestors(revs, inclusive=True)
280 ctx._ancestrycontext = ac
280 ctx._ancestrycontext = ac
281 def makectx(f, n):
281 def makectx(f, n):
282 if n in node.wdirnodes: # in a working context?
282 if n in node.wdirnodes: # in a working context?
283 if ctx.rev() is None:
283 if ctx.rev() is None:
284 return ctx.filectx(f)
284 return ctx.filectx(f)
285 return repo[None][f]
285 return repo[None][f]
286 fctx = repo.filectx(f, fileid=n)
286 fctx = repo.filectx(f, fileid=n)
287 # setup only needed for filectx not create from a changectx
287 # setup only needed for filectx not create from a changectx
288 fctx._ancestrycontext = ac
288 fctx._ancestrycontext = ac
289 fctx._descendantrev = rev
289 fctx._descendantrev = rev
290 return fctx
290 return fctx
291 return util.lrucachefunc(makectx)
291 return util.lrucachefunc(makectx)
292
292
293 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
293 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
294 """combine partial copy paths"""
294 """combine partial copy paths"""
295 remainder = {}
295 remainder = {}
296 for f in copyfrom:
296 for f in copyfrom:
297 if f in copyto:
297 if f in copyto:
298 finalcopy[copyto[f]] = copyfrom[f]
298 finalcopy[copyto[f]] = copyfrom[f]
299 del copyto[f]
299 del copyto[f]
300 for f in incompletediverge:
300 for f in incompletediverge:
301 assert f not in diverge
301 assert f not in diverge
302 ic = incompletediverge[f]
302 ic = incompletediverge[f]
303 if ic[0] in copyto:
303 if ic[0] in copyto:
304 diverge[f] = [copyto[ic[0]], ic[1]]
304 diverge[f] = [copyto[ic[0]], ic[1]]
305 else:
305 else:
306 remainder[f] = ic
306 remainder[f] = ic
307 return remainder
307 return remainder
308
308
309 def mergecopies(repo, c1, c2, base):
309 def mergecopies(repo, c1, c2, base):
310 """
310 """
311 The function calling different copytracing algorithms on the basis of config
311 The function calling different copytracing algorithms on the basis of config
312 which find moves and copies between context c1 and c2 that are relevant for
312 which find moves and copies between context c1 and c2 that are relevant for
313 merging. 'base' will be used as the merge base.
313 merging. 'base' will be used as the merge base.
314
314
315 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
315 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
316 files that were moved/ copied in one merge parent and modified in another.
316 files that were moved/ copied in one merge parent and modified in another.
317 For example:
317 For example:
318
318
319 o ---> 4 another commit
319 o ---> 4 another commit
320 |
320 |
321 | o ---> 3 commit that modifies a.txt
321 | o ---> 3 commit that modifies a.txt
322 | /
322 | /
323 o / ---> 2 commit that moves a.txt to b.txt
323 o / ---> 2 commit that moves a.txt to b.txt
324 |/
324 |/
325 o ---> 1 merge base
325 o ---> 1 merge base
326
326
327 If we try to rebase revision 3 on revision 4, since there is no a.txt in
327 If we try to rebase revision 3 on revision 4, since there is no a.txt in
328 revision 4, and if user have copytrace disabled, we prints the following
328 revision 4, and if user have copytrace disabled, we prints the following
329 message:
329 message:
330
330
331 ```other changed <file> which local deleted```
331 ```other changed <file> which local deleted```
332
332
333 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
333 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
334 "dirmove".
334 "dirmove".
335
335
336 "copy" is a mapping from destination name -> source name,
336 "copy" is a mapping from destination name -> source name,
337 where source is in c1 and destination is in c2 or vice-versa.
337 where source is in c1 and destination is in c2 or vice-versa.
338
338
339 "movewithdir" is a mapping from source name -> destination name,
339 "movewithdir" is a mapping from source name -> destination name,
340 where the file at source present in one context but not the other
340 where the file at source present in one context but not the other
341 needs to be moved to destination by the merge process, because the
341 needs to be moved to destination by the merge process, because the
342 other context moved the directory it is in.
342 other context moved the directory it is in.
343
343
344 "diverge" is a mapping of source name -> list of destination names
344 "diverge" is a mapping of source name -> list of destination names
345 for divergent renames.
345 for divergent renames.
346
346
347 "renamedelete" is a mapping of source name -> list of destination
347 "renamedelete" is a mapping of source name -> list of destination
348 names for files deleted in c1 that were renamed in c2 or vice-versa.
348 names for files deleted in c1 that were renamed in c2 or vice-versa.
349
349
350 "dirmove" is a mapping of detected source dir -> destination dir renames.
350 "dirmove" is a mapping of detected source dir -> destination dir renames.
351 This is needed for handling changes to new files previously grafted into
351 This is needed for handling changes to new files previously grafted into
352 renamed directories.
352 renamed directories.
353 """
353 """
354 # avoid silly behavior for update from empty dir
354 # avoid silly behavior for update from empty dir
355 if not c1 or not c2 or c1 == c2:
355 if not c1 or not c2 or c1 == c2:
356 return {}, {}, {}, {}, {}
356 return {}, {}, {}, {}, {}
357
357
358 # avoid silly behavior for parent -> working dir
358 # avoid silly behavior for parent -> working dir
359 if c2.node() is None and c1.node() == repo.dirstate.p1():
359 if c2.node() is None and c1.node() == repo.dirstate.p1():
360 return repo.dirstate.copies(), {}, {}, {}, {}
360 return repo.dirstate.copies(), {}, {}, {}, {}
361
361
362 copytracing = repo.ui.config('experimental', 'copytrace')
362 copytracing = repo.ui.config('experimental', 'copytrace')
363
363
364 # Copy trace disabling is explicitly below the node == p1 logic above
364 # Copy trace disabling is explicitly below the node == p1 logic above
365 # because the logic above is required for a simple copy to be kept across a
365 # because the logic above is required for a simple copy to be kept across a
366 # rebase.
366 # rebase.
367 if copytracing == 'off':
367 if copytracing == 'off':
368 return {}, {}, {}, {}, {}
368 return {}, {}, {}, {}, {}
369 elif copytracing == 'heuristics':
369 elif copytracing == 'heuristics':
370 # Do full copytracing if only non-public revisions are involved as
370 # Do full copytracing if only non-public revisions are involved as
371 # that will be fast enough and will also cover the copies which could
371 # that will be fast enough and will also cover the copies which could
372 # be missed by heuristics
372 # be missed by heuristics
373 if _isfullcopytraceable(repo, c1, base):
373 if _isfullcopytraceable(repo, c1, base):
374 return _fullcopytracing(repo, c1, c2, base)
374 return _fullcopytracing(repo, c1, c2, base)
375 return _heuristicscopytracing(repo, c1, c2, base)
375 return _heuristicscopytracing(repo, c1, c2, base)
376 else:
376 else:
377 return _fullcopytracing(repo, c1, c2, base)
377 return _fullcopytracing(repo, c1, c2, base)
378
378
379 def _isfullcopytraceable(repo, c1, base):
379 def _isfullcopytraceable(repo, c1, base):
380 """ Checks that if base, source and destination are all no-public branches,
380 """ Checks that if base, source and destination are all no-public branches,
381 if yes let's use the full copytrace algorithm for increased capabilities
381 if yes let's use the full copytrace algorithm for increased capabilities
382 since it will be fast enough.
382 since it will be fast enough.
383
384 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
385 number of changesets from c1 to base such that if number of changesets are
386 more than the limit, full copytracing algorithm won't be used.
383 """
387 """
384 if c1.rev() is None:
388 if c1.rev() is None:
385 c1 = c1.p1()
389 c1 = c1.p1()
386 if c1.mutable() and base.mutable():
390 if c1.mutable() and base.mutable():
387 sourcecommitlimit = repo.ui.configint('experimental',
391 sourcecommitlimit = repo.ui.configint('experimental',
388 'copytrace.sourcecommitlimit')
392 'copytrace.sourcecommitlimit')
389 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
393 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
390 return commits < sourcecommitlimit
394 return commits < sourcecommitlimit
391 return False
395 return False
392
396
393 def _fullcopytracing(repo, c1, c2, base):
397 def _fullcopytracing(repo, c1, c2, base):
394 """ The full copytracing algorithm which finds all the new files that were
398 """ The full copytracing algorithm which finds all the new files that were
395 added from merge base up to the top commit and for each file it checks if
399 added from merge base up to the top commit and for each file it checks if
396 this file was copied from another file.
400 this file was copied from another file.
397
401
398 This is pretty slow when a lot of changesets are involved but will track all
402 This is pretty slow when a lot of changesets are involved but will track all
399 the copies.
403 the copies.
400 """
404 """
401 # In certain scenarios (e.g. graft, update or rebase), base can be
405 # In certain scenarios (e.g. graft, update or rebase), base can be
402 # overridden We still need to know a real common ancestor in this case We
406 # overridden We still need to know a real common ancestor in this case We
403 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
407 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
404 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
408 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
405 # caller may not know if the revision passed in lieu of the CA is a genuine
409 # caller may not know if the revision passed in lieu of the CA is a genuine
406 # common ancestor or not without explicitly checking it, it's better to
410 # common ancestor or not without explicitly checking it, it's better to
407 # determine that here.
411 # determine that here.
408 #
412 #
409 # base.descendant(wc) and base.descendant(base) are False, work around that
413 # base.descendant(wc) and base.descendant(base) are False, work around that
410 _c1 = c1.p1() if c1.rev() is None else c1
414 _c1 = c1.p1() if c1.rev() is None else c1
411 _c2 = c2.p1() if c2.rev() is None else c2
415 _c2 = c2.p1() if c2.rev() is None else c2
412 # an endpoint is "dirty" if it isn't a descendant of the merge base
416 # an endpoint is "dirty" if it isn't a descendant of the merge base
413 # if we have a dirty endpoint, we need to trigger graft logic, and also
417 # if we have a dirty endpoint, we need to trigger graft logic, and also
414 # keep track of which endpoint is dirty
418 # keep track of which endpoint is dirty
415 dirtyc1 = not (base == _c1 or base.descendant(_c1))
419 dirtyc1 = not (base == _c1 or base.descendant(_c1))
416 dirtyc2 = not (base == _c2 or base.descendant(_c2))
420 dirtyc2 = not (base == _c2 or base.descendant(_c2))
417 graft = dirtyc1 or dirtyc2
421 graft = dirtyc1 or dirtyc2
418 tca = base
422 tca = base
419 if graft:
423 if graft:
420 tca = _c1.ancestor(_c2)
424 tca = _c1.ancestor(_c2)
421
425
422 limit = _findlimit(repo, c1.rev(), c2.rev())
426 limit = _findlimit(repo, c1.rev(), c2.rev())
423 if limit is None:
427 if limit is None:
424 # no common ancestor, no copies
428 # no common ancestor, no copies
425 return {}, {}, {}, {}, {}
429 return {}, {}, {}, {}, {}
426 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
430 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
427
431
428 m1 = c1.manifest()
432 m1 = c1.manifest()
429 m2 = c2.manifest()
433 m2 = c2.manifest()
430 mb = base.manifest()
434 mb = base.manifest()
431
435
432 # gather data from _checkcopies:
436 # gather data from _checkcopies:
433 # - diverge = record all diverges in this dict
437 # - diverge = record all diverges in this dict
434 # - copy = record all non-divergent copies in this dict
438 # - copy = record all non-divergent copies in this dict
435 # - fullcopy = record all copies in this dict
439 # - fullcopy = record all copies in this dict
436 # - incomplete = record non-divergent partial copies here
440 # - incomplete = record non-divergent partial copies here
437 # - incompletediverge = record divergent partial copies here
441 # - incompletediverge = record divergent partial copies here
438 diverge = {} # divergence data is shared
442 diverge = {} # divergence data is shared
439 incompletediverge = {}
443 incompletediverge = {}
440 data1 = {'copy': {},
444 data1 = {'copy': {},
441 'fullcopy': {},
445 'fullcopy': {},
442 'incomplete': {},
446 'incomplete': {},
443 'diverge': diverge,
447 'diverge': diverge,
444 'incompletediverge': incompletediverge,
448 'incompletediverge': incompletediverge,
445 }
449 }
446 data2 = {'copy': {},
450 data2 = {'copy': {},
447 'fullcopy': {},
451 'fullcopy': {},
448 'incomplete': {},
452 'incomplete': {},
449 'diverge': diverge,
453 'diverge': diverge,
450 'incompletediverge': incompletediverge,
454 'incompletediverge': incompletediverge,
451 }
455 }
452
456
453 # find interesting file sets from manifests
457 # find interesting file sets from manifests
454 addedinm1 = m1.filesnotin(mb)
458 addedinm1 = m1.filesnotin(mb)
455 addedinm2 = m2.filesnotin(mb)
459 addedinm2 = m2.filesnotin(mb)
456 bothnew = sorted(addedinm1 & addedinm2)
460 bothnew = sorted(addedinm1 & addedinm2)
457 if tca == base:
461 if tca == base:
458 # unmatched file from base
462 # unmatched file from base
459 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
463 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
460 u1u, u2u = u1r, u2r
464 u1u, u2u = u1r, u2r
461 else:
465 else:
462 # unmatched file from base (DAG rotation in the graft case)
466 # unmatched file from base (DAG rotation in the graft case)
463 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
467 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
464 baselabel='base')
468 baselabel='base')
465 # unmatched file from topological common ancestors (no DAG rotation)
469 # unmatched file from topological common ancestors (no DAG rotation)
466 # need to recompute this for directory move handling when grafting
470 # need to recompute this for directory move handling when grafting
467 mta = tca.manifest()
471 mta = tca.manifest()
468 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
472 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
469 m2.filesnotin(mta),
473 m2.filesnotin(mta),
470 baselabel='topological common ancestor')
474 baselabel='topological common ancestor')
471
475
472 for f in u1u:
476 for f in u1u:
473 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
477 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
474
478
475 for f in u2u:
479 for f in u2u:
476 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
480 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
477
481
478 copy = dict(data1['copy'])
482 copy = dict(data1['copy'])
479 copy.update(data2['copy'])
483 copy.update(data2['copy'])
480 fullcopy = dict(data1['fullcopy'])
484 fullcopy = dict(data1['fullcopy'])
481 fullcopy.update(data2['fullcopy'])
485 fullcopy.update(data2['fullcopy'])
482
486
483 if dirtyc1:
487 if dirtyc1:
484 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
488 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
485 incompletediverge)
489 incompletediverge)
486 else:
490 else:
487 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
491 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
488 incompletediverge)
492 incompletediverge)
489
493
490 renamedelete = {}
494 renamedelete = {}
491 renamedeleteset = set()
495 renamedeleteset = set()
492 divergeset = set()
496 divergeset = set()
493 for of, fl in list(diverge.items()):
497 for of, fl in list(diverge.items()):
494 if len(fl) == 1 or of in c1 or of in c2:
498 if len(fl) == 1 or of in c1 or of in c2:
495 del diverge[of] # not actually divergent, or not a rename
499 del diverge[of] # not actually divergent, or not a rename
496 if of not in c1 and of not in c2:
500 if of not in c1 and of not in c2:
497 # renamed on one side, deleted on the other side, but filter
501 # renamed on one side, deleted on the other side, but filter
498 # out files that have been renamed and then deleted
502 # out files that have been renamed and then deleted
499 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
503 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
500 renamedeleteset.update(fl) # reverse map for below
504 renamedeleteset.update(fl) # reverse map for below
501 else:
505 else:
502 divergeset.update(fl) # reverse map for below
506 divergeset.update(fl) # reverse map for below
503
507
504 if bothnew:
508 if bothnew:
505 repo.ui.debug(" unmatched files new in both:\n %s\n"
509 repo.ui.debug(" unmatched files new in both:\n %s\n"
506 % "\n ".join(bothnew))
510 % "\n ".join(bothnew))
507 bothdiverge = {}
511 bothdiverge = {}
508 bothincompletediverge = {}
512 bothincompletediverge = {}
509 remainder = {}
513 remainder = {}
510 both1 = {'copy': {},
514 both1 = {'copy': {},
511 'fullcopy': {},
515 'fullcopy': {},
512 'incomplete': {},
516 'incomplete': {},
513 'diverge': bothdiverge,
517 'diverge': bothdiverge,
514 'incompletediverge': bothincompletediverge
518 'incompletediverge': bothincompletediverge
515 }
519 }
516 both2 = {'copy': {},
520 both2 = {'copy': {},
517 'fullcopy': {},
521 'fullcopy': {},
518 'incomplete': {},
522 'incomplete': {},
519 'diverge': bothdiverge,
523 'diverge': bothdiverge,
520 'incompletediverge': bothincompletediverge
524 'incompletediverge': bothincompletediverge
521 }
525 }
522 for f in bothnew:
526 for f in bothnew:
523 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
527 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
524 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
528 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
525 if dirtyc1:
529 if dirtyc1:
526 # incomplete copies may only be found on the "dirty" side for bothnew
530 # incomplete copies may only be found on the "dirty" side for bothnew
527 assert not both2['incomplete']
531 assert not both2['incomplete']
528 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
532 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
529 bothincompletediverge)
533 bothincompletediverge)
530 elif dirtyc2:
534 elif dirtyc2:
531 assert not both1['incomplete']
535 assert not both1['incomplete']
532 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
536 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
533 bothincompletediverge)
537 bothincompletediverge)
534 else:
538 else:
535 # incomplete copies and divergences can't happen outside grafts
539 # incomplete copies and divergences can't happen outside grafts
536 assert not both1['incomplete']
540 assert not both1['incomplete']
537 assert not both2['incomplete']
541 assert not both2['incomplete']
538 assert not bothincompletediverge
542 assert not bothincompletediverge
539 for f in remainder:
543 for f in remainder:
540 assert f not in bothdiverge
544 assert f not in bothdiverge
541 ic = remainder[f]
545 ic = remainder[f]
542 if ic[0] in (m1 if dirtyc1 else m2):
546 if ic[0] in (m1 if dirtyc1 else m2):
543 # backed-out rename on one side, but watch out for deleted files
547 # backed-out rename on one side, but watch out for deleted files
544 bothdiverge[f] = ic
548 bothdiverge[f] = ic
545 for of, fl in bothdiverge.items():
549 for of, fl in bothdiverge.items():
546 if len(fl) == 2 and fl[0] == fl[1]:
550 if len(fl) == 2 and fl[0] == fl[1]:
547 copy[fl[0]] = of # not actually divergent, just matching renames
551 copy[fl[0]] = of # not actually divergent, just matching renames
548
552
549 if fullcopy and repo.ui.debugflag:
553 if fullcopy and repo.ui.debugflag:
550 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
554 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
551 "% = renamed and deleted):\n")
555 "% = renamed and deleted):\n")
552 for f in sorted(fullcopy):
556 for f in sorted(fullcopy):
553 note = ""
557 note = ""
554 if f in copy:
558 if f in copy:
555 note += "*"
559 note += "*"
556 if f in divergeset:
560 if f in divergeset:
557 note += "!"
561 note += "!"
558 if f in renamedeleteset:
562 if f in renamedeleteset:
559 note += "%"
563 note += "%"
560 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
564 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
561 note))
565 note))
562 del divergeset
566 del divergeset
563
567
564 if not fullcopy:
568 if not fullcopy:
565 return copy, {}, diverge, renamedelete, {}
569 return copy, {}, diverge, renamedelete, {}
566
570
567 repo.ui.debug(" checking for directory renames\n")
571 repo.ui.debug(" checking for directory renames\n")
568
572
569 # generate a directory move map
573 # generate a directory move map
570 d1, d2 = c1.dirs(), c2.dirs()
574 d1, d2 = c1.dirs(), c2.dirs()
571 # Hack for adding '', which is not otherwise added, to d1 and d2
575 # Hack for adding '', which is not otherwise added, to d1 and d2
572 d1.addpath('/')
576 d1.addpath('/')
573 d2.addpath('/')
577 d2.addpath('/')
574 invalid = set()
578 invalid = set()
575 dirmove = {}
579 dirmove = {}
576
580
577 # examine each file copy for a potential directory move, which is
581 # examine each file copy for a potential directory move, which is
578 # when all the files in a directory are moved to a new directory
582 # when all the files in a directory are moved to a new directory
579 for dst, src in fullcopy.iteritems():
583 for dst, src in fullcopy.iteritems():
580 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
584 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
581 if dsrc in invalid:
585 if dsrc in invalid:
582 # already seen to be uninteresting
586 # already seen to be uninteresting
583 continue
587 continue
584 elif dsrc in d1 and ddst in d1:
588 elif dsrc in d1 and ddst in d1:
585 # directory wasn't entirely moved locally
589 # directory wasn't entirely moved locally
586 invalid.add(dsrc + "/")
590 invalid.add(dsrc + "/")
587 elif dsrc in d2 and ddst in d2:
591 elif dsrc in d2 and ddst in d2:
588 # directory wasn't entirely moved remotely
592 # directory wasn't entirely moved remotely
589 invalid.add(dsrc + "/")
593 invalid.add(dsrc + "/")
590 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
594 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
591 # files from the same directory moved to two different places
595 # files from the same directory moved to two different places
592 invalid.add(dsrc + "/")
596 invalid.add(dsrc + "/")
593 else:
597 else:
594 # looks good so far
598 # looks good so far
595 dirmove[dsrc + "/"] = ddst + "/"
599 dirmove[dsrc + "/"] = ddst + "/"
596
600
597 for i in invalid:
601 for i in invalid:
598 if i in dirmove:
602 if i in dirmove:
599 del dirmove[i]
603 del dirmove[i]
600 del d1, d2, invalid
604 del d1, d2, invalid
601
605
602 if not dirmove:
606 if not dirmove:
603 return copy, {}, diverge, renamedelete, {}
607 return copy, {}, diverge, renamedelete, {}
604
608
605 for d in dirmove:
609 for d in dirmove:
606 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
610 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
607 (d, dirmove[d]))
611 (d, dirmove[d]))
608
612
609 movewithdir = {}
613 movewithdir = {}
610 # check unaccounted nonoverlapping files against directory moves
614 # check unaccounted nonoverlapping files against directory moves
611 for f in u1r + u2r:
615 for f in u1r + u2r:
612 if f not in fullcopy:
616 if f not in fullcopy:
613 for d in dirmove:
617 for d in dirmove:
614 if f.startswith(d):
618 if f.startswith(d):
615 # new file added in a directory that was moved, move it
619 # new file added in a directory that was moved, move it
616 df = dirmove[d] + f[len(d):]
620 df = dirmove[d] + f[len(d):]
617 if df not in copy:
621 if df not in copy:
618 movewithdir[f] = df
622 movewithdir[f] = df
619 repo.ui.debug((" pending file src: '%s' -> "
623 repo.ui.debug((" pending file src: '%s' -> "
620 "dst: '%s'\n") % (f, df))
624 "dst: '%s'\n") % (f, df))
621 break
625 break
622
626
623 return copy, movewithdir, diverge, renamedelete, dirmove
627 return copy, movewithdir, diverge, renamedelete, dirmove
624
628
625 def _heuristicscopytracing(repo, c1, c2, base):
629 def _heuristicscopytracing(repo, c1, c2, base):
626 """ Fast copytracing using filename heuristics
630 """ Fast copytracing using filename heuristics
627
631
628 Assumes that moves or renames are of following two types:
632 Assumes that moves or renames are of following two types:
629
633
630 1) Inside a directory only (same directory name but different filenames)
634 1) Inside a directory only (same directory name but different filenames)
631 2) Move from one directory to another
635 2) Move from one directory to another
632 (same filenames but different directory names)
636 (same filenames but different directory names)
633
637
634 Works only when there are no merge commits in the "source branch".
638 Works only when there are no merge commits in the "source branch".
635 Source branch is commits from base up to c2 not including base.
639 Source branch is commits from base up to c2 not including base.
636
640
637 If merge is involved it fallbacks to _fullcopytracing().
641 If merge is involved it fallbacks to _fullcopytracing().
638
642
639 Can be used by setting the following config:
643 Can be used by setting the following config:
640
644
641 [experimental]
645 [experimental]
642 copytrace = heuristics
646 copytrace = heuristics
643 """
647 """
644
648
645 if c1.rev() is None:
649 if c1.rev() is None:
646 c1 = c1.p1()
650 c1 = c1.p1()
647 if c2.rev() is None:
651 if c2.rev() is None:
648 c2 = c2.p1()
652 c2 = c2.p1()
649
653
650 copies = {}
654 copies = {}
651
655
652 changedfiles = set()
656 changedfiles = set()
653 m1 = c1.manifest()
657 m1 = c1.manifest()
654 if not repo.revs('%d::%d', base.rev(), c2.rev()):
658 if not repo.revs('%d::%d', base.rev(), c2.rev()):
655 # If base is not in c2 branch, we switch to fullcopytracing
659 # If base is not in c2 branch, we switch to fullcopytracing
656 repo.ui.debug("switching to full copytracing as base is not "
660 repo.ui.debug("switching to full copytracing as base is not "
657 "an ancestor of c2\n")
661 "an ancestor of c2\n")
658 return _fullcopytracing(repo, c1, c2, base)
662 return _fullcopytracing(repo, c1, c2, base)
659
663
660 ctx = c2
664 ctx = c2
661 while ctx != base:
665 while ctx != base:
662 if len(ctx.parents()) == 2:
666 if len(ctx.parents()) == 2:
663 # To keep things simple let's not handle merges
667 # To keep things simple let's not handle merges
664 repo.ui.debug("switching to full copytracing because of merges\n")
668 repo.ui.debug("switching to full copytracing because of merges\n")
665 return _fullcopytracing(repo, c1, c2, base)
669 return _fullcopytracing(repo, c1, c2, base)
666 changedfiles.update(ctx.files())
670 changedfiles.update(ctx.files())
667 ctx = ctx.p1()
671 ctx = ctx.p1()
668
672
669 cp = _forwardcopies(base, c2)
673 cp = _forwardcopies(base, c2)
670 for dst, src in cp.iteritems():
674 for dst, src in cp.iteritems():
671 if src in m1:
675 if src in m1:
672 copies[dst] = src
676 copies[dst] = src
673
677
674 # file is missing if it isn't present in the destination, but is present in
678 # file is missing if it isn't present in the destination, but is present in
675 # the base and present in the source.
679 # the base and present in the source.
676 # Presence in the base is important to exclude added files, presence in the
680 # Presence in the base is important to exclude added files, presence in the
677 # source is important to exclude removed files.
681 # source is important to exclude removed files.
678 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
682 missingfiles = filter(lambda f: f not in m1 and f in base and f in c2,
679 changedfiles)
683 changedfiles)
680
684
681 if missingfiles:
685 if missingfiles:
682 basenametofilename = collections.defaultdict(list)
686 basenametofilename = collections.defaultdict(list)
683 dirnametofilename = collections.defaultdict(list)
687 dirnametofilename = collections.defaultdict(list)
684
688
685 for f in m1.filesnotin(base.manifest()):
689 for f in m1.filesnotin(base.manifest()):
686 basename = os.path.basename(f)
690 basename = os.path.basename(f)
687 dirname = os.path.dirname(f)
691 dirname = os.path.dirname(f)
688 basenametofilename[basename].append(f)
692 basenametofilename[basename].append(f)
689 dirnametofilename[dirname].append(f)
693 dirnametofilename[dirname].append(f)
690
694
691 # in case of a rebase/graft, base may not be a common ancestor
695 # in case of a rebase/graft, base may not be a common ancestor
692 anc = c1.ancestor(c2)
696 anc = c1.ancestor(c2)
693
697
694 for f in missingfiles:
698 for f in missingfiles:
695 basename = os.path.basename(f)
699 basename = os.path.basename(f)
696 dirname = os.path.dirname(f)
700 dirname = os.path.dirname(f)
697 samebasename = basenametofilename[basename]
701 samebasename = basenametofilename[basename]
698 samedirname = dirnametofilename[dirname]
702 samedirname = dirnametofilename[dirname]
699 movecandidates = samebasename + samedirname
703 movecandidates = samebasename + samedirname
700 # f is guaranteed to be present in c2, that's why
704 # f is guaranteed to be present in c2, that's why
701 # c2.filectx(f) won't fail
705 # c2.filectx(f) won't fail
702 f2 = c2.filectx(f)
706 f2 = c2.filectx(f)
703 for candidate in movecandidates:
707 for candidate in movecandidates:
704 f1 = c1.filectx(candidate)
708 f1 = c1.filectx(candidate)
705 if _related(f1, f2, anc.rev()):
709 if _related(f1, f2, anc.rev()):
706 # if there are a few related copies then we'll merge
710 # if there are a few related copies then we'll merge
707 # changes into all of them. This matches the behaviour
711 # changes into all of them. This matches the behaviour
708 # of upstream copytracing
712 # of upstream copytracing
709 copies[candidate] = f
713 copies[candidate] = f
710
714
711 return copies, {}, {}, {}, {}
715 return copies, {}, {}, {}, {}
712
716
713 def _related(f1, f2, limit):
717 def _related(f1, f2, limit):
714 """return True if f1 and f2 filectx have a common ancestor
718 """return True if f1 and f2 filectx have a common ancestor
715
719
716 Walk back to common ancestor to see if the two files originate
720 Walk back to common ancestor to see if the two files originate
717 from the same file. Since workingfilectx's rev() is None it messes
721 from the same file. Since workingfilectx's rev() is None it messes
718 up the integer comparison logic, hence the pre-step check for
722 up the integer comparison logic, hence the pre-step check for
719 None (f1 and f2 can only be workingfilectx's initially).
723 None (f1 and f2 can only be workingfilectx's initially).
720 """
724 """
721
725
722 if f1 == f2:
726 if f1 == f2:
723 return f1 # a match
727 return f1 # a match
724
728
725 g1, g2 = f1.ancestors(), f2.ancestors()
729 g1, g2 = f1.ancestors(), f2.ancestors()
726 try:
730 try:
727 f1r, f2r = f1.linkrev(), f2.linkrev()
731 f1r, f2r = f1.linkrev(), f2.linkrev()
728
732
729 if f1r is None:
733 if f1r is None:
730 f1 = next(g1)
734 f1 = next(g1)
731 if f2r is None:
735 if f2r is None:
732 f2 = next(g2)
736 f2 = next(g2)
733
737
734 while True:
738 while True:
735 f1r, f2r = f1.linkrev(), f2.linkrev()
739 f1r, f2r = f1.linkrev(), f2.linkrev()
736 if f1r > f2r:
740 if f1r > f2r:
737 f1 = next(g1)
741 f1 = next(g1)
738 elif f2r > f1r:
742 elif f2r > f1r:
739 f2 = next(g2)
743 f2 = next(g2)
740 elif f1 == f2:
744 elif f1 == f2:
741 return f1 # a match
745 return f1 # a match
742 elif f1r == f2r or f1r < limit or f2r < limit:
746 elif f1r == f2r or f1r < limit or f2r < limit:
743 return False # copy no longer relevant
747 return False # copy no longer relevant
744 except StopIteration:
748 except StopIteration:
745 return False
749 return False
746
750
747 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
751 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
748 """
752 """
749 check possible copies of f from msrc to mdst
753 check possible copies of f from msrc to mdst
750
754
751 srcctx = starting context for f in msrc
755 srcctx = starting context for f in msrc
752 dstctx = destination context for f in mdst
756 dstctx = destination context for f in mdst
753 f = the filename to check (as in msrc)
757 f = the filename to check (as in msrc)
754 base = the changectx used as a merge base
758 base = the changectx used as a merge base
755 tca = topological common ancestor for graft-like scenarios
759 tca = topological common ancestor for graft-like scenarios
756 remotebase = True if base is outside tca::srcctx, False otherwise
760 remotebase = True if base is outside tca::srcctx, False otherwise
757 limit = the rev number to not search beyond
761 limit = the rev number to not search beyond
758 data = dictionary of dictionary to store copy data. (see mergecopies)
762 data = dictionary of dictionary to store copy data. (see mergecopies)
759
763
760 note: limit is only an optimization, and provides no guarantee that
764 note: limit is only an optimization, and provides no guarantee that
761 irrelevant revisions will not be visited
765 irrelevant revisions will not be visited
762 there is no easy way to make this algorithm stop in a guaranteed way
766 there is no easy way to make this algorithm stop in a guaranteed way
763 once it "goes behind a certain revision".
767 once it "goes behind a certain revision".
764 """
768 """
765
769
766 msrc = srcctx.manifest()
770 msrc = srcctx.manifest()
767 mdst = dstctx.manifest()
771 mdst = dstctx.manifest()
768 mb = base.manifest()
772 mb = base.manifest()
769 mta = tca.manifest()
773 mta = tca.manifest()
770 # Might be true if this call is about finding backward renames,
774 # Might be true if this call is about finding backward renames,
771 # This happens in the case of grafts because the DAG is then rotated.
775 # This happens in the case of grafts because the DAG is then rotated.
772 # If the file exists in both the base and the source, we are not looking
776 # If the file exists in both the base and the source, we are not looking
773 # for a rename on the source side, but on the part of the DAG that is
777 # for a rename on the source side, but on the part of the DAG that is
774 # traversed backwards.
778 # traversed backwards.
775 #
779 #
776 # In the case there is both backward and forward renames (before and after
780 # In the case there is both backward and forward renames (before and after
777 # the base) this is more complicated as we must detect a divergence.
781 # the base) this is more complicated as we must detect a divergence.
778 # We use 'backwards = False' in that case.
782 # We use 'backwards = False' in that case.
779 backwards = not remotebase and base != tca and f in mb
783 backwards = not remotebase and base != tca and f in mb
780 getsrcfctx = _makegetfctx(srcctx)
784 getsrcfctx = _makegetfctx(srcctx)
781 getdstfctx = _makegetfctx(dstctx)
785 getdstfctx = _makegetfctx(dstctx)
782
786
783 if msrc[f] == mb.get(f) and not remotebase:
787 if msrc[f] == mb.get(f) and not remotebase:
784 # Nothing to merge
788 # Nothing to merge
785 return
789 return
786
790
787 of = None
791 of = None
788 seen = {f}
792 seen = {f}
789 for oc in getsrcfctx(f, msrc[f]).ancestors():
793 for oc in getsrcfctx(f, msrc[f]).ancestors():
790 ocr = oc.linkrev()
794 ocr = oc.linkrev()
791 of = oc.path()
795 of = oc.path()
792 if of in seen:
796 if of in seen:
793 # check limit late - grab last rename before
797 # check limit late - grab last rename before
794 if ocr < limit:
798 if ocr < limit:
795 break
799 break
796 continue
800 continue
797 seen.add(of)
801 seen.add(of)
798
802
799 # remember for dir rename detection
803 # remember for dir rename detection
800 if backwards:
804 if backwards:
801 data['fullcopy'][of] = f # grafting backwards through renames
805 data['fullcopy'][of] = f # grafting backwards through renames
802 else:
806 else:
803 data['fullcopy'][f] = of
807 data['fullcopy'][f] = of
804 if of not in mdst:
808 if of not in mdst:
805 continue # no match, keep looking
809 continue # no match, keep looking
806 if mdst[of] == mb.get(of):
810 if mdst[of] == mb.get(of):
807 return # no merge needed, quit early
811 return # no merge needed, quit early
808 c2 = getdstfctx(of, mdst[of])
812 c2 = getdstfctx(of, mdst[of])
809 # c2 might be a plain new file on added on destination side that is
813 # c2 might be a plain new file on added on destination side that is
810 # unrelated to the droids we are looking for.
814 # unrelated to the droids we are looking for.
811 cr = _related(oc, c2, tca.rev())
815 cr = _related(oc, c2, tca.rev())
812 if cr and (of == f or of == c2.path()): # non-divergent
816 if cr and (of == f or of == c2.path()): # non-divergent
813 if backwards:
817 if backwards:
814 data['copy'][of] = f
818 data['copy'][of] = f
815 elif of in mb:
819 elif of in mb:
816 data['copy'][f] = of
820 data['copy'][f] = of
817 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
821 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
818 data['copy'][of] = f
822 data['copy'][of] = f
819 del data['fullcopy'][f]
823 del data['fullcopy'][f]
820 data['fullcopy'][of] = f
824 data['fullcopy'][of] = f
821 else: # divergence w.r.t. graft CA on one side of topological CA
825 else: # divergence w.r.t. graft CA on one side of topological CA
822 for sf in seen:
826 for sf in seen:
823 if sf in mb:
827 if sf in mb:
824 assert sf not in data['diverge']
828 assert sf not in data['diverge']
825 data['diverge'][sf] = [f, of]
829 data['diverge'][sf] = [f, of]
826 break
830 break
827 return
831 return
828
832
829 if of in mta:
833 if of in mta:
830 if backwards or remotebase:
834 if backwards or remotebase:
831 data['incomplete'][of] = f
835 data['incomplete'][of] = f
832 else:
836 else:
833 for sf in seen:
837 for sf in seen:
834 if sf in mb:
838 if sf in mb:
835 if tca == base:
839 if tca == base:
836 data['diverge'].setdefault(sf, []).append(f)
840 data['diverge'].setdefault(sf, []).append(f)
837 else:
841 else:
838 data['incompletediverge'][sf] = [of, f]
842 data['incompletediverge'][sf] = [of, f]
839 return
843 return
840
844
841 def duplicatecopies(repo, rev, fromrev, skiprev=None):
845 def duplicatecopies(repo, rev, fromrev, skiprev=None):
842 '''reproduce copies from fromrev to rev in the dirstate
846 '''reproduce copies from fromrev to rev in the dirstate
843
847
844 If skiprev is specified, it's a revision that should be used to
848 If skiprev is specified, it's a revision that should be used to
845 filter copy records. Any copies that occur between fromrev and
849 filter copy records. Any copies that occur between fromrev and
846 skiprev will not be duplicated, even if they appear in the set of
850 skiprev will not be duplicated, even if they appear in the set of
847 copies between fromrev and rev.
851 copies between fromrev and rev.
848 '''
852 '''
849 exclude = {}
853 exclude = {}
850 if (skiprev is not None and
854 if (skiprev is not None and
851 repo.ui.config('experimental', 'copytrace') != 'off'):
855 repo.ui.config('experimental', 'copytrace') != 'off'):
852 # copytrace='off' skips this line, but not the entire function because
856 # copytrace='off' skips this line, but not the entire function because
853 # the line below is O(size of the repo) during a rebase, while the rest
857 # the line below is O(size of the repo) during a rebase, while the rest
854 # of the function is much faster (and is required for carrying copy
858 # of the function is much faster (and is required for carrying copy
855 # metadata across the rebase anyway).
859 # metadata across the rebase anyway).
856 exclude = pathcopies(repo[fromrev], repo[skiprev])
860 exclude = pathcopies(repo[fromrev], repo[skiprev])
857 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
861 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
858 # copies.pathcopies returns backward renames, so dst might not
862 # copies.pathcopies returns backward renames, so dst might not
859 # actually be in the dirstate
863 # actually be in the dirstate
860 if dst in exclude:
864 if dst in exclude:
861 continue
865 continue
862 if repo.dirstate[dst] in "nma":
866 if repo.dirstate[dst] in "nma":
863 repo.dirstate.copy(src, dst)
867 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now