##// END OF EJS Templates
copies: consider nullrev a common ancestor...
Martin von Zweigbergk -
r41437:b80af070 default
parent child Browse files
Show More
@@ -1,922 +1,911 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, a, b):
27 def _findlimit(repo, a, b):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
35 """
34 """
36
35
37 # basic idea:
36 # basic idea:
38 # - mark a and b with different sides
37 # - mark a and b with different sides
39 # - if a parent's children are all on the same side, the parent is
38 # - if a parent's children are all on the same side, the parent is
40 # on that side, otherwise it is on no side
39 # on that side, otherwise it is on no side
41 # - walk the graph in topological order with the help of a heap;
40 # - walk the graph in topological order with the help of a heap;
42 # - add unseen parents to side map
41 # - add unseen parents to side map
43 # - clear side of any parent that has children on different sides
42 # - clear side of any parent that has children on different sides
44 # - track number of interesting revs that might still be on a side
43 # - track number of interesting revs that might still be on a side
45 # - track the lowest interesting rev seen
44 # - track the lowest interesting rev seen
46 # - quit when interesting revs is zero
45 # - quit when interesting revs is zero
47
46
48 cl = repo.changelog
47 cl = repo.changelog
49 if a is None:
48 if a is None:
50 a = node.wdirrev
49 a = node.wdirrev
51 if b is None:
50 if b is None:
52 b = node.wdirrev
51 b = node.wdirrev
53
52
54 side = {a: -1, b: 1}
53 side = {a: -1, b: 1}
55 visit = [-a, -b]
54 visit = [-a, -b]
56 heapq.heapify(visit)
55 heapq.heapify(visit)
57 interesting = len(visit)
56 interesting = len(visit)
58 hascommonancestor = False
59 limit = node.wdirrev
57 limit = node.wdirrev
60
58
61 while interesting:
59 while interesting:
62 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
63 if r == node.wdirrev:
61 if r == node.wdirrev:
64 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 else:
63 else:
66 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 if parents[1] == node.nullrev:
66 parents = parents[:1]
67 for p in parents:
67 for p in parents:
68 if p < 0:
69 continue
70 if p not in side:
68 if p not in side:
71 # first time we see p; add it to visit
69 # first time we see p; add it to visit
72 side[p] = side[r]
70 side[p] = side[r]
73 if side[p]:
71 if side[p]:
74 interesting += 1
72 interesting += 1
75 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
76 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
77 # p was interesting but now we know better
75 # p was interesting but now we know better
78 side[p] = 0
76 side[p] = 0
79 interesting -= 1
77 interesting -= 1
80 hascommonancestor = True
81 if side[r]:
78 if side[r]:
82 limit = r # lowest rev visited
79 limit = r # lowest rev visited
83 interesting -= 1
80 interesting -= 1
84
81
85 if not hascommonancestor:
86 return None
87
88 # Consider the following flow (see test-commit-amend.t under issue4405):
82 # Consider the following flow (see test-commit-amend.t under issue4405):
89 # 1/ File 'a0' committed
83 # 1/ File 'a0' committed
90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 # 3/ Move back to first commit
85 # 3/ Move back to first commit
92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 #
88 #
95 # During the amend in step five, we will be in this state:
89 # During the amend in step five, we will be in this state:
96 #
90 #
97 # @ 3 temporary amend commit for a1-amend
91 # @ 3 temporary amend commit for a1-amend
98 # |
92 # |
99 # o 2 a1-amend
93 # o 2 a1-amend
100 # |
94 # |
101 # | o 1 a1
95 # | o 1 a1
102 # |/
96 # |/
103 # o 0 a0
97 # o 0 a0
104 #
98 #
105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 # yet the filelog has the copy information in rev 1 and we will not look
100 # yet the filelog has the copy information in rev 1 and we will not look
107 # back far enough unless we also look at the a and b as candidates.
101 # back far enough unless we also look at the a and b as candidates.
108 # This only occurs when a is a descendent of b or visa-versa.
102 # This only occurs when a is a descendent of b or visa-versa.
109 return min(limit, a, b)
103 return min(limit, a, b)
110
104
111 def _chain(src, dst, a, b):
105 def _chain(src, dst, a, b):
112 """chain two sets of copies a->b"""
106 """chain two sets of copies a->b"""
113 t = a.copy()
107 t = a.copy()
114 for k, v in b.iteritems():
108 for k, v in b.iteritems():
115 if v in t:
109 if v in t:
116 # found a chain
110 # found a chain
117 if t[v] != k:
111 if t[v] != k:
118 # file wasn't renamed back to itself
112 # file wasn't renamed back to itself
119 t[k] = t[v]
113 t[k] = t[v]
120 if v not in dst:
114 if v not in dst:
121 # chain was a rename, not a copy
115 # chain was a rename, not a copy
122 del t[v]
116 del t[v]
123 if v in src:
117 if v in src:
124 # file is a copy of an existing file
118 # file is a copy of an existing file
125 t[k] = v
119 t[k] = v
126
120
127 # remove criss-crossed copies
121 # remove criss-crossed copies
128 for k, v in list(t.items()):
122 for k, v in list(t.items()):
129 if k in src and v in dst:
123 if k in src and v in dst:
130 del t[k]
124 del t[k]
131
125
132 return t
126 return t
133
127
134 def _tracefile(fctx, am, limit=node.nullrev):
128 def _tracefile(fctx, am, limit=node.nullrev):
135 """return file context that is the ancestor of fctx present in ancestor
129 """return file context that is the ancestor of fctx present in ancestor
136 manifest am, stopping after the first ancestor lower than limit"""
130 manifest am, stopping after the first ancestor lower than limit"""
137
131
138 for f in fctx.ancestors():
132 for f in fctx.ancestors():
139 if am.get(f.path(), None) == f.filenode():
133 if am.get(f.path(), None) == f.filenode():
140 return f
134 return f
141 if limit >= 0 and not f.isintroducedafter(limit):
135 if limit >= 0 and not f.isintroducedafter(limit):
142 return None
136 return None
143
137
144 def _dirstatecopies(d, match=None):
138 def _dirstatecopies(d, match=None):
145 ds = d._repo.dirstate
139 ds = d._repo.dirstate
146 c = ds.copies().copy()
140 c = ds.copies().copy()
147 for k in list(c):
141 for k in list(c):
148 if ds[k] not in 'anm' or (match and not match(k)):
142 if ds[k] not in 'anm' or (match and not match(k)):
149 del c[k]
143 del c[k]
150 return c
144 return c
151
145
152 def _computeforwardmissing(a, b, match=None):
146 def _computeforwardmissing(a, b, match=None):
153 """Computes which files are in b but not a.
147 """Computes which files are in b but not a.
154 This is its own function so extensions can easily wrap this call to see what
148 This is its own function so extensions can easily wrap this call to see what
155 files _forwardcopies is about to process.
149 files _forwardcopies is about to process.
156 """
150 """
157 ma = a.manifest()
151 ma = a.manifest()
158 mb = b.manifest()
152 mb = b.manifest()
159 return mb.filesnotin(ma, match=match)
153 return mb.filesnotin(ma, match=match)
160
154
161 def _committedforwardcopies(a, b, match):
155 def _committedforwardcopies(a, b, match):
162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
156 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 # files might have to be traced back to the fctx parent of the last
157 # files might have to be traced back to the fctx parent of the last
164 # one-side-only changeset, but not further back than that
158 # one-side-only changeset, but not further back than that
165 repo = a._repo
159 repo = a._repo
166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
160 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 dbg = repo.ui.debug
161 dbg = repo.ui.debug
168 if debug:
162 if debug:
169 dbg('debug.copies: looking into rename from %s to %s\n'
163 dbg('debug.copies: looking into rename from %s to %s\n'
170 % (a, b))
164 % (a, b))
171 limit = _findlimit(repo, a.rev(), b.rev())
165 limit = _findlimit(repo, a.rev(), b.rev())
172 if limit is None:
173 limit = node.nullrev
174 if debug:
166 if debug:
175 dbg('debug.copies: search limit: %d\n' % limit)
167 dbg('debug.copies: search limit: %d\n' % limit)
176 am = a.manifest()
168 am = a.manifest()
177
169
178 # find where new files came from
170 # find where new files came from
179 # we currently don't try to find where old files went, too expensive
171 # we currently don't try to find where old files went, too expensive
180 # this means we can miss a case like 'hg rm b; hg cp a b'
172 # this means we can miss a case like 'hg rm b; hg cp a b'
181 cm = {}
173 cm = {}
182
174
183 # Computing the forward missing is quite expensive on large manifests, since
175 # Computing the forward missing is quite expensive on large manifests, since
184 # it compares the entire manifests. We can optimize it in the common use
176 # it compares the entire manifests. We can optimize it in the common use
185 # case of computing what copies are in a commit versus its parent (like
177 # case of computing what copies are in a commit versus its parent (like
186 # during a rebase or histedit). Note, we exclude merge commits from this
178 # during a rebase or histedit). Note, we exclude merge commits from this
187 # optimization, since the ctx.files() for a merge commit is not correct for
179 # optimization, since the ctx.files() for a merge commit is not correct for
188 # this comparison.
180 # this comparison.
189 forwardmissingmatch = match
181 forwardmissingmatch = match
190 if b.p1() == a and b.p2().node() == node.nullid:
182 if b.p1() == a and b.p2().node() == node.nullid:
191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
183 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
184 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
185 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194
186
195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
187 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196
188
197 if debug:
189 if debug:
198 dbg('debug.copies: missing file to search: %d\n' % len(missing))
190 dbg('debug.copies: missing file to search: %d\n' % len(missing))
199
191
200 for f in missing:
192 for f in missing:
201 if debug:
193 if debug:
202 dbg('debug.copies: tracing file: %s\n' % f)
194 dbg('debug.copies: tracing file: %s\n' % f)
203 fctx = b[f]
195 fctx = b[f]
204 fctx._ancestrycontext = ancestrycontext
196 fctx._ancestrycontext = ancestrycontext
205
197
206 if debug:
198 if debug:
207 start = util.timer()
199 start = util.timer()
208 ofctx = _tracefile(fctx, am, limit)
200 ofctx = _tracefile(fctx, am, limit)
209 if ofctx:
201 if ofctx:
210 if debug:
202 if debug:
211 dbg('debug.copies: rename of: %s\n' % ofctx._path)
203 dbg('debug.copies: rename of: %s\n' % ofctx._path)
212 cm[f] = ofctx.path()
204 cm[f] = ofctx.path()
213 if debug:
205 if debug:
214 dbg('debug.copies: time: %f seconds\n'
206 dbg('debug.copies: time: %f seconds\n'
215 % (util.timer() - start))
207 % (util.timer() - start))
216 return cm
208 return cm
217
209
218 def _forwardcopies(a, b, match=None):
210 def _forwardcopies(a, b, match=None):
219 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
211 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
220
212
221 match = a.repo().narrowmatch(match)
213 match = a.repo().narrowmatch(match)
222 # check for working copy
214 # check for working copy
223 if b.rev() is None:
215 if b.rev() is None:
224 if a == b.p1():
216 if a == b.p1():
225 # short-circuit to avoid issues with merge states
217 # short-circuit to avoid issues with merge states
226 return _dirstatecopies(b, match)
218 return _dirstatecopies(b, match)
227
219
228 cm = _committedforwardcopies(a, b.p1(), match)
220 cm = _committedforwardcopies(a, b.p1(), match)
229 # combine copies from dirstate if necessary
221 # combine copies from dirstate if necessary
230 return _chain(a, b, cm, _dirstatecopies(b, match))
222 return _chain(a, b, cm, _dirstatecopies(b, match))
231 return _committedforwardcopies(a, b, match)
223 return _committedforwardcopies(a, b, match)
232
224
233 def _backwardrenames(a, b):
225 def _backwardrenames(a, b):
234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
226 if a._repo.ui.config('experimental', 'copytrace') == 'off':
235 return {}
227 return {}
236
228
237 # Even though we're not taking copies into account, 1:n rename situations
229 # Even though we're not taking copies into account, 1:n rename situations
238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
230 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
239 # arbitrarily pick one of the renames.
231 # arbitrarily pick one of the renames.
240 f = _forwardcopies(b, a)
232 f = _forwardcopies(b, a)
241 r = {}
233 r = {}
242 for k, v in sorted(f.iteritems()):
234 for k, v in sorted(f.iteritems()):
243 # remove copies
235 # remove copies
244 if v in a:
236 if v in a:
245 continue
237 continue
246 r[v] = k
238 r[v] = k
247 return r
239 return r
248
240
249 def pathcopies(x, y, match=None):
241 def pathcopies(x, y, match=None):
250 """find {dst@y: src@x} copy mapping for directed compare"""
242 """find {dst@y: src@x} copy mapping for directed compare"""
251 repo = x._repo
243 repo = x._repo
252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
244 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
253 if debug:
245 if debug:
254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
246 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
255 % (x, y))
247 % (x, y))
256 if x == y or not x or not y:
248 if x == y or not x or not y:
257 return {}
249 return {}
258 a = y.ancestor(x)
250 a = y.ancestor(x)
259 if a == x:
251 if a == x:
260 if debug:
252 if debug:
261 repo.ui.debug('debug.copies: search mode: forward\n')
253 repo.ui.debug('debug.copies: search mode: forward\n')
262 return _forwardcopies(x, y, match=match)
254 return _forwardcopies(x, y, match=match)
263 if a == y:
255 if a == y:
264 if debug:
256 if debug:
265 repo.ui.debug('debug.copies: search mode: backward\n')
257 repo.ui.debug('debug.copies: search mode: backward\n')
266 return _backwardrenames(x, y)
258 return _backwardrenames(x, y)
267 if debug:
259 if debug:
268 repo.ui.debug('debug.copies: search mode: combined\n')
260 repo.ui.debug('debug.copies: search mode: combined\n')
269 return _chain(x, y, _backwardrenames(x, a),
261 return _chain(x, y, _backwardrenames(x, a),
270 _forwardcopies(a, y, match=match))
262 _forwardcopies(a, y, match=match))
271
263
272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
264 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
265 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
274 and c2. This is its own function so extensions can easily wrap this call
266 and c2. This is its own function so extensions can easily wrap this call
275 to see what files mergecopies is about to process.
267 to see what files mergecopies is about to process.
276
268
277 Even though c1 and c2 are not used in this function, they are useful in
269 Even though c1 and c2 are not used in this function, they are useful in
278 other extensions for being able to read the file nodes of the changed files.
270 other extensions for being able to read the file nodes of the changed files.
279
271
280 "baselabel" can be passed to help distinguish the multiple computations
272 "baselabel" can be passed to help distinguish the multiple computations
281 done in the graft case.
273 done in the graft case.
282 """
274 """
283 u1 = sorted(addedinm1 - addedinm2)
275 u1 = sorted(addedinm1 - addedinm2)
284 u2 = sorted(addedinm2 - addedinm1)
276 u2 = sorted(addedinm2 - addedinm1)
285
277
286 header = " unmatched files in %s"
278 header = " unmatched files in %s"
287 if baselabel:
279 if baselabel:
288 header += ' (from %s)' % baselabel
280 header += ' (from %s)' % baselabel
289 if u1:
281 if u1:
290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
282 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
291 if u2:
283 if u2:
292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
284 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
293
285
294 return u1, u2
286 return u1, u2
295
287
296 def _makegetfctx(ctx):
288 def _makegetfctx(ctx):
297 """return a 'getfctx' function suitable for _checkcopies usage
289 """return a 'getfctx' function suitable for _checkcopies usage
298
290
299 We have to re-setup the function building 'filectx' for each
291 We have to re-setup the function building 'filectx' for each
300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
292 '_checkcopies' to ensure the linkrev adjustment is properly setup for
301 each. Linkrev adjustment is important to avoid bug in rename
293 each. Linkrev adjustment is important to avoid bug in rename
302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
294 detection. Moreover, having a proper '_ancestrycontext' setup ensures
303 the performance impact of this adjustment is kept limited. Without it,
295 the performance impact of this adjustment is kept limited. Without it,
304 each file could do a full dag traversal making the time complexity of
296 each file could do a full dag traversal making the time complexity of
305 the operation explode (see issue4537).
297 the operation explode (see issue4537).
306
298
307 This function exists here mostly to limit the impact on stable. Feel
299 This function exists here mostly to limit the impact on stable. Feel
308 free to refactor on default.
300 free to refactor on default.
309 """
301 """
310 rev = ctx.rev()
302 rev = ctx.rev()
311 repo = ctx._repo
303 repo = ctx._repo
312 ac = getattr(ctx, '_ancestrycontext', None)
304 ac = getattr(ctx, '_ancestrycontext', None)
313 if ac is None:
305 if ac is None:
314 revs = [rev]
306 revs = [rev]
315 if rev is None:
307 if rev is None:
316 revs = [p.rev() for p in ctx.parents()]
308 revs = [p.rev() for p in ctx.parents()]
317 ac = repo.changelog.ancestors(revs, inclusive=True)
309 ac = repo.changelog.ancestors(revs, inclusive=True)
318 ctx._ancestrycontext = ac
310 ctx._ancestrycontext = ac
319 def makectx(f, n):
311 def makectx(f, n):
320 if n in node.wdirfilenodeids: # in a working context?
312 if n in node.wdirfilenodeids: # in a working context?
321 if ctx.rev() is None:
313 if ctx.rev() is None:
322 return ctx.filectx(f)
314 return ctx.filectx(f)
323 return repo[None][f]
315 return repo[None][f]
324 fctx = repo.filectx(f, fileid=n)
316 fctx = repo.filectx(f, fileid=n)
325 # setup only needed for filectx not create from a changectx
317 # setup only needed for filectx not create from a changectx
326 fctx._ancestrycontext = ac
318 fctx._ancestrycontext = ac
327 fctx._descendantrev = rev
319 fctx._descendantrev = rev
328 return fctx
320 return fctx
329 return util.lrucachefunc(makectx)
321 return util.lrucachefunc(makectx)
330
322
331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
323 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
332 """combine partial copy paths"""
324 """combine partial copy paths"""
333 remainder = {}
325 remainder = {}
334 for f in copyfrom:
326 for f in copyfrom:
335 if f in copyto:
327 if f in copyto:
336 finalcopy[copyto[f]] = copyfrom[f]
328 finalcopy[copyto[f]] = copyfrom[f]
337 del copyto[f]
329 del copyto[f]
338 for f in incompletediverge:
330 for f in incompletediverge:
339 assert f not in diverge
331 assert f not in diverge
340 ic = incompletediverge[f]
332 ic = incompletediverge[f]
341 if ic[0] in copyto:
333 if ic[0] in copyto:
342 diverge[f] = [copyto[ic[0]], ic[1]]
334 diverge[f] = [copyto[ic[0]], ic[1]]
343 else:
335 else:
344 remainder[f] = ic
336 remainder[f] = ic
345 return remainder
337 return remainder
346
338
347 def mergecopies(repo, c1, c2, base):
339 def mergecopies(repo, c1, c2, base):
348 """
340 """
349 The function calling different copytracing algorithms on the basis of config
341 The function calling different copytracing algorithms on the basis of config
350 which find moves and copies between context c1 and c2 that are relevant for
342 which find moves and copies between context c1 and c2 that are relevant for
351 merging. 'base' will be used as the merge base.
343 merging. 'base' will be used as the merge base.
352
344
353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
345 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
354 files that were moved/ copied in one merge parent and modified in another.
346 files that were moved/ copied in one merge parent and modified in another.
355 For example:
347 For example:
356
348
357 o ---> 4 another commit
349 o ---> 4 another commit
358 |
350 |
359 | o ---> 3 commit that modifies a.txt
351 | o ---> 3 commit that modifies a.txt
360 | /
352 | /
361 o / ---> 2 commit that moves a.txt to b.txt
353 o / ---> 2 commit that moves a.txt to b.txt
362 |/
354 |/
363 o ---> 1 merge base
355 o ---> 1 merge base
364
356
365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
357 If we try to rebase revision 3 on revision 4, since there is no a.txt in
366 revision 4, and if user have copytrace disabled, we prints the following
358 revision 4, and if user have copytrace disabled, we prints the following
367 message:
359 message:
368
360
369 ```other changed <file> which local deleted```
361 ```other changed <file> which local deleted```
370
362
371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
363 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
372 "dirmove".
364 "dirmove".
373
365
374 "copy" is a mapping from destination name -> source name,
366 "copy" is a mapping from destination name -> source name,
375 where source is in c1 and destination is in c2 or vice-versa.
367 where source is in c1 and destination is in c2 or vice-versa.
376
368
377 "movewithdir" is a mapping from source name -> destination name,
369 "movewithdir" is a mapping from source name -> destination name,
378 where the file at source present in one context but not the other
370 where the file at source present in one context but not the other
379 needs to be moved to destination by the merge process, because the
371 needs to be moved to destination by the merge process, because the
380 other context moved the directory it is in.
372 other context moved the directory it is in.
381
373
382 "diverge" is a mapping of source name -> list of destination names
374 "diverge" is a mapping of source name -> list of destination names
383 for divergent renames.
375 for divergent renames.
384
376
385 "renamedelete" is a mapping of source name -> list of destination
377 "renamedelete" is a mapping of source name -> list of destination
386 names for files deleted in c1 that were renamed in c2 or vice-versa.
378 names for files deleted in c1 that were renamed in c2 or vice-versa.
387
379
388 "dirmove" is a mapping of detected source dir -> destination dir renames.
380 "dirmove" is a mapping of detected source dir -> destination dir renames.
389 This is needed for handling changes to new files previously grafted into
381 This is needed for handling changes to new files previously grafted into
390 renamed directories.
382 renamed directories.
391 """
383 """
392 # avoid silly behavior for update from empty dir
384 # avoid silly behavior for update from empty dir
393 if not c1 or not c2 or c1 == c2:
385 if not c1 or not c2 or c1 == c2:
394 return {}, {}, {}, {}, {}
386 return {}, {}, {}, {}, {}
395
387
396 # avoid silly behavior for parent -> working dir
388 # avoid silly behavior for parent -> working dir
397 if c2.node() is None and c1.node() == repo.dirstate.p1():
389 if c2.node() is None and c1.node() == repo.dirstate.p1():
398 return repo.dirstate.copies(), {}, {}, {}, {}
390 return repo.dirstate.copies(), {}, {}, {}, {}
399
391
400 copytracing = repo.ui.config('experimental', 'copytrace')
392 copytracing = repo.ui.config('experimental', 'copytrace')
401 boolctrace = stringutil.parsebool(copytracing)
393 boolctrace = stringutil.parsebool(copytracing)
402
394
403 # Copy trace disabling is explicitly below the node == p1 logic above
395 # Copy trace disabling is explicitly below the node == p1 logic above
404 # because the logic above is required for a simple copy to be kept across a
396 # because the logic above is required for a simple copy to be kept across a
405 # rebase.
397 # rebase.
406 if copytracing == 'heuristics':
398 if copytracing == 'heuristics':
407 # Do full copytracing if only non-public revisions are involved as
399 # Do full copytracing if only non-public revisions are involved as
408 # that will be fast enough and will also cover the copies which could
400 # that will be fast enough and will also cover the copies which could
409 # be missed by heuristics
401 # be missed by heuristics
410 if _isfullcopytraceable(repo, c1, base):
402 if _isfullcopytraceable(repo, c1, base):
411 return _fullcopytracing(repo, c1, c2, base)
403 return _fullcopytracing(repo, c1, c2, base)
412 return _heuristicscopytracing(repo, c1, c2, base)
404 return _heuristicscopytracing(repo, c1, c2, base)
413 elif boolctrace is False:
405 elif boolctrace is False:
414 # stringutil.parsebool() returns None when it is unable to parse the
406 # stringutil.parsebool() returns None when it is unable to parse the
415 # value, so we should rely on making sure copytracing is on such cases
407 # value, so we should rely on making sure copytracing is on such cases
416 return {}, {}, {}, {}, {}
408 return {}, {}, {}, {}, {}
417 else:
409 else:
418 return _fullcopytracing(repo, c1, c2, base)
410 return _fullcopytracing(repo, c1, c2, base)
419
411
420 def _isfullcopytraceable(repo, c1, base):
412 def _isfullcopytraceable(repo, c1, base):
421 """ Checks that if base, source and destination are all no-public branches,
413 """ Checks that if base, source and destination are all no-public branches,
422 if yes let's use the full copytrace algorithm for increased capabilities
414 if yes let's use the full copytrace algorithm for increased capabilities
423 since it will be fast enough.
415 since it will be fast enough.
424
416
425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
417 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
426 number of changesets from c1 to base such that if number of changesets are
418 number of changesets from c1 to base such that if number of changesets are
427 more than the limit, full copytracing algorithm won't be used.
419 more than the limit, full copytracing algorithm won't be used.
428 """
420 """
429 if c1.rev() is None:
421 if c1.rev() is None:
430 c1 = c1.p1()
422 c1 = c1.p1()
431 if c1.mutable() and base.mutable():
423 if c1.mutable() and base.mutable():
432 sourcecommitlimit = repo.ui.configint('experimental',
424 sourcecommitlimit = repo.ui.configint('experimental',
433 'copytrace.sourcecommitlimit')
425 'copytrace.sourcecommitlimit')
434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
426 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
435 return commits < sourcecommitlimit
427 return commits < sourcecommitlimit
436 return False
428 return False
437
429
438 def _fullcopytracing(repo, c1, c2, base):
430 def _fullcopytracing(repo, c1, c2, base):
439 """ The full copytracing algorithm which finds all the new files that were
431 """ The full copytracing algorithm which finds all the new files that were
440 added from merge base up to the top commit and for each file it checks if
432 added from merge base up to the top commit and for each file it checks if
441 this file was copied from another file.
433 this file was copied from another file.
442
434
443 This is pretty slow when a lot of changesets are involved but will track all
435 This is pretty slow when a lot of changesets are involved but will track all
444 the copies.
436 the copies.
445 """
437 """
446 # In certain scenarios (e.g. graft, update or rebase), base can be
438 # In certain scenarios (e.g. graft, update or rebase), base can be
447 # overridden We still need to know a real common ancestor in this case We
439 # overridden We still need to know a real common ancestor in this case We
448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
440 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
441 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
450 # caller may not know if the revision passed in lieu of the CA is a genuine
442 # caller may not know if the revision passed in lieu of the CA is a genuine
451 # common ancestor or not without explicitly checking it, it's better to
443 # common ancestor or not without explicitly checking it, it's better to
452 # determine that here.
444 # determine that here.
453 #
445 #
454 # base.isancestorof(wc) is False, work around that
446 # base.isancestorof(wc) is False, work around that
455 _c1 = c1.p1() if c1.rev() is None else c1
447 _c1 = c1.p1() if c1.rev() is None else c1
456 _c2 = c2.p1() if c2.rev() is None else c2
448 _c2 = c2.p1() if c2.rev() is None else c2
457 # an endpoint is "dirty" if it isn't a descendant of the merge base
449 # an endpoint is "dirty" if it isn't a descendant of the merge base
458 # if we have a dirty endpoint, we need to trigger graft logic, and also
450 # if we have a dirty endpoint, we need to trigger graft logic, and also
459 # keep track of which endpoint is dirty
451 # keep track of which endpoint is dirty
460 dirtyc1 = not base.isancestorof(_c1)
452 dirtyc1 = not base.isancestorof(_c1)
461 dirtyc2 = not base.isancestorof(_c2)
453 dirtyc2 = not base.isancestorof(_c2)
462 graft = dirtyc1 or dirtyc2
454 graft = dirtyc1 or dirtyc2
463 tca = base
455 tca = base
464 if graft:
456 if graft:
465 tca = _c1.ancestor(_c2)
457 tca = _c1.ancestor(_c2)
466
458
467 limit = _findlimit(repo, c1.rev(), c2.rev())
459 limit = _findlimit(repo, c1.rev(), c2.rev())
468 if limit is None:
469 # no common ancestor, no copies
470 return {}, {}, {}, {}, {}
471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
460 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
472
461
473 m1 = c1.manifest()
462 m1 = c1.manifest()
474 m2 = c2.manifest()
463 m2 = c2.manifest()
475 mb = base.manifest()
464 mb = base.manifest()
476
465
477 # gather data from _checkcopies:
466 # gather data from _checkcopies:
478 # - diverge = record all diverges in this dict
467 # - diverge = record all diverges in this dict
479 # - copy = record all non-divergent copies in this dict
468 # - copy = record all non-divergent copies in this dict
480 # - fullcopy = record all copies in this dict
469 # - fullcopy = record all copies in this dict
481 # - incomplete = record non-divergent partial copies here
470 # - incomplete = record non-divergent partial copies here
482 # - incompletediverge = record divergent partial copies here
471 # - incompletediverge = record divergent partial copies here
483 diverge = {} # divergence data is shared
472 diverge = {} # divergence data is shared
484 incompletediverge = {}
473 incompletediverge = {}
485 data1 = {'copy': {},
474 data1 = {'copy': {},
486 'fullcopy': {},
475 'fullcopy': {},
487 'incomplete': {},
476 'incomplete': {},
488 'diverge': diverge,
477 'diverge': diverge,
489 'incompletediverge': incompletediverge,
478 'incompletediverge': incompletediverge,
490 }
479 }
491 data2 = {'copy': {},
480 data2 = {'copy': {},
492 'fullcopy': {},
481 'fullcopy': {},
493 'incomplete': {},
482 'incomplete': {},
494 'diverge': diverge,
483 'diverge': diverge,
495 'incompletediverge': incompletediverge,
484 'incompletediverge': incompletediverge,
496 }
485 }
497
486
498 # find interesting file sets from manifests
487 # find interesting file sets from manifests
499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
488 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
489 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
501 bothnew = sorted(addedinm1 & addedinm2)
490 bothnew = sorted(addedinm1 & addedinm2)
502 if tca == base:
491 if tca == base:
503 # unmatched file from base
492 # unmatched file from base
504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
493 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
505 u1u, u2u = u1r, u2r
494 u1u, u2u = u1r, u2r
506 else:
495 else:
507 # unmatched file from base (DAG rotation in the graft case)
496 # unmatched file from base (DAG rotation in the graft case)
508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
497 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
509 baselabel='base')
498 baselabel='base')
510 # unmatched file from topological common ancestors (no DAG rotation)
499 # unmatched file from topological common ancestors (no DAG rotation)
511 # need to recompute this for directory move handling when grafting
500 # need to recompute this for directory move handling when grafting
512 mta = tca.manifest()
501 mta = tca.manifest()
513 u1u, u2u = _computenonoverlap(repo, c1, c2,
502 u1u, u2u = _computenonoverlap(repo, c1, c2,
514 m1.filesnotin(mta, repo.narrowmatch()),
503 m1.filesnotin(mta, repo.narrowmatch()),
515 m2.filesnotin(mta, repo.narrowmatch()),
504 m2.filesnotin(mta, repo.narrowmatch()),
516 baselabel='topological common ancestor')
505 baselabel='topological common ancestor')
517
506
518 for f in u1u:
507 for f in u1u:
519 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
508 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520
509
521 for f in u2u:
510 for f in u2u:
522 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
511 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523
512
524 copy = dict(data1['copy'])
513 copy = dict(data1['copy'])
525 copy.update(data2['copy'])
514 copy.update(data2['copy'])
526 fullcopy = dict(data1['fullcopy'])
515 fullcopy = dict(data1['fullcopy'])
527 fullcopy.update(data2['fullcopy'])
516 fullcopy.update(data2['fullcopy'])
528
517
529 if dirtyc1:
518 if dirtyc1:
530 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
519 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 incompletediverge)
520 incompletediverge)
532 else:
521 else:
533 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
522 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 incompletediverge)
523 incompletediverge)
535
524
536 renamedelete = {}
525 renamedelete = {}
537 renamedeleteset = set()
526 renamedeleteset = set()
538 divergeset = set()
527 divergeset = set()
539 for of, fl in list(diverge.items()):
528 for of, fl in list(diverge.items()):
540 if len(fl) == 1 or of in c1 or of in c2:
529 if len(fl) == 1 or of in c1 or of in c2:
541 del diverge[of] # not actually divergent, or not a rename
530 del diverge[of] # not actually divergent, or not a rename
542 if of not in c1 and of not in c2:
531 if of not in c1 and of not in c2:
543 # renamed on one side, deleted on the other side, but filter
532 # renamed on one side, deleted on the other side, but filter
544 # out files that have been renamed and then deleted
533 # out files that have been renamed and then deleted
545 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
534 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 renamedeleteset.update(fl) # reverse map for below
535 renamedeleteset.update(fl) # reverse map for below
547 else:
536 else:
548 divergeset.update(fl) # reverse map for below
537 divergeset.update(fl) # reverse map for below
549
538
550 if bothnew:
539 if bothnew:
551 repo.ui.debug(" unmatched files new in both:\n %s\n"
540 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 % "\n ".join(bothnew))
541 % "\n ".join(bothnew))
553 bothdiverge = {}
542 bothdiverge = {}
554 bothincompletediverge = {}
543 bothincompletediverge = {}
555 remainder = {}
544 remainder = {}
556 both1 = {'copy': {},
545 both1 = {'copy': {},
557 'fullcopy': {},
546 'fullcopy': {},
558 'incomplete': {},
547 'incomplete': {},
559 'diverge': bothdiverge,
548 'diverge': bothdiverge,
560 'incompletediverge': bothincompletediverge
549 'incompletediverge': bothincompletediverge
561 }
550 }
562 both2 = {'copy': {},
551 both2 = {'copy': {},
563 'fullcopy': {},
552 'fullcopy': {},
564 'incomplete': {},
553 'incomplete': {},
565 'diverge': bothdiverge,
554 'diverge': bothdiverge,
566 'incompletediverge': bothincompletediverge
555 'incompletediverge': bothincompletediverge
567 }
556 }
568 for f in bothnew:
557 for f in bothnew:
569 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
558 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
559 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 if dirtyc1:
560 if dirtyc1:
572 # incomplete copies may only be found on the "dirty" side for bothnew
561 # incomplete copies may only be found on the "dirty" side for bothnew
573 assert not both2['incomplete']
562 assert not both2['incomplete']
574 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
563 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 bothincompletediverge)
564 bothincompletediverge)
576 elif dirtyc2:
565 elif dirtyc2:
577 assert not both1['incomplete']
566 assert not both1['incomplete']
578 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
567 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 bothincompletediverge)
568 bothincompletediverge)
580 else:
569 else:
581 # incomplete copies and divergences can't happen outside grafts
570 # incomplete copies and divergences can't happen outside grafts
582 assert not both1['incomplete']
571 assert not both1['incomplete']
583 assert not both2['incomplete']
572 assert not both2['incomplete']
584 assert not bothincompletediverge
573 assert not bothincompletediverge
585 for f in remainder:
574 for f in remainder:
586 assert f not in bothdiverge
575 assert f not in bothdiverge
587 ic = remainder[f]
576 ic = remainder[f]
588 if ic[0] in (m1 if dirtyc1 else m2):
577 if ic[0] in (m1 if dirtyc1 else m2):
589 # backed-out rename on one side, but watch out for deleted files
578 # backed-out rename on one side, but watch out for deleted files
590 bothdiverge[f] = ic
579 bothdiverge[f] = ic
591 for of, fl in bothdiverge.items():
580 for of, fl in bothdiverge.items():
592 if len(fl) == 2 and fl[0] == fl[1]:
581 if len(fl) == 2 and fl[0] == fl[1]:
593 copy[fl[0]] = of # not actually divergent, just matching renames
582 copy[fl[0]] = of # not actually divergent, just matching renames
594
583
595 if fullcopy and repo.ui.debugflag:
584 if fullcopy and repo.ui.debugflag:
596 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 "% = renamed and deleted):\n")
586 "% = renamed and deleted):\n")
598 for f in sorted(fullcopy):
587 for f in sorted(fullcopy):
599 note = ""
588 note = ""
600 if f in copy:
589 if f in copy:
601 note += "*"
590 note += "*"
602 if f in divergeset:
591 if f in divergeset:
603 note += "!"
592 note += "!"
604 if f in renamedeleteset:
593 if f in renamedeleteset:
605 note += "%"
594 note += "%"
606 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 note))
596 note))
608 del divergeset
597 del divergeset
609
598
610 if not fullcopy:
599 if not fullcopy:
611 return copy, {}, diverge, renamedelete, {}
600 return copy, {}, diverge, renamedelete, {}
612
601
613 repo.ui.debug(" checking for directory renames\n")
602 repo.ui.debug(" checking for directory renames\n")
614
603
615 # generate a directory move map
604 # generate a directory move map
616 d1, d2 = c1.dirs(), c2.dirs()
605 d1, d2 = c1.dirs(), c2.dirs()
617 # Hack for adding '', which is not otherwise added, to d1 and d2
606 # Hack for adding '', which is not otherwise added, to d1 and d2
618 d1.addpath('/')
607 d1.addpath('/')
619 d2.addpath('/')
608 d2.addpath('/')
620 invalid = set()
609 invalid = set()
621 dirmove = {}
610 dirmove = {}
622
611
623 # examine each file copy for a potential directory move, which is
612 # examine each file copy for a potential directory move, which is
624 # when all the files in a directory are moved to a new directory
613 # when all the files in a directory are moved to a new directory
625 for dst, src in fullcopy.iteritems():
614 for dst, src in fullcopy.iteritems():
626 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
615 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 if dsrc in invalid:
616 if dsrc in invalid:
628 # already seen to be uninteresting
617 # already seen to be uninteresting
629 continue
618 continue
630 elif dsrc in d1 and ddst in d1:
619 elif dsrc in d1 and ddst in d1:
631 # directory wasn't entirely moved locally
620 # directory wasn't entirely moved locally
632 invalid.add(dsrc)
621 invalid.add(dsrc)
633 elif dsrc in d2 and ddst in d2:
622 elif dsrc in d2 and ddst in d2:
634 # directory wasn't entirely moved remotely
623 # directory wasn't entirely moved remotely
635 invalid.add(dsrc)
624 invalid.add(dsrc)
636 elif dsrc in dirmove and dirmove[dsrc] != ddst:
625 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 # files from the same directory moved to two different places
626 # files from the same directory moved to two different places
638 invalid.add(dsrc)
627 invalid.add(dsrc)
639 else:
628 else:
640 # looks good so far
629 # looks good so far
641 dirmove[dsrc] = ddst
630 dirmove[dsrc] = ddst
642
631
643 for i in invalid:
632 for i in invalid:
644 if i in dirmove:
633 if i in dirmove:
645 del dirmove[i]
634 del dirmove[i]
646 del d1, d2, invalid
635 del d1, d2, invalid
647
636
648 if not dirmove:
637 if not dirmove:
649 return copy, {}, diverge, renamedelete, {}
638 return copy, {}, diverge, renamedelete, {}
650
639
651 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
640 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652
641
653 for d in dirmove:
642 for d in dirmove:
654 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
643 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 (d, dirmove[d]))
644 (d, dirmove[d]))
656
645
657 movewithdir = {}
646 movewithdir = {}
658 # check unaccounted nonoverlapping files against directory moves
647 # check unaccounted nonoverlapping files against directory moves
659 for f in u1r + u2r:
648 for f in u1r + u2r:
660 if f not in fullcopy:
649 if f not in fullcopy:
661 for d in dirmove:
650 for d in dirmove:
662 if f.startswith(d):
651 if f.startswith(d):
663 # new file added in a directory that was moved, move it
652 # new file added in a directory that was moved, move it
664 df = dirmove[d] + f[len(d):]
653 df = dirmove[d] + f[len(d):]
665 if df not in copy:
654 if df not in copy:
666 movewithdir[f] = df
655 movewithdir[f] = df
667 repo.ui.debug((" pending file src: '%s' -> "
656 repo.ui.debug((" pending file src: '%s' -> "
668 "dst: '%s'\n") % (f, df))
657 "dst: '%s'\n") % (f, df))
669 break
658 break
670
659
671 return copy, movewithdir, diverge, renamedelete, dirmove
660 return copy, movewithdir, diverge, renamedelete, dirmove
672
661
673 def _heuristicscopytracing(repo, c1, c2, base):
662 def _heuristicscopytracing(repo, c1, c2, base):
674 """ Fast copytracing using filename heuristics
663 """ Fast copytracing using filename heuristics
675
664
676 Assumes that moves or renames are of following two types:
665 Assumes that moves or renames are of following two types:
677
666
678 1) Inside a directory only (same directory name but different filenames)
667 1) Inside a directory only (same directory name but different filenames)
679 2) Move from one directory to another
668 2) Move from one directory to another
680 (same filenames but different directory names)
669 (same filenames but different directory names)
681
670
682 Works only when there are no merge commits in the "source branch".
671 Works only when there are no merge commits in the "source branch".
683 Source branch is commits from base up to c2 not including base.
672 Source branch is commits from base up to c2 not including base.
684
673
685 If merge is involved it fallbacks to _fullcopytracing().
674 If merge is involved it fallbacks to _fullcopytracing().
686
675
687 Can be used by setting the following config:
676 Can be used by setting the following config:
688
677
689 [experimental]
678 [experimental]
690 copytrace = heuristics
679 copytrace = heuristics
691
680
692 In some cases the copy/move candidates found by heuristics can be very large
681 In some cases the copy/move candidates found by heuristics can be very large
693 in number and that will make the algorithm slow. The number of possible
682 in number and that will make the algorithm slow. The number of possible
694 candidates to check can be limited by using the config
683 candidates to check can be limited by using the config
695 `experimental.copytrace.movecandidateslimit` which defaults to 100.
684 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 """
685 """
697
686
698 if c1.rev() is None:
687 if c1.rev() is None:
699 c1 = c1.p1()
688 c1 = c1.p1()
700 if c2.rev() is None:
689 if c2.rev() is None:
701 c2 = c2.p1()
690 c2 = c2.p1()
702
691
703 copies = {}
692 copies = {}
704
693
705 changedfiles = set()
694 changedfiles = set()
706 m1 = c1.manifest()
695 m1 = c1.manifest()
707 if not repo.revs('%d::%d', base.rev(), c2.rev()):
696 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 # If base is not in c2 branch, we switch to fullcopytracing
697 # If base is not in c2 branch, we switch to fullcopytracing
709 repo.ui.debug("switching to full copytracing as base is not "
698 repo.ui.debug("switching to full copytracing as base is not "
710 "an ancestor of c2\n")
699 "an ancestor of c2\n")
711 return _fullcopytracing(repo, c1, c2, base)
700 return _fullcopytracing(repo, c1, c2, base)
712
701
713 ctx = c2
702 ctx = c2
714 while ctx != base:
703 while ctx != base:
715 if len(ctx.parents()) == 2:
704 if len(ctx.parents()) == 2:
716 # To keep things simple let's not handle merges
705 # To keep things simple let's not handle merges
717 repo.ui.debug("switching to full copytracing because of merges\n")
706 repo.ui.debug("switching to full copytracing because of merges\n")
718 return _fullcopytracing(repo, c1, c2, base)
707 return _fullcopytracing(repo, c1, c2, base)
719 changedfiles.update(ctx.files())
708 changedfiles.update(ctx.files())
720 ctx = ctx.p1()
709 ctx = ctx.p1()
721
710
722 cp = _forwardcopies(base, c2)
711 cp = _forwardcopies(base, c2)
723 for dst, src in cp.iteritems():
712 for dst, src in cp.iteritems():
724 if src in m1:
713 if src in m1:
725 copies[dst] = src
714 copies[dst] = src
726
715
727 # file is missing if it isn't present in the destination, but is present in
716 # file is missing if it isn't present in the destination, but is present in
728 # the base and present in the source.
717 # the base and present in the source.
729 # Presence in the base is important to exclude added files, presence in the
718 # Presence in the base is important to exclude added files, presence in the
730 # source is important to exclude removed files.
719 # source is important to exclude removed files.
731 filt = lambda f: f not in m1 and f in base and f in c2
720 filt = lambda f: f not in m1 and f in base and f in c2
732 missingfiles = [f for f in changedfiles if filt(f)]
721 missingfiles = [f for f in changedfiles if filt(f)]
733
722
734 if missingfiles:
723 if missingfiles:
735 basenametofilename = collections.defaultdict(list)
724 basenametofilename = collections.defaultdict(list)
736 dirnametofilename = collections.defaultdict(list)
725 dirnametofilename = collections.defaultdict(list)
737
726
738 for f in m1.filesnotin(base.manifest()):
727 for f in m1.filesnotin(base.manifest()):
739 basename = os.path.basename(f)
728 basename = os.path.basename(f)
740 dirname = os.path.dirname(f)
729 dirname = os.path.dirname(f)
741 basenametofilename[basename].append(f)
730 basenametofilename[basename].append(f)
742 dirnametofilename[dirname].append(f)
731 dirnametofilename[dirname].append(f)
743
732
744 for f in missingfiles:
733 for f in missingfiles:
745 basename = os.path.basename(f)
734 basename = os.path.basename(f)
746 dirname = os.path.dirname(f)
735 dirname = os.path.dirname(f)
747 samebasename = basenametofilename[basename]
736 samebasename = basenametofilename[basename]
748 samedirname = dirnametofilename[dirname]
737 samedirname = dirnametofilename[dirname]
749 movecandidates = samebasename + samedirname
738 movecandidates = samebasename + samedirname
750 # f is guaranteed to be present in c2, that's why
739 # f is guaranteed to be present in c2, that's why
751 # c2.filectx(f) won't fail
740 # c2.filectx(f) won't fail
752 f2 = c2.filectx(f)
741 f2 = c2.filectx(f)
753 # we can have a lot of candidates which can slow down the heuristics
742 # we can have a lot of candidates which can slow down the heuristics
754 # config value to limit the number of candidates moves to check
743 # config value to limit the number of candidates moves to check
755 maxcandidates = repo.ui.configint('experimental',
744 maxcandidates = repo.ui.configint('experimental',
756 'copytrace.movecandidateslimit')
745 'copytrace.movecandidateslimit')
757
746
758 if len(movecandidates) > maxcandidates:
747 if len(movecandidates) > maxcandidates:
759 repo.ui.status(_("skipping copytracing for '%s', more "
748 repo.ui.status(_("skipping copytracing for '%s', more "
760 "candidates than the limit: %d\n")
749 "candidates than the limit: %d\n")
761 % (f, len(movecandidates)))
750 % (f, len(movecandidates)))
762 continue
751 continue
763
752
764 for candidate in movecandidates:
753 for candidate in movecandidates:
765 f1 = c1.filectx(candidate)
754 f1 = c1.filectx(candidate)
766 if _related(f1, f2):
755 if _related(f1, f2):
767 # if there are a few related copies then we'll merge
756 # if there are a few related copies then we'll merge
768 # changes into all of them. This matches the behaviour
757 # changes into all of them. This matches the behaviour
769 # of upstream copytracing
758 # of upstream copytracing
770 copies[candidate] = f
759 copies[candidate] = f
771
760
772 return copies, {}, {}, {}, {}
761 return copies, {}, {}, {}, {}
773
762
774 def _related(f1, f2):
763 def _related(f1, f2):
775 """return True if f1 and f2 filectx have a common ancestor
764 """return True if f1 and f2 filectx have a common ancestor
776
765
777 Walk back to common ancestor to see if the two files originate
766 Walk back to common ancestor to see if the two files originate
778 from the same file. Since workingfilectx's rev() is None it messes
767 from the same file. Since workingfilectx's rev() is None it messes
779 up the integer comparison logic, hence the pre-step check for
768 up the integer comparison logic, hence the pre-step check for
780 None (f1 and f2 can only be workingfilectx's initially).
769 None (f1 and f2 can only be workingfilectx's initially).
781 """
770 """
782
771
783 if f1 == f2:
772 if f1 == f2:
784 return f1 # a match
773 return f1 # a match
785
774
786 g1, g2 = f1.ancestors(), f2.ancestors()
775 g1, g2 = f1.ancestors(), f2.ancestors()
787 try:
776 try:
788 f1r, f2r = f1.linkrev(), f2.linkrev()
777 f1r, f2r = f1.linkrev(), f2.linkrev()
789
778
790 if f1r is None:
779 if f1r is None:
791 f1 = next(g1)
780 f1 = next(g1)
792 if f2r is None:
781 if f2r is None:
793 f2 = next(g2)
782 f2 = next(g2)
794
783
795 while True:
784 while True:
796 f1r, f2r = f1.linkrev(), f2.linkrev()
785 f1r, f2r = f1.linkrev(), f2.linkrev()
797 if f1r > f2r:
786 if f1r > f2r:
798 f1 = next(g1)
787 f1 = next(g1)
799 elif f2r > f1r:
788 elif f2r > f1r:
800 f2 = next(g2)
789 f2 = next(g2)
801 else: # f1 and f2 point to files in the same linkrev
790 else: # f1 and f2 point to files in the same linkrev
802 return f1 == f2 # true if they point to the same file
791 return f1 == f2 # true if they point to the same file
803 except StopIteration:
792 except StopIteration:
804 return False
793 return False
805
794
806 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
795 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 """
796 """
808 check possible copies of f from msrc to mdst
797 check possible copies of f from msrc to mdst
809
798
810 srcctx = starting context for f in msrc
799 srcctx = starting context for f in msrc
811 dstctx = destination context for f in mdst
800 dstctx = destination context for f in mdst
812 f = the filename to check (as in msrc)
801 f = the filename to check (as in msrc)
813 base = the changectx used as a merge base
802 base = the changectx used as a merge base
814 tca = topological common ancestor for graft-like scenarios
803 tca = topological common ancestor for graft-like scenarios
815 remotebase = True if base is outside tca::srcctx, False otherwise
804 remotebase = True if base is outside tca::srcctx, False otherwise
816 limit = the rev number to not search beyond
805 limit = the rev number to not search beyond
817 data = dictionary of dictionary to store copy data. (see mergecopies)
806 data = dictionary of dictionary to store copy data. (see mergecopies)
818
807
819 note: limit is only an optimization, and provides no guarantee that
808 note: limit is only an optimization, and provides no guarantee that
820 irrelevant revisions will not be visited
809 irrelevant revisions will not be visited
821 there is no easy way to make this algorithm stop in a guaranteed way
810 there is no easy way to make this algorithm stop in a guaranteed way
822 once it "goes behind a certain revision".
811 once it "goes behind a certain revision".
823 """
812 """
824
813
825 msrc = srcctx.manifest()
814 msrc = srcctx.manifest()
826 mdst = dstctx.manifest()
815 mdst = dstctx.manifest()
827 mb = base.manifest()
816 mb = base.manifest()
828 mta = tca.manifest()
817 mta = tca.manifest()
829 # Might be true if this call is about finding backward renames,
818 # Might be true if this call is about finding backward renames,
830 # This happens in the case of grafts because the DAG is then rotated.
819 # This happens in the case of grafts because the DAG is then rotated.
831 # If the file exists in both the base and the source, we are not looking
820 # If the file exists in both the base and the source, we are not looking
832 # for a rename on the source side, but on the part of the DAG that is
821 # for a rename on the source side, but on the part of the DAG that is
833 # traversed backwards.
822 # traversed backwards.
834 #
823 #
835 # In the case there is both backward and forward renames (before and after
824 # In the case there is both backward and forward renames (before and after
836 # the base) this is more complicated as we must detect a divergence.
825 # the base) this is more complicated as we must detect a divergence.
837 # We use 'backwards = False' in that case.
826 # We use 'backwards = False' in that case.
838 backwards = not remotebase and base != tca and f in mb
827 backwards = not remotebase and base != tca and f in mb
839 getsrcfctx = _makegetfctx(srcctx)
828 getsrcfctx = _makegetfctx(srcctx)
840 getdstfctx = _makegetfctx(dstctx)
829 getdstfctx = _makegetfctx(dstctx)
841
830
842 if msrc[f] == mb.get(f) and not remotebase:
831 if msrc[f] == mb.get(f) and not remotebase:
843 # Nothing to merge
832 # Nothing to merge
844 return
833 return
845
834
846 of = None
835 of = None
847 seen = {f}
836 seen = {f}
848 for oc in getsrcfctx(f, msrc[f]).ancestors():
837 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 of = oc.path()
838 of = oc.path()
850 if of in seen:
839 if of in seen:
851 # check limit late - grab last rename before
840 # check limit late - grab last rename before
852 if oc.linkrev() < limit:
841 if oc.linkrev() < limit:
853 break
842 break
854 continue
843 continue
855 seen.add(of)
844 seen.add(of)
856
845
857 # remember for dir rename detection
846 # remember for dir rename detection
858 if backwards:
847 if backwards:
859 data['fullcopy'][of] = f # grafting backwards through renames
848 data['fullcopy'][of] = f # grafting backwards through renames
860 else:
849 else:
861 data['fullcopy'][f] = of
850 data['fullcopy'][f] = of
862 if of not in mdst:
851 if of not in mdst:
863 continue # no match, keep looking
852 continue # no match, keep looking
864 if mdst[of] == mb.get(of):
853 if mdst[of] == mb.get(of):
865 return # no merge needed, quit early
854 return # no merge needed, quit early
866 c2 = getdstfctx(of, mdst[of])
855 c2 = getdstfctx(of, mdst[of])
867 # c2 might be a plain new file on added on destination side that is
856 # c2 might be a plain new file on added on destination side that is
868 # unrelated to the droids we are looking for.
857 # unrelated to the droids we are looking for.
869 cr = _related(oc, c2)
858 cr = _related(oc, c2)
870 if cr and (of == f or of == c2.path()): # non-divergent
859 if cr and (of == f or of == c2.path()): # non-divergent
871 if backwards:
860 if backwards:
872 data['copy'][of] = f
861 data['copy'][of] = f
873 elif of in mb:
862 elif of in mb:
874 data['copy'][f] = of
863 data['copy'][f] = of
875 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
864 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 data['copy'][of] = f
865 data['copy'][of] = f
877 del data['fullcopy'][f]
866 del data['fullcopy'][f]
878 data['fullcopy'][of] = f
867 data['fullcopy'][of] = f
879 else: # divergence w.r.t. graft CA on one side of topological CA
868 else: # divergence w.r.t. graft CA on one side of topological CA
880 for sf in seen:
869 for sf in seen:
881 if sf in mb:
870 if sf in mb:
882 assert sf not in data['diverge']
871 assert sf not in data['diverge']
883 data['diverge'][sf] = [f, of]
872 data['diverge'][sf] = [f, of]
884 break
873 break
885 return
874 return
886
875
887 if of in mta:
876 if of in mta:
888 if backwards or remotebase:
877 if backwards or remotebase:
889 data['incomplete'][of] = f
878 data['incomplete'][of] = f
890 else:
879 else:
891 for sf in seen:
880 for sf in seen:
892 if sf in mb:
881 if sf in mb:
893 if tca == base:
882 if tca == base:
894 data['diverge'].setdefault(sf, []).append(f)
883 data['diverge'].setdefault(sf, []).append(f)
895 else:
884 else:
896 data['incompletediverge'][sf] = [of, f]
885 data['incompletediverge'][sf] = [of, f]
897 return
886 return
898
887
899 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
888 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 """reproduce copies from fromrev to rev in the dirstate
889 """reproduce copies from fromrev to rev in the dirstate
901
890
902 If skiprev is specified, it's a revision that should be used to
891 If skiprev is specified, it's a revision that should be used to
903 filter copy records. Any copies that occur between fromrev and
892 filter copy records. Any copies that occur between fromrev and
904 skiprev will not be duplicated, even if they appear in the set of
893 skiprev will not be duplicated, even if they appear in the set of
905 copies between fromrev and rev.
894 copies between fromrev and rev.
906 """
895 """
907 exclude = {}
896 exclude = {}
908 ctraceconfig = repo.ui.config('experimental', 'copytrace')
897 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 bctrace = stringutil.parsebool(ctraceconfig)
898 bctrace = stringutil.parsebool(ctraceconfig)
910 if (skiprev is not None and
899 if (skiprev is not None and
911 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
900 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 # copytrace='off' skips this line, but not the entire function because
901 # copytrace='off' skips this line, but not the entire function because
913 # the line below is O(size of the repo) during a rebase, while the rest
902 # the line below is O(size of the repo) during a rebase, while the rest
914 # of the function is much faster (and is required for carrying copy
903 # of the function is much faster (and is required for carrying copy
915 # metadata across the rebase anyway).
904 # metadata across the rebase anyway).
916 exclude = pathcopies(repo[fromrev], repo[skiprev])
905 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
906 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 # copies.pathcopies returns backward renames, so dst might not
907 # copies.pathcopies returns backward renames, so dst might not
919 # actually be in the dirstate
908 # actually be in the dirstate
920 if dst in exclude:
909 if dst in exclude:
921 continue
910 continue
922 wctx[dst].markcopied(src)
911 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now