##// END OF EJS Templates
copies: respect narrowmatcher in "parent -> working dir" case...
Martin von Zweigbergk -
r41918:012f6955 default
parent child Browse files
Show More
@@ -1,917 +1,919 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, ctxa, ctxb):
27 def _findlimit(repo, ctxa, ctxb):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 """
34 """
35
35
36 # basic idea:
36 # basic idea:
37 # - mark a and b with different sides
37 # - mark a and b with different sides
38 # - if a parent's children are all on the same side, the parent is
38 # - if a parent's children are all on the same side, the parent is
39 # on that side, otherwise it is on no side
39 # on that side, otherwise it is on no side
40 # - walk the graph in topological order with the help of a heap;
40 # - walk the graph in topological order with the help of a heap;
41 # - add unseen parents to side map
41 # - add unseen parents to side map
42 # - clear side of any parent that has children on different sides
42 # - clear side of any parent that has children on different sides
43 # - track number of interesting revs that might still be on a side
43 # - track number of interesting revs that might still be on a side
44 # - track the lowest interesting rev seen
44 # - track the lowest interesting rev seen
45 # - quit when interesting revs is zero
45 # - quit when interesting revs is zero
46
46
47 cl = repo.changelog
47 cl = repo.changelog
48 wdirparents = None
48 wdirparents = None
49 a = ctxa.rev()
49 a = ctxa.rev()
50 b = ctxb.rev()
50 b = ctxb.rev()
51 if a is None:
51 if a is None:
52 wdirparents = (ctxa.p1(), ctxa.p2())
52 wdirparents = (ctxa.p1(), ctxa.p2())
53 a = node.wdirrev
53 a = node.wdirrev
54 if b is None:
54 if b is None:
55 assert not wdirparents
55 assert not wdirparents
56 wdirparents = (ctxb.p1(), ctxb.p2())
56 wdirparents = (ctxb.p1(), ctxb.p2())
57 b = node.wdirrev
57 b = node.wdirrev
58
58
59 side = {a: -1, b: 1}
59 side = {a: -1, b: 1}
60 visit = [-a, -b]
60 visit = [-a, -b]
61 heapq.heapify(visit)
61 heapq.heapify(visit)
62 interesting = len(visit)
62 interesting = len(visit)
63 limit = node.wdirrev
63 limit = node.wdirrev
64
64
65 while interesting:
65 while interesting:
66 r = -heapq.heappop(visit)
66 r = -heapq.heappop(visit)
67 if r == node.wdirrev:
67 if r == node.wdirrev:
68 parents = [pctx.rev() for pctx in wdirparents]
68 parents = [pctx.rev() for pctx in wdirparents]
69 else:
69 else:
70 parents = cl.parentrevs(r)
70 parents = cl.parentrevs(r)
71 if parents[1] == node.nullrev:
71 if parents[1] == node.nullrev:
72 parents = parents[:1]
72 parents = parents[:1]
73 for p in parents:
73 for p in parents:
74 if p not in side:
74 if p not in side:
75 # first time we see p; add it to visit
75 # first time we see p; add it to visit
76 side[p] = side[r]
76 side[p] = side[r]
77 if side[p]:
77 if side[p]:
78 interesting += 1
78 interesting += 1
79 heapq.heappush(visit, -p)
79 heapq.heappush(visit, -p)
80 elif side[p] and side[p] != side[r]:
80 elif side[p] and side[p] != side[r]:
81 # p was interesting but now we know better
81 # p was interesting but now we know better
82 side[p] = 0
82 side[p] = 0
83 interesting -= 1
83 interesting -= 1
84 if side[r]:
84 if side[r]:
85 limit = r # lowest rev visited
85 limit = r # lowest rev visited
86 interesting -= 1
86 interesting -= 1
87
87
88 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # Consider the following flow (see test-commit-amend.t under issue4405):
89 # 1/ File 'a0' committed
89 # 1/ File 'a0' committed
90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 # 3/ Move back to first commit
91 # 3/ Move back to first commit
92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 #
94 #
95 # During the amend in step five, we will be in this state:
95 # During the amend in step five, we will be in this state:
96 #
96 #
97 # @ 3 temporary amend commit for a1-amend
97 # @ 3 temporary amend commit for a1-amend
98 # |
98 # |
99 # o 2 a1-amend
99 # o 2 a1-amend
100 # |
100 # |
101 # | o 1 a1
101 # | o 1 a1
102 # |/
102 # |/
103 # o 0 a0
103 # o 0 a0
104 #
104 #
105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 # yet the filelog has the copy information in rev 1 and we will not look
106 # yet the filelog has the copy information in rev 1 and we will not look
107 # back far enough unless we also look at the a and b as candidates.
107 # back far enough unless we also look at the a and b as candidates.
108 # This only occurs when a is a descendent of b or visa-versa.
108 # This only occurs when a is a descendent of b or visa-versa.
109 return min(limit, a, b)
109 return min(limit, a, b)
110
110
111 def _chain(src, dst, a, b):
111 def _chain(src, dst, a, b):
112 """chain two sets of copies a->b"""
112 """chain two sets of copies a->b"""
113 t = a.copy()
113 t = a.copy()
114 for k, v in b.iteritems():
114 for k, v in b.iteritems():
115 if v in t:
115 if v in t:
116 # found a chain
116 # found a chain
117 if t[v] != k:
117 if t[v] != k:
118 # file wasn't renamed back to itself
118 # file wasn't renamed back to itself
119 t[k] = t[v]
119 t[k] = t[v]
120 if v not in dst:
120 if v not in dst:
121 # chain was a rename, not a copy
121 # chain was a rename, not a copy
122 del t[v]
122 del t[v]
123 if v in src:
123 if v in src:
124 # file is a copy of an existing file
124 # file is a copy of an existing file
125 t[k] = v
125 t[k] = v
126
126
127 # remove criss-crossed copies
127 # remove criss-crossed copies
128 for k, v in list(t.items()):
128 for k, v in list(t.items()):
129 if k in src and v in dst:
129 if k in src and v in dst:
130 del t[k]
130 del t[k]
131
131
132 return t
132 return t
133
133
134 def _tracefile(fctx, am, limit=node.nullrev):
134 def _tracefile(fctx, am, limit=node.nullrev):
135 """return file context that is the ancestor of fctx present in ancestor
135 """return file context that is the ancestor of fctx present in ancestor
136 manifest am, stopping after the first ancestor lower than limit"""
136 manifest am, stopping after the first ancestor lower than limit"""
137
137
138 for f in fctx.ancestors():
138 for f in fctx.ancestors():
139 if am.get(f.path(), None) == f.filenode():
139 if am.get(f.path(), None) == f.filenode():
140 return f
140 return f
141 if limit >= 0 and not f.isintroducedafter(limit):
141 if limit >= 0 and not f.isintroducedafter(limit):
142 return None
142 return None
143
143
144 def _dirstatecopies(d, match=None):
144 def _dirstatecopies(repo, match=None):
145 ds = d._repo.dirstate
145 ds = repo.dirstate
146 c = ds.copies().copy()
146 c = ds.copies().copy()
147 for k in list(c):
147 for k in list(c):
148 if ds[k] not in 'anm' or (match and not match(k)):
148 if ds[k] not in 'anm' or (match and not match(k)):
149 del c[k]
149 del c[k]
150 return c
150 return c
151
151
152 def _computeforwardmissing(a, b, match=None):
152 def _computeforwardmissing(a, b, match=None):
153 """Computes which files are in b but not a.
153 """Computes which files are in b but not a.
154 This is its own function so extensions can easily wrap this call to see what
154 This is its own function so extensions can easily wrap this call to see what
155 files _forwardcopies is about to process.
155 files _forwardcopies is about to process.
156 """
156 """
157 ma = a.manifest()
157 ma = a.manifest()
158 mb = b.manifest()
158 mb = b.manifest()
159 return mb.filesnotin(ma, match=match)
159 return mb.filesnotin(ma, match=match)
160
160
161 def _committedforwardcopies(a, b, match):
161 def _committedforwardcopies(a, b, match):
162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 # files might have to be traced back to the fctx parent of the last
163 # files might have to be traced back to the fctx parent of the last
164 # one-side-only changeset, but not further back than that
164 # one-side-only changeset, but not further back than that
165 repo = a._repo
165 repo = a._repo
166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 dbg = repo.ui.debug
167 dbg = repo.ui.debug
168 if debug:
168 if debug:
169 dbg('debug.copies: looking into rename from %s to %s\n'
169 dbg('debug.copies: looking into rename from %s to %s\n'
170 % (a, b))
170 % (a, b))
171 limit = _findlimit(repo, a, b)
171 limit = _findlimit(repo, a, b)
172 if debug:
172 if debug:
173 dbg('debug.copies: search limit: %d\n' % limit)
173 dbg('debug.copies: search limit: %d\n' % limit)
174 am = a.manifest()
174 am = a.manifest()
175
175
176 # find where new files came from
176 # find where new files came from
177 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
178 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 cm = {}
179 cm = {}
180
180
181 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
182 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
183 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
184 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
185 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
186 # this comparison.
186 # this comparison.
187 forwardmissingmatch = match
187 forwardmissingmatch = match
188 if b.p1() == a and b.p2().node() == node.nullid:
188 if b.p1() == a and b.p2().node() == node.nullid:
189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
189 filesmatcher = scmutil.matchfiles(a._repo, b.files())
190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
190 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
192
192
193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
194
194
195 if debug:
195 if debug:
196 dbg('debug.copies: missing file to search: %d\n' % len(missing))
196 dbg('debug.copies: missing file to search: %d\n' % len(missing))
197
197
198 for f in missing:
198 for f in missing:
199 if debug:
199 if debug:
200 dbg('debug.copies: tracing file: %s\n' % f)
200 dbg('debug.copies: tracing file: %s\n' % f)
201 fctx = b[f]
201 fctx = b[f]
202 fctx._ancestrycontext = ancestrycontext
202 fctx._ancestrycontext = ancestrycontext
203
203
204 if debug:
204 if debug:
205 start = util.timer()
205 start = util.timer()
206 ofctx = _tracefile(fctx, am, limit)
206 ofctx = _tracefile(fctx, am, limit)
207 if ofctx:
207 if ofctx:
208 if debug:
208 if debug:
209 dbg('debug.copies: rename of: %s\n' % ofctx._path)
209 dbg('debug.copies: rename of: %s\n' % ofctx._path)
210 cm[f] = ofctx.path()
210 cm[f] = ofctx.path()
211 if debug:
211 if debug:
212 dbg('debug.copies: time: %f seconds\n'
212 dbg('debug.copies: time: %f seconds\n'
213 % (util.timer() - start))
213 % (util.timer() - start))
214 return cm
214 return cm
215
215
216 def _forwardcopies(a, b, match=None):
216 def _forwardcopies(a, b, match=None):
217 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
217 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
218
218
219 match = a.repo().narrowmatch(match)
219 match = a.repo().narrowmatch(match)
220 # check for working copy
220 # check for working copy
221 if b.rev() is None:
221 if b.rev() is None:
222 if a == b.p1():
222 if a == b.p1():
223 # short-circuit to avoid issues with merge states
223 # short-circuit to avoid issues with merge states
224 return _dirstatecopies(b, match)
224 return _dirstatecopies(b._repo, match)
225
225
226 cm = _committedforwardcopies(a, b.p1(), match)
226 cm = _committedforwardcopies(a, b.p1(), match)
227 # combine copies from dirstate if necessary
227 # combine copies from dirstate if necessary
228 return _chain(a, b, cm, _dirstatecopies(b, match))
228 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
229 return _committedforwardcopies(a, b, match)
229 return _committedforwardcopies(a, b, match)
230
230
231 def _backwardrenames(a, b):
231 def _backwardrenames(a, b):
232 if a._repo.ui.config('experimental', 'copytrace') == 'off':
232 if a._repo.ui.config('experimental', 'copytrace') == 'off':
233 return {}
233 return {}
234
234
235 # Even though we're not taking copies into account, 1:n rename situations
235 # Even though we're not taking copies into account, 1:n rename situations
236 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
236 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
237 # arbitrarily pick one of the renames.
237 # arbitrarily pick one of the renames.
238 f = _forwardcopies(b, a)
238 f = _forwardcopies(b, a)
239 r = {}
239 r = {}
240 for k, v in sorted(f.iteritems()):
240 for k, v in sorted(f.iteritems()):
241 # remove copies
241 # remove copies
242 if v in a:
242 if v in a:
243 continue
243 continue
244 r[v] = k
244 r[v] = k
245 return r
245 return r
246
246
247 def pathcopies(x, y, match=None):
247 def pathcopies(x, y, match=None):
248 """find {dst@y: src@x} copy mapping for directed compare"""
248 """find {dst@y: src@x} copy mapping for directed compare"""
249 repo = x._repo
249 repo = x._repo
250 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
250 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
251 if debug:
251 if debug:
252 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
252 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
253 % (x, y))
253 % (x, y))
254 if x == y or not x or not y:
254 if x == y or not x or not y:
255 return {}
255 return {}
256 a = y.ancestor(x)
256 a = y.ancestor(x)
257 if a == x:
257 if a == x:
258 if debug:
258 if debug:
259 repo.ui.debug('debug.copies: search mode: forward\n')
259 repo.ui.debug('debug.copies: search mode: forward\n')
260 return _forwardcopies(x, y, match=match)
260 return _forwardcopies(x, y, match=match)
261 if a == y:
261 if a == y:
262 if debug:
262 if debug:
263 repo.ui.debug('debug.copies: search mode: backward\n')
263 repo.ui.debug('debug.copies: search mode: backward\n')
264 return _backwardrenames(x, y)
264 return _backwardrenames(x, y)
265 if debug:
265 if debug:
266 repo.ui.debug('debug.copies: search mode: combined\n')
266 repo.ui.debug('debug.copies: search mode: combined\n')
267 return _chain(x, y, _backwardrenames(x, a),
267 return _chain(x, y, _backwardrenames(x, a),
268 _forwardcopies(a, y, match=match))
268 _forwardcopies(a, y, match=match))
269
269
270 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
270 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
271 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
271 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
272 and c2. This is its own function so extensions can easily wrap this call
272 and c2. This is its own function so extensions can easily wrap this call
273 to see what files mergecopies is about to process.
273 to see what files mergecopies is about to process.
274
274
275 Even though c1 and c2 are not used in this function, they are useful in
275 Even though c1 and c2 are not used in this function, they are useful in
276 other extensions for being able to read the file nodes of the changed files.
276 other extensions for being able to read the file nodes of the changed files.
277
277
278 "baselabel" can be passed to help distinguish the multiple computations
278 "baselabel" can be passed to help distinguish the multiple computations
279 done in the graft case.
279 done in the graft case.
280 """
280 """
281 u1 = sorted(addedinm1 - addedinm2)
281 u1 = sorted(addedinm1 - addedinm2)
282 u2 = sorted(addedinm2 - addedinm1)
282 u2 = sorted(addedinm2 - addedinm1)
283
283
284 header = " unmatched files in %s"
284 header = " unmatched files in %s"
285 if baselabel:
285 if baselabel:
286 header += ' (from %s)' % baselabel
286 header += ' (from %s)' % baselabel
287 if u1:
287 if u1:
288 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
288 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
289 if u2:
289 if u2:
290 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
290 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
291
291
292 return u1, u2
292 return u1, u2
293
293
294 def _makegetfctx(ctx):
294 def _makegetfctx(ctx):
295 """return a 'getfctx' function suitable for _checkcopies usage
295 """return a 'getfctx' function suitable for _checkcopies usage
296
296
297 We have to re-setup the function building 'filectx' for each
297 We have to re-setup the function building 'filectx' for each
298 '_checkcopies' to ensure the linkrev adjustment is properly setup for
298 '_checkcopies' to ensure the linkrev adjustment is properly setup for
299 each. Linkrev adjustment is important to avoid bug in rename
299 each. Linkrev adjustment is important to avoid bug in rename
300 detection. Moreover, having a proper '_ancestrycontext' setup ensures
300 detection. Moreover, having a proper '_ancestrycontext' setup ensures
301 the performance impact of this adjustment is kept limited. Without it,
301 the performance impact of this adjustment is kept limited. Without it,
302 each file could do a full dag traversal making the time complexity of
302 each file could do a full dag traversal making the time complexity of
303 the operation explode (see issue4537).
303 the operation explode (see issue4537).
304
304
305 This function exists here mostly to limit the impact on stable. Feel
305 This function exists here mostly to limit the impact on stable. Feel
306 free to refactor on default.
306 free to refactor on default.
307 """
307 """
308 rev = ctx.rev()
308 rev = ctx.rev()
309 repo = ctx._repo
309 repo = ctx._repo
310 ac = getattr(ctx, '_ancestrycontext', None)
310 ac = getattr(ctx, '_ancestrycontext', None)
311 if ac is None:
311 if ac is None:
312 revs = [rev]
312 revs = [rev]
313 if rev is None:
313 if rev is None:
314 revs = [p.rev() for p in ctx.parents()]
314 revs = [p.rev() for p in ctx.parents()]
315 ac = repo.changelog.ancestors(revs, inclusive=True)
315 ac = repo.changelog.ancestors(revs, inclusive=True)
316 ctx._ancestrycontext = ac
316 ctx._ancestrycontext = ac
317 def makectx(f, n):
317 def makectx(f, n):
318 if n in node.wdirfilenodeids: # in a working context?
318 if n in node.wdirfilenodeids: # in a working context?
319 if ctx.rev() is None:
319 if ctx.rev() is None:
320 return ctx.filectx(f)
320 return ctx.filectx(f)
321 return repo[None][f]
321 return repo[None][f]
322 fctx = repo.filectx(f, fileid=n)
322 fctx = repo.filectx(f, fileid=n)
323 # setup only needed for filectx not create from a changectx
323 # setup only needed for filectx not create from a changectx
324 fctx._ancestrycontext = ac
324 fctx._ancestrycontext = ac
325 fctx._descendantrev = rev
325 fctx._descendantrev = rev
326 return fctx
326 return fctx
327 return util.lrucachefunc(makectx)
327 return util.lrucachefunc(makectx)
328
328
329 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
329 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
330 """combine partial copy paths"""
330 """combine partial copy paths"""
331 remainder = {}
331 remainder = {}
332 for f in copyfrom:
332 for f in copyfrom:
333 if f in copyto:
333 if f in copyto:
334 finalcopy[copyto[f]] = copyfrom[f]
334 finalcopy[copyto[f]] = copyfrom[f]
335 del copyto[f]
335 del copyto[f]
336 for f in incompletediverge:
336 for f in incompletediverge:
337 assert f not in diverge
337 assert f not in diverge
338 ic = incompletediverge[f]
338 ic = incompletediverge[f]
339 if ic[0] in copyto:
339 if ic[0] in copyto:
340 diverge[f] = [copyto[ic[0]], ic[1]]
340 diverge[f] = [copyto[ic[0]], ic[1]]
341 else:
341 else:
342 remainder[f] = ic
342 remainder[f] = ic
343 return remainder
343 return remainder
344
344
345 def mergecopies(repo, c1, c2, base):
345 def mergecopies(repo, c1, c2, base):
346 """
346 """
347 The function calling different copytracing algorithms on the basis of config
347 The function calling different copytracing algorithms on the basis of config
348 which find moves and copies between context c1 and c2 that are relevant for
348 which find moves and copies between context c1 and c2 that are relevant for
349 merging. 'base' will be used as the merge base.
349 merging. 'base' will be used as the merge base.
350
350
351 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
351 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
352 files that were moved/ copied in one merge parent and modified in another.
352 files that were moved/ copied in one merge parent and modified in another.
353 For example:
353 For example:
354
354
355 o ---> 4 another commit
355 o ---> 4 another commit
356 |
356 |
357 | o ---> 3 commit that modifies a.txt
357 | o ---> 3 commit that modifies a.txt
358 | /
358 | /
359 o / ---> 2 commit that moves a.txt to b.txt
359 o / ---> 2 commit that moves a.txt to b.txt
360 |/
360 |/
361 o ---> 1 merge base
361 o ---> 1 merge base
362
362
363 If we try to rebase revision 3 on revision 4, since there is no a.txt in
363 If we try to rebase revision 3 on revision 4, since there is no a.txt in
364 revision 4, and if user have copytrace disabled, we prints the following
364 revision 4, and if user have copytrace disabled, we prints the following
365 message:
365 message:
366
366
367 ```other changed <file> which local deleted```
367 ```other changed <file> which local deleted```
368
368
369 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
369 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
370 "dirmove".
370 "dirmove".
371
371
372 "copy" is a mapping from destination name -> source name,
372 "copy" is a mapping from destination name -> source name,
373 where source is in c1 and destination is in c2 or vice-versa.
373 where source is in c1 and destination is in c2 or vice-versa.
374
374
375 "movewithdir" is a mapping from source name -> destination name,
375 "movewithdir" is a mapping from source name -> destination name,
376 where the file at source present in one context but not the other
376 where the file at source present in one context but not the other
377 needs to be moved to destination by the merge process, because the
377 needs to be moved to destination by the merge process, because the
378 other context moved the directory it is in.
378 other context moved the directory it is in.
379
379
380 "diverge" is a mapping of source name -> list of destination names
380 "diverge" is a mapping of source name -> list of destination names
381 for divergent renames.
381 for divergent renames.
382
382
383 "renamedelete" is a mapping of source name -> list of destination
383 "renamedelete" is a mapping of source name -> list of destination
384 names for files deleted in c1 that were renamed in c2 or vice-versa.
384 names for files deleted in c1 that were renamed in c2 or vice-versa.
385
385
386 "dirmove" is a mapping of detected source dir -> destination dir renames.
386 "dirmove" is a mapping of detected source dir -> destination dir renames.
387 This is needed for handling changes to new files previously grafted into
387 This is needed for handling changes to new files previously grafted into
388 renamed directories.
388 renamed directories.
389 """
389 """
390 # avoid silly behavior for update from empty dir
390 # avoid silly behavior for update from empty dir
391 if not c1 or not c2 or c1 == c2:
391 if not c1 or not c2 or c1 == c2:
392 return {}, {}, {}, {}, {}
392 return {}, {}, {}, {}, {}
393
393
394 narrowmatch = c1.repo().narrowmatch()
395
394 # avoid silly behavior for parent -> working dir
396 # avoid silly behavior for parent -> working dir
395 if c2.node() is None and c1.node() == repo.dirstate.p1():
397 if c2.node() is None and c1.node() == repo.dirstate.p1():
396 return repo.dirstate.copies(), {}, {}, {}, {}
398 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
397
399
398 copytracing = repo.ui.config('experimental', 'copytrace')
400 copytracing = repo.ui.config('experimental', 'copytrace')
399 boolctrace = stringutil.parsebool(copytracing)
401 boolctrace = stringutil.parsebool(copytracing)
400
402
401 # Copy trace disabling is explicitly below the node == p1 logic above
403 # Copy trace disabling is explicitly below the node == p1 logic above
402 # because the logic above is required for a simple copy to be kept across a
404 # because the logic above is required for a simple copy to be kept across a
403 # rebase.
405 # rebase.
404 if copytracing == 'heuristics':
406 if copytracing == 'heuristics':
405 # Do full copytracing if only non-public revisions are involved as
407 # Do full copytracing if only non-public revisions are involved as
406 # that will be fast enough and will also cover the copies which could
408 # that will be fast enough and will also cover the copies which could
407 # be missed by heuristics
409 # be missed by heuristics
408 if _isfullcopytraceable(repo, c1, base):
410 if _isfullcopytraceable(repo, c1, base):
409 return _fullcopytracing(repo, c1, c2, base)
411 return _fullcopytracing(repo, c1, c2, base)
410 return _heuristicscopytracing(repo, c1, c2, base)
412 return _heuristicscopytracing(repo, c1, c2, base)
411 elif boolctrace is False:
413 elif boolctrace is False:
412 # stringutil.parsebool() returns None when it is unable to parse the
414 # stringutil.parsebool() returns None when it is unable to parse the
413 # value, so we should rely on making sure copytracing is on such cases
415 # value, so we should rely on making sure copytracing is on such cases
414 return {}, {}, {}, {}, {}
416 return {}, {}, {}, {}, {}
415 else:
417 else:
416 return _fullcopytracing(repo, c1, c2, base)
418 return _fullcopytracing(repo, c1, c2, base)
417
419
418 def _isfullcopytraceable(repo, c1, base):
420 def _isfullcopytraceable(repo, c1, base):
419 """ Checks that if base, source and destination are all no-public branches,
421 """ Checks that if base, source and destination are all no-public branches,
420 if yes let's use the full copytrace algorithm for increased capabilities
422 if yes let's use the full copytrace algorithm for increased capabilities
421 since it will be fast enough.
423 since it will be fast enough.
422
424
423 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
424 number of changesets from c1 to base such that if number of changesets are
426 number of changesets from c1 to base such that if number of changesets are
425 more than the limit, full copytracing algorithm won't be used.
427 more than the limit, full copytracing algorithm won't be used.
426 """
428 """
427 if c1.rev() is None:
429 if c1.rev() is None:
428 c1 = c1.p1()
430 c1 = c1.p1()
429 if c1.mutable() and base.mutable():
431 if c1.mutable() and base.mutable():
430 sourcecommitlimit = repo.ui.configint('experimental',
432 sourcecommitlimit = repo.ui.configint('experimental',
431 'copytrace.sourcecommitlimit')
433 'copytrace.sourcecommitlimit')
432 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
433 return commits < sourcecommitlimit
435 return commits < sourcecommitlimit
434 return False
436 return False
435
437
436 def _fullcopytracing(repo, c1, c2, base):
438 def _fullcopytracing(repo, c1, c2, base):
437 """ The full copytracing algorithm which finds all the new files that were
439 """ The full copytracing algorithm which finds all the new files that were
438 added from merge base up to the top commit and for each file it checks if
440 added from merge base up to the top commit and for each file it checks if
439 this file was copied from another file.
441 this file was copied from another file.
440
442
441 This is pretty slow when a lot of changesets are involved but will track all
443 This is pretty slow when a lot of changesets are involved but will track all
442 the copies.
444 the copies.
443 """
445 """
444 # In certain scenarios (e.g. graft, update or rebase), base can be
446 # In certain scenarios (e.g. graft, update or rebase), base can be
445 # overridden We still need to know a real common ancestor in this case We
447 # overridden We still need to know a real common ancestor in this case We
446 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
447 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
448 # caller may not know if the revision passed in lieu of the CA is a genuine
450 # caller may not know if the revision passed in lieu of the CA is a genuine
449 # common ancestor or not without explicitly checking it, it's better to
451 # common ancestor or not without explicitly checking it, it's better to
450 # determine that here.
452 # determine that here.
451 #
453 #
452 # base.isancestorof(wc) is False, work around that
454 # base.isancestorof(wc) is False, work around that
453 _c1 = c1.p1() if c1.rev() is None else c1
455 _c1 = c1.p1() if c1.rev() is None else c1
454 _c2 = c2.p1() if c2.rev() is None else c2
456 _c2 = c2.p1() if c2.rev() is None else c2
455 # an endpoint is "dirty" if it isn't a descendant of the merge base
457 # an endpoint is "dirty" if it isn't a descendant of the merge base
456 # if we have a dirty endpoint, we need to trigger graft logic, and also
458 # if we have a dirty endpoint, we need to trigger graft logic, and also
457 # keep track of which endpoint is dirty
459 # keep track of which endpoint is dirty
458 dirtyc1 = not base.isancestorof(_c1)
460 dirtyc1 = not base.isancestorof(_c1)
459 dirtyc2 = not base.isancestorof(_c2)
461 dirtyc2 = not base.isancestorof(_c2)
460 graft = dirtyc1 or dirtyc2
462 graft = dirtyc1 or dirtyc2
461 tca = base
463 tca = base
462 if graft:
464 if graft:
463 tca = _c1.ancestor(_c2)
465 tca = _c1.ancestor(_c2)
464
466
465 limit = _findlimit(repo, c1, c2)
467 limit = _findlimit(repo, c1, c2)
466 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
468 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
467
469
468 m1 = c1.manifest()
470 m1 = c1.manifest()
469 m2 = c2.manifest()
471 m2 = c2.manifest()
470 mb = base.manifest()
472 mb = base.manifest()
471
473
472 # gather data from _checkcopies:
474 # gather data from _checkcopies:
473 # - diverge = record all diverges in this dict
475 # - diverge = record all diverges in this dict
474 # - copy = record all non-divergent copies in this dict
476 # - copy = record all non-divergent copies in this dict
475 # - fullcopy = record all copies in this dict
477 # - fullcopy = record all copies in this dict
476 # - incomplete = record non-divergent partial copies here
478 # - incomplete = record non-divergent partial copies here
477 # - incompletediverge = record divergent partial copies here
479 # - incompletediverge = record divergent partial copies here
478 diverge = {} # divergence data is shared
480 diverge = {} # divergence data is shared
479 incompletediverge = {}
481 incompletediverge = {}
480 data1 = {'copy': {},
482 data1 = {'copy': {},
481 'fullcopy': {},
483 'fullcopy': {},
482 'incomplete': {},
484 'incomplete': {},
483 'diverge': diverge,
485 'diverge': diverge,
484 'incompletediverge': incompletediverge,
486 'incompletediverge': incompletediverge,
485 }
487 }
486 data2 = {'copy': {},
488 data2 = {'copy': {},
487 'fullcopy': {},
489 'fullcopy': {},
488 'incomplete': {},
490 'incomplete': {},
489 'diverge': diverge,
491 'diverge': diverge,
490 'incompletediverge': incompletediverge,
492 'incompletediverge': incompletediverge,
491 }
493 }
492
494
493 # find interesting file sets from manifests
495 # find interesting file sets from manifests
494 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
496 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
495 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
497 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
496 bothnew = sorted(addedinm1 & addedinm2)
498 bothnew = sorted(addedinm1 & addedinm2)
497 if tca == base:
499 if tca == base:
498 # unmatched file from base
500 # unmatched file from base
499 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
501 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
500 u1u, u2u = u1r, u2r
502 u1u, u2u = u1r, u2r
501 else:
503 else:
502 # unmatched file from base (DAG rotation in the graft case)
504 # unmatched file from base (DAG rotation in the graft case)
503 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
504 baselabel='base')
506 baselabel='base')
505 # unmatched file from topological common ancestors (no DAG rotation)
507 # unmatched file from topological common ancestors (no DAG rotation)
506 # need to recompute this for directory move handling when grafting
508 # need to recompute this for directory move handling when grafting
507 mta = tca.manifest()
509 mta = tca.manifest()
508 u1u, u2u = _computenonoverlap(repo, c1, c2,
510 u1u, u2u = _computenonoverlap(repo, c1, c2,
509 m1.filesnotin(mta, repo.narrowmatch()),
511 m1.filesnotin(mta, repo.narrowmatch()),
510 m2.filesnotin(mta, repo.narrowmatch()),
512 m2.filesnotin(mta, repo.narrowmatch()),
511 baselabel='topological common ancestor')
513 baselabel='topological common ancestor')
512
514
513 for f in u1u:
515 for f in u1u:
514 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
516 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
515
517
516 for f in u2u:
518 for f in u2u:
517 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
519 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
518
520
519 copy = dict(data1['copy'])
521 copy = dict(data1['copy'])
520 copy.update(data2['copy'])
522 copy.update(data2['copy'])
521 fullcopy = dict(data1['fullcopy'])
523 fullcopy = dict(data1['fullcopy'])
522 fullcopy.update(data2['fullcopy'])
524 fullcopy.update(data2['fullcopy'])
523
525
524 if dirtyc1:
526 if dirtyc1:
525 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
527 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
526 incompletediverge)
528 incompletediverge)
527 else:
529 else:
528 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
530 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
529 incompletediverge)
531 incompletediverge)
530
532
531 renamedelete = {}
533 renamedelete = {}
532 renamedeleteset = set()
534 renamedeleteset = set()
533 divergeset = set()
535 divergeset = set()
534 for of, fl in list(diverge.items()):
536 for of, fl in list(diverge.items()):
535 if len(fl) == 1 or of in c1 or of in c2:
537 if len(fl) == 1 or of in c1 or of in c2:
536 del diverge[of] # not actually divergent, or not a rename
538 del diverge[of] # not actually divergent, or not a rename
537 if of not in c1 and of not in c2:
539 if of not in c1 and of not in c2:
538 # renamed on one side, deleted on the other side, but filter
540 # renamed on one side, deleted on the other side, but filter
539 # out files that have been renamed and then deleted
541 # out files that have been renamed and then deleted
540 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
542 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
541 renamedeleteset.update(fl) # reverse map for below
543 renamedeleteset.update(fl) # reverse map for below
542 else:
544 else:
543 divergeset.update(fl) # reverse map for below
545 divergeset.update(fl) # reverse map for below
544
546
545 if bothnew:
547 if bothnew:
546 repo.ui.debug(" unmatched files new in both:\n %s\n"
548 repo.ui.debug(" unmatched files new in both:\n %s\n"
547 % "\n ".join(bothnew))
549 % "\n ".join(bothnew))
548 bothdiverge = {}
550 bothdiverge = {}
549 bothincompletediverge = {}
551 bothincompletediverge = {}
550 remainder = {}
552 remainder = {}
551 both1 = {'copy': {},
553 both1 = {'copy': {},
552 'fullcopy': {},
554 'fullcopy': {},
553 'incomplete': {},
555 'incomplete': {},
554 'diverge': bothdiverge,
556 'diverge': bothdiverge,
555 'incompletediverge': bothincompletediverge
557 'incompletediverge': bothincompletediverge
556 }
558 }
557 both2 = {'copy': {},
559 both2 = {'copy': {},
558 'fullcopy': {},
560 'fullcopy': {},
559 'incomplete': {},
561 'incomplete': {},
560 'diverge': bothdiverge,
562 'diverge': bothdiverge,
561 'incompletediverge': bothincompletediverge
563 'incompletediverge': bothincompletediverge
562 }
564 }
563 for f in bothnew:
565 for f in bothnew:
564 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
566 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
565 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
567 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
566 if dirtyc1:
568 if dirtyc1:
567 # incomplete copies may only be found on the "dirty" side for bothnew
569 # incomplete copies may only be found on the "dirty" side for bothnew
568 assert not both2['incomplete']
570 assert not both2['incomplete']
569 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
571 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
570 bothincompletediverge)
572 bothincompletediverge)
571 elif dirtyc2:
573 elif dirtyc2:
572 assert not both1['incomplete']
574 assert not both1['incomplete']
573 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
575 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
574 bothincompletediverge)
576 bothincompletediverge)
575 else:
577 else:
576 # incomplete copies and divergences can't happen outside grafts
578 # incomplete copies and divergences can't happen outside grafts
577 assert not both1['incomplete']
579 assert not both1['incomplete']
578 assert not both2['incomplete']
580 assert not both2['incomplete']
579 assert not bothincompletediverge
581 assert not bothincompletediverge
580 for f in remainder:
582 for f in remainder:
581 assert f not in bothdiverge
583 assert f not in bothdiverge
582 ic = remainder[f]
584 ic = remainder[f]
583 if ic[0] in (m1 if dirtyc1 else m2):
585 if ic[0] in (m1 if dirtyc1 else m2):
584 # backed-out rename on one side, but watch out for deleted files
586 # backed-out rename on one side, but watch out for deleted files
585 bothdiverge[f] = ic
587 bothdiverge[f] = ic
586 for of, fl in bothdiverge.items():
588 for of, fl in bothdiverge.items():
587 if len(fl) == 2 and fl[0] == fl[1]:
589 if len(fl) == 2 and fl[0] == fl[1]:
588 copy[fl[0]] = of # not actually divergent, just matching renames
590 copy[fl[0]] = of # not actually divergent, just matching renames
589
591
590 if fullcopy and repo.ui.debugflag:
592 if fullcopy and repo.ui.debugflag:
591 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
593 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
592 "% = renamed and deleted):\n")
594 "% = renamed and deleted):\n")
593 for f in sorted(fullcopy):
595 for f in sorted(fullcopy):
594 note = ""
596 note = ""
595 if f in copy:
597 if f in copy:
596 note += "*"
598 note += "*"
597 if f in divergeset:
599 if f in divergeset:
598 note += "!"
600 note += "!"
599 if f in renamedeleteset:
601 if f in renamedeleteset:
600 note += "%"
602 note += "%"
601 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
603 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
602 note))
604 note))
603 del divergeset
605 del divergeset
604
606
605 if not fullcopy:
607 if not fullcopy:
606 return copy, {}, diverge, renamedelete, {}
608 return copy, {}, diverge, renamedelete, {}
607
609
608 repo.ui.debug(" checking for directory renames\n")
610 repo.ui.debug(" checking for directory renames\n")
609
611
610 # generate a directory move map
612 # generate a directory move map
611 d1, d2 = c1.dirs(), c2.dirs()
613 d1, d2 = c1.dirs(), c2.dirs()
612 # Hack for adding '', which is not otherwise added, to d1 and d2
614 # Hack for adding '', which is not otherwise added, to d1 and d2
613 d1.addpath('/')
615 d1.addpath('/')
614 d2.addpath('/')
616 d2.addpath('/')
615 invalid = set()
617 invalid = set()
616 dirmove = {}
618 dirmove = {}
617
619
618 # examine each file copy for a potential directory move, which is
620 # examine each file copy for a potential directory move, which is
619 # when all the files in a directory are moved to a new directory
621 # when all the files in a directory are moved to a new directory
620 for dst, src in fullcopy.iteritems():
622 for dst, src in fullcopy.iteritems():
621 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
623 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
622 if dsrc in invalid:
624 if dsrc in invalid:
623 # already seen to be uninteresting
625 # already seen to be uninteresting
624 continue
626 continue
625 elif dsrc in d1 and ddst in d1:
627 elif dsrc in d1 and ddst in d1:
626 # directory wasn't entirely moved locally
628 # directory wasn't entirely moved locally
627 invalid.add(dsrc)
629 invalid.add(dsrc)
628 elif dsrc in d2 and ddst in d2:
630 elif dsrc in d2 and ddst in d2:
629 # directory wasn't entirely moved remotely
631 # directory wasn't entirely moved remotely
630 invalid.add(dsrc)
632 invalid.add(dsrc)
631 elif dsrc in dirmove and dirmove[dsrc] != ddst:
633 elif dsrc in dirmove and dirmove[dsrc] != ddst:
632 # files from the same directory moved to two different places
634 # files from the same directory moved to two different places
633 invalid.add(dsrc)
635 invalid.add(dsrc)
634 else:
636 else:
635 # looks good so far
637 # looks good so far
636 dirmove[dsrc] = ddst
638 dirmove[dsrc] = ddst
637
639
638 for i in invalid:
640 for i in invalid:
639 if i in dirmove:
641 if i in dirmove:
640 del dirmove[i]
642 del dirmove[i]
641 del d1, d2, invalid
643 del d1, d2, invalid
642
644
643 if not dirmove:
645 if not dirmove:
644 return copy, {}, diverge, renamedelete, {}
646 return copy, {}, diverge, renamedelete, {}
645
647
646 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
648 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
647
649
648 for d in dirmove:
650 for d in dirmove:
649 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
651 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
650 (d, dirmove[d]))
652 (d, dirmove[d]))
651
653
652 movewithdir = {}
654 movewithdir = {}
653 # check unaccounted nonoverlapping files against directory moves
655 # check unaccounted nonoverlapping files against directory moves
654 for f in u1r + u2r:
656 for f in u1r + u2r:
655 if f not in fullcopy:
657 if f not in fullcopy:
656 for d in dirmove:
658 for d in dirmove:
657 if f.startswith(d):
659 if f.startswith(d):
658 # new file added in a directory that was moved, move it
660 # new file added in a directory that was moved, move it
659 df = dirmove[d] + f[len(d):]
661 df = dirmove[d] + f[len(d):]
660 if df not in copy:
662 if df not in copy:
661 movewithdir[f] = df
663 movewithdir[f] = df
662 repo.ui.debug((" pending file src: '%s' -> "
664 repo.ui.debug((" pending file src: '%s' -> "
663 "dst: '%s'\n") % (f, df))
665 "dst: '%s'\n") % (f, df))
664 break
666 break
665
667
666 return copy, movewithdir, diverge, renamedelete, dirmove
668 return copy, movewithdir, diverge, renamedelete, dirmove
667
669
668 def _heuristicscopytracing(repo, c1, c2, base):
670 def _heuristicscopytracing(repo, c1, c2, base):
669 """ Fast copytracing using filename heuristics
671 """ Fast copytracing using filename heuristics
670
672
671 Assumes that moves or renames are of following two types:
673 Assumes that moves or renames are of following two types:
672
674
673 1) Inside a directory only (same directory name but different filenames)
675 1) Inside a directory only (same directory name but different filenames)
674 2) Move from one directory to another
676 2) Move from one directory to another
675 (same filenames but different directory names)
677 (same filenames but different directory names)
676
678
677 Works only when there are no merge commits in the "source branch".
679 Works only when there are no merge commits in the "source branch".
678 Source branch is commits from base up to c2 not including base.
680 Source branch is commits from base up to c2 not including base.
679
681
680 If merge is involved it fallbacks to _fullcopytracing().
682 If merge is involved it fallbacks to _fullcopytracing().
681
683
682 Can be used by setting the following config:
684 Can be used by setting the following config:
683
685
684 [experimental]
686 [experimental]
685 copytrace = heuristics
687 copytrace = heuristics
686
688
687 In some cases the copy/move candidates found by heuristics can be very large
689 In some cases the copy/move candidates found by heuristics can be very large
688 in number and that will make the algorithm slow. The number of possible
690 in number and that will make the algorithm slow. The number of possible
689 candidates to check can be limited by using the config
691 candidates to check can be limited by using the config
690 `experimental.copytrace.movecandidateslimit` which defaults to 100.
692 `experimental.copytrace.movecandidateslimit` which defaults to 100.
691 """
693 """
692
694
693 if c1.rev() is None:
695 if c1.rev() is None:
694 c1 = c1.p1()
696 c1 = c1.p1()
695 if c2.rev() is None:
697 if c2.rev() is None:
696 c2 = c2.p1()
698 c2 = c2.p1()
697
699
698 copies = {}
700 copies = {}
699
701
700 changedfiles = set()
702 changedfiles = set()
701 m1 = c1.manifest()
703 m1 = c1.manifest()
702 if not repo.revs('%d::%d', base.rev(), c2.rev()):
704 if not repo.revs('%d::%d', base.rev(), c2.rev()):
703 # If base is not in c2 branch, we switch to fullcopytracing
705 # If base is not in c2 branch, we switch to fullcopytracing
704 repo.ui.debug("switching to full copytracing as base is not "
706 repo.ui.debug("switching to full copytracing as base is not "
705 "an ancestor of c2\n")
707 "an ancestor of c2\n")
706 return _fullcopytracing(repo, c1, c2, base)
708 return _fullcopytracing(repo, c1, c2, base)
707
709
708 ctx = c2
710 ctx = c2
709 while ctx != base:
711 while ctx != base:
710 if len(ctx.parents()) == 2:
712 if len(ctx.parents()) == 2:
711 # To keep things simple let's not handle merges
713 # To keep things simple let's not handle merges
712 repo.ui.debug("switching to full copytracing because of merges\n")
714 repo.ui.debug("switching to full copytracing because of merges\n")
713 return _fullcopytracing(repo, c1, c2, base)
715 return _fullcopytracing(repo, c1, c2, base)
714 changedfiles.update(ctx.files())
716 changedfiles.update(ctx.files())
715 ctx = ctx.p1()
717 ctx = ctx.p1()
716
718
717 cp = _forwardcopies(base, c2)
719 cp = _forwardcopies(base, c2)
718 for dst, src in cp.iteritems():
720 for dst, src in cp.iteritems():
719 if src in m1:
721 if src in m1:
720 copies[dst] = src
722 copies[dst] = src
721
723
722 # file is missing if it isn't present in the destination, but is present in
724 # file is missing if it isn't present in the destination, but is present in
723 # the base and present in the source.
725 # the base and present in the source.
724 # Presence in the base is important to exclude added files, presence in the
726 # Presence in the base is important to exclude added files, presence in the
725 # source is important to exclude removed files.
727 # source is important to exclude removed files.
726 filt = lambda f: f not in m1 and f in base and f in c2
728 filt = lambda f: f not in m1 and f in base and f in c2
727 missingfiles = [f for f in changedfiles if filt(f)]
729 missingfiles = [f for f in changedfiles if filt(f)]
728
730
729 if missingfiles:
731 if missingfiles:
730 basenametofilename = collections.defaultdict(list)
732 basenametofilename = collections.defaultdict(list)
731 dirnametofilename = collections.defaultdict(list)
733 dirnametofilename = collections.defaultdict(list)
732
734
733 for f in m1.filesnotin(base.manifest()):
735 for f in m1.filesnotin(base.manifest()):
734 basename = os.path.basename(f)
736 basename = os.path.basename(f)
735 dirname = os.path.dirname(f)
737 dirname = os.path.dirname(f)
736 basenametofilename[basename].append(f)
738 basenametofilename[basename].append(f)
737 dirnametofilename[dirname].append(f)
739 dirnametofilename[dirname].append(f)
738
740
739 for f in missingfiles:
741 for f in missingfiles:
740 basename = os.path.basename(f)
742 basename = os.path.basename(f)
741 dirname = os.path.dirname(f)
743 dirname = os.path.dirname(f)
742 samebasename = basenametofilename[basename]
744 samebasename = basenametofilename[basename]
743 samedirname = dirnametofilename[dirname]
745 samedirname = dirnametofilename[dirname]
744 movecandidates = samebasename + samedirname
746 movecandidates = samebasename + samedirname
745 # f is guaranteed to be present in c2, that's why
747 # f is guaranteed to be present in c2, that's why
746 # c2.filectx(f) won't fail
748 # c2.filectx(f) won't fail
747 f2 = c2.filectx(f)
749 f2 = c2.filectx(f)
748 # we can have a lot of candidates which can slow down the heuristics
750 # we can have a lot of candidates which can slow down the heuristics
749 # config value to limit the number of candidates moves to check
751 # config value to limit the number of candidates moves to check
750 maxcandidates = repo.ui.configint('experimental',
752 maxcandidates = repo.ui.configint('experimental',
751 'copytrace.movecandidateslimit')
753 'copytrace.movecandidateslimit')
752
754
753 if len(movecandidates) > maxcandidates:
755 if len(movecandidates) > maxcandidates:
754 repo.ui.status(_("skipping copytracing for '%s', more "
756 repo.ui.status(_("skipping copytracing for '%s', more "
755 "candidates than the limit: %d\n")
757 "candidates than the limit: %d\n")
756 % (f, len(movecandidates)))
758 % (f, len(movecandidates)))
757 continue
759 continue
758
760
759 for candidate in movecandidates:
761 for candidate in movecandidates:
760 f1 = c1.filectx(candidate)
762 f1 = c1.filectx(candidate)
761 if _related(f1, f2):
763 if _related(f1, f2):
762 # if there are a few related copies then we'll merge
764 # if there are a few related copies then we'll merge
763 # changes into all of them. This matches the behaviour
765 # changes into all of them. This matches the behaviour
764 # of upstream copytracing
766 # of upstream copytracing
765 copies[candidate] = f
767 copies[candidate] = f
766
768
767 return copies, {}, {}, {}, {}
769 return copies, {}, {}, {}, {}
768
770
769 def _related(f1, f2):
771 def _related(f1, f2):
770 """return True if f1 and f2 filectx have a common ancestor
772 """return True if f1 and f2 filectx have a common ancestor
771
773
772 Walk back to common ancestor to see if the two files originate
774 Walk back to common ancestor to see if the two files originate
773 from the same file. Since workingfilectx's rev() is None it messes
775 from the same file. Since workingfilectx's rev() is None it messes
774 up the integer comparison logic, hence the pre-step check for
776 up the integer comparison logic, hence the pre-step check for
775 None (f1 and f2 can only be workingfilectx's initially).
777 None (f1 and f2 can only be workingfilectx's initially).
776 """
778 """
777
779
778 if f1 == f2:
780 if f1 == f2:
779 return True # a match
781 return True # a match
780
782
781 g1, g2 = f1.ancestors(), f2.ancestors()
783 g1, g2 = f1.ancestors(), f2.ancestors()
782 try:
784 try:
783 f1r, f2r = f1.linkrev(), f2.linkrev()
785 f1r, f2r = f1.linkrev(), f2.linkrev()
784
786
785 if f1r is None:
787 if f1r is None:
786 f1 = next(g1)
788 f1 = next(g1)
787 if f2r is None:
789 if f2r is None:
788 f2 = next(g2)
790 f2 = next(g2)
789
791
790 while True:
792 while True:
791 f1r, f2r = f1.linkrev(), f2.linkrev()
793 f1r, f2r = f1.linkrev(), f2.linkrev()
792 if f1r > f2r:
794 if f1r > f2r:
793 f1 = next(g1)
795 f1 = next(g1)
794 elif f2r > f1r:
796 elif f2r > f1r:
795 f2 = next(g2)
797 f2 = next(g2)
796 else: # f1 and f2 point to files in the same linkrev
798 else: # f1 and f2 point to files in the same linkrev
797 return f1 == f2 # true if they point to the same file
799 return f1 == f2 # true if they point to the same file
798 except StopIteration:
800 except StopIteration:
799 return False
801 return False
800
802
801 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
803 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
802 """
804 """
803 check possible copies of f from msrc to mdst
805 check possible copies of f from msrc to mdst
804
806
805 srcctx = starting context for f in msrc
807 srcctx = starting context for f in msrc
806 dstctx = destination context for f in mdst
808 dstctx = destination context for f in mdst
807 f = the filename to check (as in msrc)
809 f = the filename to check (as in msrc)
808 base = the changectx used as a merge base
810 base = the changectx used as a merge base
809 tca = topological common ancestor for graft-like scenarios
811 tca = topological common ancestor for graft-like scenarios
810 remotebase = True if base is outside tca::srcctx, False otherwise
812 remotebase = True if base is outside tca::srcctx, False otherwise
811 limit = the rev number to not search beyond
813 limit = the rev number to not search beyond
812 data = dictionary of dictionary to store copy data. (see mergecopies)
814 data = dictionary of dictionary to store copy data. (see mergecopies)
813
815
814 note: limit is only an optimization, and provides no guarantee that
816 note: limit is only an optimization, and provides no guarantee that
815 irrelevant revisions will not be visited
817 irrelevant revisions will not be visited
816 there is no easy way to make this algorithm stop in a guaranteed way
818 there is no easy way to make this algorithm stop in a guaranteed way
817 once it "goes behind a certain revision".
819 once it "goes behind a certain revision".
818 """
820 """
819
821
820 msrc = srcctx.manifest()
822 msrc = srcctx.manifest()
821 mdst = dstctx.manifest()
823 mdst = dstctx.manifest()
822 mb = base.manifest()
824 mb = base.manifest()
823 mta = tca.manifest()
825 mta = tca.manifest()
824 # Might be true if this call is about finding backward renames,
826 # Might be true if this call is about finding backward renames,
825 # This happens in the case of grafts because the DAG is then rotated.
827 # This happens in the case of grafts because the DAG is then rotated.
826 # If the file exists in both the base and the source, we are not looking
828 # If the file exists in both the base and the source, we are not looking
827 # for a rename on the source side, but on the part of the DAG that is
829 # for a rename on the source side, but on the part of the DAG that is
828 # traversed backwards.
830 # traversed backwards.
829 #
831 #
830 # In the case there is both backward and forward renames (before and after
832 # In the case there is both backward and forward renames (before and after
831 # the base) this is more complicated as we must detect a divergence.
833 # the base) this is more complicated as we must detect a divergence.
832 # We use 'backwards = False' in that case.
834 # We use 'backwards = False' in that case.
833 backwards = not remotebase and base != tca and f in mb
835 backwards = not remotebase and base != tca and f in mb
834 getsrcfctx = _makegetfctx(srcctx)
836 getsrcfctx = _makegetfctx(srcctx)
835 getdstfctx = _makegetfctx(dstctx)
837 getdstfctx = _makegetfctx(dstctx)
836
838
837 if msrc[f] == mb.get(f) and not remotebase:
839 if msrc[f] == mb.get(f) and not remotebase:
838 # Nothing to merge
840 # Nothing to merge
839 return
841 return
840
842
841 of = None
843 of = None
842 seen = {f}
844 seen = {f}
843 for oc in getsrcfctx(f, msrc[f]).ancestors():
845 for oc in getsrcfctx(f, msrc[f]).ancestors():
844 of = oc.path()
846 of = oc.path()
845 if of in seen:
847 if of in seen:
846 # check limit late - grab last rename before
848 # check limit late - grab last rename before
847 if oc.linkrev() < limit:
849 if oc.linkrev() < limit:
848 break
850 break
849 continue
851 continue
850 seen.add(of)
852 seen.add(of)
851
853
852 # remember for dir rename detection
854 # remember for dir rename detection
853 if backwards:
855 if backwards:
854 data['fullcopy'][of] = f # grafting backwards through renames
856 data['fullcopy'][of] = f # grafting backwards through renames
855 else:
857 else:
856 data['fullcopy'][f] = of
858 data['fullcopy'][f] = of
857 if of not in mdst:
859 if of not in mdst:
858 continue # no match, keep looking
860 continue # no match, keep looking
859 if mdst[of] == mb.get(of):
861 if mdst[of] == mb.get(of):
860 return # no merge needed, quit early
862 return # no merge needed, quit early
861 c2 = getdstfctx(of, mdst[of])
863 c2 = getdstfctx(of, mdst[of])
862 # c2 might be a plain new file on added on destination side that is
864 # c2 might be a plain new file on added on destination side that is
863 # unrelated to the droids we are looking for.
865 # unrelated to the droids we are looking for.
864 cr = _related(oc, c2)
866 cr = _related(oc, c2)
865 if cr and (of == f or of == c2.path()): # non-divergent
867 if cr and (of == f or of == c2.path()): # non-divergent
866 if backwards:
868 if backwards:
867 data['copy'][of] = f
869 data['copy'][of] = f
868 elif of in mb:
870 elif of in mb:
869 data['copy'][f] = of
871 data['copy'][f] = of
870 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
872 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
871 data['copy'][of] = f
873 data['copy'][of] = f
872 del data['fullcopy'][f]
874 del data['fullcopy'][f]
873 data['fullcopy'][of] = f
875 data['fullcopy'][of] = f
874 else: # divergence w.r.t. graft CA on one side of topological CA
876 else: # divergence w.r.t. graft CA on one side of topological CA
875 for sf in seen:
877 for sf in seen:
876 if sf in mb:
878 if sf in mb:
877 assert sf not in data['diverge']
879 assert sf not in data['diverge']
878 data['diverge'][sf] = [f, of]
880 data['diverge'][sf] = [f, of]
879 break
881 break
880 return
882 return
881
883
882 if of in mta:
884 if of in mta:
883 if backwards or remotebase:
885 if backwards or remotebase:
884 data['incomplete'][of] = f
886 data['incomplete'][of] = f
885 else:
887 else:
886 for sf in seen:
888 for sf in seen:
887 if sf in mb:
889 if sf in mb:
888 if tca == base:
890 if tca == base:
889 data['diverge'].setdefault(sf, []).append(f)
891 data['diverge'].setdefault(sf, []).append(f)
890 else:
892 else:
891 data['incompletediverge'][sf] = [of, f]
893 data['incompletediverge'][sf] = [of, f]
892 return
894 return
893
895
894 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
896 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
895 """reproduce copies from fromrev to rev in the dirstate
897 """reproduce copies from fromrev to rev in the dirstate
896
898
897 If skiprev is specified, it's a revision that should be used to
899 If skiprev is specified, it's a revision that should be used to
898 filter copy records. Any copies that occur between fromrev and
900 filter copy records. Any copies that occur between fromrev and
899 skiprev will not be duplicated, even if they appear in the set of
901 skiprev will not be duplicated, even if they appear in the set of
900 copies between fromrev and rev.
902 copies between fromrev and rev.
901 """
903 """
902 exclude = {}
904 exclude = {}
903 ctraceconfig = repo.ui.config('experimental', 'copytrace')
905 ctraceconfig = repo.ui.config('experimental', 'copytrace')
904 bctrace = stringutil.parsebool(ctraceconfig)
906 bctrace = stringutil.parsebool(ctraceconfig)
905 if (skiprev is not None and
907 if (skiprev is not None and
906 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
908 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
907 # copytrace='off' skips this line, but not the entire function because
909 # copytrace='off' skips this line, but not the entire function because
908 # the line below is O(size of the repo) during a rebase, while the rest
910 # the line below is O(size of the repo) during a rebase, while the rest
909 # of the function is much faster (and is required for carrying copy
911 # of the function is much faster (and is required for carrying copy
910 # metadata across the rebase anyway).
912 # metadata across the rebase anyway).
911 exclude = pathcopies(repo[fromrev], repo[skiprev])
913 exclude = pathcopies(repo[fromrev], repo[skiprev])
912 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
914 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
913 # copies.pathcopies returns backward renames, so dst might not
915 # copies.pathcopies returns backward renames, so dst might not
914 # actually be in the dirstate
916 # actually be in the dirstate
915 if dst in exclude:
917 if dst in exclude:
916 continue
918 continue
917 wctx[dst].markcopied(src)
919 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now