##// END OF EJS Templates
narrow: fix copies._fullcopytracing() narrowspec filtering in graft case...
Martin von Zweigbergk -
r40489:07a66c13 stable
parent child Browse files
Show More
@@ -1,922 +1,923
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, a, b):
27 def _findlimit(repo, a, b):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
34 None if no such revision exists.
35 """
35 """
36
36
37 # basic idea:
37 # basic idea:
38 # - mark a and b with different sides
38 # - mark a and b with different sides
39 # - if a parent's children are all on the same side, the parent is
39 # - if a parent's children are all on the same side, the parent is
40 # on that side, otherwise it is on no side
40 # on that side, otherwise it is on no side
41 # - walk the graph in topological order with the help of a heap;
41 # - walk the graph in topological order with the help of a heap;
42 # - add unseen parents to side map
42 # - add unseen parents to side map
43 # - clear side of any parent that has children on different sides
43 # - clear side of any parent that has children on different sides
44 # - track number of interesting revs that might still be on a side
44 # - track number of interesting revs that might still be on a side
45 # - track the lowest interesting rev seen
45 # - track the lowest interesting rev seen
46 # - quit when interesting revs is zero
46 # - quit when interesting revs is zero
47
47
48 cl = repo.changelog
48 cl = repo.changelog
49 working = len(cl) # pseudo rev for the working directory
49 working = len(cl) # pseudo rev for the working directory
50 if a is None:
50 if a is None:
51 a = working
51 a = working
52 if b is None:
52 if b is None:
53 b = working
53 b = working
54
54
55 side = {a: -1, b: 1}
55 side = {a: -1, b: 1}
56 visit = [-a, -b]
56 visit = [-a, -b]
57 heapq.heapify(visit)
57 heapq.heapify(visit)
58 interesting = len(visit)
58 interesting = len(visit)
59 hascommonancestor = False
59 hascommonancestor = False
60 limit = working
60 limit = working
61
61
62 while interesting:
62 while interesting:
63 r = -heapq.heappop(visit)
63 r = -heapq.heappop(visit)
64 if r == working:
64 if r == working:
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 else:
66 else:
67 parents = cl.parentrevs(r)
67 parents = cl.parentrevs(r)
68 for p in parents:
68 for p in parents:
69 if p < 0:
69 if p < 0:
70 continue
70 continue
71 if p not in side:
71 if p not in side:
72 # first time we see p; add it to visit
72 # first time we see p; add it to visit
73 side[p] = side[r]
73 side[p] = side[r]
74 if side[p]:
74 if side[p]:
75 interesting += 1
75 interesting += 1
76 heapq.heappush(visit, -p)
76 heapq.heappush(visit, -p)
77 elif side[p] and side[p] != side[r]:
77 elif side[p] and side[p] != side[r]:
78 # p was interesting but now we know better
78 # p was interesting but now we know better
79 side[p] = 0
79 side[p] = 0
80 interesting -= 1
80 interesting -= 1
81 hascommonancestor = True
81 hascommonancestor = True
82 if side[r]:
82 if side[r]:
83 limit = r # lowest rev visited
83 limit = r # lowest rev visited
84 interesting -= 1
84 interesting -= 1
85
85
86 if not hascommonancestor:
86 if not hascommonancestor:
87 return None
87 return None
88
88
89 # Consider the following flow (see test-commit-amend.t under issue4405):
89 # Consider the following flow (see test-commit-amend.t under issue4405):
90 # 1/ File 'a0' committed
90 # 1/ File 'a0' committed
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 # 3/ Move back to first commit
92 # 3/ Move back to first commit
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 #
95 #
96 # During the amend in step five, we will be in this state:
96 # During the amend in step five, we will be in this state:
97 #
97 #
98 # @ 3 temporary amend commit for a1-amend
98 # @ 3 temporary amend commit for a1-amend
99 # |
99 # |
100 # o 2 a1-amend
100 # o 2 a1-amend
101 # |
101 # |
102 # | o 1 a1
102 # | o 1 a1
103 # |/
103 # |/
104 # o 0 a0
104 # o 0 a0
105 #
105 #
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 # yet the filelog has the copy information in rev 1 and we will not look
107 # yet the filelog has the copy information in rev 1 and we will not look
108 # back far enough unless we also look at the a and b as candidates.
108 # back far enough unless we also look at the a and b as candidates.
109 # This only occurs when a is a descendent of b or visa-versa.
109 # This only occurs when a is a descendent of b or visa-versa.
110 return min(limit, a, b)
110 return min(limit, a, b)
111
111
112 def _chain(src, dst, a, b):
112 def _chain(src, dst, a, b):
113 """chain two sets of copies a->b"""
113 """chain two sets of copies a->b"""
114 t = a.copy()
114 t = a.copy()
115 for k, v in b.iteritems():
115 for k, v in b.iteritems():
116 if v in t:
116 if v in t:
117 # found a chain
117 # found a chain
118 if t[v] != k:
118 if t[v] != k:
119 # file wasn't renamed back to itself
119 # file wasn't renamed back to itself
120 t[k] = t[v]
120 t[k] = t[v]
121 if v not in dst:
121 if v not in dst:
122 # chain was a rename, not a copy
122 # chain was a rename, not a copy
123 del t[v]
123 del t[v]
124 if v in src:
124 if v in src:
125 # file is a copy of an existing file
125 # file is a copy of an existing file
126 t[k] = v
126 t[k] = v
127
127
128 # remove criss-crossed copies
128 # remove criss-crossed copies
129 for k, v in list(t.items()):
129 for k, v in list(t.items()):
130 if k in src and v in dst:
130 if k in src and v in dst:
131 del t[k]
131 del t[k]
132
132
133 return t
133 return t
134
134
135 def _tracefile(fctx, am, limit=-1):
135 def _tracefile(fctx, am, limit=-1):
136 """return file context that is the ancestor of fctx present in ancestor
136 """return file context that is the ancestor of fctx present in ancestor
137 manifest am, stopping after the first ancestor lower than limit"""
137 manifest am, stopping after the first ancestor lower than limit"""
138
138
139 for f in fctx.ancestors():
139 for f in fctx.ancestors():
140 if am.get(f.path(), None) == f.filenode():
140 if am.get(f.path(), None) == f.filenode():
141 return f
141 return f
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
143 return None
143 return None
144
144
145 def _dirstatecopies(d, match=None):
145 def _dirstatecopies(d, match=None):
146 ds = d._repo.dirstate
146 ds = d._repo.dirstate
147 c = ds.copies().copy()
147 c = ds.copies().copy()
148 for k in list(c):
148 for k in list(c):
149 if ds[k] not in 'anm' or (match and not match(k)):
149 if ds[k] not in 'anm' or (match and not match(k)):
150 del c[k]
150 del c[k]
151 return c
151 return c
152
152
153 def _computeforwardmissing(a, b, match=None):
153 def _computeforwardmissing(a, b, match=None):
154 """Computes which files are in b but not a.
154 """Computes which files are in b but not a.
155 This is its own function so extensions can easily wrap this call to see what
155 This is its own function so extensions can easily wrap this call to see what
156 files _forwardcopies is about to process.
156 files _forwardcopies is about to process.
157 """
157 """
158 ma = a.manifest()
158 ma = a.manifest()
159 mb = b.manifest()
159 mb = b.manifest()
160 return mb.filesnotin(ma, match=match)
160 return mb.filesnotin(ma, match=match)
161
161
162 def _committedforwardcopies(a, b, match):
162 def _committedforwardcopies(a, b, match):
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 # files might have to be traced back to the fctx parent of the last
164 # files might have to be traced back to the fctx parent of the last
165 # one-side-only changeset, but not further back than that
165 # one-side-only changeset, but not further back than that
166 repo = a._repo
166 repo = a._repo
167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
168 dbg = repo.ui.debug
168 dbg = repo.ui.debug
169 if debug:
169 if debug:
170 dbg('debug.copies: looking into rename from %s to %s\n'
170 dbg('debug.copies: looking into rename from %s to %s\n'
171 % (a, b))
171 % (a, b))
172 limit = _findlimit(repo, a.rev(), b.rev())
172 limit = _findlimit(repo, a.rev(), b.rev())
173 if limit is None:
173 if limit is None:
174 limit = -1
174 limit = -1
175 if debug:
175 if debug:
176 dbg('debug.copies: search limit: %d\n' % limit)
176 dbg('debug.copies: search limit: %d\n' % limit)
177 am = a.manifest()
177 am = a.manifest()
178
178
179 # find where new files came from
179 # find where new files came from
180 # we currently don't try to find where old files went, too expensive
180 # we currently don't try to find where old files went, too expensive
181 # this means we can miss a case like 'hg rm b; hg cp a b'
181 # this means we can miss a case like 'hg rm b; hg cp a b'
182 cm = {}
182 cm = {}
183
183
184 # Computing the forward missing is quite expensive on large manifests, since
184 # Computing the forward missing is quite expensive on large manifests, since
185 # it compares the entire manifests. We can optimize it in the common use
185 # it compares the entire manifests. We can optimize it in the common use
186 # case of computing what copies are in a commit versus its parent (like
186 # case of computing what copies are in a commit versus its parent (like
187 # during a rebase or histedit). Note, we exclude merge commits from this
187 # during a rebase or histedit). Note, we exclude merge commits from this
188 # optimization, since the ctx.files() for a merge commit is not correct for
188 # optimization, since the ctx.files() for a merge commit is not correct for
189 # this comparison.
189 # this comparison.
190 forwardmissingmatch = match
190 forwardmissingmatch = match
191 if b.p1() == a and b.p2().node() == node.nullid:
191 if b.p1() == a and b.p2().node() == node.nullid:
192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
195
195
196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
197
197
198 if debug:
198 if debug:
199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
200
200
201 for f in missing:
201 for f in missing:
202 if debug:
202 if debug:
203 dbg('debug.copies: tracing file: %s\n' % f)
203 dbg('debug.copies: tracing file: %s\n' % f)
204 fctx = b[f]
204 fctx = b[f]
205 fctx._ancestrycontext = ancestrycontext
205 fctx._ancestrycontext = ancestrycontext
206
206
207 if debug:
207 if debug:
208 start = util.timer()
208 start = util.timer()
209 ofctx = _tracefile(fctx, am, limit)
209 ofctx = _tracefile(fctx, am, limit)
210 if ofctx:
210 if ofctx:
211 if debug:
211 if debug:
212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
213 cm[f] = ofctx.path()
213 cm[f] = ofctx.path()
214 if debug:
214 if debug:
215 dbg('debug.copies: time: %f seconds\n'
215 dbg('debug.copies: time: %f seconds\n'
216 % (util.timer() - start))
216 % (util.timer() - start))
217 return cm
217 return cm
218
218
219 def _forwardcopies(a, b, match=None):
219 def _forwardcopies(a, b, match=None):
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221
221
222 match = a.repo().narrowmatch(match)
222 match = a.repo().narrowmatch(match)
223 # check for working copy
223 # check for working copy
224 if b.rev() is None:
224 if b.rev() is None:
225 if a == b.p1():
225 if a == b.p1():
226 # short-circuit to avoid issues with merge states
226 # short-circuit to avoid issues with merge states
227 return _dirstatecopies(b, match)
227 return _dirstatecopies(b, match)
228
228
229 cm = _committedforwardcopies(a, b.p1(), match)
229 cm = _committedforwardcopies(a, b.p1(), match)
230 # combine copies from dirstate if necessary
230 # combine copies from dirstate if necessary
231 return _chain(a, b, cm, _dirstatecopies(b, match))
231 return _chain(a, b, cm, _dirstatecopies(b, match))
232 return _committedforwardcopies(a, b, match)
232 return _committedforwardcopies(a, b, match)
233
233
234 def _backwardrenames(a, b):
234 def _backwardrenames(a, b):
235 if a._repo.ui.config('experimental', 'copytrace') == 'off':
235 if a._repo.ui.config('experimental', 'copytrace') == 'off':
236 return {}
236 return {}
237
237
238 # Even though we're not taking copies into account, 1:n rename situations
238 # Even though we're not taking copies into account, 1:n rename situations
239 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
239 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
240 # arbitrarily pick one of the renames.
240 # arbitrarily pick one of the renames.
241 f = _forwardcopies(b, a)
241 f = _forwardcopies(b, a)
242 r = {}
242 r = {}
243 for k, v in sorted(f.iteritems()):
243 for k, v in sorted(f.iteritems()):
244 # remove copies
244 # remove copies
245 if v in a:
245 if v in a:
246 continue
246 continue
247 r[v] = k
247 r[v] = k
248 return r
248 return r
249
249
250 def pathcopies(x, y, match=None):
250 def pathcopies(x, y, match=None):
251 """find {dst@y: src@x} copy mapping for directed compare"""
251 """find {dst@y: src@x} copy mapping for directed compare"""
252 repo = x._repo
252 repo = x._repo
253 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
253 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
254 if debug:
254 if debug:
255 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
255 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
256 % (x, y))
256 % (x, y))
257 if x == y or not x or not y:
257 if x == y or not x or not y:
258 return {}
258 return {}
259 a = y.ancestor(x)
259 a = y.ancestor(x)
260 if a == x:
260 if a == x:
261 if debug:
261 if debug:
262 repo.ui.debug('debug.copies: search mode: forward\n')
262 repo.ui.debug('debug.copies: search mode: forward\n')
263 return _forwardcopies(x, y, match=match)
263 return _forwardcopies(x, y, match=match)
264 if a == y:
264 if a == y:
265 if debug:
265 if debug:
266 repo.ui.debug('debug.copies: search mode: backward\n')
266 repo.ui.debug('debug.copies: search mode: backward\n')
267 return _backwardrenames(x, y)
267 return _backwardrenames(x, y)
268 if debug:
268 if debug:
269 repo.ui.debug('debug.copies: search mode: combined\n')
269 repo.ui.debug('debug.copies: search mode: combined\n')
270 return _chain(x, y, _backwardrenames(x, a),
270 return _chain(x, y, _backwardrenames(x, a),
271 _forwardcopies(a, y, match=match))
271 _forwardcopies(a, y, match=match))
272
272
273 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
273 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
274 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
274 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
275 and c2. This is its own function so extensions can easily wrap this call
275 and c2. This is its own function so extensions can easily wrap this call
276 to see what files mergecopies is about to process.
276 to see what files mergecopies is about to process.
277
277
278 Even though c1 and c2 are not used in this function, they are useful in
278 Even though c1 and c2 are not used in this function, they are useful in
279 other extensions for being able to read the file nodes of the changed files.
279 other extensions for being able to read the file nodes of the changed files.
280
280
281 "baselabel" can be passed to help distinguish the multiple computations
281 "baselabel" can be passed to help distinguish the multiple computations
282 done in the graft case.
282 done in the graft case.
283 """
283 """
284 u1 = sorted(addedinm1 - addedinm2)
284 u1 = sorted(addedinm1 - addedinm2)
285 u2 = sorted(addedinm2 - addedinm1)
285 u2 = sorted(addedinm2 - addedinm1)
286
286
287 header = " unmatched files in %s"
287 header = " unmatched files in %s"
288 if baselabel:
288 if baselabel:
289 header += ' (from %s)' % baselabel
289 header += ' (from %s)' % baselabel
290 if u1:
290 if u1:
291 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
291 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
292 if u2:
292 if u2:
293 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
293 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
294
294
295 return u1, u2
295 return u1, u2
296
296
297 def _makegetfctx(ctx):
297 def _makegetfctx(ctx):
298 """return a 'getfctx' function suitable for _checkcopies usage
298 """return a 'getfctx' function suitable for _checkcopies usage
299
299
300 We have to re-setup the function building 'filectx' for each
300 We have to re-setup the function building 'filectx' for each
301 '_checkcopies' to ensure the linkrev adjustment is properly setup for
301 '_checkcopies' to ensure the linkrev adjustment is properly setup for
302 each. Linkrev adjustment is important to avoid bug in rename
302 each. Linkrev adjustment is important to avoid bug in rename
303 detection. Moreover, having a proper '_ancestrycontext' setup ensures
303 detection. Moreover, having a proper '_ancestrycontext' setup ensures
304 the performance impact of this adjustment is kept limited. Without it,
304 the performance impact of this adjustment is kept limited. Without it,
305 each file could do a full dag traversal making the time complexity of
305 each file could do a full dag traversal making the time complexity of
306 the operation explode (see issue4537).
306 the operation explode (see issue4537).
307
307
308 This function exists here mostly to limit the impact on stable. Feel
308 This function exists here mostly to limit the impact on stable. Feel
309 free to refactor on default.
309 free to refactor on default.
310 """
310 """
311 rev = ctx.rev()
311 rev = ctx.rev()
312 repo = ctx._repo
312 repo = ctx._repo
313 ac = getattr(ctx, '_ancestrycontext', None)
313 ac = getattr(ctx, '_ancestrycontext', None)
314 if ac is None:
314 if ac is None:
315 revs = [rev]
315 revs = [rev]
316 if rev is None:
316 if rev is None:
317 revs = [p.rev() for p in ctx.parents()]
317 revs = [p.rev() for p in ctx.parents()]
318 ac = repo.changelog.ancestors(revs, inclusive=True)
318 ac = repo.changelog.ancestors(revs, inclusive=True)
319 ctx._ancestrycontext = ac
319 ctx._ancestrycontext = ac
320 def makectx(f, n):
320 def makectx(f, n):
321 if n in node.wdirfilenodeids: # in a working context?
321 if n in node.wdirfilenodeids: # in a working context?
322 if ctx.rev() is None:
322 if ctx.rev() is None:
323 return ctx.filectx(f)
323 return ctx.filectx(f)
324 return repo[None][f]
324 return repo[None][f]
325 fctx = repo.filectx(f, fileid=n)
325 fctx = repo.filectx(f, fileid=n)
326 # setup only needed for filectx not create from a changectx
326 # setup only needed for filectx not create from a changectx
327 fctx._ancestrycontext = ac
327 fctx._ancestrycontext = ac
328 fctx._descendantrev = rev
328 fctx._descendantrev = rev
329 return fctx
329 return fctx
330 return util.lrucachefunc(makectx)
330 return util.lrucachefunc(makectx)
331
331
332 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
332 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
333 """combine partial copy paths"""
333 """combine partial copy paths"""
334 remainder = {}
334 remainder = {}
335 for f in copyfrom:
335 for f in copyfrom:
336 if f in copyto:
336 if f in copyto:
337 finalcopy[copyto[f]] = copyfrom[f]
337 finalcopy[copyto[f]] = copyfrom[f]
338 del copyto[f]
338 del copyto[f]
339 for f in incompletediverge:
339 for f in incompletediverge:
340 assert f not in diverge
340 assert f not in diverge
341 ic = incompletediverge[f]
341 ic = incompletediverge[f]
342 if ic[0] in copyto:
342 if ic[0] in copyto:
343 diverge[f] = [copyto[ic[0]], ic[1]]
343 diverge[f] = [copyto[ic[0]], ic[1]]
344 else:
344 else:
345 remainder[f] = ic
345 remainder[f] = ic
346 return remainder
346 return remainder
347
347
348 def mergecopies(repo, c1, c2, base):
348 def mergecopies(repo, c1, c2, base):
349 """
349 """
350 The function calling different copytracing algorithms on the basis of config
350 The function calling different copytracing algorithms on the basis of config
351 which find moves and copies between context c1 and c2 that are relevant for
351 which find moves and copies between context c1 and c2 that are relevant for
352 merging. 'base' will be used as the merge base.
352 merging. 'base' will be used as the merge base.
353
353
354 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
354 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
355 files that were moved/ copied in one merge parent and modified in another.
355 files that were moved/ copied in one merge parent and modified in another.
356 For example:
356 For example:
357
357
358 o ---> 4 another commit
358 o ---> 4 another commit
359 |
359 |
360 | o ---> 3 commit that modifies a.txt
360 | o ---> 3 commit that modifies a.txt
361 | /
361 | /
362 o / ---> 2 commit that moves a.txt to b.txt
362 o / ---> 2 commit that moves a.txt to b.txt
363 |/
363 |/
364 o ---> 1 merge base
364 o ---> 1 merge base
365
365
366 If we try to rebase revision 3 on revision 4, since there is no a.txt in
366 If we try to rebase revision 3 on revision 4, since there is no a.txt in
367 revision 4, and if user have copytrace disabled, we prints the following
367 revision 4, and if user have copytrace disabled, we prints the following
368 message:
368 message:
369
369
370 ```other changed <file> which local deleted```
370 ```other changed <file> which local deleted```
371
371
372 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
372 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
373 "dirmove".
373 "dirmove".
374
374
375 "copy" is a mapping from destination name -> source name,
375 "copy" is a mapping from destination name -> source name,
376 where source is in c1 and destination is in c2 or vice-versa.
376 where source is in c1 and destination is in c2 or vice-versa.
377
377
378 "movewithdir" is a mapping from source name -> destination name,
378 "movewithdir" is a mapping from source name -> destination name,
379 where the file at source present in one context but not the other
379 where the file at source present in one context but not the other
380 needs to be moved to destination by the merge process, because the
380 needs to be moved to destination by the merge process, because the
381 other context moved the directory it is in.
381 other context moved the directory it is in.
382
382
383 "diverge" is a mapping of source name -> list of destination names
383 "diverge" is a mapping of source name -> list of destination names
384 for divergent renames.
384 for divergent renames.
385
385
386 "renamedelete" is a mapping of source name -> list of destination
386 "renamedelete" is a mapping of source name -> list of destination
387 names for files deleted in c1 that were renamed in c2 or vice-versa.
387 names for files deleted in c1 that were renamed in c2 or vice-versa.
388
388
389 "dirmove" is a mapping of detected source dir -> destination dir renames.
389 "dirmove" is a mapping of detected source dir -> destination dir renames.
390 This is needed for handling changes to new files previously grafted into
390 This is needed for handling changes to new files previously grafted into
391 renamed directories.
391 renamed directories.
392 """
392 """
393 # avoid silly behavior for update from empty dir
393 # avoid silly behavior for update from empty dir
394 if not c1 or not c2 or c1 == c2:
394 if not c1 or not c2 or c1 == c2:
395 return {}, {}, {}, {}, {}
395 return {}, {}, {}, {}, {}
396
396
397 # avoid silly behavior for parent -> working dir
397 # avoid silly behavior for parent -> working dir
398 if c2.node() is None and c1.node() == repo.dirstate.p1():
398 if c2.node() is None and c1.node() == repo.dirstate.p1():
399 return repo.dirstate.copies(), {}, {}, {}, {}
399 return repo.dirstate.copies(), {}, {}, {}, {}
400
400
401 copytracing = repo.ui.config('experimental', 'copytrace')
401 copytracing = repo.ui.config('experimental', 'copytrace')
402 boolctrace = stringutil.parsebool(copytracing)
402 boolctrace = stringutil.parsebool(copytracing)
403
403
404 # Copy trace disabling is explicitly below the node == p1 logic above
404 # Copy trace disabling is explicitly below the node == p1 logic above
405 # because the logic above is required for a simple copy to be kept across a
405 # because the logic above is required for a simple copy to be kept across a
406 # rebase.
406 # rebase.
407 if copytracing == 'heuristics':
407 if copytracing == 'heuristics':
408 # Do full copytracing if only non-public revisions are involved as
408 # Do full copytracing if only non-public revisions are involved as
409 # that will be fast enough and will also cover the copies which could
409 # that will be fast enough and will also cover the copies which could
410 # be missed by heuristics
410 # be missed by heuristics
411 if _isfullcopytraceable(repo, c1, base):
411 if _isfullcopytraceable(repo, c1, base):
412 return _fullcopytracing(repo, c1, c2, base)
412 return _fullcopytracing(repo, c1, c2, base)
413 return _heuristicscopytracing(repo, c1, c2, base)
413 return _heuristicscopytracing(repo, c1, c2, base)
414 elif boolctrace is False:
414 elif boolctrace is False:
415 # stringutil.parsebool() returns None when it is unable to parse the
415 # stringutil.parsebool() returns None when it is unable to parse the
416 # value, so we should rely on making sure copytracing is on such cases
416 # value, so we should rely on making sure copytracing is on such cases
417 return {}, {}, {}, {}, {}
417 return {}, {}, {}, {}, {}
418 else:
418 else:
419 return _fullcopytracing(repo, c1, c2, base)
419 return _fullcopytracing(repo, c1, c2, base)
420
420
421 def _isfullcopytraceable(repo, c1, base):
421 def _isfullcopytraceable(repo, c1, base):
422 """ Checks that if base, source and destination are all no-public branches,
422 """ Checks that if base, source and destination are all no-public branches,
423 if yes let's use the full copytrace algorithm for increased capabilities
423 if yes let's use the full copytrace algorithm for increased capabilities
424 since it will be fast enough.
424 since it will be fast enough.
425
425
426 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
426 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
427 number of changesets from c1 to base such that if number of changesets are
427 number of changesets from c1 to base such that if number of changesets are
428 more than the limit, full copytracing algorithm won't be used.
428 more than the limit, full copytracing algorithm won't be used.
429 """
429 """
430 if c1.rev() is None:
430 if c1.rev() is None:
431 c1 = c1.p1()
431 c1 = c1.p1()
432 if c1.mutable() and base.mutable():
432 if c1.mutable() and base.mutable():
433 sourcecommitlimit = repo.ui.configint('experimental',
433 sourcecommitlimit = repo.ui.configint('experimental',
434 'copytrace.sourcecommitlimit')
434 'copytrace.sourcecommitlimit')
435 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
435 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
436 return commits < sourcecommitlimit
436 return commits < sourcecommitlimit
437 return False
437 return False
438
438
439 def _fullcopytracing(repo, c1, c2, base):
439 def _fullcopytracing(repo, c1, c2, base):
440 """ The full copytracing algorithm which finds all the new files that were
440 """ The full copytracing algorithm which finds all the new files that were
441 added from merge base up to the top commit and for each file it checks if
441 added from merge base up to the top commit and for each file it checks if
442 this file was copied from another file.
442 this file was copied from another file.
443
443
444 This is pretty slow when a lot of changesets are involved but will track all
444 This is pretty slow when a lot of changesets are involved but will track all
445 the copies.
445 the copies.
446 """
446 """
447 # In certain scenarios (e.g. graft, update or rebase), base can be
447 # In certain scenarios (e.g. graft, update or rebase), base can be
448 # overridden We still need to know a real common ancestor in this case We
448 # overridden We still need to know a real common ancestor in this case We
449 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
449 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
450 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
450 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
451 # caller may not know if the revision passed in lieu of the CA is a genuine
451 # caller may not know if the revision passed in lieu of the CA is a genuine
452 # common ancestor or not without explicitly checking it, it's better to
452 # common ancestor or not without explicitly checking it, it's better to
453 # determine that here.
453 # determine that here.
454 #
454 #
455 # base.isancestorof(wc) is False, work around that
455 # base.isancestorof(wc) is False, work around that
456 _c1 = c1.p1() if c1.rev() is None else c1
456 _c1 = c1.p1() if c1.rev() is None else c1
457 _c2 = c2.p1() if c2.rev() is None else c2
457 _c2 = c2.p1() if c2.rev() is None else c2
458 # an endpoint is "dirty" if it isn't a descendant of the merge base
458 # an endpoint is "dirty" if it isn't a descendant of the merge base
459 # if we have a dirty endpoint, we need to trigger graft logic, and also
459 # if we have a dirty endpoint, we need to trigger graft logic, and also
460 # keep track of which endpoint is dirty
460 # keep track of which endpoint is dirty
461 dirtyc1 = not base.isancestorof(_c1)
461 dirtyc1 = not base.isancestorof(_c1)
462 dirtyc2 = not base.isancestorof(_c2)
462 dirtyc2 = not base.isancestorof(_c2)
463 graft = dirtyc1 or dirtyc2
463 graft = dirtyc1 or dirtyc2
464 tca = base
464 tca = base
465 if graft:
465 if graft:
466 tca = _c1.ancestor(_c2)
466 tca = _c1.ancestor(_c2)
467
467
468 limit = _findlimit(repo, c1.rev(), c2.rev())
468 limit = _findlimit(repo, c1.rev(), c2.rev())
469 if limit is None:
469 if limit is None:
470 # no common ancestor, no copies
470 # no common ancestor, no copies
471 return {}, {}, {}, {}, {}
471 return {}, {}, {}, {}, {}
472 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
472 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
473
473
474 m1 = c1.manifest()
474 m1 = c1.manifest()
475 m2 = c2.manifest()
475 m2 = c2.manifest()
476 mb = base.manifest()
476 mb = base.manifest()
477
477
478 # gather data from _checkcopies:
478 # gather data from _checkcopies:
479 # - diverge = record all diverges in this dict
479 # - diverge = record all diverges in this dict
480 # - copy = record all non-divergent copies in this dict
480 # - copy = record all non-divergent copies in this dict
481 # - fullcopy = record all copies in this dict
481 # - fullcopy = record all copies in this dict
482 # - incomplete = record non-divergent partial copies here
482 # - incomplete = record non-divergent partial copies here
483 # - incompletediverge = record divergent partial copies here
483 # - incompletediverge = record divergent partial copies here
484 diverge = {} # divergence data is shared
484 diverge = {} # divergence data is shared
485 incompletediverge = {}
485 incompletediverge = {}
486 data1 = {'copy': {},
486 data1 = {'copy': {},
487 'fullcopy': {},
487 'fullcopy': {},
488 'incomplete': {},
488 'incomplete': {},
489 'diverge': diverge,
489 'diverge': diverge,
490 'incompletediverge': incompletediverge,
490 'incompletediverge': incompletediverge,
491 }
491 }
492 data2 = {'copy': {},
492 data2 = {'copy': {},
493 'fullcopy': {},
493 'fullcopy': {},
494 'incomplete': {},
494 'incomplete': {},
495 'diverge': diverge,
495 'diverge': diverge,
496 'incompletediverge': incompletediverge,
496 'incompletediverge': incompletediverge,
497 }
497 }
498
498
499 # find interesting file sets from manifests
499 # find interesting file sets from manifests
500 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
500 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
501 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
501 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
502 bothnew = sorted(addedinm1 & addedinm2)
502 bothnew = sorted(addedinm1 & addedinm2)
503 if tca == base:
503 if tca == base:
504 # unmatched file from base
504 # unmatched file from base
505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
506 u1u, u2u = u1r, u2r
506 u1u, u2u = u1r, u2r
507 else:
507 else:
508 # unmatched file from base (DAG rotation in the graft case)
508 # unmatched file from base (DAG rotation in the graft case)
509 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
509 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
510 baselabel='base')
510 baselabel='base')
511 # unmatched file from topological common ancestors (no DAG rotation)
511 # unmatched file from topological common ancestors (no DAG rotation)
512 # need to recompute this for directory move handling when grafting
512 # need to recompute this for directory move handling when grafting
513 mta = tca.manifest()
513 mta = tca.manifest()
514 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
514 u1u, u2u = _computenonoverlap(repo, c1, c2,
515 m2.filesnotin(mta),
515 m1.filesnotin(mta, repo.narrowmatch()),
516 m2.filesnotin(mta, repo.narrowmatch()),
516 baselabel='topological common ancestor')
517 baselabel='topological common ancestor')
517
518
518 for f in u1u:
519 for f in u1u:
519 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
520
521
521 for f in u2u:
522 for f in u2u:
522 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
523
524
524 copy = dict(data1['copy'])
525 copy = dict(data1['copy'])
525 copy.update(data2['copy'])
526 copy.update(data2['copy'])
526 fullcopy = dict(data1['fullcopy'])
527 fullcopy = dict(data1['fullcopy'])
527 fullcopy.update(data2['fullcopy'])
528 fullcopy.update(data2['fullcopy'])
528
529
529 if dirtyc1:
530 if dirtyc1:
530 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
531 incompletediverge)
532 incompletediverge)
532 else:
533 else:
533 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
534 incompletediverge)
535 incompletediverge)
535
536
536 renamedelete = {}
537 renamedelete = {}
537 renamedeleteset = set()
538 renamedeleteset = set()
538 divergeset = set()
539 divergeset = set()
539 for of, fl in list(diverge.items()):
540 for of, fl in list(diverge.items()):
540 if len(fl) == 1 or of in c1 or of in c2:
541 if len(fl) == 1 or of in c1 or of in c2:
541 del diverge[of] # not actually divergent, or not a rename
542 del diverge[of] # not actually divergent, or not a rename
542 if of not in c1 and of not in c2:
543 if of not in c1 and of not in c2:
543 # renamed on one side, deleted on the other side, but filter
544 # renamed on one side, deleted on the other side, but filter
544 # out files that have been renamed and then deleted
545 # out files that have been renamed and then deleted
545 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
546 renamedeleteset.update(fl) # reverse map for below
547 renamedeleteset.update(fl) # reverse map for below
547 else:
548 else:
548 divergeset.update(fl) # reverse map for below
549 divergeset.update(fl) # reverse map for below
549
550
550 if bothnew:
551 if bothnew:
551 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 repo.ui.debug(" unmatched files new in both:\n %s\n"
552 % "\n ".join(bothnew))
553 % "\n ".join(bothnew))
553 bothdiverge = {}
554 bothdiverge = {}
554 bothincompletediverge = {}
555 bothincompletediverge = {}
555 remainder = {}
556 remainder = {}
556 both1 = {'copy': {},
557 both1 = {'copy': {},
557 'fullcopy': {},
558 'fullcopy': {},
558 'incomplete': {},
559 'incomplete': {},
559 'diverge': bothdiverge,
560 'diverge': bothdiverge,
560 'incompletediverge': bothincompletediverge
561 'incompletediverge': bothincompletediverge
561 }
562 }
562 both2 = {'copy': {},
563 both2 = {'copy': {},
563 'fullcopy': {},
564 'fullcopy': {},
564 'incomplete': {},
565 'incomplete': {},
565 'diverge': bothdiverge,
566 'diverge': bothdiverge,
566 'incompletediverge': bothincompletediverge
567 'incompletediverge': bothincompletediverge
567 }
568 }
568 for f in bothnew:
569 for f in bothnew:
569 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
570 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
571 if dirtyc1:
572 if dirtyc1:
572 # incomplete copies may only be found on the "dirty" side for bothnew
573 # incomplete copies may only be found on the "dirty" side for bothnew
573 assert not both2['incomplete']
574 assert not both2['incomplete']
574 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
575 bothincompletediverge)
576 bothincompletediverge)
576 elif dirtyc2:
577 elif dirtyc2:
577 assert not both1['incomplete']
578 assert not both1['incomplete']
578 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
579 bothincompletediverge)
580 bothincompletediverge)
580 else:
581 else:
581 # incomplete copies and divergences can't happen outside grafts
582 # incomplete copies and divergences can't happen outside grafts
582 assert not both1['incomplete']
583 assert not both1['incomplete']
583 assert not both2['incomplete']
584 assert not both2['incomplete']
584 assert not bothincompletediverge
585 assert not bothincompletediverge
585 for f in remainder:
586 for f in remainder:
586 assert f not in bothdiverge
587 assert f not in bothdiverge
587 ic = remainder[f]
588 ic = remainder[f]
588 if ic[0] in (m1 if dirtyc1 else m2):
589 if ic[0] in (m1 if dirtyc1 else m2):
589 # backed-out rename on one side, but watch out for deleted files
590 # backed-out rename on one side, but watch out for deleted files
590 bothdiverge[f] = ic
591 bothdiverge[f] = ic
591 for of, fl in bothdiverge.items():
592 for of, fl in bothdiverge.items():
592 if len(fl) == 2 and fl[0] == fl[1]:
593 if len(fl) == 2 and fl[0] == fl[1]:
593 copy[fl[0]] = of # not actually divergent, just matching renames
594 copy[fl[0]] = of # not actually divergent, just matching renames
594
595
595 if fullcopy and repo.ui.debugflag:
596 if fullcopy and repo.ui.debugflag:
596 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
597 "% = renamed and deleted):\n")
598 "% = renamed and deleted):\n")
598 for f in sorted(fullcopy):
599 for f in sorted(fullcopy):
599 note = ""
600 note = ""
600 if f in copy:
601 if f in copy:
601 note += "*"
602 note += "*"
602 if f in divergeset:
603 if f in divergeset:
603 note += "!"
604 note += "!"
604 if f in renamedeleteset:
605 if f in renamedeleteset:
605 note += "%"
606 note += "%"
606 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
607 note))
608 note))
608 del divergeset
609 del divergeset
609
610
610 if not fullcopy:
611 if not fullcopy:
611 return copy, {}, diverge, renamedelete, {}
612 return copy, {}, diverge, renamedelete, {}
612
613
613 repo.ui.debug(" checking for directory renames\n")
614 repo.ui.debug(" checking for directory renames\n")
614
615
615 # generate a directory move map
616 # generate a directory move map
616 d1, d2 = c1.dirs(), c2.dirs()
617 d1, d2 = c1.dirs(), c2.dirs()
617 # Hack for adding '', which is not otherwise added, to d1 and d2
618 # Hack for adding '', which is not otherwise added, to d1 and d2
618 d1.addpath('/')
619 d1.addpath('/')
619 d2.addpath('/')
620 d2.addpath('/')
620 invalid = set()
621 invalid = set()
621 dirmove = {}
622 dirmove = {}
622
623
623 # examine each file copy for a potential directory move, which is
624 # examine each file copy for a potential directory move, which is
624 # when all the files in a directory are moved to a new directory
625 # when all the files in a directory are moved to a new directory
625 for dst, src in fullcopy.iteritems():
626 for dst, src in fullcopy.iteritems():
626 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
627 if dsrc in invalid:
628 if dsrc in invalid:
628 # already seen to be uninteresting
629 # already seen to be uninteresting
629 continue
630 continue
630 elif dsrc in d1 and ddst in d1:
631 elif dsrc in d1 and ddst in d1:
631 # directory wasn't entirely moved locally
632 # directory wasn't entirely moved locally
632 invalid.add(dsrc)
633 invalid.add(dsrc)
633 elif dsrc in d2 and ddst in d2:
634 elif dsrc in d2 and ddst in d2:
634 # directory wasn't entirely moved remotely
635 # directory wasn't entirely moved remotely
635 invalid.add(dsrc)
636 invalid.add(dsrc)
636 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 elif dsrc in dirmove and dirmove[dsrc] != ddst:
637 # files from the same directory moved to two different places
638 # files from the same directory moved to two different places
638 invalid.add(dsrc)
639 invalid.add(dsrc)
639 else:
640 else:
640 # looks good so far
641 # looks good so far
641 dirmove[dsrc] = ddst
642 dirmove[dsrc] = ddst
642
643
643 for i in invalid:
644 for i in invalid:
644 if i in dirmove:
645 if i in dirmove:
645 del dirmove[i]
646 del dirmove[i]
646 del d1, d2, invalid
647 del d1, d2, invalid
647
648
648 if not dirmove:
649 if not dirmove:
649 return copy, {}, diverge, renamedelete, {}
650 return copy, {}, diverge, renamedelete, {}
650
651
651 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
652
653
653 for d in dirmove:
654 for d in dirmove:
654 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
655 (d, dirmove[d]))
656 (d, dirmove[d]))
656
657
657 movewithdir = {}
658 movewithdir = {}
658 # check unaccounted nonoverlapping files against directory moves
659 # check unaccounted nonoverlapping files against directory moves
659 for f in u1r + u2r:
660 for f in u1r + u2r:
660 if f not in fullcopy:
661 if f not in fullcopy:
661 for d in dirmove:
662 for d in dirmove:
662 if f.startswith(d):
663 if f.startswith(d):
663 # new file added in a directory that was moved, move it
664 # new file added in a directory that was moved, move it
664 df = dirmove[d] + f[len(d):]
665 df = dirmove[d] + f[len(d):]
665 if df not in copy:
666 if df not in copy:
666 movewithdir[f] = df
667 movewithdir[f] = df
667 repo.ui.debug((" pending file src: '%s' -> "
668 repo.ui.debug((" pending file src: '%s' -> "
668 "dst: '%s'\n") % (f, df))
669 "dst: '%s'\n") % (f, df))
669 break
670 break
670
671
671 return copy, movewithdir, diverge, renamedelete, dirmove
672 return copy, movewithdir, diverge, renamedelete, dirmove
672
673
673 def _heuristicscopytracing(repo, c1, c2, base):
674 def _heuristicscopytracing(repo, c1, c2, base):
674 """ Fast copytracing using filename heuristics
675 """ Fast copytracing using filename heuristics
675
676
676 Assumes that moves or renames are of following two types:
677 Assumes that moves or renames are of following two types:
677
678
678 1) Inside a directory only (same directory name but different filenames)
679 1) Inside a directory only (same directory name but different filenames)
679 2) Move from one directory to another
680 2) Move from one directory to another
680 (same filenames but different directory names)
681 (same filenames but different directory names)
681
682
682 Works only when there are no merge commits in the "source branch".
683 Works only when there are no merge commits in the "source branch".
683 Source branch is commits from base up to c2 not including base.
684 Source branch is commits from base up to c2 not including base.
684
685
685 If merge is involved it fallbacks to _fullcopytracing().
686 If merge is involved it fallbacks to _fullcopytracing().
686
687
687 Can be used by setting the following config:
688 Can be used by setting the following config:
688
689
689 [experimental]
690 [experimental]
690 copytrace = heuristics
691 copytrace = heuristics
691
692
692 In some cases the copy/move candidates found by heuristics can be very large
693 In some cases the copy/move candidates found by heuristics can be very large
693 in number and that will make the algorithm slow. The number of possible
694 in number and that will make the algorithm slow. The number of possible
694 candidates to check can be limited by using the config
695 candidates to check can be limited by using the config
695 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 `experimental.copytrace.movecandidateslimit` which defaults to 100.
696 """
697 """
697
698
698 if c1.rev() is None:
699 if c1.rev() is None:
699 c1 = c1.p1()
700 c1 = c1.p1()
700 if c2.rev() is None:
701 if c2.rev() is None:
701 c2 = c2.p1()
702 c2 = c2.p1()
702
703
703 copies = {}
704 copies = {}
704
705
705 changedfiles = set()
706 changedfiles = set()
706 m1 = c1.manifest()
707 m1 = c1.manifest()
707 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 if not repo.revs('%d::%d', base.rev(), c2.rev()):
708 # If base is not in c2 branch, we switch to fullcopytracing
709 # If base is not in c2 branch, we switch to fullcopytracing
709 repo.ui.debug("switching to full copytracing as base is not "
710 repo.ui.debug("switching to full copytracing as base is not "
710 "an ancestor of c2\n")
711 "an ancestor of c2\n")
711 return _fullcopytracing(repo, c1, c2, base)
712 return _fullcopytracing(repo, c1, c2, base)
712
713
713 ctx = c2
714 ctx = c2
714 while ctx != base:
715 while ctx != base:
715 if len(ctx.parents()) == 2:
716 if len(ctx.parents()) == 2:
716 # To keep things simple let's not handle merges
717 # To keep things simple let's not handle merges
717 repo.ui.debug("switching to full copytracing because of merges\n")
718 repo.ui.debug("switching to full copytracing because of merges\n")
718 return _fullcopytracing(repo, c1, c2, base)
719 return _fullcopytracing(repo, c1, c2, base)
719 changedfiles.update(ctx.files())
720 changedfiles.update(ctx.files())
720 ctx = ctx.p1()
721 ctx = ctx.p1()
721
722
722 cp = _forwardcopies(base, c2)
723 cp = _forwardcopies(base, c2)
723 for dst, src in cp.iteritems():
724 for dst, src in cp.iteritems():
724 if src in m1:
725 if src in m1:
725 copies[dst] = src
726 copies[dst] = src
726
727
727 # file is missing if it isn't present in the destination, but is present in
728 # file is missing if it isn't present in the destination, but is present in
728 # the base and present in the source.
729 # the base and present in the source.
729 # Presence in the base is important to exclude added files, presence in the
730 # Presence in the base is important to exclude added files, presence in the
730 # source is important to exclude removed files.
731 # source is important to exclude removed files.
731 filt = lambda f: f not in m1 and f in base and f in c2
732 filt = lambda f: f not in m1 and f in base and f in c2
732 missingfiles = [f for f in changedfiles if filt(f)]
733 missingfiles = [f for f in changedfiles if filt(f)]
733
734
734 if missingfiles:
735 if missingfiles:
735 basenametofilename = collections.defaultdict(list)
736 basenametofilename = collections.defaultdict(list)
736 dirnametofilename = collections.defaultdict(list)
737 dirnametofilename = collections.defaultdict(list)
737
738
738 for f in m1.filesnotin(base.manifest()):
739 for f in m1.filesnotin(base.manifest()):
739 basename = os.path.basename(f)
740 basename = os.path.basename(f)
740 dirname = os.path.dirname(f)
741 dirname = os.path.dirname(f)
741 basenametofilename[basename].append(f)
742 basenametofilename[basename].append(f)
742 dirnametofilename[dirname].append(f)
743 dirnametofilename[dirname].append(f)
743
744
744 for f in missingfiles:
745 for f in missingfiles:
745 basename = os.path.basename(f)
746 basename = os.path.basename(f)
746 dirname = os.path.dirname(f)
747 dirname = os.path.dirname(f)
747 samebasename = basenametofilename[basename]
748 samebasename = basenametofilename[basename]
748 samedirname = dirnametofilename[dirname]
749 samedirname = dirnametofilename[dirname]
749 movecandidates = samebasename + samedirname
750 movecandidates = samebasename + samedirname
750 # f is guaranteed to be present in c2, that's why
751 # f is guaranteed to be present in c2, that's why
751 # c2.filectx(f) won't fail
752 # c2.filectx(f) won't fail
752 f2 = c2.filectx(f)
753 f2 = c2.filectx(f)
753 # we can have a lot of candidates which can slow down the heuristics
754 # we can have a lot of candidates which can slow down the heuristics
754 # config value to limit the number of candidates moves to check
755 # config value to limit the number of candidates moves to check
755 maxcandidates = repo.ui.configint('experimental',
756 maxcandidates = repo.ui.configint('experimental',
756 'copytrace.movecandidateslimit')
757 'copytrace.movecandidateslimit')
757
758
758 if len(movecandidates) > maxcandidates:
759 if len(movecandidates) > maxcandidates:
759 repo.ui.status(_("skipping copytracing for '%s', more "
760 repo.ui.status(_("skipping copytracing for '%s', more "
760 "candidates than the limit: %d\n")
761 "candidates than the limit: %d\n")
761 % (f, len(movecandidates)))
762 % (f, len(movecandidates)))
762 continue
763 continue
763
764
764 for candidate in movecandidates:
765 for candidate in movecandidates:
765 f1 = c1.filectx(candidate)
766 f1 = c1.filectx(candidate)
766 if _related(f1, f2):
767 if _related(f1, f2):
767 # if there are a few related copies then we'll merge
768 # if there are a few related copies then we'll merge
768 # changes into all of them. This matches the behaviour
769 # changes into all of them. This matches the behaviour
769 # of upstream copytracing
770 # of upstream copytracing
770 copies[candidate] = f
771 copies[candidate] = f
771
772
772 return copies, {}, {}, {}, {}
773 return copies, {}, {}, {}, {}
773
774
774 def _related(f1, f2):
775 def _related(f1, f2):
775 """return True if f1 and f2 filectx have a common ancestor
776 """return True if f1 and f2 filectx have a common ancestor
776
777
777 Walk back to common ancestor to see if the two files originate
778 Walk back to common ancestor to see if the two files originate
778 from the same file. Since workingfilectx's rev() is None it messes
779 from the same file. Since workingfilectx's rev() is None it messes
779 up the integer comparison logic, hence the pre-step check for
780 up the integer comparison logic, hence the pre-step check for
780 None (f1 and f2 can only be workingfilectx's initially).
781 None (f1 and f2 can only be workingfilectx's initially).
781 """
782 """
782
783
783 if f1 == f2:
784 if f1 == f2:
784 return f1 # a match
785 return f1 # a match
785
786
786 g1, g2 = f1.ancestors(), f2.ancestors()
787 g1, g2 = f1.ancestors(), f2.ancestors()
787 try:
788 try:
788 f1r, f2r = f1.linkrev(), f2.linkrev()
789 f1r, f2r = f1.linkrev(), f2.linkrev()
789
790
790 if f1r is None:
791 if f1r is None:
791 f1 = next(g1)
792 f1 = next(g1)
792 if f2r is None:
793 if f2r is None:
793 f2 = next(g2)
794 f2 = next(g2)
794
795
795 while True:
796 while True:
796 f1r, f2r = f1.linkrev(), f2.linkrev()
797 f1r, f2r = f1.linkrev(), f2.linkrev()
797 if f1r > f2r:
798 if f1r > f2r:
798 f1 = next(g1)
799 f1 = next(g1)
799 elif f2r > f1r:
800 elif f2r > f1r:
800 f2 = next(g2)
801 f2 = next(g2)
801 else: # f1 and f2 point to files in the same linkrev
802 else: # f1 and f2 point to files in the same linkrev
802 return f1 == f2 # true if they point to the same file
803 return f1 == f2 # true if they point to the same file
803 except StopIteration:
804 except StopIteration:
804 return False
805 return False
805
806
806 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
807 """
808 """
808 check possible copies of f from msrc to mdst
809 check possible copies of f from msrc to mdst
809
810
810 srcctx = starting context for f in msrc
811 srcctx = starting context for f in msrc
811 dstctx = destination context for f in mdst
812 dstctx = destination context for f in mdst
812 f = the filename to check (as in msrc)
813 f = the filename to check (as in msrc)
813 base = the changectx used as a merge base
814 base = the changectx used as a merge base
814 tca = topological common ancestor for graft-like scenarios
815 tca = topological common ancestor for graft-like scenarios
815 remotebase = True if base is outside tca::srcctx, False otherwise
816 remotebase = True if base is outside tca::srcctx, False otherwise
816 limit = the rev number to not search beyond
817 limit = the rev number to not search beyond
817 data = dictionary of dictionary to store copy data. (see mergecopies)
818 data = dictionary of dictionary to store copy data. (see mergecopies)
818
819
819 note: limit is only an optimization, and provides no guarantee that
820 note: limit is only an optimization, and provides no guarantee that
820 irrelevant revisions will not be visited
821 irrelevant revisions will not be visited
821 there is no easy way to make this algorithm stop in a guaranteed way
822 there is no easy way to make this algorithm stop in a guaranteed way
822 once it "goes behind a certain revision".
823 once it "goes behind a certain revision".
823 """
824 """
824
825
825 msrc = srcctx.manifest()
826 msrc = srcctx.manifest()
826 mdst = dstctx.manifest()
827 mdst = dstctx.manifest()
827 mb = base.manifest()
828 mb = base.manifest()
828 mta = tca.manifest()
829 mta = tca.manifest()
829 # Might be true if this call is about finding backward renames,
830 # Might be true if this call is about finding backward renames,
830 # This happens in the case of grafts because the DAG is then rotated.
831 # This happens in the case of grafts because the DAG is then rotated.
831 # If the file exists in both the base and the source, we are not looking
832 # If the file exists in both the base and the source, we are not looking
832 # for a rename on the source side, but on the part of the DAG that is
833 # for a rename on the source side, but on the part of the DAG that is
833 # traversed backwards.
834 # traversed backwards.
834 #
835 #
835 # In the case there is both backward and forward renames (before and after
836 # In the case there is both backward and forward renames (before and after
836 # the base) this is more complicated as we must detect a divergence.
837 # the base) this is more complicated as we must detect a divergence.
837 # We use 'backwards = False' in that case.
838 # We use 'backwards = False' in that case.
838 backwards = not remotebase and base != tca and f in mb
839 backwards = not remotebase and base != tca and f in mb
839 getsrcfctx = _makegetfctx(srcctx)
840 getsrcfctx = _makegetfctx(srcctx)
840 getdstfctx = _makegetfctx(dstctx)
841 getdstfctx = _makegetfctx(dstctx)
841
842
842 if msrc[f] == mb.get(f) and not remotebase:
843 if msrc[f] == mb.get(f) and not remotebase:
843 # Nothing to merge
844 # Nothing to merge
844 return
845 return
845
846
846 of = None
847 of = None
847 seen = {f}
848 seen = {f}
848 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 for oc in getsrcfctx(f, msrc[f]).ancestors():
849 of = oc.path()
850 of = oc.path()
850 if of in seen:
851 if of in seen:
851 # check limit late - grab last rename before
852 # check limit late - grab last rename before
852 if oc.linkrev() < limit:
853 if oc.linkrev() < limit:
853 break
854 break
854 continue
855 continue
855 seen.add(of)
856 seen.add(of)
856
857
857 # remember for dir rename detection
858 # remember for dir rename detection
858 if backwards:
859 if backwards:
859 data['fullcopy'][of] = f # grafting backwards through renames
860 data['fullcopy'][of] = f # grafting backwards through renames
860 else:
861 else:
861 data['fullcopy'][f] = of
862 data['fullcopy'][f] = of
862 if of not in mdst:
863 if of not in mdst:
863 continue # no match, keep looking
864 continue # no match, keep looking
864 if mdst[of] == mb.get(of):
865 if mdst[of] == mb.get(of):
865 return # no merge needed, quit early
866 return # no merge needed, quit early
866 c2 = getdstfctx(of, mdst[of])
867 c2 = getdstfctx(of, mdst[of])
867 # c2 might be a plain new file on added on destination side that is
868 # c2 might be a plain new file on added on destination side that is
868 # unrelated to the droids we are looking for.
869 # unrelated to the droids we are looking for.
869 cr = _related(oc, c2)
870 cr = _related(oc, c2)
870 if cr and (of == f or of == c2.path()): # non-divergent
871 if cr and (of == f or of == c2.path()): # non-divergent
871 if backwards:
872 if backwards:
872 data['copy'][of] = f
873 data['copy'][of] = f
873 elif of in mb:
874 elif of in mb:
874 data['copy'][f] = of
875 data['copy'][f] = of
875 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
876 data['copy'][of] = f
877 data['copy'][of] = f
877 del data['fullcopy'][f]
878 del data['fullcopy'][f]
878 data['fullcopy'][of] = f
879 data['fullcopy'][of] = f
879 else: # divergence w.r.t. graft CA on one side of topological CA
880 else: # divergence w.r.t. graft CA on one side of topological CA
880 for sf in seen:
881 for sf in seen:
881 if sf in mb:
882 if sf in mb:
882 assert sf not in data['diverge']
883 assert sf not in data['diverge']
883 data['diverge'][sf] = [f, of]
884 data['diverge'][sf] = [f, of]
884 break
885 break
885 return
886 return
886
887
887 if of in mta:
888 if of in mta:
888 if backwards or remotebase:
889 if backwards or remotebase:
889 data['incomplete'][of] = f
890 data['incomplete'][of] = f
890 else:
891 else:
891 for sf in seen:
892 for sf in seen:
892 if sf in mb:
893 if sf in mb:
893 if tca == base:
894 if tca == base:
894 data['diverge'].setdefault(sf, []).append(f)
895 data['diverge'].setdefault(sf, []).append(f)
895 else:
896 else:
896 data['incompletediverge'][sf] = [of, f]
897 data['incompletediverge'][sf] = [of, f]
897 return
898 return
898
899
899 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
900 """reproduce copies from fromrev to rev in the dirstate
901 """reproduce copies from fromrev to rev in the dirstate
901
902
902 If skiprev is specified, it's a revision that should be used to
903 If skiprev is specified, it's a revision that should be used to
903 filter copy records. Any copies that occur between fromrev and
904 filter copy records. Any copies that occur between fromrev and
904 skiprev will not be duplicated, even if they appear in the set of
905 skiprev will not be duplicated, even if they appear in the set of
905 copies between fromrev and rev.
906 copies between fromrev and rev.
906 """
907 """
907 exclude = {}
908 exclude = {}
908 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 ctraceconfig = repo.ui.config('experimental', 'copytrace')
909 bctrace = stringutil.parsebool(ctraceconfig)
910 bctrace = stringutil.parsebool(ctraceconfig)
910 if (skiprev is not None and
911 if (skiprev is not None and
911 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
912 # copytrace='off' skips this line, but not the entire function because
913 # copytrace='off' skips this line, but not the entire function because
913 # the line below is O(size of the repo) during a rebase, while the rest
914 # the line below is O(size of the repo) during a rebase, while the rest
914 # of the function is much faster (and is required for carrying copy
915 # of the function is much faster (and is required for carrying copy
915 # metadata across the rebase anyway).
916 # metadata across the rebase anyway).
916 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 exclude = pathcopies(repo[fromrev], repo[skiprev])
917 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
918 # copies.pathcopies returns backward renames, so dst might not
919 # copies.pathcopies returns backward renames, so dst might not
919 # actually be in the dirstate
920 # actually be in the dirstate
920 if dst in exclude:
921 if dst in exclude:
921 continue
922 continue
922 wctx[dst].markcopied(src)
923 wctx[dst].markcopied(src)
@@ -1,75 +1,74
1
1
2 $ . "$TESTDIR/narrow-library.sh"
2 $ . "$TESTDIR/narrow-library.sh"
3
3
4 create full repo
4 create full repo
5
5
6 $ hg init master
6 $ hg init master
7 $ cd master
7 $ cd master
8
8
9 $ mkdir inside
9 $ mkdir inside
10 $ echo inside > inside/f1
10 $ echo inside > inside/f1
11 $ mkdir outside
11 $ mkdir outside
12 $ echo outside > outside/f2
12 $ echo outside > outside/f2
13 $ hg ci -Aqm 'initial'
13 $ hg ci -Aqm 'initial'
14
14
15 $ hg mv outside/f2 inside/f2
15 $ hg mv outside/f2 inside/f2
16 $ hg ci -qm 'move f2 from outside'
16 $ hg ci -qm 'move f2 from outside'
17
17
18 $ echo modified > inside/f2
18 $ echo modified > inside/f2
19 $ hg ci -qm 'modify inside/f2'
19 $ hg ci -qm 'modify inside/f2'
20
20
21 $ mkdir outside
21 $ mkdir outside
22 $ echo new > outside/f3
22 $ echo new > outside/f3
23 $ hg ci -Aqm 'add outside/f3'
23 $ hg ci -Aqm 'add outside/f3'
24 $ cd ..
24 $ cd ..
25
25
26 $ hg clone --narrow ssh://user@dummy/master narrow --include inside -r 2
26 $ hg clone --narrow ssh://user@dummy/master narrow --include inside -r 2
27 adding changesets
27 adding changesets
28 adding manifests
28 adding manifests
29 adding file changes
29 adding file changes
30 added 3 changesets with 3 changes to 2 files
30 added 3 changesets with 3 changes to 2 files
31 new changesets *:* (glob)
31 new changesets *:* (glob)
32 updating to branch default
32 updating to branch default
33 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
33 2 files updated, 0 files merged, 0 files removed, 0 files unresolved
34 $ cd narrow
34 $ cd narrow
35
35
36 $ hg co 'desc("move f2")'
36 $ hg co 'desc("move f2")'
37 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
37 1 files updated, 0 files merged, 0 files removed, 0 files unresolved
38 $ hg status
38 $ hg status
39 $ hg diff
39 $ hg diff
40 $ hg diff --change . --git
40 $ hg diff --change . --git
41 diff --git a/inside/f2 b/inside/f2
41 diff --git a/inside/f2 b/inside/f2
42 new file mode 100644
42 new file mode 100644
43 --- /dev/null
43 --- /dev/null
44 +++ b/inside/f2
44 +++ b/inside/f2
45 @@ -0,0 +1,1 @@
45 @@ -0,0 +1,1 @@
46 +outside
46 +outside
47
47
48 $ hg log --follow inside/f2 -r tip
48 $ hg log --follow inside/f2 -r tip
49 changeset: 2:bcfb756e0ca9
49 changeset: 2:bcfb756e0ca9
50 tag: tip
50 tag: tip
51 user: test
51 user: test
52 date: Thu Jan 01 00:00:00 1970 +0000
52 date: Thu Jan 01 00:00:00 1970 +0000
53 summary: modify inside/f2
53 summary: modify inside/f2
54
54
55 changeset: 1:5a016133b2bb
55 changeset: 1:5a016133b2bb
56 user: test
56 user: test
57 date: Thu Jan 01 00:00:00 1970 +0000
57 date: Thu Jan 01 00:00:00 1970 +0000
58 summary: move f2 from outside
58 summary: move f2 from outside
59
59
60 $ echo new > inside/f4
60 $ echo new > inside/f4
61 $ hg ci -Aqm 'add inside/f4'
61 $ hg ci -Aqm 'add inside/f4'
62 $ hg pull -q
62 $ hg pull -q
63 $ hg --config extensions.rebase= rebase -d tip
63 $ hg --config extensions.rebase= rebase -d tip
64 rebasing 3:4f84b666728c "add inside/f4"
64 rebasing 3:4f84b666728c "add inside/f4"
65 saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/4f84b666728c-4269b76e-rebase.hg
65 saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/4f84b666728c-4269b76e-rebase.hg
66
66
67 $ hg co -q 0
67 $ hg co -q 0
68 $ echo modified > inside/f1
68 $ echo modified > inside/f1
69 $ hg ci -qm 'modify inside/f1'
69 $ hg ci -qm 'modify inside/f1'
70 $ echo new > inside/f5
70 $ echo new > inside/f5
71 $ hg ci -Aqm 'add inside/f5'
71 $ hg ci -Aqm 'add inside/f5'
72 $ hg --config extensions.rebase= rebase -d 'public()' -r .
72 $ hg --config extensions.rebase= rebase -d 'public()' -r .
73 rebasing 6:610b60178c28 "add inside/f5" (tip)
73 rebasing 6:610b60178c28 "add inside/f5" (tip)
74 abort: data/outside/f3.i@54e53435331b: no match found!
74 saved backup bundle to $TESTTMP/narrow/.hg/strip-backup/610b60178c28-65716a78-rebase.hg
75 [255]
General Comments 0
You need to be logged in to leave comments. Login now