##// END OF EJS Templates
py3: use '%f' for floats instead of '%s'...
Pulkit Goyal -
r40112:2e9378f6 default
parent child Browse files
Show More
@@ -1,921 +1,921 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, a, b):
27 def _findlimit(repo, a, b):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
34 None if no such revision exists.
35 """
35 """
36
36
37 # basic idea:
37 # basic idea:
38 # - mark a and b with different sides
38 # - mark a and b with different sides
39 # - if a parent's children are all on the same side, the parent is
39 # - if a parent's children are all on the same side, the parent is
40 # on that side, otherwise it is on no side
40 # on that side, otherwise it is on no side
41 # - walk the graph in topological order with the help of a heap;
41 # - walk the graph in topological order with the help of a heap;
42 # - add unseen parents to side map
42 # - add unseen parents to side map
43 # - clear side of any parent that has children on different sides
43 # - clear side of any parent that has children on different sides
44 # - track number of interesting revs that might still be on a side
44 # - track number of interesting revs that might still be on a side
45 # - track the lowest interesting rev seen
45 # - track the lowest interesting rev seen
46 # - quit when interesting revs is zero
46 # - quit when interesting revs is zero
47
47
48 cl = repo.changelog
48 cl = repo.changelog
49 working = len(cl) # pseudo rev for the working directory
49 working = len(cl) # pseudo rev for the working directory
50 if a is None:
50 if a is None:
51 a = working
51 a = working
52 if b is None:
52 if b is None:
53 b = working
53 b = working
54
54
55 side = {a: -1, b: 1}
55 side = {a: -1, b: 1}
56 visit = [-a, -b]
56 visit = [-a, -b]
57 heapq.heapify(visit)
57 heapq.heapify(visit)
58 interesting = len(visit)
58 interesting = len(visit)
59 hascommonancestor = False
59 hascommonancestor = False
60 limit = working
60 limit = working
61
61
62 while interesting:
62 while interesting:
63 r = -heapq.heappop(visit)
63 r = -heapq.heappop(visit)
64 if r == working:
64 if r == working:
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 else:
66 else:
67 parents = cl.parentrevs(r)
67 parents = cl.parentrevs(r)
68 for p in parents:
68 for p in parents:
69 if p < 0:
69 if p < 0:
70 continue
70 continue
71 if p not in side:
71 if p not in side:
72 # first time we see p; add it to visit
72 # first time we see p; add it to visit
73 side[p] = side[r]
73 side[p] = side[r]
74 if side[p]:
74 if side[p]:
75 interesting += 1
75 interesting += 1
76 heapq.heappush(visit, -p)
76 heapq.heappush(visit, -p)
77 elif side[p] and side[p] != side[r]:
77 elif side[p] and side[p] != side[r]:
78 # p was interesting but now we know better
78 # p was interesting but now we know better
79 side[p] = 0
79 side[p] = 0
80 interesting -= 1
80 interesting -= 1
81 hascommonancestor = True
81 hascommonancestor = True
82 if side[r]:
82 if side[r]:
83 limit = r # lowest rev visited
83 limit = r # lowest rev visited
84 interesting -= 1
84 interesting -= 1
85
85
86 if not hascommonancestor:
86 if not hascommonancestor:
87 return None
87 return None
88
88
89 # Consider the following flow (see test-commit-amend.t under issue4405):
89 # Consider the following flow (see test-commit-amend.t under issue4405):
90 # 1/ File 'a0' committed
90 # 1/ File 'a0' committed
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 # 3/ Move back to first commit
92 # 3/ Move back to first commit
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 #
95 #
96 # During the amend in step five, we will be in this state:
96 # During the amend in step five, we will be in this state:
97 #
97 #
98 # @ 3 temporary amend commit for a1-amend
98 # @ 3 temporary amend commit for a1-amend
99 # |
99 # |
100 # o 2 a1-amend
100 # o 2 a1-amend
101 # |
101 # |
102 # | o 1 a1
102 # | o 1 a1
103 # |/
103 # |/
104 # o 0 a0
104 # o 0 a0
105 #
105 #
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 # yet the filelog has the copy information in rev 1 and we will not look
107 # yet the filelog has the copy information in rev 1 and we will not look
108 # back far enough unless we also look at the a and b as candidates.
108 # back far enough unless we also look at the a and b as candidates.
109 # This only occurs when a is a descendent of b or visa-versa.
109 # This only occurs when a is a descendent of b or visa-versa.
110 return min(limit, a, b)
110 return min(limit, a, b)
111
111
112 def _chain(src, dst, a, b):
112 def _chain(src, dst, a, b):
113 """chain two sets of copies a->b"""
113 """chain two sets of copies a->b"""
114 t = a.copy()
114 t = a.copy()
115 for k, v in b.iteritems():
115 for k, v in b.iteritems():
116 if v in t:
116 if v in t:
117 # found a chain
117 # found a chain
118 if t[v] != k:
118 if t[v] != k:
119 # file wasn't renamed back to itself
119 # file wasn't renamed back to itself
120 t[k] = t[v]
120 t[k] = t[v]
121 if v not in dst:
121 if v not in dst:
122 # chain was a rename, not a copy
122 # chain was a rename, not a copy
123 del t[v]
123 del t[v]
124 if v in src:
124 if v in src:
125 # file is a copy of an existing file
125 # file is a copy of an existing file
126 t[k] = v
126 t[k] = v
127
127
128 # remove criss-crossed copies
128 # remove criss-crossed copies
129 for k, v in list(t.items()):
129 for k, v in list(t.items()):
130 if k in src and v in dst:
130 if k in src and v in dst:
131 del t[k]
131 del t[k]
132
132
133 return t
133 return t
134
134
135 def _tracefile(fctx, am, limit=-1):
135 def _tracefile(fctx, am, limit=-1):
136 """return file context that is the ancestor of fctx present in ancestor
136 """return file context that is the ancestor of fctx present in ancestor
137 manifest am, stopping after the first ancestor lower than limit"""
137 manifest am, stopping after the first ancestor lower than limit"""
138
138
139 for f in fctx.ancestors():
139 for f in fctx.ancestors():
140 if am.get(f.path(), None) == f.filenode():
140 if am.get(f.path(), None) == f.filenode():
141 return f
141 return f
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
142 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
143 return None
143 return None
144
144
145 def _dirstatecopies(d, match=None):
145 def _dirstatecopies(d, match=None):
146 ds = d._repo.dirstate
146 ds = d._repo.dirstate
147 c = ds.copies().copy()
147 c = ds.copies().copy()
148 for k in list(c):
148 for k in list(c):
149 if ds[k] not in 'anm' or (match and not match(k)):
149 if ds[k] not in 'anm' or (match and not match(k)):
150 del c[k]
150 del c[k]
151 return c
151 return c
152
152
153 def _computeforwardmissing(a, b, match=None):
153 def _computeforwardmissing(a, b, match=None):
154 """Computes which files are in b but not a.
154 """Computes which files are in b but not a.
155 This is its own function so extensions can easily wrap this call to see what
155 This is its own function so extensions can easily wrap this call to see what
156 files _forwardcopies is about to process.
156 files _forwardcopies is about to process.
157 """
157 """
158 ma = a.manifest()
158 ma = a.manifest()
159 mb = b.manifest()
159 mb = b.manifest()
160 return mb.filesnotin(ma, match=match)
160 return mb.filesnotin(ma, match=match)
161
161
162 def _committedforwardcopies(a, b, match):
162 def _committedforwardcopies(a, b, match):
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 # files might have to be traced back to the fctx parent of the last
164 # files might have to be traced back to the fctx parent of the last
165 # one-side-only changeset, but not further back than that
165 # one-side-only changeset, but not further back than that
166 repo = a._repo
166 repo = a._repo
167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
168 dbg = repo.ui.debug
168 dbg = repo.ui.debug
169 if debug:
169 if debug:
170 dbg('debug.copies: looking into rename from %s to %s\n'
170 dbg('debug.copies: looking into rename from %s to %s\n'
171 % (a, b))
171 % (a, b))
172 limit = _findlimit(repo, a.rev(), b.rev())
172 limit = _findlimit(repo, a.rev(), b.rev())
173 if limit is None:
173 if limit is None:
174 limit = -1
174 limit = -1
175 if debug:
175 if debug:
176 dbg('debug.copies: search limit: %d\n' % limit)
176 dbg('debug.copies: search limit: %d\n' % limit)
177 am = a.manifest()
177 am = a.manifest()
178
178
179 # find where new files came from
179 # find where new files came from
180 # we currently don't try to find where old files went, too expensive
180 # we currently don't try to find where old files went, too expensive
181 # this means we can miss a case like 'hg rm b; hg cp a b'
181 # this means we can miss a case like 'hg rm b; hg cp a b'
182 cm = {}
182 cm = {}
183
183
184 # Computing the forward missing is quite expensive on large manifests, since
184 # Computing the forward missing is quite expensive on large manifests, since
185 # it compares the entire manifests. We can optimize it in the common use
185 # it compares the entire manifests. We can optimize it in the common use
186 # case of computing what copies are in a commit versus its parent (like
186 # case of computing what copies are in a commit versus its parent (like
187 # during a rebase or histedit). Note, we exclude merge commits from this
187 # during a rebase or histedit). Note, we exclude merge commits from this
188 # optimization, since the ctx.files() for a merge commit is not correct for
188 # optimization, since the ctx.files() for a merge commit is not correct for
189 # this comparison.
189 # this comparison.
190 forwardmissingmatch = match
190 forwardmissingmatch = match
191 if b.p1() == a and b.p2().node() == node.nullid:
191 if b.p1() == a and b.p2().node() == node.nullid:
192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
195
195
196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
197
197
198 if debug:
198 if debug:
199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
200
200
201 for f in missing:
201 for f in missing:
202 if debug:
202 if debug:
203 dbg('debug.copies: tracing file: %s\n' % f)
203 dbg('debug.copies: tracing file: %s\n' % f)
204 fctx = b[f]
204 fctx = b[f]
205 fctx._ancestrycontext = ancestrycontext
205 fctx._ancestrycontext = ancestrycontext
206
206
207 if debug:
207 if debug:
208 start = util.timer()
208 start = util.timer()
209 ofctx = _tracefile(fctx, am, limit)
209 ofctx = _tracefile(fctx, am, limit)
210 if ofctx:
210 if ofctx:
211 if debug:
211 if debug:
212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
213 cm[f] = ofctx.path()
213 cm[f] = ofctx.path()
214 if debug:
214 if debug:
215 dbg('debug.copies: time: %s seconds\n'
215 dbg('debug.copies: time: %f seconds\n'
216 % (util.timer() - start))
216 % (util.timer() - start))
217 return cm
217 return cm
218
218
219 def _forwardcopies(a, b, match=None):
219 def _forwardcopies(a, b, match=None):
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221
221
222 # check for working copy
222 # check for working copy
223 if b.rev() is None:
223 if b.rev() is None:
224 if a == b.p1():
224 if a == b.p1():
225 # short-circuit to avoid issues with merge states
225 # short-circuit to avoid issues with merge states
226 return _dirstatecopies(b, match)
226 return _dirstatecopies(b, match)
227
227
228 cm = _committedforwardcopies(a, b.p1(), match)
228 cm = _committedforwardcopies(a, b.p1(), match)
229 # combine copies from dirstate if necessary
229 # combine copies from dirstate if necessary
230 return _chain(a, b, cm, _dirstatecopies(b, match))
230 return _chain(a, b, cm, _dirstatecopies(b, match))
231 return _committedforwardcopies(a, b, match)
231 return _committedforwardcopies(a, b, match)
232
232
233 def _backwardrenames(a, b):
233 def _backwardrenames(a, b):
234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
235 return {}
235 return {}
236
236
237 # Even though we're not taking copies into account, 1:n rename situations
237 # Even though we're not taking copies into account, 1:n rename situations
238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
239 # arbitrarily pick one of the renames.
239 # arbitrarily pick one of the renames.
240 f = _forwardcopies(b, a)
240 f = _forwardcopies(b, a)
241 r = {}
241 r = {}
242 for k, v in sorted(f.iteritems()):
242 for k, v in sorted(f.iteritems()):
243 # remove copies
243 # remove copies
244 if v in a:
244 if v in a:
245 continue
245 continue
246 r[v] = k
246 r[v] = k
247 return r
247 return r
248
248
249 def pathcopies(x, y, match=None):
249 def pathcopies(x, y, match=None):
250 """find {dst@y: src@x} copy mapping for directed compare"""
250 """find {dst@y: src@x} copy mapping for directed compare"""
251 repo = x._repo
251 repo = x._repo
252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
253 if debug:
253 if debug:
254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
255 % (x, y))
255 % (x, y))
256 if x == y or not x or not y:
256 if x == y or not x or not y:
257 return {}
257 return {}
258 a = y.ancestor(x)
258 a = y.ancestor(x)
259 if a == x:
259 if a == x:
260 if debug:
260 if debug:
261 repo.ui.debug('debug.copies: search mode: forward\n')
261 repo.ui.debug('debug.copies: search mode: forward\n')
262 return _forwardcopies(x, y, match=match)
262 return _forwardcopies(x, y, match=match)
263 if a == y:
263 if a == y:
264 if debug:
264 if debug:
265 repo.ui.debug('debug.copies: search mode: backward\n')
265 repo.ui.debug('debug.copies: search mode: backward\n')
266 return _backwardrenames(x, y)
266 return _backwardrenames(x, y)
267 if debug:
267 if debug:
268 repo.ui.debug('debug.copies: search mode: combined\n')
268 repo.ui.debug('debug.copies: search mode: combined\n')
269 return _chain(x, y, _backwardrenames(x, a),
269 return _chain(x, y, _backwardrenames(x, a),
270 _forwardcopies(a, y, match=match))
270 _forwardcopies(a, y, match=match))
271
271
272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
274 and c2. This is its own function so extensions can easily wrap this call
274 and c2. This is its own function so extensions can easily wrap this call
275 to see what files mergecopies is about to process.
275 to see what files mergecopies is about to process.
276
276
277 Even though c1 and c2 are not used in this function, they are useful in
277 Even though c1 and c2 are not used in this function, they are useful in
278 other extensions for being able to read the file nodes of the changed files.
278 other extensions for being able to read the file nodes of the changed files.
279
279
280 "baselabel" can be passed to help distinguish the multiple computations
280 "baselabel" can be passed to help distinguish the multiple computations
281 done in the graft case.
281 done in the graft case.
282 """
282 """
283 u1 = sorted(addedinm1 - addedinm2)
283 u1 = sorted(addedinm1 - addedinm2)
284 u2 = sorted(addedinm2 - addedinm1)
284 u2 = sorted(addedinm2 - addedinm1)
285
285
286 header = " unmatched files in %s"
286 header = " unmatched files in %s"
287 if baselabel:
287 if baselabel:
288 header += ' (from %s)' % baselabel
288 header += ' (from %s)' % baselabel
289 if u1:
289 if u1:
290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
291 if u2:
291 if u2:
292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
293
293
294 return u1, u2
294 return u1, u2
295
295
296 def _makegetfctx(ctx):
296 def _makegetfctx(ctx):
297 """return a 'getfctx' function suitable for _checkcopies usage
297 """return a 'getfctx' function suitable for _checkcopies usage
298
298
299 We have to re-setup the function building 'filectx' for each
299 We have to re-setup the function building 'filectx' for each
300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
301 each. Linkrev adjustment is important to avoid bug in rename
301 each. Linkrev adjustment is important to avoid bug in rename
302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
303 the performance impact of this adjustment is kept limited. Without it,
303 the performance impact of this adjustment is kept limited. Without it,
304 each file could do a full dag traversal making the time complexity of
304 each file could do a full dag traversal making the time complexity of
305 the operation explode (see issue4537).
305 the operation explode (see issue4537).
306
306
307 This function exists here mostly to limit the impact on stable. Feel
307 This function exists here mostly to limit the impact on stable. Feel
308 free to refactor on default.
308 free to refactor on default.
309 """
309 """
310 rev = ctx.rev()
310 rev = ctx.rev()
311 repo = ctx._repo
311 repo = ctx._repo
312 ac = getattr(ctx, '_ancestrycontext', None)
312 ac = getattr(ctx, '_ancestrycontext', None)
313 if ac is None:
313 if ac is None:
314 revs = [rev]
314 revs = [rev]
315 if rev is None:
315 if rev is None:
316 revs = [p.rev() for p in ctx.parents()]
316 revs = [p.rev() for p in ctx.parents()]
317 ac = repo.changelog.ancestors(revs, inclusive=True)
317 ac = repo.changelog.ancestors(revs, inclusive=True)
318 ctx._ancestrycontext = ac
318 ctx._ancestrycontext = ac
319 def makectx(f, n):
319 def makectx(f, n):
320 if n in node.wdirfilenodeids: # in a working context?
320 if n in node.wdirfilenodeids: # in a working context?
321 if ctx.rev() is None:
321 if ctx.rev() is None:
322 return ctx.filectx(f)
322 return ctx.filectx(f)
323 return repo[None][f]
323 return repo[None][f]
324 fctx = repo.filectx(f, fileid=n)
324 fctx = repo.filectx(f, fileid=n)
325 # setup only needed for filectx not create from a changectx
325 # setup only needed for filectx not create from a changectx
326 fctx._ancestrycontext = ac
326 fctx._ancestrycontext = ac
327 fctx._descendantrev = rev
327 fctx._descendantrev = rev
328 return fctx
328 return fctx
329 return util.lrucachefunc(makectx)
329 return util.lrucachefunc(makectx)
330
330
331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
332 """combine partial copy paths"""
332 """combine partial copy paths"""
333 remainder = {}
333 remainder = {}
334 for f in copyfrom:
334 for f in copyfrom:
335 if f in copyto:
335 if f in copyto:
336 finalcopy[copyto[f]] = copyfrom[f]
336 finalcopy[copyto[f]] = copyfrom[f]
337 del copyto[f]
337 del copyto[f]
338 for f in incompletediverge:
338 for f in incompletediverge:
339 assert f not in diverge
339 assert f not in diverge
340 ic = incompletediverge[f]
340 ic = incompletediverge[f]
341 if ic[0] in copyto:
341 if ic[0] in copyto:
342 diverge[f] = [copyto[ic[0]], ic[1]]
342 diverge[f] = [copyto[ic[0]], ic[1]]
343 else:
343 else:
344 remainder[f] = ic
344 remainder[f] = ic
345 return remainder
345 return remainder
346
346
347 def mergecopies(repo, c1, c2, base):
347 def mergecopies(repo, c1, c2, base):
348 """
348 """
349 The function calling different copytracing algorithms on the basis of config
349 The function calling different copytracing algorithms on the basis of config
350 which find moves and copies between context c1 and c2 that are relevant for
350 which find moves and copies between context c1 and c2 that are relevant for
351 merging. 'base' will be used as the merge base.
351 merging. 'base' will be used as the merge base.
352
352
353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
354 files that were moved/ copied in one merge parent and modified in another.
354 files that were moved/ copied in one merge parent and modified in another.
355 For example:
355 For example:
356
356
357 o ---> 4 another commit
357 o ---> 4 another commit
358 |
358 |
359 | o ---> 3 commit that modifies a.txt
359 | o ---> 3 commit that modifies a.txt
360 | /
360 | /
361 o / ---> 2 commit that moves a.txt to b.txt
361 o / ---> 2 commit that moves a.txt to b.txt
362 |/
362 |/
363 o ---> 1 merge base
363 o ---> 1 merge base
364
364
365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
366 revision 4, and if user have copytrace disabled, we prints the following
366 revision 4, and if user have copytrace disabled, we prints the following
367 message:
367 message:
368
368
369 ```other changed <file> which local deleted```
369 ```other changed <file> which local deleted```
370
370
371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
372 "dirmove".
372 "dirmove".
373
373
374 "copy" is a mapping from destination name -> source name,
374 "copy" is a mapping from destination name -> source name,
375 where source is in c1 and destination is in c2 or vice-versa.
375 where source is in c1 and destination is in c2 or vice-versa.
376
376
377 "movewithdir" is a mapping from source name -> destination name,
377 "movewithdir" is a mapping from source name -> destination name,
378 where the file at source present in one context but not the other
378 where the file at source present in one context but not the other
379 needs to be moved to destination by the merge process, because the
379 needs to be moved to destination by the merge process, because the
380 other context moved the directory it is in.
380 other context moved the directory it is in.
381
381
382 "diverge" is a mapping of source name -> list of destination names
382 "diverge" is a mapping of source name -> list of destination names
383 for divergent renames.
383 for divergent renames.
384
384
385 "renamedelete" is a mapping of source name -> list of destination
385 "renamedelete" is a mapping of source name -> list of destination
386 names for files deleted in c1 that were renamed in c2 or vice-versa.
386 names for files deleted in c1 that were renamed in c2 or vice-versa.
387
387
388 "dirmove" is a mapping of detected source dir -> destination dir renames.
388 "dirmove" is a mapping of detected source dir -> destination dir renames.
389 This is needed for handling changes to new files previously grafted into
389 This is needed for handling changes to new files previously grafted into
390 renamed directories.
390 renamed directories.
391 """
391 """
392 # avoid silly behavior for update from empty dir
392 # avoid silly behavior for update from empty dir
393 if not c1 or not c2 or c1 == c2:
393 if not c1 or not c2 or c1 == c2:
394 return {}, {}, {}, {}, {}
394 return {}, {}, {}, {}, {}
395
395
396 # avoid silly behavior for parent -> working dir
396 # avoid silly behavior for parent -> working dir
397 if c2.node() is None and c1.node() == repo.dirstate.p1():
397 if c2.node() is None and c1.node() == repo.dirstate.p1():
398 return repo.dirstate.copies(), {}, {}, {}, {}
398 return repo.dirstate.copies(), {}, {}, {}, {}
399
399
400 copytracing = repo.ui.config('experimental', 'copytrace')
400 copytracing = repo.ui.config('experimental', 'copytrace')
401 boolctrace = stringutil.parsebool(copytracing)
401 boolctrace = stringutil.parsebool(copytracing)
402
402
403 # Copy trace disabling is explicitly below the node == p1 logic above
403 # Copy trace disabling is explicitly below the node == p1 logic above
404 # because the logic above is required for a simple copy to be kept across a
404 # because the logic above is required for a simple copy to be kept across a
405 # rebase.
405 # rebase.
406 if copytracing == 'heuristics':
406 if copytracing == 'heuristics':
407 # Do full copytracing if only non-public revisions are involved as
407 # Do full copytracing if only non-public revisions are involved as
408 # that will be fast enough and will also cover the copies which could
408 # that will be fast enough and will also cover the copies which could
409 # be missed by heuristics
409 # be missed by heuristics
410 if _isfullcopytraceable(repo, c1, base):
410 if _isfullcopytraceable(repo, c1, base):
411 return _fullcopytracing(repo, c1, c2, base)
411 return _fullcopytracing(repo, c1, c2, base)
412 return _heuristicscopytracing(repo, c1, c2, base)
412 return _heuristicscopytracing(repo, c1, c2, base)
413 elif boolctrace is False:
413 elif boolctrace is False:
414 # stringutil.parsebool() returns None when it is unable to parse the
414 # stringutil.parsebool() returns None when it is unable to parse the
415 # value, so we should rely on making sure copytracing is on such cases
415 # value, so we should rely on making sure copytracing is on such cases
416 return {}, {}, {}, {}, {}
416 return {}, {}, {}, {}, {}
417 else:
417 else:
418 return _fullcopytracing(repo, c1, c2, base)
418 return _fullcopytracing(repo, c1, c2, base)
419
419
420 def _isfullcopytraceable(repo, c1, base):
420 def _isfullcopytraceable(repo, c1, base):
421 """ Checks that if base, source and destination are all no-public branches,
421 """ Checks that if base, source and destination are all no-public branches,
422 if yes let's use the full copytrace algorithm for increased capabilities
422 if yes let's use the full copytrace algorithm for increased capabilities
423 since it will be fast enough.
423 since it will be fast enough.
424
424
425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
426 number of changesets from c1 to base such that if number of changesets are
426 number of changesets from c1 to base such that if number of changesets are
427 more than the limit, full copytracing algorithm won't be used.
427 more than the limit, full copytracing algorithm won't be used.
428 """
428 """
429 if c1.rev() is None:
429 if c1.rev() is None:
430 c1 = c1.p1()
430 c1 = c1.p1()
431 if c1.mutable() and base.mutable():
431 if c1.mutable() and base.mutable():
432 sourcecommitlimit = repo.ui.configint('experimental',
432 sourcecommitlimit = repo.ui.configint('experimental',
433 'copytrace.sourcecommitlimit')
433 'copytrace.sourcecommitlimit')
434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
435 return commits < sourcecommitlimit
435 return commits < sourcecommitlimit
436 return False
436 return False
437
437
438 def _fullcopytracing(repo, c1, c2, base):
438 def _fullcopytracing(repo, c1, c2, base):
439 """ The full copytracing algorithm which finds all the new files that were
439 """ The full copytracing algorithm which finds all the new files that were
440 added from merge base up to the top commit and for each file it checks if
440 added from merge base up to the top commit and for each file it checks if
441 this file was copied from another file.
441 this file was copied from another file.
442
442
443 This is pretty slow when a lot of changesets are involved but will track all
443 This is pretty slow when a lot of changesets are involved but will track all
444 the copies.
444 the copies.
445 """
445 """
446 # In certain scenarios (e.g. graft, update or rebase), base can be
446 # In certain scenarios (e.g. graft, update or rebase), base can be
447 # overridden We still need to know a real common ancestor in this case We
447 # overridden We still need to know a real common ancestor in this case We
448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
450 # caller may not know if the revision passed in lieu of the CA is a genuine
450 # caller may not know if the revision passed in lieu of the CA is a genuine
451 # common ancestor or not without explicitly checking it, it's better to
451 # common ancestor or not without explicitly checking it, it's better to
452 # determine that here.
452 # determine that here.
453 #
453 #
454 # base.isancestorof(wc) is False, work around that
454 # base.isancestorof(wc) is False, work around that
455 _c1 = c1.p1() if c1.rev() is None else c1
455 _c1 = c1.p1() if c1.rev() is None else c1
456 _c2 = c2.p1() if c2.rev() is None else c2
456 _c2 = c2.p1() if c2.rev() is None else c2
457 # an endpoint is "dirty" if it isn't a descendant of the merge base
457 # an endpoint is "dirty" if it isn't a descendant of the merge base
458 # if we have a dirty endpoint, we need to trigger graft logic, and also
458 # if we have a dirty endpoint, we need to trigger graft logic, and also
459 # keep track of which endpoint is dirty
459 # keep track of which endpoint is dirty
460 dirtyc1 = not base.isancestorof(_c1)
460 dirtyc1 = not base.isancestorof(_c1)
461 dirtyc2 = not base.isancestorof(_c2)
461 dirtyc2 = not base.isancestorof(_c2)
462 graft = dirtyc1 or dirtyc2
462 graft = dirtyc1 or dirtyc2
463 tca = base
463 tca = base
464 if graft:
464 if graft:
465 tca = _c1.ancestor(_c2)
465 tca = _c1.ancestor(_c2)
466
466
467 limit = _findlimit(repo, c1.rev(), c2.rev())
467 limit = _findlimit(repo, c1.rev(), c2.rev())
468 if limit is None:
468 if limit is None:
469 # no common ancestor, no copies
469 # no common ancestor, no copies
470 return {}, {}, {}, {}, {}
470 return {}, {}, {}, {}, {}
471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
472
472
473 m1 = c1.manifest()
473 m1 = c1.manifest()
474 m2 = c2.manifest()
474 m2 = c2.manifest()
475 mb = base.manifest()
475 mb = base.manifest()
476
476
477 # gather data from _checkcopies:
477 # gather data from _checkcopies:
478 # - diverge = record all diverges in this dict
478 # - diverge = record all diverges in this dict
479 # - copy = record all non-divergent copies in this dict
479 # - copy = record all non-divergent copies in this dict
480 # - fullcopy = record all copies in this dict
480 # - fullcopy = record all copies in this dict
481 # - incomplete = record non-divergent partial copies here
481 # - incomplete = record non-divergent partial copies here
482 # - incompletediverge = record divergent partial copies here
482 # - incompletediverge = record divergent partial copies here
483 diverge = {} # divergence data is shared
483 diverge = {} # divergence data is shared
484 incompletediverge = {}
484 incompletediverge = {}
485 data1 = {'copy': {},
485 data1 = {'copy': {},
486 'fullcopy': {},
486 'fullcopy': {},
487 'incomplete': {},
487 'incomplete': {},
488 'diverge': diverge,
488 'diverge': diverge,
489 'incompletediverge': incompletediverge,
489 'incompletediverge': incompletediverge,
490 }
490 }
491 data2 = {'copy': {},
491 data2 = {'copy': {},
492 'fullcopy': {},
492 'fullcopy': {},
493 'incomplete': {},
493 'incomplete': {},
494 'diverge': diverge,
494 'diverge': diverge,
495 'incompletediverge': incompletediverge,
495 'incompletediverge': incompletediverge,
496 }
496 }
497
497
498 # find interesting file sets from manifests
498 # find interesting file sets from manifests
499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
501 bothnew = sorted(addedinm1 & addedinm2)
501 bothnew = sorted(addedinm1 & addedinm2)
502 if tca == base:
502 if tca == base:
503 # unmatched file from base
503 # unmatched file from base
504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
505 u1u, u2u = u1r, u2r
505 u1u, u2u = u1r, u2r
506 else:
506 else:
507 # unmatched file from base (DAG rotation in the graft case)
507 # unmatched file from base (DAG rotation in the graft case)
508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
509 baselabel='base')
509 baselabel='base')
510 # unmatched file from topological common ancestors (no DAG rotation)
510 # unmatched file from topological common ancestors (no DAG rotation)
511 # need to recompute this for directory move handling when grafting
511 # need to recompute this for directory move handling when grafting
512 mta = tca.manifest()
512 mta = tca.manifest()
513 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
513 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
514 m2.filesnotin(mta),
514 m2.filesnotin(mta),
515 baselabel='topological common ancestor')
515 baselabel='topological common ancestor')
516
516
517 for f in u1u:
517 for f in u1u:
518 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
518 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
519
519
520 for f in u2u:
520 for f in u2u:
521 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
521 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
522
522
523 copy = dict(data1['copy'])
523 copy = dict(data1['copy'])
524 copy.update(data2['copy'])
524 copy.update(data2['copy'])
525 fullcopy = dict(data1['fullcopy'])
525 fullcopy = dict(data1['fullcopy'])
526 fullcopy.update(data2['fullcopy'])
526 fullcopy.update(data2['fullcopy'])
527
527
528 if dirtyc1:
528 if dirtyc1:
529 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
529 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
530 incompletediverge)
530 incompletediverge)
531 else:
531 else:
532 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
532 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
533 incompletediverge)
533 incompletediverge)
534
534
535 renamedelete = {}
535 renamedelete = {}
536 renamedeleteset = set()
536 renamedeleteset = set()
537 divergeset = set()
537 divergeset = set()
538 for of, fl in list(diverge.items()):
538 for of, fl in list(diverge.items()):
539 if len(fl) == 1 or of in c1 or of in c2:
539 if len(fl) == 1 or of in c1 or of in c2:
540 del diverge[of] # not actually divergent, or not a rename
540 del diverge[of] # not actually divergent, or not a rename
541 if of not in c1 and of not in c2:
541 if of not in c1 and of not in c2:
542 # renamed on one side, deleted on the other side, but filter
542 # renamed on one side, deleted on the other side, but filter
543 # out files that have been renamed and then deleted
543 # out files that have been renamed and then deleted
544 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
544 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
545 renamedeleteset.update(fl) # reverse map for below
545 renamedeleteset.update(fl) # reverse map for below
546 else:
546 else:
547 divergeset.update(fl) # reverse map for below
547 divergeset.update(fl) # reverse map for below
548
548
549 if bothnew:
549 if bothnew:
550 repo.ui.debug(" unmatched files new in both:\n %s\n"
550 repo.ui.debug(" unmatched files new in both:\n %s\n"
551 % "\n ".join(bothnew))
551 % "\n ".join(bothnew))
552 bothdiverge = {}
552 bothdiverge = {}
553 bothincompletediverge = {}
553 bothincompletediverge = {}
554 remainder = {}
554 remainder = {}
555 both1 = {'copy': {},
555 both1 = {'copy': {},
556 'fullcopy': {},
556 'fullcopy': {},
557 'incomplete': {},
557 'incomplete': {},
558 'diverge': bothdiverge,
558 'diverge': bothdiverge,
559 'incompletediverge': bothincompletediverge
559 'incompletediverge': bothincompletediverge
560 }
560 }
561 both2 = {'copy': {},
561 both2 = {'copy': {},
562 'fullcopy': {},
562 'fullcopy': {},
563 'incomplete': {},
563 'incomplete': {},
564 'diverge': bothdiverge,
564 'diverge': bothdiverge,
565 'incompletediverge': bothincompletediverge
565 'incompletediverge': bothincompletediverge
566 }
566 }
567 for f in bothnew:
567 for f in bothnew:
568 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
568 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
569 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
569 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
570 if dirtyc1:
570 if dirtyc1:
571 # incomplete copies may only be found on the "dirty" side for bothnew
571 # incomplete copies may only be found on the "dirty" side for bothnew
572 assert not both2['incomplete']
572 assert not both2['incomplete']
573 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
573 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
574 bothincompletediverge)
574 bothincompletediverge)
575 elif dirtyc2:
575 elif dirtyc2:
576 assert not both1['incomplete']
576 assert not both1['incomplete']
577 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
577 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
578 bothincompletediverge)
578 bothincompletediverge)
579 else:
579 else:
580 # incomplete copies and divergences can't happen outside grafts
580 # incomplete copies and divergences can't happen outside grafts
581 assert not both1['incomplete']
581 assert not both1['incomplete']
582 assert not both2['incomplete']
582 assert not both2['incomplete']
583 assert not bothincompletediverge
583 assert not bothincompletediverge
584 for f in remainder:
584 for f in remainder:
585 assert f not in bothdiverge
585 assert f not in bothdiverge
586 ic = remainder[f]
586 ic = remainder[f]
587 if ic[0] in (m1 if dirtyc1 else m2):
587 if ic[0] in (m1 if dirtyc1 else m2):
588 # backed-out rename on one side, but watch out for deleted files
588 # backed-out rename on one side, but watch out for deleted files
589 bothdiverge[f] = ic
589 bothdiverge[f] = ic
590 for of, fl in bothdiverge.items():
590 for of, fl in bothdiverge.items():
591 if len(fl) == 2 and fl[0] == fl[1]:
591 if len(fl) == 2 and fl[0] == fl[1]:
592 copy[fl[0]] = of # not actually divergent, just matching renames
592 copy[fl[0]] = of # not actually divergent, just matching renames
593
593
594 if fullcopy and repo.ui.debugflag:
594 if fullcopy and repo.ui.debugflag:
595 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
595 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
596 "% = renamed and deleted):\n")
596 "% = renamed and deleted):\n")
597 for f in sorted(fullcopy):
597 for f in sorted(fullcopy):
598 note = ""
598 note = ""
599 if f in copy:
599 if f in copy:
600 note += "*"
600 note += "*"
601 if f in divergeset:
601 if f in divergeset:
602 note += "!"
602 note += "!"
603 if f in renamedeleteset:
603 if f in renamedeleteset:
604 note += "%"
604 note += "%"
605 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
605 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
606 note))
606 note))
607 del divergeset
607 del divergeset
608
608
609 if not fullcopy:
609 if not fullcopy:
610 return copy, {}, diverge, renamedelete, {}
610 return copy, {}, diverge, renamedelete, {}
611
611
612 repo.ui.debug(" checking for directory renames\n")
612 repo.ui.debug(" checking for directory renames\n")
613
613
614 # generate a directory move map
614 # generate a directory move map
615 d1, d2 = c1.dirs(), c2.dirs()
615 d1, d2 = c1.dirs(), c2.dirs()
616 # Hack for adding '', which is not otherwise added, to d1 and d2
616 # Hack for adding '', which is not otherwise added, to d1 and d2
617 d1.addpath('/')
617 d1.addpath('/')
618 d2.addpath('/')
618 d2.addpath('/')
619 invalid = set()
619 invalid = set()
620 dirmove = {}
620 dirmove = {}
621
621
622 # examine each file copy for a potential directory move, which is
622 # examine each file copy for a potential directory move, which is
623 # when all the files in a directory are moved to a new directory
623 # when all the files in a directory are moved to a new directory
624 for dst, src in fullcopy.iteritems():
624 for dst, src in fullcopy.iteritems():
625 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
625 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
626 if dsrc in invalid:
626 if dsrc in invalid:
627 # already seen to be uninteresting
627 # already seen to be uninteresting
628 continue
628 continue
629 elif dsrc in d1 and ddst in d1:
629 elif dsrc in d1 and ddst in d1:
630 # directory wasn't entirely moved locally
630 # directory wasn't entirely moved locally
631 invalid.add(dsrc)
631 invalid.add(dsrc)
632 elif dsrc in d2 and ddst in d2:
632 elif dsrc in d2 and ddst in d2:
633 # directory wasn't entirely moved remotely
633 # directory wasn't entirely moved remotely
634 invalid.add(dsrc)
634 invalid.add(dsrc)
635 elif dsrc in dirmove and dirmove[dsrc] != ddst:
635 elif dsrc in dirmove and dirmove[dsrc] != ddst:
636 # files from the same directory moved to two different places
636 # files from the same directory moved to two different places
637 invalid.add(dsrc)
637 invalid.add(dsrc)
638 else:
638 else:
639 # looks good so far
639 # looks good so far
640 dirmove[dsrc] = ddst
640 dirmove[dsrc] = ddst
641
641
642 for i in invalid:
642 for i in invalid:
643 if i in dirmove:
643 if i in dirmove:
644 del dirmove[i]
644 del dirmove[i]
645 del d1, d2, invalid
645 del d1, d2, invalid
646
646
647 if not dirmove:
647 if not dirmove:
648 return copy, {}, diverge, renamedelete, {}
648 return copy, {}, diverge, renamedelete, {}
649
649
650 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
650 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
651
651
652 for d in dirmove:
652 for d in dirmove:
653 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
653 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
654 (d, dirmove[d]))
654 (d, dirmove[d]))
655
655
656 movewithdir = {}
656 movewithdir = {}
657 # check unaccounted nonoverlapping files against directory moves
657 # check unaccounted nonoverlapping files against directory moves
658 for f in u1r + u2r:
658 for f in u1r + u2r:
659 if f not in fullcopy:
659 if f not in fullcopy:
660 for d in dirmove:
660 for d in dirmove:
661 if f.startswith(d):
661 if f.startswith(d):
662 # new file added in a directory that was moved, move it
662 # new file added in a directory that was moved, move it
663 df = dirmove[d] + f[len(d):]
663 df = dirmove[d] + f[len(d):]
664 if df not in copy:
664 if df not in copy:
665 movewithdir[f] = df
665 movewithdir[f] = df
666 repo.ui.debug((" pending file src: '%s' -> "
666 repo.ui.debug((" pending file src: '%s' -> "
667 "dst: '%s'\n") % (f, df))
667 "dst: '%s'\n") % (f, df))
668 break
668 break
669
669
670 return copy, movewithdir, diverge, renamedelete, dirmove
670 return copy, movewithdir, diverge, renamedelete, dirmove
671
671
672 def _heuristicscopytracing(repo, c1, c2, base):
672 def _heuristicscopytracing(repo, c1, c2, base):
673 """ Fast copytracing using filename heuristics
673 """ Fast copytracing using filename heuristics
674
674
675 Assumes that moves or renames are of following two types:
675 Assumes that moves or renames are of following two types:
676
676
677 1) Inside a directory only (same directory name but different filenames)
677 1) Inside a directory only (same directory name but different filenames)
678 2) Move from one directory to another
678 2) Move from one directory to another
679 (same filenames but different directory names)
679 (same filenames but different directory names)
680
680
681 Works only when there are no merge commits in the "source branch".
681 Works only when there are no merge commits in the "source branch".
682 Source branch is commits from base up to c2 not including base.
682 Source branch is commits from base up to c2 not including base.
683
683
684 If merge is involved it fallbacks to _fullcopytracing().
684 If merge is involved it fallbacks to _fullcopytracing().
685
685
686 Can be used by setting the following config:
686 Can be used by setting the following config:
687
687
688 [experimental]
688 [experimental]
689 copytrace = heuristics
689 copytrace = heuristics
690
690
691 In some cases the copy/move candidates found by heuristics can be very large
691 In some cases the copy/move candidates found by heuristics can be very large
692 in number and that will make the algorithm slow. The number of possible
692 in number and that will make the algorithm slow. The number of possible
693 candidates to check can be limited by using the config
693 candidates to check can be limited by using the config
694 `experimental.copytrace.movecandidateslimit` which defaults to 100.
694 `experimental.copytrace.movecandidateslimit` which defaults to 100.
695 """
695 """
696
696
697 if c1.rev() is None:
697 if c1.rev() is None:
698 c1 = c1.p1()
698 c1 = c1.p1()
699 if c2.rev() is None:
699 if c2.rev() is None:
700 c2 = c2.p1()
700 c2 = c2.p1()
701
701
702 copies = {}
702 copies = {}
703
703
704 changedfiles = set()
704 changedfiles = set()
705 m1 = c1.manifest()
705 m1 = c1.manifest()
706 if not repo.revs('%d::%d', base.rev(), c2.rev()):
706 if not repo.revs('%d::%d', base.rev(), c2.rev()):
707 # If base is not in c2 branch, we switch to fullcopytracing
707 # If base is not in c2 branch, we switch to fullcopytracing
708 repo.ui.debug("switching to full copytracing as base is not "
708 repo.ui.debug("switching to full copytracing as base is not "
709 "an ancestor of c2\n")
709 "an ancestor of c2\n")
710 return _fullcopytracing(repo, c1, c2, base)
710 return _fullcopytracing(repo, c1, c2, base)
711
711
712 ctx = c2
712 ctx = c2
713 while ctx != base:
713 while ctx != base:
714 if len(ctx.parents()) == 2:
714 if len(ctx.parents()) == 2:
715 # To keep things simple let's not handle merges
715 # To keep things simple let's not handle merges
716 repo.ui.debug("switching to full copytracing because of merges\n")
716 repo.ui.debug("switching to full copytracing because of merges\n")
717 return _fullcopytracing(repo, c1, c2, base)
717 return _fullcopytracing(repo, c1, c2, base)
718 changedfiles.update(ctx.files())
718 changedfiles.update(ctx.files())
719 ctx = ctx.p1()
719 ctx = ctx.p1()
720
720
721 cp = _forwardcopies(base, c2)
721 cp = _forwardcopies(base, c2)
722 for dst, src in cp.iteritems():
722 for dst, src in cp.iteritems():
723 if src in m1:
723 if src in m1:
724 copies[dst] = src
724 copies[dst] = src
725
725
726 # file is missing if it isn't present in the destination, but is present in
726 # file is missing if it isn't present in the destination, but is present in
727 # the base and present in the source.
727 # the base and present in the source.
728 # Presence in the base is important to exclude added files, presence in the
728 # Presence in the base is important to exclude added files, presence in the
729 # source is important to exclude removed files.
729 # source is important to exclude removed files.
730 filt = lambda f: f not in m1 and f in base and f in c2
730 filt = lambda f: f not in m1 and f in base and f in c2
731 missingfiles = [f for f in changedfiles if filt(f)]
731 missingfiles = [f for f in changedfiles if filt(f)]
732
732
733 if missingfiles:
733 if missingfiles:
734 basenametofilename = collections.defaultdict(list)
734 basenametofilename = collections.defaultdict(list)
735 dirnametofilename = collections.defaultdict(list)
735 dirnametofilename = collections.defaultdict(list)
736
736
737 for f in m1.filesnotin(base.manifest()):
737 for f in m1.filesnotin(base.manifest()):
738 basename = os.path.basename(f)
738 basename = os.path.basename(f)
739 dirname = os.path.dirname(f)
739 dirname = os.path.dirname(f)
740 basenametofilename[basename].append(f)
740 basenametofilename[basename].append(f)
741 dirnametofilename[dirname].append(f)
741 dirnametofilename[dirname].append(f)
742
742
743 for f in missingfiles:
743 for f in missingfiles:
744 basename = os.path.basename(f)
744 basename = os.path.basename(f)
745 dirname = os.path.dirname(f)
745 dirname = os.path.dirname(f)
746 samebasename = basenametofilename[basename]
746 samebasename = basenametofilename[basename]
747 samedirname = dirnametofilename[dirname]
747 samedirname = dirnametofilename[dirname]
748 movecandidates = samebasename + samedirname
748 movecandidates = samebasename + samedirname
749 # f is guaranteed to be present in c2, that's why
749 # f is guaranteed to be present in c2, that's why
750 # c2.filectx(f) won't fail
750 # c2.filectx(f) won't fail
751 f2 = c2.filectx(f)
751 f2 = c2.filectx(f)
752 # we can have a lot of candidates which can slow down the heuristics
752 # we can have a lot of candidates which can slow down the heuristics
753 # config value to limit the number of candidates moves to check
753 # config value to limit the number of candidates moves to check
754 maxcandidates = repo.ui.configint('experimental',
754 maxcandidates = repo.ui.configint('experimental',
755 'copytrace.movecandidateslimit')
755 'copytrace.movecandidateslimit')
756
756
757 if len(movecandidates) > maxcandidates:
757 if len(movecandidates) > maxcandidates:
758 repo.ui.status(_("skipping copytracing for '%s', more "
758 repo.ui.status(_("skipping copytracing for '%s', more "
759 "candidates than the limit: %d\n")
759 "candidates than the limit: %d\n")
760 % (f, len(movecandidates)))
760 % (f, len(movecandidates)))
761 continue
761 continue
762
762
763 for candidate in movecandidates:
763 for candidate in movecandidates:
764 f1 = c1.filectx(candidate)
764 f1 = c1.filectx(candidate)
765 if _related(f1, f2):
765 if _related(f1, f2):
766 # if there are a few related copies then we'll merge
766 # if there are a few related copies then we'll merge
767 # changes into all of them. This matches the behaviour
767 # changes into all of them. This matches the behaviour
768 # of upstream copytracing
768 # of upstream copytracing
769 copies[candidate] = f
769 copies[candidate] = f
770
770
771 return copies, {}, {}, {}, {}
771 return copies, {}, {}, {}, {}
772
772
773 def _related(f1, f2):
773 def _related(f1, f2):
774 """return True if f1 and f2 filectx have a common ancestor
774 """return True if f1 and f2 filectx have a common ancestor
775
775
776 Walk back to common ancestor to see if the two files originate
776 Walk back to common ancestor to see if the two files originate
777 from the same file. Since workingfilectx's rev() is None it messes
777 from the same file. Since workingfilectx's rev() is None it messes
778 up the integer comparison logic, hence the pre-step check for
778 up the integer comparison logic, hence the pre-step check for
779 None (f1 and f2 can only be workingfilectx's initially).
779 None (f1 and f2 can only be workingfilectx's initially).
780 """
780 """
781
781
782 if f1 == f2:
782 if f1 == f2:
783 return f1 # a match
783 return f1 # a match
784
784
785 g1, g2 = f1.ancestors(), f2.ancestors()
785 g1, g2 = f1.ancestors(), f2.ancestors()
786 try:
786 try:
787 f1r, f2r = f1.linkrev(), f2.linkrev()
787 f1r, f2r = f1.linkrev(), f2.linkrev()
788
788
789 if f1r is None:
789 if f1r is None:
790 f1 = next(g1)
790 f1 = next(g1)
791 if f2r is None:
791 if f2r is None:
792 f2 = next(g2)
792 f2 = next(g2)
793
793
794 while True:
794 while True:
795 f1r, f2r = f1.linkrev(), f2.linkrev()
795 f1r, f2r = f1.linkrev(), f2.linkrev()
796 if f1r > f2r:
796 if f1r > f2r:
797 f1 = next(g1)
797 f1 = next(g1)
798 elif f2r > f1r:
798 elif f2r > f1r:
799 f2 = next(g2)
799 f2 = next(g2)
800 else: # f1 and f2 point to files in the same linkrev
800 else: # f1 and f2 point to files in the same linkrev
801 return f1 == f2 # true if they point to the same file
801 return f1 == f2 # true if they point to the same file
802 except StopIteration:
802 except StopIteration:
803 return False
803 return False
804
804
805 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
805 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
806 """
806 """
807 check possible copies of f from msrc to mdst
807 check possible copies of f from msrc to mdst
808
808
809 srcctx = starting context for f in msrc
809 srcctx = starting context for f in msrc
810 dstctx = destination context for f in mdst
810 dstctx = destination context for f in mdst
811 f = the filename to check (as in msrc)
811 f = the filename to check (as in msrc)
812 base = the changectx used as a merge base
812 base = the changectx used as a merge base
813 tca = topological common ancestor for graft-like scenarios
813 tca = topological common ancestor for graft-like scenarios
814 remotebase = True if base is outside tca::srcctx, False otherwise
814 remotebase = True if base is outside tca::srcctx, False otherwise
815 limit = the rev number to not search beyond
815 limit = the rev number to not search beyond
816 data = dictionary of dictionary to store copy data. (see mergecopies)
816 data = dictionary of dictionary to store copy data. (see mergecopies)
817
817
818 note: limit is only an optimization, and provides no guarantee that
818 note: limit is only an optimization, and provides no guarantee that
819 irrelevant revisions will not be visited
819 irrelevant revisions will not be visited
820 there is no easy way to make this algorithm stop in a guaranteed way
820 there is no easy way to make this algorithm stop in a guaranteed way
821 once it "goes behind a certain revision".
821 once it "goes behind a certain revision".
822 """
822 """
823
823
824 msrc = srcctx.manifest()
824 msrc = srcctx.manifest()
825 mdst = dstctx.manifest()
825 mdst = dstctx.manifest()
826 mb = base.manifest()
826 mb = base.manifest()
827 mta = tca.manifest()
827 mta = tca.manifest()
828 # Might be true if this call is about finding backward renames,
828 # Might be true if this call is about finding backward renames,
829 # This happens in the case of grafts because the DAG is then rotated.
829 # This happens in the case of grafts because the DAG is then rotated.
830 # If the file exists in both the base and the source, we are not looking
830 # If the file exists in both the base and the source, we are not looking
831 # for a rename on the source side, but on the part of the DAG that is
831 # for a rename on the source side, but on the part of the DAG that is
832 # traversed backwards.
832 # traversed backwards.
833 #
833 #
834 # In the case there is both backward and forward renames (before and after
834 # In the case there is both backward and forward renames (before and after
835 # the base) this is more complicated as we must detect a divergence.
835 # the base) this is more complicated as we must detect a divergence.
836 # We use 'backwards = False' in that case.
836 # We use 'backwards = False' in that case.
837 backwards = not remotebase and base != tca and f in mb
837 backwards = not remotebase and base != tca and f in mb
838 getsrcfctx = _makegetfctx(srcctx)
838 getsrcfctx = _makegetfctx(srcctx)
839 getdstfctx = _makegetfctx(dstctx)
839 getdstfctx = _makegetfctx(dstctx)
840
840
841 if msrc[f] == mb.get(f) and not remotebase:
841 if msrc[f] == mb.get(f) and not remotebase:
842 # Nothing to merge
842 # Nothing to merge
843 return
843 return
844
844
845 of = None
845 of = None
846 seen = {f}
846 seen = {f}
847 for oc in getsrcfctx(f, msrc[f]).ancestors():
847 for oc in getsrcfctx(f, msrc[f]).ancestors():
848 of = oc.path()
848 of = oc.path()
849 if of in seen:
849 if of in seen:
850 # check limit late - grab last rename before
850 # check limit late - grab last rename before
851 if oc.linkrev() < limit:
851 if oc.linkrev() < limit:
852 break
852 break
853 continue
853 continue
854 seen.add(of)
854 seen.add(of)
855
855
856 # remember for dir rename detection
856 # remember for dir rename detection
857 if backwards:
857 if backwards:
858 data['fullcopy'][of] = f # grafting backwards through renames
858 data['fullcopy'][of] = f # grafting backwards through renames
859 else:
859 else:
860 data['fullcopy'][f] = of
860 data['fullcopy'][f] = of
861 if of not in mdst:
861 if of not in mdst:
862 continue # no match, keep looking
862 continue # no match, keep looking
863 if mdst[of] == mb.get(of):
863 if mdst[of] == mb.get(of):
864 return # no merge needed, quit early
864 return # no merge needed, quit early
865 c2 = getdstfctx(of, mdst[of])
865 c2 = getdstfctx(of, mdst[of])
866 # c2 might be a plain new file on added on destination side that is
866 # c2 might be a plain new file on added on destination side that is
867 # unrelated to the droids we are looking for.
867 # unrelated to the droids we are looking for.
868 cr = _related(oc, c2)
868 cr = _related(oc, c2)
869 if cr and (of == f or of == c2.path()): # non-divergent
869 if cr and (of == f or of == c2.path()): # non-divergent
870 if backwards:
870 if backwards:
871 data['copy'][of] = f
871 data['copy'][of] = f
872 elif of in mb:
872 elif of in mb:
873 data['copy'][f] = of
873 data['copy'][f] = of
874 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
874 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
875 data['copy'][of] = f
875 data['copy'][of] = f
876 del data['fullcopy'][f]
876 del data['fullcopy'][f]
877 data['fullcopy'][of] = f
877 data['fullcopy'][of] = f
878 else: # divergence w.r.t. graft CA on one side of topological CA
878 else: # divergence w.r.t. graft CA on one side of topological CA
879 for sf in seen:
879 for sf in seen:
880 if sf in mb:
880 if sf in mb:
881 assert sf not in data['diverge']
881 assert sf not in data['diverge']
882 data['diverge'][sf] = [f, of]
882 data['diverge'][sf] = [f, of]
883 break
883 break
884 return
884 return
885
885
886 if of in mta:
886 if of in mta:
887 if backwards or remotebase:
887 if backwards or remotebase:
888 data['incomplete'][of] = f
888 data['incomplete'][of] = f
889 else:
889 else:
890 for sf in seen:
890 for sf in seen:
891 if sf in mb:
891 if sf in mb:
892 if tca == base:
892 if tca == base:
893 data['diverge'].setdefault(sf, []).append(f)
893 data['diverge'].setdefault(sf, []).append(f)
894 else:
894 else:
895 data['incompletediverge'][sf] = [of, f]
895 data['incompletediverge'][sf] = [of, f]
896 return
896 return
897
897
898 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
898 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
899 """reproduce copies from fromrev to rev in the dirstate
899 """reproduce copies from fromrev to rev in the dirstate
900
900
901 If skiprev is specified, it's a revision that should be used to
901 If skiprev is specified, it's a revision that should be used to
902 filter copy records. Any copies that occur between fromrev and
902 filter copy records. Any copies that occur between fromrev and
903 skiprev will not be duplicated, even if they appear in the set of
903 skiprev will not be duplicated, even if they appear in the set of
904 copies between fromrev and rev.
904 copies between fromrev and rev.
905 """
905 """
906 exclude = {}
906 exclude = {}
907 ctraceconfig = repo.ui.config('experimental', 'copytrace')
907 ctraceconfig = repo.ui.config('experimental', 'copytrace')
908 bctrace = stringutil.parsebool(ctraceconfig)
908 bctrace = stringutil.parsebool(ctraceconfig)
909 if (skiprev is not None and
909 if (skiprev is not None and
910 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
910 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
911 # copytrace='off' skips this line, but not the entire function because
911 # copytrace='off' skips this line, but not the entire function because
912 # the line below is O(size of the repo) during a rebase, while the rest
912 # the line below is O(size of the repo) during a rebase, while the rest
913 # of the function is much faster (and is required for carrying copy
913 # of the function is much faster (and is required for carrying copy
914 # metadata across the rebase anyway).
914 # metadata across the rebase anyway).
915 exclude = pathcopies(repo[fromrev], repo[skiprev])
915 exclude = pathcopies(repo[fromrev], repo[skiprev])
916 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
916 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
917 # copies.pathcopies returns backward renames, so dst might not
917 # copies.pathcopies returns backward renames, so dst might not
918 # actually be in the dirstate
918 # actually be in the dirstate
919 if dst in exclude:
919 if dst in exclude:
920 continue
920 continue
921 wctx[dst].markcopied(src)
921 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now