##// END OF EJS Templates
copies: use node.wdirrev instead of inventing another constant for it...
Martin von Zweigbergk -
r41267:e3e1b063 default
parent child Browse files
Show More
@@ -1,923 +1,922 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23 from .utils import (
23 from .utils import (
24 stringutil,
24 stringutil,
25 )
25 )
26
26
27 def _findlimit(repo, a, b):
27 def _findlimit(repo, a, b):
28 """
28 """
29 Find the last revision that needs to be checked to ensure that a full
29 Find the last revision that needs to be checked to ensure that a full
30 transitive closure for file copies can be properly calculated.
30 transitive closure for file copies can be properly calculated.
31 Generally, this means finding the earliest revision number that's an
31 Generally, this means finding the earliest revision number that's an
32 ancestor of a or b but not both, except when a or b is a direct descendent
32 ancestor of a or b but not both, except when a or b is a direct descendent
33 of the other, in which case we can return the minimum revnum of a and b.
33 of the other, in which case we can return the minimum revnum of a and b.
34 None if no such revision exists.
34 None if no such revision exists.
35 """
35 """
36
36
37 # basic idea:
37 # basic idea:
38 # - mark a and b with different sides
38 # - mark a and b with different sides
39 # - if a parent's children are all on the same side, the parent is
39 # - if a parent's children are all on the same side, the parent is
40 # on that side, otherwise it is on no side
40 # on that side, otherwise it is on no side
41 # - walk the graph in topological order with the help of a heap;
41 # - walk the graph in topological order with the help of a heap;
42 # - add unseen parents to side map
42 # - add unseen parents to side map
43 # - clear side of any parent that has children on different sides
43 # - clear side of any parent that has children on different sides
44 # - track number of interesting revs that might still be on a side
44 # - track number of interesting revs that might still be on a side
45 # - track the lowest interesting rev seen
45 # - track the lowest interesting rev seen
46 # - quit when interesting revs is zero
46 # - quit when interesting revs is zero
47
47
48 cl = repo.changelog
48 cl = repo.changelog
49 working = len(cl) # pseudo rev for the working directory
50 if a is None:
49 if a is None:
51 a = working
50 a = node.wdirrev
52 if b is None:
51 if b is None:
53 b = working
52 b = node.wdirrev
54
53
55 side = {a: -1, b: 1}
54 side = {a: -1, b: 1}
56 visit = [-a, -b]
55 visit = [-a, -b]
57 heapq.heapify(visit)
56 heapq.heapify(visit)
58 interesting = len(visit)
57 interesting = len(visit)
59 hascommonancestor = False
58 hascommonancestor = False
60 limit = working
59 limit = node.wdirrev
61
60
62 while interesting:
61 while interesting:
63 r = -heapq.heappop(visit)
62 r = -heapq.heappop(visit)
64 if r == working:
63 if r == node.wdirrev:
65 parents = [cl.rev(p) for p in repo.dirstate.parents()]
64 parents = [cl.rev(p) for p in repo.dirstate.parents()]
66 else:
65 else:
67 parents = cl.parentrevs(r)
66 parents = cl.parentrevs(r)
68 for p in parents:
67 for p in parents:
69 if p < 0:
68 if p < 0:
70 continue
69 continue
71 if p not in side:
70 if p not in side:
72 # first time we see p; add it to visit
71 # first time we see p; add it to visit
73 side[p] = side[r]
72 side[p] = side[r]
74 if side[p]:
73 if side[p]:
75 interesting += 1
74 interesting += 1
76 heapq.heappush(visit, -p)
75 heapq.heappush(visit, -p)
77 elif side[p] and side[p] != side[r]:
76 elif side[p] and side[p] != side[r]:
78 # p was interesting but now we know better
77 # p was interesting but now we know better
79 side[p] = 0
78 side[p] = 0
80 interesting -= 1
79 interesting -= 1
81 hascommonancestor = True
80 hascommonancestor = True
82 if side[r]:
81 if side[r]:
83 limit = r # lowest rev visited
82 limit = r # lowest rev visited
84 interesting -= 1
83 interesting -= 1
85
84
86 if not hascommonancestor:
85 if not hascommonancestor:
87 return None
86 return None
88
87
89 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # Consider the following flow (see test-commit-amend.t under issue4405):
90 # 1/ File 'a0' committed
89 # 1/ File 'a0' committed
91 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
92 # 3/ Move back to first commit
91 # 3/ Move back to first commit
93 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
94 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
95 #
94 #
96 # During the amend in step five, we will be in this state:
95 # During the amend in step five, we will be in this state:
97 #
96 #
98 # @ 3 temporary amend commit for a1-amend
97 # @ 3 temporary amend commit for a1-amend
99 # |
98 # |
100 # o 2 a1-amend
99 # o 2 a1-amend
101 # |
100 # |
102 # | o 1 a1
101 # | o 1 a1
103 # |/
102 # |/
104 # o 0 a0
103 # o 0 a0
105 #
104 #
106 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
107 # yet the filelog has the copy information in rev 1 and we will not look
106 # yet the filelog has the copy information in rev 1 and we will not look
108 # back far enough unless we also look at the a and b as candidates.
107 # back far enough unless we also look at the a and b as candidates.
109 # This only occurs when a is a descendent of b or visa-versa.
108 # This only occurs when a is a descendent of b or visa-versa.
110 return min(limit, a, b)
109 return min(limit, a, b)
111
110
112 def _chain(src, dst, a, b):
111 def _chain(src, dst, a, b):
113 """chain two sets of copies a->b"""
112 """chain two sets of copies a->b"""
114 t = a.copy()
113 t = a.copy()
115 for k, v in b.iteritems():
114 for k, v in b.iteritems():
116 if v in t:
115 if v in t:
117 # found a chain
116 # found a chain
118 if t[v] != k:
117 if t[v] != k:
119 # file wasn't renamed back to itself
118 # file wasn't renamed back to itself
120 t[k] = t[v]
119 t[k] = t[v]
121 if v not in dst:
120 if v not in dst:
122 # chain was a rename, not a copy
121 # chain was a rename, not a copy
123 del t[v]
122 del t[v]
124 if v in src:
123 if v in src:
125 # file is a copy of an existing file
124 # file is a copy of an existing file
126 t[k] = v
125 t[k] = v
127
126
128 # remove criss-crossed copies
127 # remove criss-crossed copies
129 for k, v in list(t.items()):
128 for k, v in list(t.items()):
130 if k in src and v in dst:
129 if k in src and v in dst:
131 del t[k]
130 del t[k]
132
131
133 return t
132 return t
134
133
135 def _tracefile(fctx, am, limit=-1):
134 def _tracefile(fctx, am, limit=-1):
136 """return file context that is the ancestor of fctx present in ancestor
135 """return file context that is the ancestor of fctx present in ancestor
137 manifest am, stopping after the first ancestor lower than limit"""
136 manifest am, stopping after the first ancestor lower than limit"""
138
137
139 for f in fctx.ancestors():
138 for f in fctx.ancestors():
140 if am.get(f.path(), None) == f.filenode():
139 if am.get(f.path(), None) == f.filenode():
141 return f
140 return f
142 if limit >= 0 and not f.isintroducedafter(limit):
141 if limit >= 0 and not f.isintroducedafter(limit):
143 return None
142 return None
144
143
145 def _dirstatecopies(d, match=None):
144 def _dirstatecopies(d, match=None):
146 ds = d._repo.dirstate
145 ds = d._repo.dirstate
147 c = ds.copies().copy()
146 c = ds.copies().copy()
148 for k in list(c):
147 for k in list(c):
149 if ds[k] not in 'anm' or (match and not match(k)):
148 if ds[k] not in 'anm' or (match and not match(k)):
150 del c[k]
149 del c[k]
151 return c
150 return c
152
151
153 def _computeforwardmissing(a, b, match=None):
152 def _computeforwardmissing(a, b, match=None):
154 """Computes which files are in b but not a.
153 """Computes which files are in b but not a.
155 This is its own function so extensions can easily wrap this call to see what
154 This is its own function so extensions can easily wrap this call to see what
156 files _forwardcopies is about to process.
155 files _forwardcopies is about to process.
157 """
156 """
158 ma = a.manifest()
157 ma = a.manifest()
159 mb = b.manifest()
158 mb = b.manifest()
160 return mb.filesnotin(ma, match=match)
159 return mb.filesnotin(ma, match=match)
161
160
162 def _committedforwardcopies(a, b, match):
161 def _committedforwardcopies(a, b, match):
163 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
162 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
164 # files might have to be traced back to the fctx parent of the last
163 # files might have to be traced back to the fctx parent of the last
165 # one-side-only changeset, but not further back than that
164 # one-side-only changeset, but not further back than that
166 repo = a._repo
165 repo = a._repo
167 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
166 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
168 dbg = repo.ui.debug
167 dbg = repo.ui.debug
169 if debug:
168 if debug:
170 dbg('debug.copies: looking into rename from %s to %s\n'
169 dbg('debug.copies: looking into rename from %s to %s\n'
171 % (a, b))
170 % (a, b))
172 limit = _findlimit(repo, a.rev(), b.rev())
171 limit = _findlimit(repo, a.rev(), b.rev())
173 if limit is None:
172 if limit is None:
174 limit = -1
173 limit = -1
175 if debug:
174 if debug:
176 dbg('debug.copies: search limit: %d\n' % limit)
175 dbg('debug.copies: search limit: %d\n' % limit)
177 am = a.manifest()
176 am = a.manifest()
178
177
179 # find where new files came from
178 # find where new files came from
180 # we currently don't try to find where old files went, too expensive
179 # we currently don't try to find where old files went, too expensive
181 # this means we can miss a case like 'hg rm b; hg cp a b'
180 # this means we can miss a case like 'hg rm b; hg cp a b'
182 cm = {}
181 cm = {}
183
182
184 # Computing the forward missing is quite expensive on large manifests, since
183 # Computing the forward missing is quite expensive on large manifests, since
185 # it compares the entire manifests. We can optimize it in the common use
184 # it compares the entire manifests. We can optimize it in the common use
186 # case of computing what copies are in a commit versus its parent (like
185 # case of computing what copies are in a commit versus its parent (like
187 # during a rebase or histedit). Note, we exclude merge commits from this
186 # during a rebase or histedit). Note, we exclude merge commits from this
188 # optimization, since the ctx.files() for a merge commit is not correct for
187 # optimization, since the ctx.files() for a merge commit is not correct for
189 # this comparison.
188 # this comparison.
190 forwardmissingmatch = match
189 forwardmissingmatch = match
191 if b.p1() == a and b.p2().node() == node.nullid:
190 if b.p1() == a and b.p2().node() == node.nullid:
192 filesmatcher = scmutil.matchfiles(a._repo, b.files())
191 filesmatcher = scmutil.matchfiles(a._repo, b.files())
193 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
192 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
194 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
193 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
195
194
196 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
195 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
197
196
198 if debug:
197 if debug:
199 dbg('debug.copies: missing file to search: %d\n' % len(missing))
198 dbg('debug.copies: missing file to search: %d\n' % len(missing))
200
199
201 for f in missing:
200 for f in missing:
202 if debug:
201 if debug:
203 dbg('debug.copies: tracing file: %s\n' % f)
202 dbg('debug.copies: tracing file: %s\n' % f)
204 fctx = b[f]
203 fctx = b[f]
205 fctx._ancestrycontext = ancestrycontext
204 fctx._ancestrycontext = ancestrycontext
206
205
207 if debug:
206 if debug:
208 start = util.timer()
207 start = util.timer()
209 ofctx = _tracefile(fctx, am, limit)
208 ofctx = _tracefile(fctx, am, limit)
210 if ofctx:
209 if ofctx:
211 if debug:
210 if debug:
212 dbg('debug.copies: rename of: %s\n' % ofctx._path)
211 dbg('debug.copies: rename of: %s\n' % ofctx._path)
213 cm[f] = ofctx.path()
212 cm[f] = ofctx.path()
214 if debug:
213 if debug:
215 dbg('debug.copies: time: %f seconds\n'
214 dbg('debug.copies: time: %f seconds\n'
216 % (util.timer() - start))
215 % (util.timer() - start))
217 return cm
216 return cm
218
217
219 def _forwardcopies(a, b, match=None):
218 def _forwardcopies(a, b, match=None):
220 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
219 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
221
220
222 match = a.repo().narrowmatch(match)
221 match = a.repo().narrowmatch(match)
223 # check for working copy
222 # check for working copy
224 if b.rev() is None:
223 if b.rev() is None:
225 if a == b.p1():
224 if a == b.p1():
226 # short-circuit to avoid issues with merge states
225 # short-circuit to avoid issues with merge states
227 return _dirstatecopies(b, match)
226 return _dirstatecopies(b, match)
228
227
229 cm = _committedforwardcopies(a, b.p1(), match)
228 cm = _committedforwardcopies(a, b.p1(), match)
230 # combine copies from dirstate if necessary
229 # combine copies from dirstate if necessary
231 return _chain(a, b, cm, _dirstatecopies(b, match))
230 return _chain(a, b, cm, _dirstatecopies(b, match))
232 return _committedforwardcopies(a, b, match)
231 return _committedforwardcopies(a, b, match)
233
232
234 def _backwardrenames(a, b):
233 def _backwardrenames(a, b):
235 if a._repo.ui.config('experimental', 'copytrace') == 'off':
234 if a._repo.ui.config('experimental', 'copytrace') == 'off':
236 return {}
235 return {}
237
236
238 # Even though we're not taking copies into account, 1:n rename situations
237 # Even though we're not taking copies into account, 1:n rename situations
239 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
238 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
240 # arbitrarily pick one of the renames.
239 # arbitrarily pick one of the renames.
241 f = _forwardcopies(b, a)
240 f = _forwardcopies(b, a)
242 r = {}
241 r = {}
243 for k, v in sorted(f.iteritems()):
242 for k, v in sorted(f.iteritems()):
244 # remove copies
243 # remove copies
245 if v in a:
244 if v in a:
246 continue
245 continue
247 r[v] = k
246 r[v] = k
248 return r
247 return r
249
248
250 def pathcopies(x, y, match=None):
249 def pathcopies(x, y, match=None):
251 """find {dst@y: src@x} copy mapping for directed compare"""
250 """find {dst@y: src@x} copy mapping for directed compare"""
252 repo = x._repo
251 repo = x._repo
253 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
252 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
254 if debug:
253 if debug:
255 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
254 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
256 % (x, y))
255 % (x, y))
257 if x == y or not x or not y:
256 if x == y or not x or not y:
258 return {}
257 return {}
259 a = y.ancestor(x)
258 a = y.ancestor(x)
260 if a == x:
259 if a == x:
261 if debug:
260 if debug:
262 repo.ui.debug('debug.copies: search mode: forward\n')
261 repo.ui.debug('debug.copies: search mode: forward\n')
263 return _forwardcopies(x, y, match=match)
262 return _forwardcopies(x, y, match=match)
264 if a == y:
263 if a == y:
265 if debug:
264 if debug:
266 repo.ui.debug('debug.copies: search mode: backward\n')
265 repo.ui.debug('debug.copies: search mode: backward\n')
267 return _backwardrenames(x, y)
266 return _backwardrenames(x, y)
268 if debug:
267 if debug:
269 repo.ui.debug('debug.copies: search mode: combined\n')
268 repo.ui.debug('debug.copies: search mode: combined\n')
270 return _chain(x, y, _backwardrenames(x, a),
269 return _chain(x, y, _backwardrenames(x, a),
271 _forwardcopies(a, y, match=match))
270 _forwardcopies(a, y, match=match))
272
271
273 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
272 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
274 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
273 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
275 and c2. This is its own function so extensions can easily wrap this call
274 and c2. This is its own function so extensions can easily wrap this call
276 to see what files mergecopies is about to process.
275 to see what files mergecopies is about to process.
277
276
278 Even though c1 and c2 are not used in this function, they are useful in
277 Even though c1 and c2 are not used in this function, they are useful in
279 other extensions for being able to read the file nodes of the changed files.
278 other extensions for being able to read the file nodes of the changed files.
280
279
281 "baselabel" can be passed to help distinguish the multiple computations
280 "baselabel" can be passed to help distinguish the multiple computations
282 done in the graft case.
281 done in the graft case.
283 """
282 """
284 u1 = sorted(addedinm1 - addedinm2)
283 u1 = sorted(addedinm1 - addedinm2)
285 u2 = sorted(addedinm2 - addedinm1)
284 u2 = sorted(addedinm2 - addedinm1)
286
285
287 header = " unmatched files in %s"
286 header = " unmatched files in %s"
288 if baselabel:
287 if baselabel:
289 header += ' (from %s)' % baselabel
288 header += ' (from %s)' % baselabel
290 if u1:
289 if u1:
291 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
290 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
292 if u2:
291 if u2:
293 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
292 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
294
293
295 return u1, u2
294 return u1, u2
296
295
297 def _makegetfctx(ctx):
296 def _makegetfctx(ctx):
298 """return a 'getfctx' function suitable for _checkcopies usage
297 """return a 'getfctx' function suitable for _checkcopies usage
299
298
300 We have to re-setup the function building 'filectx' for each
299 We have to re-setup the function building 'filectx' for each
301 '_checkcopies' to ensure the linkrev adjustment is properly setup for
300 '_checkcopies' to ensure the linkrev adjustment is properly setup for
302 each. Linkrev adjustment is important to avoid bug in rename
301 each. Linkrev adjustment is important to avoid bug in rename
303 detection. Moreover, having a proper '_ancestrycontext' setup ensures
302 detection. Moreover, having a proper '_ancestrycontext' setup ensures
304 the performance impact of this adjustment is kept limited. Without it,
303 the performance impact of this adjustment is kept limited. Without it,
305 each file could do a full dag traversal making the time complexity of
304 each file could do a full dag traversal making the time complexity of
306 the operation explode (see issue4537).
305 the operation explode (see issue4537).
307
306
308 This function exists here mostly to limit the impact on stable. Feel
307 This function exists here mostly to limit the impact on stable. Feel
309 free to refactor on default.
308 free to refactor on default.
310 """
309 """
311 rev = ctx.rev()
310 rev = ctx.rev()
312 repo = ctx._repo
311 repo = ctx._repo
313 ac = getattr(ctx, '_ancestrycontext', None)
312 ac = getattr(ctx, '_ancestrycontext', None)
314 if ac is None:
313 if ac is None:
315 revs = [rev]
314 revs = [rev]
316 if rev is None:
315 if rev is None:
317 revs = [p.rev() for p in ctx.parents()]
316 revs = [p.rev() for p in ctx.parents()]
318 ac = repo.changelog.ancestors(revs, inclusive=True)
317 ac = repo.changelog.ancestors(revs, inclusive=True)
319 ctx._ancestrycontext = ac
318 ctx._ancestrycontext = ac
320 def makectx(f, n):
319 def makectx(f, n):
321 if n in node.wdirfilenodeids: # in a working context?
320 if n in node.wdirfilenodeids: # in a working context?
322 if ctx.rev() is None:
321 if ctx.rev() is None:
323 return ctx.filectx(f)
322 return ctx.filectx(f)
324 return repo[None][f]
323 return repo[None][f]
325 fctx = repo.filectx(f, fileid=n)
324 fctx = repo.filectx(f, fileid=n)
326 # setup only needed for filectx not create from a changectx
325 # setup only needed for filectx not create from a changectx
327 fctx._ancestrycontext = ac
326 fctx._ancestrycontext = ac
328 fctx._descendantrev = rev
327 fctx._descendantrev = rev
329 return fctx
328 return fctx
330 return util.lrucachefunc(makectx)
329 return util.lrucachefunc(makectx)
331
330
332 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
331 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
333 """combine partial copy paths"""
332 """combine partial copy paths"""
334 remainder = {}
333 remainder = {}
335 for f in copyfrom:
334 for f in copyfrom:
336 if f in copyto:
335 if f in copyto:
337 finalcopy[copyto[f]] = copyfrom[f]
336 finalcopy[copyto[f]] = copyfrom[f]
338 del copyto[f]
337 del copyto[f]
339 for f in incompletediverge:
338 for f in incompletediverge:
340 assert f not in diverge
339 assert f not in diverge
341 ic = incompletediverge[f]
340 ic = incompletediverge[f]
342 if ic[0] in copyto:
341 if ic[0] in copyto:
343 diverge[f] = [copyto[ic[0]], ic[1]]
342 diverge[f] = [copyto[ic[0]], ic[1]]
344 else:
343 else:
345 remainder[f] = ic
344 remainder[f] = ic
346 return remainder
345 return remainder
347
346
348 def mergecopies(repo, c1, c2, base):
347 def mergecopies(repo, c1, c2, base):
349 """
348 """
350 The function calling different copytracing algorithms on the basis of config
349 The function calling different copytracing algorithms on the basis of config
351 which find moves and copies between context c1 and c2 that are relevant for
350 which find moves and copies between context c1 and c2 that are relevant for
352 merging. 'base' will be used as the merge base.
351 merging. 'base' will be used as the merge base.
353
352
354 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
353 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
355 files that were moved/ copied in one merge parent and modified in another.
354 files that were moved/ copied in one merge parent and modified in another.
356 For example:
355 For example:
357
356
358 o ---> 4 another commit
357 o ---> 4 another commit
359 |
358 |
360 | o ---> 3 commit that modifies a.txt
359 | o ---> 3 commit that modifies a.txt
361 | /
360 | /
362 o / ---> 2 commit that moves a.txt to b.txt
361 o / ---> 2 commit that moves a.txt to b.txt
363 |/
362 |/
364 o ---> 1 merge base
363 o ---> 1 merge base
365
364
366 If we try to rebase revision 3 on revision 4, since there is no a.txt in
365 If we try to rebase revision 3 on revision 4, since there is no a.txt in
367 revision 4, and if user have copytrace disabled, we prints the following
366 revision 4, and if user have copytrace disabled, we prints the following
368 message:
367 message:
369
368
370 ```other changed <file> which local deleted```
369 ```other changed <file> which local deleted```
371
370
372 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
371 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
373 "dirmove".
372 "dirmove".
374
373
375 "copy" is a mapping from destination name -> source name,
374 "copy" is a mapping from destination name -> source name,
376 where source is in c1 and destination is in c2 or vice-versa.
375 where source is in c1 and destination is in c2 or vice-versa.
377
376
378 "movewithdir" is a mapping from source name -> destination name,
377 "movewithdir" is a mapping from source name -> destination name,
379 where the file at source present in one context but not the other
378 where the file at source present in one context but not the other
380 needs to be moved to destination by the merge process, because the
379 needs to be moved to destination by the merge process, because the
381 other context moved the directory it is in.
380 other context moved the directory it is in.
382
381
383 "diverge" is a mapping of source name -> list of destination names
382 "diverge" is a mapping of source name -> list of destination names
384 for divergent renames.
383 for divergent renames.
385
384
386 "renamedelete" is a mapping of source name -> list of destination
385 "renamedelete" is a mapping of source name -> list of destination
387 names for files deleted in c1 that were renamed in c2 or vice-versa.
386 names for files deleted in c1 that were renamed in c2 or vice-versa.
388
387
389 "dirmove" is a mapping of detected source dir -> destination dir renames.
388 "dirmove" is a mapping of detected source dir -> destination dir renames.
390 This is needed for handling changes to new files previously grafted into
389 This is needed for handling changes to new files previously grafted into
391 renamed directories.
390 renamed directories.
392 """
391 """
393 # avoid silly behavior for update from empty dir
392 # avoid silly behavior for update from empty dir
394 if not c1 or not c2 or c1 == c2:
393 if not c1 or not c2 or c1 == c2:
395 return {}, {}, {}, {}, {}
394 return {}, {}, {}, {}, {}
396
395
397 # avoid silly behavior for parent -> working dir
396 # avoid silly behavior for parent -> working dir
398 if c2.node() is None and c1.node() == repo.dirstate.p1():
397 if c2.node() is None and c1.node() == repo.dirstate.p1():
399 return repo.dirstate.copies(), {}, {}, {}, {}
398 return repo.dirstate.copies(), {}, {}, {}, {}
400
399
401 copytracing = repo.ui.config('experimental', 'copytrace')
400 copytracing = repo.ui.config('experimental', 'copytrace')
402 boolctrace = stringutil.parsebool(copytracing)
401 boolctrace = stringutil.parsebool(copytracing)
403
402
404 # Copy trace disabling is explicitly below the node == p1 logic above
403 # Copy trace disabling is explicitly below the node == p1 logic above
405 # because the logic above is required for a simple copy to be kept across a
404 # because the logic above is required for a simple copy to be kept across a
406 # rebase.
405 # rebase.
407 if copytracing == 'heuristics':
406 if copytracing == 'heuristics':
408 # Do full copytracing if only non-public revisions are involved as
407 # Do full copytracing if only non-public revisions are involved as
409 # that will be fast enough and will also cover the copies which could
408 # that will be fast enough and will also cover the copies which could
410 # be missed by heuristics
409 # be missed by heuristics
411 if _isfullcopytraceable(repo, c1, base):
410 if _isfullcopytraceable(repo, c1, base):
412 return _fullcopytracing(repo, c1, c2, base)
411 return _fullcopytracing(repo, c1, c2, base)
413 return _heuristicscopytracing(repo, c1, c2, base)
412 return _heuristicscopytracing(repo, c1, c2, base)
414 elif boolctrace is False:
413 elif boolctrace is False:
415 # stringutil.parsebool() returns None when it is unable to parse the
414 # stringutil.parsebool() returns None when it is unable to parse the
416 # value, so we should rely on making sure copytracing is on such cases
415 # value, so we should rely on making sure copytracing is on such cases
417 return {}, {}, {}, {}, {}
416 return {}, {}, {}, {}, {}
418 else:
417 else:
419 return _fullcopytracing(repo, c1, c2, base)
418 return _fullcopytracing(repo, c1, c2, base)
420
419
421 def _isfullcopytraceable(repo, c1, base):
420 def _isfullcopytraceable(repo, c1, base):
422 """ Checks that if base, source and destination are all no-public branches,
421 """ Checks that if base, source and destination are all no-public branches,
423 if yes let's use the full copytrace algorithm for increased capabilities
422 if yes let's use the full copytrace algorithm for increased capabilities
424 since it will be fast enough.
423 since it will be fast enough.
425
424
426 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
425 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
427 number of changesets from c1 to base such that if number of changesets are
426 number of changesets from c1 to base such that if number of changesets are
428 more than the limit, full copytracing algorithm won't be used.
427 more than the limit, full copytracing algorithm won't be used.
429 """
428 """
430 if c1.rev() is None:
429 if c1.rev() is None:
431 c1 = c1.p1()
430 c1 = c1.p1()
432 if c1.mutable() and base.mutable():
431 if c1.mutable() and base.mutable():
433 sourcecommitlimit = repo.ui.configint('experimental',
432 sourcecommitlimit = repo.ui.configint('experimental',
434 'copytrace.sourcecommitlimit')
433 'copytrace.sourcecommitlimit')
435 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
434 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
436 return commits < sourcecommitlimit
435 return commits < sourcecommitlimit
437 return False
436 return False
438
437
439 def _fullcopytracing(repo, c1, c2, base):
438 def _fullcopytracing(repo, c1, c2, base):
440 """ The full copytracing algorithm which finds all the new files that were
439 """ The full copytracing algorithm which finds all the new files that were
441 added from merge base up to the top commit and for each file it checks if
440 added from merge base up to the top commit and for each file it checks if
442 this file was copied from another file.
441 this file was copied from another file.
443
442
444 This is pretty slow when a lot of changesets are involved but will track all
443 This is pretty slow when a lot of changesets are involved but will track all
445 the copies.
444 the copies.
446 """
445 """
447 # In certain scenarios (e.g. graft, update or rebase), base can be
446 # In certain scenarios (e.g. graft, update or rebase), base can be
448 # overridden We still need to know a real common ancestor in this case We
447 # overridden We still need to know a real common ancestor in this case We
449 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
448 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
450 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
449 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
451 # caller may not know if the revision passed in lieu of the CA is a genuine
450 # caller may not know if the revision passed in lieu of the CA is a genuine
452 # common ancestor or not without explicitly checking it, it's better to
451 # common ancestor or not without explicitly checking it, it's better to
453 # determine that here.
452 # determine that here.
454 #
453 #
455 # base.isancestorof(wc) is False, work around that
454 # base.isancestorof(wc) is False, work around that
456 _c1 = c1.p1() if c1.rev() is None else c1
455 _c1 = c1.p1() if c1.rev() is None else c1
457 _c2 = c2.p1() if c2.rev() is None else c2
456 _c2 = c2.p1() if c2.rev() is None else c2
458 # an endpoint is "dirty" if it isn't a descendant of the merge base
457 # an endpoint is "dirty" if it isn't a descendant of the merge base
459 # if we have a dirty endpoint, we need to trigger graft logic, and also
458 # if we have a dirty endpoint, we need to trigger graft logic, and also
460 # keep track of which endpoint is dirty
459 # keep track of which endpoint is dirty
461 dirtyc1 = not base.isancestorof(_c1)
460 dirtyc1 = not base.isancestorof(_c1)
462 dirtyc2 = not base.isancestorof(_c2)
461 dirtyc2 = not base.isancestorof(_c2)
463 graft = dirtyc1 or dirtyc2
462 graft = dirtyc1 or dirtyc2
464 tca = base
463 tca = base
465 if graft:
464 if graft:
466 tca = _c1.ancestor(_c2)
465 tca = _c1.ancestor(_c2)
467
466
468 limit = _findlimit(repo, c1.rev(), c2.rev())
467 limit = _findlimit(repo, c1.rev(), c2.rev())
469 if limit is None:
468 if limit is None:
470 # no common ancestor, no copies
469 # no common ancestor, no copies
471 return {}, {}, {}, {}, {}
470 return {}, {}, {}, {}, {}
472 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
471 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
473
472
474 m1 = c1.manifest()
473 m1 = c1.manifest()
475 m2 = c2.manifest()
474 m2 = c2.manifest()
476 mb = base.manifest()
475 mb = base.manifest()
477
476
478 # gather data from _checkcopies:
477 # gather data from _checkcopies:
479 # - diverge = record all diverges in this dict
478 # - diverge = record all diverges in this dict
480 # - copy = record all non-divergent copies in this dict
479 # - copy = record all non-divergent copies in this dict
481 # - fullcopy = record all copies in this dict
480 # - fullcopy = record all copies in this dict
482 # - incomplete = record non-divergent partial copies here
481 # - incomplete = record non-divergent partial copies here
483 # - incompletediverge = record divergent partial copies here
482 # - incompletediverge = record divergent partial copies here
484 diverge = {} # divergence data is shared
483 diverge = {} # divergence data is shared
485 incompletediverge = {}
484 incompletediverge = {}
486 data1 = {'copy': {},
485 data1 = {'copy': {},
487 'fullcopy': {},
486 'fullcopy': {},
488 'incomplete': {},
487 'incomplete': {},
489 'diverge': diverge,
488 'diverge': diverge,
490 'incompletediverge': incompletediverge,
489 'incompletediverge': incompletediverge,
491 }
490 }
492 data2 = {'copy': {},
491 data2 = {'copy': {},
493 'fullcopy': {},
492 'fullcopy': {},
494 'incomplete': {},
493 'incomplete': {},
495 'diverge': diverge,
494 'diverge': diverge,
496 'incompletediverge': incompletediverge,
495 'incompletediverge': incompletediverge,
497 }
496 }
498
497
499 # find interesting file sets from manifests
498 # find interesting file sets from manifests
500 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
499 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
501 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
500 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
502 bothnew = sorted(addedinm1 & addedinm2)
501 bothnew = sorted(addedinm1 & addedinm2)
503 if tca == base:
502 if tca == base:
504 # unmatched file from base
503 # unmatched file from base
505 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
504 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
506 u1u, u2u = u1r, u2r
505 u1u, u2u = u1r, u2r
507 else:
506 else:
508 # unmatched file from base (DAG rotation in the graft case)
507 # unmatched file from base (DAG rotation in the graft case)
509 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
508 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
510 baselabel='base')
509 baselabel='base')
511 # unmatched file from topological common ancestors (no DAG rotation)
510 # unmatched file from topological common ancestors (no DAG rotation)
512 # need to recompute this for directory move handling when grafting
511 # need to recompute this for directory move handling when grafting
513 mta = tca.manifest()
512 mta = tca.manifest()
514 u1u, u2u = _computenonoverlap(repo, c1, c2,
513 u1u, u2u = _computenonoverlap(repo, c1, c2,
515 m1.filesnotin(mta, repo.narrowmatch()),
514 m1.filesnotin(mta, repo.narrowmatch()),
516 m2.filesnotin(mta, repo.narrowmatch()),
515 m2.filesnotin(mta, repo.narrowmatch()),
517 baselabel='topological common ancestor')
516 baselabel='topological common ancestor')
518
517
519 for f in u1u:
518 for f in u1u:
520 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
519 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
521
520
522 for f in u2u:
521 for f in u2u:
523 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
522 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
524
523
525 copy = dict(data1['copy'])
524 copy = dict(data1['copy'])
526 copy.update(data2['copy'])
525 copy.update(data2['copy'])
527 fullcopy = dict(data1['fullcopy'])
526 fullcopy = dict(data1['fullcopy'])
528 fullcopy.update(data2['fullcopy'])
527 fullcopy.update(data2['fullcopy'])
529
528
530 if dirtyc1:
529 if dirtyc1:
531 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
530 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
532 incompletediverge)
531 incompletediverge)
533 else:
532 else:
534 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
533 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
535 incompletediverge)
534 incompletediverge)
536
535
537 renamedelete = {}
536 renamedelete = {}
538 renamedeleteset = set()
537 renamedeleteset = set()
539 divergeset = set()
538 divergeset = set()
540 for of, fl in list(diverge.items()):
539 for of, fl in list(diverge.items()):
541 if len(fl) == 1 or of in c1 or of in c2:
540 if len(fl) == 1 or of in c1 or of in c2:
542 del diverge[of] # not actually divergent, or not a rename
541 del diverge[of] # not actually divergent, or not a rename
543 if of not in c1 and of not in c2:
542 if of not in c1 and of not in c2:
544 # renamed on one side, deleted on the other side, but filter
543 # renamed on one side, deleted on the other side, but filter
545 # out files that have been renamed and then deleted
544 # out files that have been renamed and then deleted
546 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
545 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
547 renamedeleteset.update(fl) # reverse map for below
546 renamedeleteset.update(fl) # reverse map for below
548 else:
547 else:
549 divergeset.update(fl) # reverse map for below
548 divergeset.update(fl) # reverse map for below
550
549
551 if bothnew:
550 if bothnew:
552 repo.ui.debug(" unmatched files new in both:\n %s\n"
551 repo.ui.debug(" unmatched files new in both:\n %s\n"
553 % "\n ".join(bothnew))
552 % "\n ".join(bothnew))
554 bothdiverge = {}
553 bothdiverge = {}
555 bothincompletediverge = {}
554 bothincompletediverge = {}
556 remainder = {}
555 remainder = {}
557 both1 = {'copy': {},
556 both1 = {'copy': {},
558 'fullcopy': {},
557 'fullcopy': {},
559 'incomplete': {},
558 'incomplete': {},
560 'diverge': bothdiverge,
559 'diverge': bothdiverge,
561 'incompletediverge': bothincompletediverge
560 'incompletediverge': bothincompletediverge
562 }
561 }
563 both2 = {'copy': {},
562 both2 = {'copy': {},
564 'fullcopy': {},
563 'fullcopy': {},
565 'incomplete': {},
564 'incomplete': {},
566 'diverge': bothdiverge,
565 'diverge': bothdiverge,
567 'incompletediverge': bothincompletediverge
566 'incompletediverge': bothincompletediverge
568 }
567 }
569 for f in bothnew:
568 for f in bothnew:
570 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
569 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
571 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
570 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
572 if dirtyc1:
571 if dirtyc1:
573 # incomplete copies may only be found on the "dirty" side for bothnew
572 # incomplete copies may only be found on the "dirty" side for bothnew
574 assert not both2['incomplete']
573 assert not both2['incomplete']
575 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
574 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
576 bothincompletediverge)
575 bothincompletediverge)
577 elif dirtyc2:
576 elif dirtyc2:
578 assert not both1['incomplete']
577 assert not both1['incomplete']
579 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
578 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
580 bothincompletediverge)
579 bothincompletediverge)
581 else:
580 else:
582 # incomplete copies and divergences can't happen outside grafts
581 # incomplete copies and divergences can't happen outside grafts
583 assert not both1['incomplete']
582 assert not both1['incomplete']
584 assert not both2['incomplete']
583 assert not both2['incomplete']
585 assert not bothincompletediverge
584 assert not bothincompletediverge
586 for f in remainder:
585 for f in remainder:
587 assert f not in bothdiverge
586 assert f not in bothdiverge
588 ic = remainder[f]
587 ic = remainder[f]
589 if ic[0] in (m1 if dirtyc1 else m2):
588 if ic[0] in (m1 if dirtyc1 else m2):
590 # backed-out rename on one side, but watch out for deleted files
589 # backed-out rename on one side, but watch out for deleted files
591 bothdiverge[f] = ic
590 bothdiverge[f] = ic
592 for of, fl in bothdiverge.items():
591 for of, fl in bothdiverge.items():
593 if len(fl) == 2 and fl[0] == fl[1]:
592 if len(fl) == 2 and fl[0] == fl[1]:
594 copy[fl[0]] = of # not actually divergent, just matching renames
593 copy[fl[0]] = of # not actually divergent, just matching renames
595
594
596 if fullcopy and repo.ui.debugflag:
595 if fullcopy and repo.ui.debugflag:
597 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
596 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
598 "% = renamed and deleted):\n")
597 "% = renamed and deleted):\n")
599 for f in sorted(fullcopy):
598 for f in sorted(fullcopy):
600 note = ""
599 note = ""
601 if f in copy:
600 if f in copy:
602 note += "*"
601 note += "*"
603 if f in divergeset:
602 if f in divergeset:
604 note += "!"
603 note += "!"
605 if f in renamedeleteset:
604 if f in renamedeleteset:
606 note += "%"
605 note += "%"
607 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
606 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
608 note))
607 note))
609 del divergeset
608 del divergeset
610
609
611 if not fullcopy:
610 if not fullcopy:
612 return copy, {}, diverge, renamedelete, {}
611 return copy, {}, diverge, renamedelete, {}
613
612
614 repo.ui.debug(" checking for directory renames\n")
613 repo.ui.debug(" checking for directory renames\n")
615
614
616 # generate a directory move map
615 # generate a directory move map
617 d1, d2 = c1.dirs(), c2.dirs()
616 d1, d2 = c1.dirs(), c2.dirs()
618 # Hack for adding '', which is not otherwise added, to d1 and d2
617 # Hack for adding '', which is not otherwise added, to d1 and d2
619 d1.addpath('/')
618 d1.addpath('/')
620 d2.addpath('/')
619 d2.addpath('/')
621 invalid = set()
620 invalid = set()
622 dirmove = {}
621 dirmove = {}
623
622
624 # examine each file copy for a potential directory move, which is
623 # examine each file copy for a potential directory move, which is
625 # when all the files in a directory are moved to a new directory
624 # when all the files in a directory are moved to a new directory
626 for dst, src in fullcopy.iteritems():
625 for dst, src in fullcopy.iteritems():
627 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
626 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
628 if dsrc in invalid:
627 if dsrc in invalid:
629 # already seen to be uninteresting
628 # already seen to be uninteresting
630 continue
629 continue
631 elif dsrc in d1 and ddst in d1:
630 elif dsrc in d1 and ddst in d1:
632 # directory wasn't entirely moved locally
631 # directory wasn't entirely moved locally
633 invalid.add(dsrc)
632 invalid.add(dsrc)
634 elif dsrc in d2 and ddst in d2:
633 elif dsrc in d2 and ddst in d2:
635 # directory wasn't entirely moved remotely
634 # directory wasn't entirely moved remotely
636 invalid.add(dsrc)
635 invalid.add(dsrc)
637 elif dsrc in dirmove and dirmove[dsrc] != ddst:
636 elif dsrc in dirmove and dirmove[dsrc] != ddst:
638 # files from the same directory moved to two different places
637 # files from the same directory moved to two different places
639 invalid.add(dsrc)
638 invalid.add(dsrc)
640 else:
639 else:
641 # looks good so far
640 # looks good so far
642 dirmove[dsrc] = ddst
641 dirmove[dsrc] = ddst
643
642
644 for i in invalid:
643 for i in invalid:
645 if i in dirmove:
644 if i in dirmove:
646 del dirmove[i]
645 del dirmove[i]
647 del d1, d2, invalid
646 del d1, d2, invalid
648
647
649 if not dirmove:
648 if not dirmove:
650 return copy, {}, diverge, renamedelete, {}
649 return copy, {}, diverge, renamedelete, {}
651
650
652 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
651 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
653
652
654 for d in dirmove:
653 for d in dirmove:
655 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
654 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
656 (d, dirmove[d]))
655 (d, dirmove[d]))
657
656
658 movewithdir = {}
657 movewithdir = {}
659 # check unaccounted nonoverlapping files against directory moves
658 # check unaccounted nonoverlapping files against directory moves
660 for f in u1r + u2r:
659 for f in u1r + u2r:
661 if f not in fullcopy:
660 if f not in fullcopy:
662 for d in dirmove:
661 for d in dirmove:
663 if f.startswith(d):
662 if f.startswith(d):
664 # new file added in a directory that was moved, move it
663 # new file added in a directory that was moved, move it
665 df = dirmove[d] + f[len(d):]
664 df = dirmove[d] + f[len(d):]
666 if df not in copy:
665 if df not in copy:
667 movewithdir[f] = df
666 movewithdir[f] = df
668 repo.ui.debug((" pending file src: '%s' -> "
667 repo.ui.debug((" pending file src: '%s' -> "
669 "dst: '%s'\n") % (f, df))
668 "dst: '%s'\n") % (f, df))
670 break
669 break
671
670
672 return copy, movewithdir, diverge, renamedelete, dirmove
671 return copy, movewithdir, diverge, renamedelete, dirmove
673
672
674 def _heuristicscopytracing(repo, c1, c2, base):
673 def _heuristicscopytracing(repo, c1, c2, base):
675 """ Fast copytracing using filename heuristics
674 """ Fast copytracing using filename heuristics
676
675
677 Assumes that moves or renames are of following two types:
676 Assumes that moves or renames are of following two types:
678
677
679 1) Inside a directory only (same directory name but different filenames)
678 1) Inside a directory only (same directory name but different filenames)
680 2) Move from one directory to another
679 2) Move from one directory to another
681 (same filenames but different directory names)
680 (same filenames but different directory names)
682
681
683 Works only when there are no merge commits in the "source branch".
682 Works only when there are no merge commits in the "source branch".
684 Source branch is commits from base up to c2 not including base.
683 Source branch is commits from base up to c2 not including base.
685
684
686 If merge is involved it fallbacks to _fullcopytracing().
685 If merge is involved it fallbacks to _fullcopytracing().
687
686
688 Can be used by setting the following config:
687 Can be used by setting the following config:
689
688
690 [experimental]
689 [experimental]
691 copytrace = heuristics
690 copytrace = heuristics
692
691
693 In some cases the copy/move candidates found by heuristics can be very large
692 In some cases the copy/move candidates found by heuristics can be very large
694 in number and that will make the algorithm slow. The number of possible
693 in number and that will make the algorithm slow. The number of possible
695 candidates to check can be limited by using the config
694 candidates to check can be limited by using the config
696 `experimental.copytrace.movecandidateslimit` which defaults to 100.
695 `experimental.copytrace.movecandidateslimit` which defaults to 100.
697 """
696 """
698
697
699 if c1.rev() is None:
698 if c1.rev() is None:
700 c1 = c1.p1()
699 c1 = c1.p1()
701 if c2.rev() is None:
700 if c2.rev() is None:
702 c2 = c2.p1()
701 c2 = c2.p1()
703
702
704 copies = {}
703 copies = {}
705
704
706 changedfiles = set()
705 changedfiles = set()
707 m1 = c1.manifest()
706 m1 = c1.manifest()
708 if not repo.revs('%d::%d', base.rev(), c2.rev()):
707 if not repo.revs('%d::%d', base.rev(), c2.rev()):
709 # If base is not in c2 branch, we switch to fullcopytracing
708 # If base is not in c2 branch, we switch to fullcopytracing
710 repo.ui.debug("switching to full copytracing as base is not "
709 repo.ui.debug("switching to full copytracing as base is not "
711 "an ancestor of c2\n")
710 "an ancestor of c2\n")
712 return _fullcopytracing(repo, c1, c2, base)
711 return _fullcopytracing(repo, c1, c2, base)
713
712
714 ctx = c2
713 ctx = c2
715 while ctx != base:
714 while ctx != base:
716 if len(ctx.parents()) == 2:
715 if len(ctx.parents()) == 2:
717 # To keep things simple let's not handle merges
716 # To keep things simple let's not handle merges
718 repo.ui.debug("switching to full copytracing because of merges\n")
717 repo.ui.debug("switching to full copytracing because of merges\n")
719 return _fullcopytracing(repo, c1, c2, base)
718 return _fullcopytracing(repo, c1, c2, base)
720 changedfiles.update(ctx.files())
719 changedfiles.update(ctx.files())
721 ctx = ctx.p1()
720 ctx = ctx.p1()
722
721
723 cp = _forwardcopies(base, c2)
722 cp = _forwardcopies(base, c2)
724 for dst, src in cp.iteritems():
723 for dst, src in cp.iteritems():
725 if src in m1:
724 if src in m1:
726 copies[dst] = src
725 copies[dst] = src
727
726
728 # file is missing if it isn't present in the destination, but is present in
727 # file is missing if it isn't present in the destination, but is present in
729 # the base and present in the source.
728 # the base and present in the source.
730 # Presence in the base is important to exclude added files, presence in the
729 # Presence in the base is important to exclude added files, presence in the
731 # source is important to exclude removed files.
730 # source is important to exclude removed files.
732 filt = lambda f: f not in m1 and f in base and f in c2
731 filt = lambda f: f not in m1 and f in base and f in c2
733 missingfiles = [f for f in changedfiles if filt(f)]
732 missingfiles = [f for f in changedfiles if filt(f)]
734
733
735 if missingfiles:
734 if missingfiles:
736 basenametofilename = collections.defaultdict(list)
735 basenametofilename = collections.defaultdict(list)
737 dirnametofilename = collections.defaultdict(list)
736 dirnametofilename = collections.defaultdict(list)
738
737
739 for f in m1.filesnotin(base.manifest()):
738 for f in m1.filesnotin(base.manifest()):
740 basename = os.path.basename(f)
739 basename = os.path.basename(f)
741 dirname = os.path.dirname(f)
740 dirname = os.path.dirname(f)
742 basenametofilename[basename].append(f)
741 basenametofilename[basename].append(f)
743 dirnametofilename[dirname].append(f)
742 dirnametofilename[dirname].append(f)
744
743
745 for f in missingfiles:
744 for f in missingfiles:
746 basename = os.path.basename(f)
745 basename = os.path.basename(f)
747 dirname = os.path.dirname(f)
746 dirname = os.path.dirname(f)
748 samebasename = basenametofilename[basename]
747 samebasename = basenametofilename[basename]
749 samedirname = dirnametofilename[dirname]
748 samedirname = dirnametofilename[dirname]
750 movecandidates = samebasename + samedirname
749 movecandidates = samebasename + samedirname
751 # f is guaranteed to be present in c2, that's why
750 # f is guaranteed to be present in c2, that's why
752 # c2.filectx(f) won't fail
751 # c2.filectx(f) won't fail
753 f2 = c2.filectx(f)
752 f2 = c2.filectx(f)
754 # we can have a lot of candidates which can slow down the heuristics
753 # we can have a lot of candidates which can slow down the heuristics
755 # config value to limit the number of candidates moves to check
754 # config value to limit the number of candidates moves to check
756 maxcandidates = repo.ui.configint('experimental',
755 maxcandidates = repo.ui.configint('experimental',
757 'copytrace.movecandidateslimit')
756 'copytrace.movecandidateslimit')
758
757
759 if len(movecandidates) > maxcandidates:
758 if len(movecandidates) > maxcandidates:
760 repo.ui.status(_("skipping copytracing for '%s', more "
759 repo.ui.status(_("skipping copytracing for '%s', more "
761 "candidates than the limit: %d\n")
760 "candidates than the limit: %d\n")
762 % (f, len(movecandidates)))
761 % (f, len(movecandidates)))
763 continue
762 continue
764
763
765 for candidate in movecandidates:
764 for candidate in movecandidates:
766 f1 = c1.filectx(candidate)
765 f1 = c1.filectx(candidate)
767 if _related(f1, f2):
766 if _related(f1, f2):
768 # if there are a few related copies then we'll merge
767 # if there are a few related copies then we'll merge
769 # changes into all of them. This matches the behaviour
768 # changes into all of them. This matches the behaviour
770 # of upstream copytracing
769 # of upstream copytracing
771 copies[candidate] = f
770 copies[candidate] = f
772
771
773 return copies, {}, {}, {}, {}
772 return copies, {}, {}, {}, {}
774
773
775 def _related(f1, f2):
774 def _related(f1, f2):
776 """return True if f1 and f2 filectx have a common ancestor
775 """return True if f1 and f2 filectx have a common ancestor
777
776
778 Walk back to common ancestor to see if the two files originate
777 Walk back to common ancestor to see if the two files originate
779 from the same file. Since workingfilectx's rev() is None it messes
778 from the same file. Since workingfilectx's rev() is None it messes
780 up the integer comparison logic, hence the pre-step check for
779 up the integer comparison logic, hence the pre-step check for
781 None (f1 and f2 can only be workingfilectx's initially).
780 None (f1 and f2 can only be workingfilectx's initially).
782 """
781 """
783
782
784 if f1 == f2:
783 if f1 == f2:
785 return f1 # a match
784 return f1 # a match
786
785
787 g1, g2 = f1.ancestors(), f2.ancestors()
786 g1, g2 = f1.ancestors(), f2.ancestors()
788 try:
787 try:
789 f1r, f2r = f1.linkrev(), f2.linkrev()
788 f1r, f2r = f1.linkrev(), f2.linkrev()
790
789
791 if f1r is None:
790 if f1r is None:
792 f1 = next(g1)
791 f1 = next(g1)
793 if f2r is None:
792 if f2r is None:
794 f2 = next(g2)
793 f2 = next(g2)
795
794
796 while True:
795 while True:
797 f1r, f2r = f1.linkrev(), f2.linkrev()
796 f1r, f2r = f1.linkrev(), f2.linkrev()
798 if f1r > f2r:
797 if f1r > f2r:
799 f1 = next(g1)
798 f1 = next(g1)
800 elif f2r > f1r:
799 elif f2r > f1r:
801 f2 = next(g2)
800 f2 = next(g2)
802 else: # f1 and f2 point to files in the same linkrev
801 else: # f1 and f2 point to files in the same linkrev
803 return f1 == f2 # true if they point to the same file
802 return f1 == f2 # true if they point to the same file
804 except StopIteration:
803 except StopIteration:
805 return False
804 return False
806
805
807 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
806 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
808 """
807 """
809 check possible copies of f from msrc to mdst
808 check possible copies of f from msrc to mdst
810
809
811 srcctx = starting context for f in msrc
810 srcctx = starting context for f in msrc
812 dstctx = destination context for f in mdst
811 dstctx = destination context for f in mdst
813 f = the filename to check (as in msrc)
812 f = the filename to check (as in msrc)
814 base = the changectx used as a merge base
813 base = the changectx used as a merge base
815 tca = topological common ancestor for graft-like scenarios
814 tca = topological common ancestor for graft-like scenarios
816 remotebase = True if base is outside tca::srcctx, False otherwise
815 remotebase = True if base is outside tca::srcctx, False otherwise
817 limit = the rev number to not search beyond
816 limit = the rev number to not search beyond
818 data = dictionary of dictionary to store copy data. (see mergecopies)
817 data = dictionary of dictionary to store copy data. (see mergecopies)
819
818
820 note: limit is only an optimization, and provides no guarantee that
819 note: limit is only an optimization, and provides no guarantee that
821 irrelevant revisions will not be visited
820 irrelevant revisions will not be visited
822 there is no easy way to make this algorithm stop in a guaranteed way
821 there is no easy way to make this algorithm stop in a guaranteed way
823 once it "goes behind a certain revision".
822 once it "goes behind a certain revision".
824 """
823 """
825
824
826 msrc = srcctx.manifest()
825 msrc = srcctx.manifest()
827 mdst = dstctx.manifest()
826 mdst = dstctx.manifest()
828 mb = base.manifest()
827 mb = base.manifest()
829 mta = tca.manifest()
828 mta = tca.manifest()
830 # Might be true if this call is about finding backward renames,
829 # Might be true if this call is about finding backward renames,
831 # This happens in the case of grafts because the DAG is then rotated.
830 # This happens in the case of grafts because the DAG is then rotated.
832 # If the file exists in both the base and the source, we are not looking
831 # If the file exists in both the base and the source, we are not looking
833 # for a rename on the source side, but on the part of the DAG that is
832 # for a rename on the source side, but on the part of the DAG that is
834 # traversed backwards.
833 # traversed backwards.
835 #
834 #
836 # In the case there is both backward and forward renames (before and after
835 # In the case there is both backward and forward renames (before and after
837 # the base) this is more complicated as we must detect a divergence.
836 # the base) this is more complicated as we must detect a divergence.
838 # We use 'backwards = False' in that case.
837 # We use 'backwards = False' in that case.
839 backwards = not remotebase and base != tca and f in mb
838 backwards = not remotebase and base != tca and f in mb
840 getsrcfctx = _makegetfctx(srcctx)
839 getsrcfctx = _makegetfctx(srcctx)
841 getdstfctx = _makegetfctx(dstctx)
840 getdstfctx = _makegetfctx(dstctx)
842
841
843 if msrc[f] == mb.get(f) and not remotebase:
842 if msrc[f] == mb.get(f) and not remotebase:
844 # Nothing to merge
843 # Nothing to merge
845 return
844 return
846
845
847 of = None
846 of = None
848 seen = {f}
847 seen = {f}
849 for oc in getsrcfctx(f, msrc[f]).ancestors():
848 for oc in getsrcfctx(f, msrc[f]).ancestors():
850 of = oc.path()
849 of = oc.path()
851 if of in seen:
850 if of in seen:
852 # check limit late - grab last rename before
851 # check limit late - grab last rename before
853 if oc.linkrev() < limit:
852 if oc.linkrev() < limit:
854 break
853 break
855 continue
854 continue
856 seen.add(of)
855 seen.add(of)
857
856
858 # remember for dir rename detection
857 # remember for dir rename detection
859 if backwards:
858 if backwards:
860 data['fullcopy'][of] = f # grafting backwards through renames
859 data['fullcopy'][of] = f # grafting backwards through renames
861 else:
860 else:
862 data['fullcopy'][f] = of
861 data['fullcopy'][f] = of
863 if of not in mdst:
862 if of not in mdst:
864 continue # no match, keep looking
863 continue # no match, keep looking
865 if mdst[of] == mb.get(of):
864 if mdst[of] == mb.get(of):
866 return # no merge needed, quit early
865 return # no merge needed, quit early
867 c2 = getdstfctx(of, mdst[of])
866 c2 = getdstfctx(of, mdst[of])
868 # c2 might be a plain new file on added on destination side that is
867 # c2 might be a plain new file on added on destination side that is
869 # unrelated to the droids we are looking for.
868 # unrelated to the droids we are looking for.
870 cr = _related(oc, c2)
869 cr = _related(oc, c2)
871 if cr and (of == f or of == c2.path()): # non-divergent
870 if cr and (of == f or of == c2.path()): # non-divergent
872 if backwards:
871 if backwards:
873 data['copy'][of] = f
872 data['copy'][of] = f
874 elif of in mb:
873 elif of in mb:
875 data['copy'][f] = of
874 data['copy'][f] = of
876 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
875 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
877 data['copy'][of] = f
876 data['copy'][of] = f
878 del data['fullcopy'][f]
877 del data['fullcopy'][f]
879 data['fullcopy'][of] = f
878 data['fullcopy'][of] = f
880 else: # divergence w.r.t. graft CA on one side of topological CA
879 else: # divergence w.r.t. graft CA on one side of topological CA
881 for sf in seen:
880 for sf in seen:
882 if sf in mb:
881 if sf in mb:
883 assert sf not in data['diverge']
882 assert sf not in data['diverge']
884 data['diverge'][sf] = [f, of]
883 data['diverge'][sf] = [f, of]
885 break
884 break
886 return
885 return
887
886
888 if of in mta:
887 if of in mta:
889 if backwards or remotebase:
888 if backwards or remotebase:
890 data['incomplete'][of] = f
889 data['incomplete'][of] = f
891 else:
890 else:
892 for sf in seen:
891 for sf in seen:
893 if sf in mb:
892 if sf in mb:
894 if tca == base:
893 if tca == base:
895 data['diverge'].setdefault(sf, []).append(f)
894 data['diverge'].setdefault(sf, []).append(f)
896 else:
895 else:
897 data['incompletediverge'][sf] = [of, f]
896 data['incompletediverge'][sf] = [of, f]
898 return
897 return
899
898
900 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
899 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
901 """reproduce copies from fromrev to rev in the dirstate
900 """reproduce copies from fromrev to rev in the dirstate
902
901
903 If skiprev is specified, it's a revision that should be used to
902 If skiprev is specified, it's a revision that should be used to
904 filter copy records. Any copies that occur between fromrev and
903 filter copy records. Any copies that occur between fromrev and
905 skiprev will not be duplicated, even if they appear in the set of
904 skiprev will not be duplicated, even if they appear in the set of
906 copies between fromrev and rev.
905 copies between fromrev and rev.
907 """
906 """
908 exclude = {}
907 exclude = {}
909 ctraceconfig = repo.ui.config('experimental', 'copytrace')
908 ctraceconfig = repo.ui.config('experimental', 'copytrace')
910 bctrace = stringutil.parsebool(ctraceconfig)
909 bctrace = stringutil.parsebool(ctraceconfig)
911 if (skiprev is not None and
910 if (skiprev is not None and
912 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
911 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
913 # copytrace='off' skips this line, but not the entire function because
912 # copytrace='off' skips this line, but not the entire function because
914 # the line below is O(size of the repo) during a rebase, while the rest
913 # the line below is O(size of the repo) during a rebase, while the rest
915 # of the function is much faster (and is required for carrying copy
914 # of the function is much faster (and is required for carrying copy
916 # metadata across the rebase anyway).
915 # metadata across the rebase anyway).
917 exclude = pathcopies(repo[fromrev], repo[skiprev])
916 exclude = pathcopies(repo[fromrev], repo[skiprev])
918 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
917 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
919 # copies.pathcopies returns backward renames, so dst might not
918 # copies.pathcopies returns backward renames, so dst might not
920 # actually be in the dirstate
919 # actually be in the dirstate
921 if dst in exclude:
920 if dst in exclude:
922 continue
921 continue
923 wctx[dst].markcopied(src)
922 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now