##// END OF EJS Templates
copies: move comment about implementation of mergecopies() to end...
Martin von Zweigbergk -
r42287:967c098e default
parent child Browse files
Show More
@@ -1,1011 +1,1012 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chain(src, dst, a, b):
111 """chain two sets of copies a->b"""
111 """chain two sets of copies a->b"""
112 t = a.copy()
112 t = a.copy()
113 for k, v in b.iteritems():
113 for k, v in b.iteritems():
114 if v in t:
114 if v in t:
115 # found a chain
115 # found a chain
116 if t[v] != k:
116 if t[v] != k:
117 # file wasn't renamed back to itself
117 # file wasn't renamed back to itself
118 t[k] = t[v]
118 t[k] = t[v]
119 if v not in dst:
119 if v not in dst:
120 # chain was a rename, not a copy
120 # chain was a rename, not a copy
121 del t[v]
121 del t[v]
122 if v in src:
122 if v in src:
123 # file is a copy of an existing file
123 # file is a copy of an existing file
124 t[k] = v
124 t[k] = v
125
125
126 for k, v in list(t.items()):
126 for k, v in list(t.items()):
127 # remove criss-crossed copies
127 # remove criss-crossed copies
128 if k in src and v in dst:
128 if k in src and v in dst:
129 del t[k]
129 del t[k]
130 # remove copies to files that were then removed
130 # remove copies to files that were then removed
131 elif k not in dst:
131 elif k not in dst:
132 del t[k]
132 del t[k]
133
133
134 return t
134 return t
135
135
136 def _tracefile(fctx, am, limit=node.nullrev):
136 def _tracefile(fctx, am, limit=node.nullrev):
137 """return file context that is the ancestor of fctx present in ancestor
137 """return file context that is the ancestor of fctx present in ancestor
138 manifest am, stopping after the first ancestor lower than limit"""
138 manifest am, stopping after the first ancestor lower than limit"""
139
139
140 for f in fctx.ancestors():
140 for f in fctx.ancestors():
141 if am.get(f.path(), None) == f.filenode():
141 if am.get(f.path(), None) == f.filenode():
142 return f
142 return f
143 if limit >= 0 and not f.isintroducedafter(limit):
143 if limit >= 0 and not f.isintroducedafter(limit):
144 return None
144 return None
145
145
146 def _dirstatecopies(repo, match=None):
146 def _dirstatecopies(repo, match=None):
147 ds = repo.dirstate
147 ds = repo.dirstate
148 c = ds.copies().copy()
148 c = ds.copies().copy()
149 for k in list(c):
149 for k in list(c):
150 if ds[k] not in 'anm' or (match and not match(k)):
150 if ds[k] not in 'anm' or (match and not match(k)):
151 del c[k]
151 del c[k]
152 return c
152 return c
153
153
154 def _computeforwardmissing(a, b, match=None):
154 def _computeforwardmissing(a, b, match=None):
155 """Computes which files are in b but not a.
155 """Computes which files are in b but not a.
156 This is its own function so extensions can easily wrap this call to see what
156 This is its own function so extensions can easily wrap this call to see what
157 files _forwardcopies is about to process.
157 files _forwardcopies is about to process.
158 """
158 """
159 ma = a.manifest()
159 ma = a.manifest()
160 mb = b.manifest()
160 mb = b.manifest()
161 return mb.filesnotin(ma, match=match)
161 return mb.filesnotin(ma, match=match)
162
162
163 def usechangesetcentricalgo(repo):
163 def usechangesetcentricalgo(repo):
164 """Checks if we should use changeset-centric copy algorithms"""
164 """Checks if we should use changeset-centric copy algorithms"""
165 return (repo.ui.config('experimental', 'copies.read-from') ==
165 return (repo.ui.config('experimental', 'copies.read-from') ==
166 'compatibility')
166 'compatibility')
167
167
168 def _committedforwardcopies(a, b, match):
168 def _committedforwardcopies(a, b, match):
169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 # files might have to be traced back to the fctx parent of the last
170 # files might have to be traced back to the fctx parent of the last
171 # one-side-only changeset, but not further back than that
171 # one-side-only changeset, but not further back than that
172 repo = a._repo
172 repo = a._repo
173
173
174 if usechangesetcentricalgo(repo):
174 if usechangesetcentricalgo(repo):
175 return _changesetforwardcopies(a, b, match)
175 return _changesetforwardcopies(a, b, match)
176
176
177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 dbg = repo.ui.debug
178 dbg = repo.ui.debug
179 if debug:
179 if debug:
180 dbg('debug.copies: looking into rename from %s to %s\n'
180 dbg('debug.copies: looking into rename from %s to %s\n'
181 % (a, b))
181 % (a, b))
182 limit = _findlimit(repo, a, b)
182 limit = _findlimit(repo, a, b)
183 if debug:
183 if debug:
184 dbg('debug.copies: search limit: %d\n' % limit)
184 dbg('debug.copies: search limit: %d\n' % limit)
185 am = a.manifest()
185 am = a.manifest()
186
186
187 # find where new files came from
187 # find where new files came from
188 # we currently don't try to find where old files went, too expensive
188 # we currently don't try to find where old files went, too expensive
189 # this means we can miss a case like 'hg rm b; hg cp a b'
189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 cm = {}
190 cm = {}
191
191
192 # Computing the forward missing is quite expensive on large manifests, since
192 # Computing the forward missing is quite expensive on large manifests, since
193 # it compares the entire manifests. We can optimize it in the common use
193 # it compares the entire manifests. We can optimize it in the common use
194 # case of computing what copies are in a commit versus its parent (like
194 # case of computing what copies are in a commit versus its parent (like
195 # during a rebase or histedit). Note, we exclude merge commits from this
195 # during a rebase or histedit). Note, we exclude merge commits from this
196 # optimization, since the ctx.files() for a merge commit is not correct for
196 # optimization, since the ctx.files() for a merge commit is not correct for
197 # this comparison.
197 # this comparison.
198 forwardmissingmatch = match
198 forwardmissingmatch = match
199 if b.p1() == a and b.p2().node() == node.nullid:
199 if b.p1() == a and b.p2().node() == node.nullid:
200 filesmatcher = matchmod.exact(b.files())
200 filesmatcher = matchmod.exact(b.files())
201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203
203
204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205
205
206 if debug:
206 if debug:
207 dbg('debug.copies: missing file to search: %d\n' % len(missing))
207 dbg('debug.copies: missing file to search: %d\n' % len(missing))
208
208
209 for f in missing:
209 for f in missing:
210 if debug:
210 if debug:
211 dbg('debug.copies: tracing file: %s\n' % f)
211 dbg('debug.copies: tracing file: %s\n' % f)
212 fctx = b[f]
212 fctx = b[f]
213 fctx._ancestrycontext = ancestrycontext
213 fctx._ancestrycontext = ancestrycontext
214
214
215 if debug:
215 if debug:
216 start = util.timer()
216 start = util.timer()
217 ofctx = _tracefile(fctx, am, limit)
217 ofctx = _tracefile(fctx, am, limit)
218 if ofctx:
218 if ofctx:
219 if debug:
219 if debug:
220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 cm[f] = ofctx.path()
221 cm[f] = ofctx.path()
222 if debug:
222 if debug:
223 dbg('debug.copies: time: %f seconds\n'
223 dbg('debug.copies: time: %f seconds\n'
224 % (util.timer() - start))
224 % (util.timer() - start))
225 return cm
225 return cm
226
226
227 def _changesetforwardcopies(a, b, match):
227 def _changesetforwardcopies(a, b, match):
228 if a.rev() == node.nullrev:
228 if a.rev() == node.nullrev:
229 return {}
229 return {}
230
230
231 repo = a.repo()
231 repo = a.repo()
232 children = {}
232 children = {}
233 cl = repo.changelog
233 cl = repo.changelog
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 for r in missingrevs:
235 for r in missingrevs:
236 for p in cl.parentrevs(r):
236 for p in cl.parentrevs(r):
237 if p == node.nullrev:
237 if p == node.nullrev:
238 continue
238 continue
239 if p not in children:
239 if p not in children:
240 children[p] = [r]
240 children[p] = [r]
241 else:
241 else:
242 children[p].append(r)
242 children[p].append(r)
243
243
244 roots = set(children) - set(missingrevs)
244 roots = set(children) - set(missingrevs)
245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 # The parent number is only used for knowing which parent the copies dict
246 # The parent number is only used for knowing which parent the copies dict
247 # came from.
247 # came from.
248 work = [(r, 1, {}) for r in roots]
248 work = [(r, 1, {}) for r in roots]
249 heapq.heapify(work)
249 heapq.heapify(work)
250 while work:
250 while work:
251 r, i1, copies1 = heapq.heappop(work)
251 r, i1, copies1 = heapq.heappop(work)
252 if work and work[0][0] == r:
252 if work and work[0][0] == r:
253 # We are tracing copies from both parents
253 # We are tracing copies from both parents
254 r, i2, copies2 = heapq.heappop(work)
254 r, i2, copies2 = heapq.heappop(work)
255 copies = {}
255 copies = {}
256 ctx = repo[r]
256 ctx = repo[r]
257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 allcopies = set(copies1) | set(copies2)
258 allcopies = set(copies1) | set(copies2)
259 # TODO: perhaps this filtering should be done as long as ctx
259 # TODO: perhaps this filtering should be done as long as ctx
260 # is merge, whether or not we're tracing from both parent.
260 # is merge, whether or not we're tracing from both parent.
261 for dst in allcopies:
261 for dst in allcopies:
262 if not match(dst):
262 if not match(dst):
263 continue
263 continue
264 if dst not in copies2:
264 if dst not in copies2:
265 # Copied on p1 side: mark as copy from p1 side if it didn't
265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 # already exist on p2 side
266 # already exist on p2 side
267 if dst not in p2man:
267 if dst not in p2man:
268 copies[dst] = copies1[dst]
268 copies[dst] = copies1[dst]
269 elif dst not in copies1:
269 elif dst not in copies1:
270 # Copied on p2 side: mark as copy from p2 side if it didn't
270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 # already exist on p1 side
271 # already exist on p1 side
272 if dst not in p1man:
272 if dst not in p1man:
273 copies[dst] = copies2[dst]
273 copies[dst] = copies2[dst]
274 else:
274 else:
275 # Copied on both sides: mark as copy from p1 side
275 # Copied on both sides: mark as copy from p1 side
276 copies[dst] = copies1[dst]
276 copies[dst] = copies1[dst]
277 else:
277 else:
278 copies = copies1
278 copies = copies1
279 if r == b.rev():
279 if r == b.rev():
280 return copies
280 return copies
281 for c in children[r]:
281 for c in children[r]:
282 childctx = repo[c]
282 childctx = repo[c]
283 if r == childctx.p1().rev():
283 if r == childctx.p1().rev():
284 parent = 1
284 parent = 1
285 childcopies = childctx.p1copies()
285 childcopies = childctx.p1copies()
286 else:
286 else:
287 assert r == childctx.p2().rev()
287 assert r == childctx.p2().rev()
288 parent = 2
288 parent = 2
289 childcopies = childctx.p2copies()
289 childcopies = childctx.p2copies()
290 if not match.always():
290 if not match.always():
291 childcopies = {dst: src for dst, src in childcopies.items()
291 childcopies = {dst: src for dst, src in childcopies.items()
292 if match(dst)}
292 if match(dst)}
293 childcopies = _chain(a, childctx, copies, childcopies)
293 childcopies = _chain(a, childctx, copies, childcopies)
294 heapq.heappush(work, (c, parent, childcopies))
294 heapq.heappush(work, (c, parent, childcopies))
295 assert False
295 assert False
296
296
297 def _forwardcopies(a, b, match=None):
297 def _forwardcopies(a, b, match=None):
298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299
299
300 match = a.repo().narrowmatch(match)
300 match = a.repo().narrowmatch(match)
301 # check for working copy
301 # check for working copy
302 if b.rev() is None:
302 if b.rev() is None:
303 if a == b.p1():
303 if a == b.p1():
304 # short-circuit to avoid issues with merge states
304 # short-circuit to avoid issues with merge states
305 return _dirstatecopies(b._repo, match)
305 return _dirstatecopies(b._repo, match)
306
306
307 cm = _committedforwardcopies(a, b.p1(), match)
307 cm = _committedforwardcopies(a, b.p1(), match)
308 # combine copies from dirstate if necessary
308 # combine copies from dirstate if necessary
309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 return _committedforwardcopies(a, b, match)
310 return _committedforwardcopies(a, b, match)
311
311
312 def _backwardrenames(a, b, match):
312 def _backwardrenames(a, b, match):
313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 return {}
314 return {}
315
315
316 # Even though we're not taking copies into account, 1:n rename situations
316 # Even though we're not taking copies into account, 1:n rename situations
317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 # arbitrarily pick one of the renames.
318 # arbitrarily pick one of the renames.
319 # We don't want to pass in "match" here, since that would filter
319 # We don't want to pass in "match" here, since that would filter
320 # the destination by it. Since we're reversing the copies, we want
320 # the destination by it. Since we're reversing the copies, we want
321 # to filter the source instead.
321 # to filter the source instead.
322 f = _forwardcopies(b, a)
322 f = _forwardcopies(b, a)
323 r = {}
323 r = {}
324 for k, v in sorted(f.iteritems()):
324 for k, v in sorted(f.iteritems()):
325 if match and not match(v):
325 if match and not match(v):
326 continue
326 continue
327 # remove copies
327 # remove copies
328 if v in a:
328 if v in a:
329 continue
329 continue
330 r[v] = k
330 r[v] = k
331 return r
331 return r
332
332
333 def pathcopies(x, y, match=None):
333 def pathcopies(x, y, match=None):
334 """find {dst@y: src@x} copy mapping for directed compare"""
334 """find {dst@y: src@x} copy mapping for directed compare"""
335 repo = x._repo
335 repo = x._repo
336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 if debug:
337 if debug:
338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 % (x, y))
339 % (x, y))
340 if x == y or not x or not y:
340 if x == y or not x or not y:
341 return {}
341 return {}
342 a = y.ancestor(x)
342 a = y.ancestor(x)
343 if a == x:
343 if a == x:
344 if debug:
344 if debug:
345 repo.ui.debug('debug.copies: search mode: forward\n')
345 repo.ui.debug('debug.copies: search mode: forward\n')
346 return _forwardcopies(x, y, match=match)
346 return _forwardcopies(x, y, match=match)
347 if a == y:
347 if a == y:
348 if debug:
348 if debug:
349 repo.ui.debug('debug.copies: search mode: backward\n')
349 repo.ui.debug('debug.copies: search mode: backward\n')
350 return _backwardrenames(x, y, match=match)
350 return _backwardrenames(x, y, match=match)
351 if debug:
351 if debug:
352 repo.ui.debug('debug.copies: search mode: combined\n')
352 repo.ui.debug('debug.copies: search mode: combined\n')
353 return _chain(x, y, _backwardrenames(x, a, match=match),
353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 _forwardcopies(a, y, match=match))
354 _forwardcopies(a, y, match=match))
355
355
356 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
356 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
357 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
357 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
358 and c2. This is its own function so extensions can easily wrap this call
358 and c2. This is its own function so extensions can easily wrap this call
359 to see what files mergecopies is about to process.
359 to see what files mergecopies is about to process.
360
360
361 Even though c1 and c2 are not used in this function, they are useful in
361 Even though c1 and c2 are not used in this function, they are useful in
362 other extensions for being able to read the file nodes of the changed files.
362 other extensions for being able to read the file nodes of the changed files.
363
363
364 "baselabel" can be passed to help distinguish the multiple computations
364 "baselabel" can be passed to help distinguish the multiple computations
365 done in the graft case.
365 done in the graft case.
366 """
366 """
367 u1 = sorted(addedinm1 - addedinm2)
367 u1 = sorted(addedinm1 - addedinm2)
368 u2 = sorted(addedinm2 - addedinm1)
368 u2 = sorted(addedinm2 - addedinm1)
369
369
370 header = " unmatched files in %s"
370 header = " unmatched files in %s"
371 if baselabel:
371 if baselabel:
372 header += ' (from %s)' % baselabel
372 header += ' (from %s)' % baselabel
373 if u1:
373 if u1:
374 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
374 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
375 if u2:
375 if u2:
376 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
376 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
377
377
378 return u1, u2
378 return u1, u2
379
379
380 def _makegetfctx(ctx):
380 def _makegetfctx(ctx):
381 """return a 'getfctx' function suitable for _checkcopies usage
381 """return a 'getfctx' function suitable for _checkcopies usage
382
382
383 We have to re-setup the function building 'filectx' for each
383 We have to re-setup the function building 'filectx' for each
384 '_checkcopies' to ensure the linkrev adjustment is properly setup for
384 '_checkcopies' to ensure the linkrev adjustment is properly setup for
385 each. Linkrev adjustment is important to avoid bug in rename
385 each. Linkrev adjustment is important to avoid bug in rename
386 detection. Moreover, having a proper '_ancestrycontext' setup ensures
386 detection. Moreover, having a proper '_ancestrycontext' setup ensures
387 the performance impact of this adjustment is kept limited. Without it,
387 the performance impact of this adjustment is kept limited. Without it,
388 each file could do a full dag traversal making the time complexity of
388 each file could do a full dag traversal making the time complexity of
389 the operation explode (see issue4537).
389 the operation explode (see issue4537).
390
390
391 This function exists here mostly to limit the impact on stable. Feel
391 This function exists here mostly to limit the impact on stable. Feel
392 free to refactor on default.
392 free to refactor on default.
393 """
393 """
394 rev = ctx.rev()
394 rev = ctx.rev()
395 repo = ctx._repo
395 repo = ctx._repo
396 ac = getattr(ctx, '_ancestrycontext', None)
396 ac = getattr(ctx, '_ancestrycontext', None)
397 if ac is None:
397 if ac is None:
398 revs = [rev]
398 revs = [rev]
399 if rev is None:
399 if rev is None:
400 revs = [p.rev() for p in ctx.parents()]
400 revs = [p.rev() for p in ctx.parents()]
401 ac = repo.changelog.ancestors(revs, inclusive=True)
401 ac = repo.changelog.ancestors(revs, inclusive=True)
402 ctx._ancestrycontext = ac
402 ctx._ancestrycontext = ac
403 def makectx(f, n):
403 def makectx(f, n):
404 if n in node.wdirfilenodeids: # in a working context?
404 if n in node.wdirfilenodeids: # in a working context?
405 if ctx.rev() is None:
405 if ctx.rev() is None:
406 return ctx.filectx(f)
406 return ctx.filectx(f)
407 return repo[None][f]
407 return repo[None][f]
408 fctx = repo.filectx(f, fileid=n)
408 fctx = repo.filectx(f, fileid=n)
409 # setup only needed for filectx not create from a changectx
409 # setup only needed for filectx not create from a changectx
410 fctx._ancestrycontext = ac
410 fctx._ancestrycontext = ac
411 fctx._descendantrev = rev
411 fctx._descendantrev = rev
412 return fctx
412 return fctx
413 return util.lrucachefunc(makectx)
413 return util.lrucachefunc(makectx)
414
414
415 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
415 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
416 """combine partial copy paths"""
416 """combine partial copy paths"""
417 remainder = {}
417 remainder = {}
418 for f in copyfrom:
418 for f in copyfrom:
419 if f in copyto:
419 if f in copyto:
420 finalcopy[copyto[f]] = copyfrom[f]
420 finalcopy[copyto[f]] = copyfrom[f]
421 del copyto[f]
421 del copyto[f]
422 for f in incompletediverge:
422 for f in incompletediverge:
423 assert f not in diverge
423 assert f not in diverge
424 ic = incompletediverge[f]
424 ic = incompletediverge[f]
425 if ic[0] in copyto:
425 if ic[0] in copyto:
426 diverge[f] = [copyto[ic[0]], ic[1]]
426 diverge[f] = [copyto[ic[0]], ic[1]]
427 else:
427 else:
428 remainder[f] = ic
428 remainder[f] = ic
429 return remainder
429 return remainder
430
430
431 def mergecopies(repo, c1, c2, base):
431 def mergecopies(repo, c1, c2, base):
432 """
432 """
433 The function calling different copytracing algorithms on the basis of config
433 Finds moves and copies between context c1 and c2 that are relevant for
434 which find moves and copies between context c1 and c2 that are relevant for
435 merging. 'base' will be used as the merge base.
434 merging. 'base' will be used as the merge base.
436
435
437 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
436 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
438 files that were moved/ copied in one merge parent and modified in another.
437 files that were moved/ copied in one merge parent and modified in another.
439 For example:
438 For example:
440
439
441 o ---> 4 another commit
440 o ---> 4 another commit
442 |
441 |
443 | o ---> 3 commit that modifies a.txt
442 | o ---> 3 commit that modifies a.txt
444 | /
443 | /
445 o / ---> 2 commit that moves a.txt to b.txt
444 o / ---> 2 commit that moves a.txt to b.txt
446 |/
445 |/
447 o ---> 1 merge base
446 o ---> 1 merge base
448
447
449 If we try to rebase revision 3 on revision 4, since there is no a.txt in
448 If we try to rebase revision 3 on revision 4, since there is no a.txt in
450 revision 4, and if user have copytrace disabled, we prints the following
449 revision 4, and if user have copytrace disabled, we prints the following
451 message:
450 message:
452
451
453 ```other changed <file> which local deleted```
452 ```other changed <file> which local deleted```
454
453
455 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
454 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
456 "dirmove".
455 "dirmove".
457
456
458 "copy" is a mapping from destination name -> source name,
457 "copy" is a mapping from destination name -> source name,
459 where source is in c1 and destination is in c2 or vice-versa.
458 where source is in c1 and destination is in c2 or vice-versa.
460
459
461 "movewithdir" is a mapping from source name -> destination name,
460 "movewithdir" is a mapping from source name -> destination name,
462 where the file at source present in one context but not the other
461 where the file at source present in one context but not the other
463 needs to be moved to destination by the merge process, because the
462 needs to be moved to destination by the merge process, because the
464 other context moved the directory it is in.
463 other context moved the directory it is in.
465
464
466 "diverge" is a mapping of source name -> list of destination names
465 "diverge" is a mapping of source name -> list of destination names
467 for divergent renames.
466 for divergent renames.
468
467
469 "renamedelete" is a mapping of source name -> list of destination
468 "renamedelete" is a mapping of source name -> list of destination
470 names for files deleted in c1 that were renamed in c2 or vice-versa.
469 names for files deleted in c1 that were renamed in c2 or vice-versa.
471
470
472 "dirmove" is a mapping of detected source dir -> destination dir renames.
471 "dirmove" is a mapping of detected source dir -> destination dir renames.
473 This is needed for handling changes to new files previously grafted into
472 This is needed for handling changes to new files previously grafted into
474 renamed directories.
473 renamed directories.
474
475 This function calls different copytracing algorithms based on config.
475 """
476 """
476 # avoid silly behavior for update from empty dir
477 # avoid silly behavior for update from empty dir
477 if not c1 or not c2 or c1 == c2:
478 if not c1 or not c2 or c1 == c2:
478 return {}, {}, {}, {}, {}
479 return {}, {}, {}, {}, {}
479
480
480 narrowmatch = c1.repo().narrowmatch()
481 narrowmatch = c1.repo().narrowmatch()
481
482
482 # avoid silly behavior for parent -> working dir
483 # avoid silly behavior for parent -> working dir
483 if c2.node() is None and c1.node() == repo.dirstate.p1():
484 if c2.node() is None and c1.node() == repo.dirstate.p1():
484 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
485 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
485
486
486 copytracing = repo.ui.config('experimental', 'copytrace')
487 copytracing = repo.ui.config('experimental', 'copytrace')
487 boolctrace = stringutil.parsebool(copytracing)
488 boolctrace = stringutil.parsebool(copytracing)
488
489
489 # Copy trace disabling is explicitly below the node == p1 logic above
490 # Copy trace disabling is explicitly below the node == p1 logic above
490 # because the logic above is required for a simple copy to be kept across a
491 # because the logic above is required for a simple copy to be kept across a
491 # rebase.
492 # rebase.
492 if copytracing == 'heuristics':
493 if copytracing == 'heuristics':
493 # Do full copytracing if only non-public revisions are involved as
494 # Do full copytracing if only non-public revisions are involved as
494 # that will be fast enough and will also cover the copies which could
495 # that will be fast enough and will also cover the copies which could
495 # be missed by heuristics
496 # be missed by heuristics
496 if _isfullcopytraceable(repo, c1, base):
497 if _isfullcopytraceable(repo, c1, base):
497 return _fullcopytracing(repo, c1, c2, base)
498 return _fullcopytracing(repo, c1, c2, base)
498 return _heuristicscopytracing(repo, c1, c2, base)
499 return _heuristicscopytracing(repo, c1, c2, base)
499 elif boolctrace is False:
500 elif boolctrace is False:
500 # stringutil.parsebool() returns None when it is unable to parse the
501 # stringutil.parsebool() returns None when it is unable to parse the
501 # value, so we should rely on making sure copytracing is on such cases
502 # value, so we should rely on making sure copytracing is on such cases
502 return {}, {}, {}, {}, {}
503 return {}, {}, {}, {}, {}
503 else:
504 else:
504 return _fullcopytracing(repo, c1, c2, base)
505 return _fullcopytracing(repo, c1, c2, base)
505
506
506 def _isfullcopytraceable(repo, c1, base):
507 def _isfullcopytraceable(repo, c1, base):
507 """ Checks that if base, source and destination are all no-public branches,
508 """ Checks that if base, source and destination are all no-public branches,
508 if yes let's use the full copytrace algorithm for increased capabilities
509 if yes let's use the full copytrace algorithm for increased capabilities
509 since it will be fast enough.
510 since it will be fast enough.
510
511
511 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
512 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
512 number of changesets from c1 to base such that if number of changesets are
513 number of changesets from c1 to base such that if number of changesets are
513 more than the limit, full copytracing algorithm won't be used.
514 more than the limit, full copytracing algorithm won't be used.
514 """
515 """
515 if c1.rev() is None:
516 if c1.rev() is None:
516 c1 = c1.p1()
517 c1 = c1.p1()
517 if c1.mutable() and base.mutable():
518 if c1.mutable() and base.mutable():
518 sourcecommitlimit = repo.ui.configint('experimental',
519 sourcecommitlimit = repo.ui.configint('experimental',
519 'copytrace.sourcecommitlimit')
520 'copytrace.sourcecommitlimit')
520 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
521 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
521 return commits < sourcecommitlimit
522 return commits < sourcecommitlimit
522 return False
523 return False
523
524
524 def _fullcopytracing(repo, c1, c2, base):
525 def _fullcopytracing(repo, c1, c2, base):
525 """ The full copytracing algorithm which finds all the new files that were
526 """ The full copytracing algorithm which finds all the new files that were
526 added from merge base up to the top commit and for each file it checks if
527 added from merge base up to the top commit and for each file it checks if
527 this file was copied from another file.
528 this file was copied from another file.
528
529
529 This is pretty slow when a lot of changesets are involved but will track all
530 This is pretty slow when a lot of changesets are involved but will track all
530 the copies.
531 the copies.
531 """
532 """
532 # In certain scenarios (e.g. graft, update or rebase), base can be
533 # In certain scenarios (e.g. graft, update or rebase), base can be
533 # overridden We still need to know a real common ancestor in this case We
534 # overridden We still need to know a real common ancestor in this case We
534 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
535 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
535 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
536 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
536 # caller may not know if the revision passed in lieu of the CA is a genuine
537 # caller may not know if the revision passed in lieu of the CA is a genuine
537 # common ancestor or not without explicitly checking it, it's better to
538 # common ancestor or not without explicitly checking it, it's better to
538 # determine that here.
539 # determine that here.
539 #
540 #
540 # base.isancestorof(wc) is False, work around that
541 # base.isancestorof(wc) is False, work around that
541 _c1 = c1.p1() if c1.rev() is None else c1
542 _c1 = c1.p1() if c1.rev() is None else c1
542 _c2 = c2.p1() if c2.rev() is None else c2
543 _c2 = c2.p1() if c2.rev() is None else c2
543 # an endpoint is "dirty" if it isn't a descendant of the merge base
544 # an endpoint is "dirty" if it isn't a descendant of the merge base
544 # if we have a dirty endpoint, we need to trigger graft logic, and also
545 # if we have a dirty endpoint, we need to trigger graft logic, and also
545 # keep track of which endpoint is dirty
546 # keep track of which endpoint is dirty
546 dirtyc1 = not base.isancestorof(_c1)
547 dirtyc1 = not base.isancestorof(_c1)
547 dirtyc2 = not base.isancestorof(_c2)
548 dirtyc2 = not base.isancestorof(_c2)
548 graft = dirtyc1 or dirtyc2
549 graft = dirtyc1 or dirtyc2
549 tca = base
550 tca = base
550 if graft:
551 if graft:
551 tca = _c1.ancestor(_c2)
552 tca = _c1.ancestor(_c2)
552
553
553 limit = _findlimit(repo, c1, c2)
554 limit = _findlimit(repo, c1, c2)
554 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
555 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
555
556
556 m1 = c1.manifest()
557 m1 = c1.manifest()
557 m2 = c2.manifest()
558 m2 = c2.manifest()
558 mb = base.manifest()
559 mb = base.manifest()
559
560
560 # gather data from _checkcopies:
561 # gather data from _checkcopies:
561 # - diverge = record all diverges in this dict
562 # - diverge = record all diverges in this dict
562 # - copy = record all non-divergent copies in this dict
563 # - copy = record all non-divergent copies in this dict
563 # - fullcopy = record all copies in this dict
564 # - fullcopy = record all copies in this dict
564 # - incomplete = record non-divergent partial copies here
565 # - incomplete = record non-divergent partial copies here
565 # - incompletediverge = record divergent partial copies here
566 # - incompletediverge = record divergent partial copies here
566 diverge = {} # divergence data is shared
567 diverge = {} # divergence data is shared
567 incompletediverge = {}
568 incompletediverge = {}
568 data1 = {'copy': {},
569 data1 = {'copy': {},
569 'fullcopy': {},
570 'fullcopy': {},
570 'incomplete': {},
571 'incomplete': {},
571 'diverge': diverge,
572 'diverge': diverge,
572 'incompletediverge': incompletediverge,
573 'incompletediverge': incompletediverge,
573 }
574 }
574 data2 = {'copy': {},
575 data2 = {'copy': {},
575 'fullcopy': {},
576 'fullcopy': {},
576 'incomplete': {},
577 'incomplete': {},
577 'diverge': diverge,
578 'diverge': diverge,
578 'incompletediverge': incompletediverge,
579 'incompletediverge': incompletediverge,
579 }
580 }
580
581
581 # find interesting file sets from manifests
582 # find interesting file sets from manifests
582 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
583 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
583 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
584 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
584 bothnew = sorted(addedinm1 & addedinm2)
585 bothnew = sorted(addedinm1 & addedinm2)
585 if tca == base:
586 if tca == base:
586 # unmatched file from base
587 # unmatched file from base
587 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
588 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
588 u1u, u2u = u1r, u2r
589 u1u, u2u = u1r, u2r
589 else:
590 else:
590 # unmatched file from base (DAG rotation in the graft case)
591 # unmatched file from base (DAG rotation in the graft case)
591 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
592 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
592 baselabel='base')
593 baselabel='base')
593 # unmatched file from topological common ancestors (no DAG rotation)
594 # unmatched file from topological common ancestors (no DAG rotation)
594 # need to recompute this for directory move handling when grafting
595 # need to recompute this for directory move handling when grafting
595 mta = tca.manifest()
596 mta = tca.manifest()
596 u1u, u2u = _computenonoverlap(repo, c1, c2,
597 u1u, u2u = _computenonoverlap(repo, c1, c2,
597 m1.filesnotin(mta, repo.narrowmatch()),
598 m1.filesnotin(mta, repo.narrowmatch()),
598 m2.filesnotin(mta, repo.narrowmatch()),
599 m2.filesnotin(mta, repo.narrowmatch()),
599 baselabel='topological common ancestor')
600 baselabel='topological common ancestor')
600
601
601 for f in u1u:
602 for f in u1u:
602 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
603 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
603
604
604 for f in u2u:
605 for f in u2u:
605 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
606 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
606
607
607 copy = dict(data1['copy'])
608 copy = dict(data1['copy'])
608 copy.update(data2['copy'])
609 copy.update(data2['copy'])
609 fullcopy = dict(data1['fullcopy'])
610 fullcopy = dict(data1['fullcopy'])
610 fullcopy.update(data2['fullcopy'])
611 fullcopy.update(data2['fullcopy'])
611
612
612 if dirtyc1:
613 if dirtyc1:
613 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
614 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
614 incompletediverge)
615 incompletediverge)
615 if dirtyc2:
616 if dirtyc2:
616 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
617 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
617 incompletediverge)
618 incompletediverge)
618
619
619 renamedelete = {}
620 renamedelete = {}
620 renamedeleteset = set()
621 renamedeleteset = set()
621 divergeset = set()
622 divergeset = set()
622 for of, fl in list(diverge.items()):
623 for of, fl in list(diverge.items()):
623 if len(fl) == 1 or of in c1 or of in c2:
624 if len(fl) == 1 or of in c1 or of in c2:
624 del diverge[of] # not actually divergent, or not a rename
625 del diverge[of] # not actually divergent, or not a rename
625 if of not in c1 and of not in c2:
626 if of not in c1 and of not in c2:
626 # renamed on one side, deleted on the other side, but filter
627 # renamed on one side, deleted on the other side, but filter
627 # out files that have been renamed and then deleted
628 # out files that have been renamed and then deleted
628 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
629 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
629 renamedeleteset.update(fl) # reverse map for below
630 renamedeleteset.update(fl) # reverse map for below
630 else:
631 else:
631 divergeset.update(fl) # reverse map for below
632 divergeset.update(fl) # reverse map for below
632
633
633 if bothnew:
634 if bothnew:
634 repo.ui.debug(" unmatched files new in both:\n %s\n"
635 repo.ui.debug(" unmatched files new in both:\n %s\n"
635 % "\n ".join(bothnew))
636 % "\n ".join(bothnew))
636 bothdiverge = {}
637 bothdiverge = {}
637 bothincompletediverge = {}
638 bothincompletediverge = {}
638 remainder = {}
639 remainder = {}
639 both1 = {'copy': {},
640 both1 = {'copy': {},
640 'fullcopy': {},
641 'fullcopy': {},
641 'incomplete': {},
642 'incomplete': {},
642 'diverge': bothdiverge,
643 'diverge': bothdiverge,
643 'incompletediverge': bothincompletediverge
644 'incompletediverge': bothincompletediverge
644 }
645 }
645 both2 = {'copy': {},
646 both2 = {'copy': {},
646 'fullcopy': {},
647 'fullcopy': {},
647 'incomplete': {},
648 'incomplete': {},
648 'diverge': bothdiverge,
649 'diverge': bothdiverge,
649 'incompletediverge': bothincompletediverge
650 'incompletediverge': bothincompletediverge
650 }
651 }
651 for f in bothnew:
652 for f in bothnew:
652 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
653 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
653 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
654 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
654 if dirtyc1 and dirtyc2:
655 if dirtyc1 and dirtyc2:
655 remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
656 remainder = _combinecopies(both2['incomplete'], both1['incomplete'],
656 copy, bothdiverge, bothincompletediverge)
657 copy, bothdiverge, bothincompletediverge)
657 remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
658 remainder1 = _combinecopies(both1['incomplete'], both2['incomplete'],
658 copy, bothdiverge, bothincompletediverge)
659 copy, bothdiverge, bothincompletediverge)
659 remainder.update(remainder1)
660 remainder.update(remainder1)
660 elif dirtyc1:
661 elif dirtyc1:
661 # incomplete copies may only be found on the "dirty" side for bothnew
662 # incomplete copies may only be found on the "dirty" side for bothnew
662 assert not both2['incomplete']
663 assert not both2['incomplete']
663 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
664 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
664 bothincompletediverge)
665 bothincompletediverge)
665 elif dirtyc2:
666 elif dirtyc2:
666 assert not both1['incomplete']
667 assert not both1['incomplete']
667 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
668 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
668 bothincompletediverge)
669 bothincompletediverge)
669 else:
670 else:
670 # incomplete copies and divergences can't happen outside grafts
671 # incomplete copies and divergences can't happen outside grafts
671 assert not both1['incomplete']
672 assert not both1['incomplete']
672 assert not both2['incomplete']
673 assert not both2['incomplete']
673 assert not bothincompletediverge
674 assert not bothincompletediverge
674 for f in remainder:
675 for f in remainder:
675 assert f not in bothdiverge
676 assert f not in bothdiverge
676 ic = remainder[f]
677 ic = remainder[f]
677 if ic[0] in (m1 if dirtyc1 else m2):
678 if ic[0] in (m1 if dirtyc1 else m2):
678 # backed-out rename on one side, but watch out for deleted files
679 # backed-out rename on one side, but watch out for deleted files
679 bothdiverge[f] = ic
680 bothdiverge[f] = ic
680 for of, fl in bothdiverge.items():
681 for of, fl in bothdiverge.items():
681 if len(fl) == 2 and fl[0] == fl[1]:
682 if len(fl) == 2 and fl[0] == fl[1]:
682 copy[fl[0]] = of # not actually divergent, just matching renames
683 copy[fl[0]] = of # not actually divergent, just matching renames
683
684
684 if fullcopy and repo.ui.debugflag:
685 if fullcopy and repo.ui.debugflag:
685 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
686 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
686 "% = renamed and deleted):\n")
687 "% = renamed and deleted):\n")
687 for f in sorted(fullcopy):
688 for f in sorted(fullcopy):
688 note = ""
689 note = ""
689 if f in copy:
690 if f in copy:
690 note += "*"
691 note += "*"
691 if f in divergeset:
692 if f in divergeset:
692 note += "!"
693 note += "!"
693 if f in renamedeleteset:
694 if f in renamedeleteset:
694 note += "%"
695 note += "%"
695 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
696 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
696 note))
697 note))
697 del divergeset
698 del divergeset
698
699
699 if not fullcopy:
700 if not fullcopy:
700 return copy, {}, diverge, renamedelete, {}
701 return copy, {}, diverge, renamedelete, {}
701
702
702 repo.ui.debug(" checking for directory renames\n")
703 repo.ui.debug(" checking for directory renames\n")
703
704
704 # generate a directory move map
705 # generate a directory move map
705 d1, d2 = c1.dirs(), c2.dirs()
706 d1, d2 = c1.dirs(), c2.dirs()
706 # Hack for adding '', which is not otherwise added, to d1 and d2
707 # Hack for adding '', which is not otherwise added, to d1 and d2
707 d1.addpath('/')
708 d1.addpath('/')
708 d2.addpath('/')
709 d2.addpath('/')
709 invalid = set()
710 invalid = set()
710 dirmove = {}
711 dirmove = {}
711
712
712 # examine each file copy for a potential directory move, which is
713 # examine each file copy for a potential directory move, which is
713 # when all the files in a directory are moved to a new directory
714 # when all the files in a directory are moved to a new directory
714 for dst, src in fullcopy.iteritems():
715 for dst, src in fullcopy.iteritems():
715 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
716 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
716 if dsrc in invalid:
717 if dsrc in invalid:
717 # already seen to be uninteresting
718 # already seen to be uninteresting
718 continue
719 continue
719 elif dsrc in d1 and ddst in d1:
720 elif dsrc in d1 and ddst in d1:
720 # directory wasn't entirely moved locally
721 # directory wasn't entirely moved locally
721 invalid.add(dsrc)
722 invalid.add(dsrc)
722 elif dsrc in d2 and ddst in d2:
723 elif dsrc in d2 and ddst in d2:
723 # directory wasn't entirely moved remotely
724 # directory wasn't entirely moved remotely
724 invalid.add(dsrc)
725 invalid.add(dsrc)
725 elif dsrc in dirmove and dirmove[dsrc] != ddst:
726 elif dsrc in dirmove and dirmove[dsrc] != ddst:
726 # files from the same directory moved to two different places
727 # files from the same directory moved to two different places
727 invalid.add(dsrc)
728 invalid.add(dsrc)
728 else:
729 else:
729 # looks good so far
730 # looks good so far
730 dirmove[dsrc] = ddst
731 dirmove[dsrc] = ddst
731
732
732 for i in invalid:
733 for i in invalid:
733 if i in dirmove:
734 if i in dirmove:
734 del dirmove[i]
735 del dirmove[i]
735 del d1, d2, invalid
736 del d1, d2, invalid
736
737
737 if not dirmove:
738 if not dirmove:
738 return copy, {}, diverge, renamedelete, {}
739 return copy, {}, diverge, renamedelete, {}
739
740
740 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
741 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
741
742
742 for d in dirmove:
743 for d in dirmove:
743 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
744 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
744 (d, dirmove[d]))
745 (d, dirmove[d]))
745
746
746 movewithdir = {}
747 movewithdir = {}
747 # check unaccounted nonoverlapping files against directory moves
748 # check unaccounted nonoverlapping files against directory moves
748 for f in u1r + u2r:
749 for f in u1r + u2r:
749 if f not in fullcopy:
750 if f not in fullcopy:
750 for d in dirmove:
751 for d in dirmove:
751 if f.startswith(d):
752 if f.startswith(d):
752 # new file added in a directory that was moved, move it
753 # new file added in a directory that was moved, move it
753 df = dirmove[d] + f[len(d):]
754 df = dirmove[d] + f[len(d):]
754 if df not in copy:
755 if df not in copy:
755 movewithdir[f] = df
756 movewithdir[f] = df
756 repo.ui.debug((" pending file src: '%s' -> "
757 repo.ui.debug((" pending file src: '%s' -> "
757 "dst: '%s'\n") % (f, df))
758 "dst: '%s'\n") % (f, df))
758 break
759 break
759
760
760 return copy, movewithdir, diverge, renamedelete, dirmove
761 return copy, movewithdir, diverge, renamedelete, dirmove
761
762
762 def _heuristicscopytracing(repo, c1, c2, base):
763 def _heuristicscopytracing(repo, c1, c2, base):
763 """ Fast copytracing using filename heuristics
764 """ Fast copytracing using filename heuristics
764
765
765 Assumes that moves or renames are of following two types:
766 Assumes that moves or renames are of following two types:
766
767
767 1) Inside a directory only (same directory name but different filenames)
768 1) Inside a directory only (same directory name but different filenames)
768 2) Move from one directory to another
769 2) Move from one directory to another
769 (same filenames but different directory names)
770 (same filenames but different directory names)
770
771
771 Works only when there are no merge commits in the "source branch".
772 Works only when there are no merge commits in the "source branch".
772 Source branch is commits from base up to c2 not including base.
773 Source branch is commits from base up to c2 not including base.
773
774
774 If merge is involved it fallbacks to _fullcopytracing().
775 If merge is involved it fallbacks to _fullcopytracing().
775
776
776 Can be used by setting the following config:
777 Can be used by setting the following config:
777
778
778 [experimental]
779 [experimental]
779 copytrace = heuristics
780 copytrace = heuristics
780
781
781 In some cases the copy/move candidates found by heuristics can be very large
782 In some cases the copy/move candidates found by heuristics can be very large
782 in number and that will make the algorithm slow. The number of possible
783 in number and that will make the algorithm slow. The number of possible
783 candidates to check can be limited by using the config
784 candidates to check can be limited by using the config
784 `experimental.copytrace.movecandidateslimit` which defaults to 100.
785 `experimental.copytrace.movecandidateslimit` which defaults to 100.
785 """
786 """
786
787
787 if c1.rev() is None:
788 if c1.rev() is None:
788 c1 = c1.p1()
789 c1 = c1.p1()
789 if c2.rev() is None:
790 if c2.rev() is None:
790 c2 = c2.p1()
791 c2 = c2.p1()
791
792
792 copies = {}
793 copies = {}
793
794
794 changedfiles = set()
795 changedfiles = set()
795 m1 = c1.manifest()
796 m1 = c1.manifest()
796 if not repo.revs('%d::%d', base.rev(), c2.rev()):
797 if not repo.revs('%d::%d', base.rev(), c2.rev()):
797 # If base is not in c2 branch, we switch to fullcopytracing
798 # If base is not in c2 branch, we switch to fullcopytracing
798 repo.ui.debug("switching to full copytracing as base is not "
799 repo.ui.debug("switching to full copytracing as base is not "
799 "an ancestor of c2\n")
800 "an ancestor of c2\n")
800 return _fullcopytracing(repo, c1, c2, base)
801 return _fullcopytracing(repo, c1, c2, base)
801
802
802 ctx = c2
803 ctx = c2
803 while ctx != base:
804 while ctx != base:
804 if len(ctx.parents()) == 2:
805 if len(ctx.parents()) == 2:
805 # To keep things simple let's not handle merges
806 # To keep things simple let's not handle merges
806 repo.ui.debug("switching to full copytracing because of merges\n")
807 repo.ui.debug("switching to full copytracing because of merges\n")
807 return _fullcopytracing(repo, c1, c2, base)
808 return _fullcopytracing(repo, c1, c2, base)
808 changedfiles.update(ctx.files())
809 changedfiles.update(ctx.files())
809 ctx = ctx.p1()
810 ctx = ctx.p1()
810
811
811 cp = _forwardcopies(base, c2)
812 cp = _forwardcopies(base, c2)
812 for dst, src in cp.iteritems():
813 for dst, src in cp.iteritems():
813 if src in m1:
814 if src in m1:
814 copies[dst] = src
815 copies[dst] = src
815
816
816 # file is missing if it isn't present in the destination, but is present in
817 # file is missing if it isn't present in the destination, but is present in
817 # the base and present in the source.
818 # the base and present in the source.
818 # Presence in the base is important to exclude added files, presence in the
819 # Presence in the base is important to exclude added files, presence in the
819 # source is important to exclude removed files.
820 # source is important to exclude removed files.
820 filt = lambda f: f not in m1 and f in base and f in c2
821 filt = lambda f: f not in m1 and f in base and f in c2
821 missingfiles = [f for f in changedfiles if filt(f)]
822 missingfiles = [f for f in changedfiles if filt(f)]
822
823
823 if missingfiles:
824 if missingfiles:
824 basenametofilename = collections.defaultdict(list)
825 basenametofilename = collections.defaultdict(list)
825 dirnametofilename = collections.defaultdict(list)
826 dirnametofilename = collections.defaultdict(list)
826
827
827 for f in m1.filesnotin(base.manifest()):
828 for f in m1.filesnotin(base.manifest()):
828 basename = os.path.basename(f)
829 basename = os.path.basename(f)
829 dirname = os.path.dirname(f)
830 dirname = os.path.dirname(f)
830 basenametofilename[basename].append(f)
831 basenametofilename[basename].append(f)
831 dirnametofilename[dirname].append(f)
832 dirnametofilename[dirname].append(f)
832
833
833 for f in missingfiles:
834 for f in missingfiles:
834 basename = os.path.basename(f)
835 basename = os.path.basename(f)
835 dirname = os.path.dirname(f)
836 dirname = os.path.dirname(f)
836 samebasename = basenametofilename[basename]
837 samebasename = basenametofilename[basename]
837 samedirname = dirnametofilename[dirname]
838 samedirname = dirnametofilename[dirname]
838 movecandidates = samebasename + samedirname
839 movecandidates = samebasename + samedirname
839 # f is guaranteed to be present in c2, that's why
840 # f is guaranteed to be present in c2, that's why
840 # c2.filectx(f) won't fail
841 # c2.filectx(f) won't fail
841 f2 = c2.filectx(f)
842 f2 = c2.filectx(f)
842 # we can have a lot of candidates which can slow down the heuristics
843 # we can have a lot of candidates which can slow down the heuristics
843 # config value to limit the number of candidates moves to check
844 # config value to limit the number of candidates moves to check
844 maxcandidates = repo.ui.configint('experimental',
845 maxcandidates = repo.ui.configint('experimental',
845 'copytrace.movecandidateslimit')
846 'copytrace.movecandidateslimit')
846
847
847 if len(movecandidates) > maxcandidates:
848 if len(movecandidates) > maxcandidates:
848 repo.ui.status(_("skipping copytracing for '%s', more "
849 repo.ui.status(_("skipping copytracing for '%s', more "
849 "candidates than the limit: %d\n")
850 "candidates than the limit: %d\n")
850 % (f, len(movecandidates)))
851 % (f, len(movecandidates)))
851 continue
852 continue
852
853
853 for candidate in movecandidates:
854 for candidate in movecandidates:
854 f1 = c1.filectx(candidate)
855 f1 = c1.filectx(candidate)
855 if _related(f1, f2):
856 if _related(f1, f2):
856 # if there are a few related copies then we'll merge
857 # if there are a few related copies then we'll merge
857 # changes into all of them. This matches the behaviour
858 # changes into all of them. This matches the behaviour
858 # of upstream copytracing
859 # of upstream copytracing
859 copies[candidate] = f
860 copies[candidate] = f
860
861
861 return copies, {}, {}, {}, {}
862 return copies, {}, {}, {}, {}
862
863
863 def _related(f1, f2):
864 def _related(f1, f2):
864 """return True if f1 and f2 filectx have a common ancestor
865 """return True if f1 and f2 filectx have a common ancestor
865
866
866 Walk back to common ancestor to see if the two files originate
867 Walk back to common ancestor to see if the two files originate
867 from the same file. Since workingfilectx's rev() is None it messes
868 from the same file. Since workingfilectx's rev() is None it messes
868 up the integer comparison logic, hence the pre-step check for
869 up the integer comparison logic, hence the pre-step check for
869 None (f1 and f2 can only be workingfilectx's initially).
870 None (f1 and f2 can only be workingfilectx's initially).
870 """
871 """
871
872
872 if f1 == f2:
873 if f1 == f2:
873 return True # a match
874 return True # a match
874
875
875 g1, g2 = f1.ancestors(), f2.ancestors()
876 g1, g2 = f1.ancestors(), f2.ancestors()
876 try:
877 try:
877 f1r, f2r = f1.linkrev(), f2.linkrev()
878 f1r, f2r = f1.linkrev(), f2.linkrev()
878
879
879 if f1r is None:
880 if f1r is None:
880 f1 = next(g1)
881 f1 = next(g1)
881 if f2r is None:
882 if f2r is None:
882 f2 = next(g2)
883 f2 = next(g2)
883
884
884 while True:
885 while True:
885 f1r, f2r = f1.linkrev(), f2.linkrev()
886 f1r, f2r = f1.linkrev(), f2.linkrev()
886 if f1r > f2r:
887 if f1r > f2r:
887 f1 = next(g1)
888 f1 = next(g1)
888 elif f2r > f1r:
889 elif f2r > f1r:
889 f2 = next(g2)
890 f2 = next(g2)
890 else: # f1 and f2 point to files in the same linkrev
891 else: # f1 and f2 point to files in the same linkrev
891 return f1 == f2 # true if they point to the same file
892 return f1 == f2 # true if they point to the same file
892 except StopIteration:
893 except StopIteration:
893 return False
894 return False
894
895
895 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
896 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
896 """
897 """
897 check possible copies of f from msrc to mdst
898 check possible copies of f from msrc to mdst
898
899
899 srcctx = starting context for f in msrc
900 srcctx = starting context for f in msrc
900 dstctx = destination context for f in mdst
901 dstctx = destination context for f in mdst
901 f = the filename to check (as in msrc)
902 f = the filename to check (as in msrc)
902 base = the changectx used as a merge base
903 base = the changectx used as a merge base
903 tca = topological common ancestor for graft-like scenarios
904 tca = topological common ancestor for graft-like scenarios
904 remotebase = True if base is outside tca::srcctx, False otherwise
905 remotebase = True if base is outside tca::srcctx, False otherwise
905 limit = the rev number to not search beyond
906 limit = the rev number to not search beyond
906 data = dictionary of dictionary to store copy data. (see mergecopies)
907 data = dictionary of dictionary to store copy data. (see mergecopies)
907
908
908 note: limit is only an optimization, and provides no guarantee that
909 note: limit is only an optimization, and provides no guarantee that
909 irrelevant revisions will not be visited
910 irrelevant revisions will not be visited
910 there is no easy way to make this algorithm stop in a guaranteed way
911 there is no easy way to make this algorithm stop in a guaranteed way
911 once it "goes behind a certain revision".
912 once it "goes behind a certain revision".
912 """
913 """
913
914
914 msrc = srcctx.manifest()
915 msrc = srcctx.manifest()
915 mdst = dstctx.manifest()
916 mdst = dstctx.manifest()
916 mb = base.manifest()
917 mb = base.manifest()
917 mta = tca.manifest()
918 mta = tca.manifest()
918 # Might be true if this call is about finding backward renames,
919 # Might be true if this call is about finding backward renames,
919 # This happens in the case of grafts because the DAG is then rotated.
920 # This happens in the case of grafts because the DAG is then rotated.
920 # If the file exists in both the base and the source, we are not looking
921 # If the file exists in both the base and the source, we are not looking
921 # for a rename on the source side, but on the part of the DAG that is
922 # for a rename on the source side, but on the part of the DAG that is
922 # traversed backwards.
923 # traversed backwards.
923 #
924 #
924 # In the case there is both backward and forward renames (before and after
925 # In the case there is both backward and forward renames (before and after
925 # the base) this is more complicated as we must detect a divergence.
926 # the base) this is more complicated as we must detect a divergence.
926 # We use 'backwards = False' in that case.
927 # We use 'backwards = False' in that case.
927 backwards = not remotebase and base != tca and f in mb
928 backwards = not remotebase and base != tca and f in mb
928 getsrcfctx = _makegetfctx(srcctx)
929 getsrcfctx = _makegetfctx(srcctx)
929 getdstfctx = _makegetfctx(dstctx)
930 getdstfctx = _makegetfctx(dstctx)
930
931
931 if msrc[f] == mb.get(f) and not remotebase:
932 if msrc[f] == mb.get(f) and not remotebase:
932 # Nothing to merge
933 # Nothing to merge
933 return
934 return
934
935
935 of = None
936 of = None
936 seen = {f}
937 seen = {f}
937 for oc in getsrcfctx(f, msrc[f]).ancestors():
938 for oc in getsrcfctx(f, msrc[f]).ancestors():
938 of = oc.path()
939 of = oc.path()
939 if of in seen:
940 if of in seen:
940 # check limit late - grab last rename before
941 # check limit late - grab last rename before
941 if oc.linkrev() < limit:
942 if oc.linkrev() < limit:
942 break
943 break
943 continue
944 continue
944 seen.add(of)
945 seen.add(of)
945
946
946 # remember for dir rename detection
947 # remember for dir rename detection
947 if backwards:
948 if backwards:
948 data['fullcopy'][of] = f # grafting backwards through renames
949 data['fullcopy'][of] = f # grafting backwards through renames
949 else:
950 else:
950 data['fullcopy'][f] = of
951 data['fullcopy'][f] = of
951 if of not in mdst:
952 if of not in mdst:
952 continue # no match, keep looking
953 continue # no match, keep looking
953 if mdst[of] == mb.get(of):
954 if mdst[of] == mb.get(of):
954 return # no merge needed, quit early
955 return # no merge needed, quit early
955 c2 = getdstfctx(of, mdst[of])
956 c2 = getdstfctx(of, mdst[of])
956 # c2 might be a plain new file on added on destination side that is
957 # c2 might be a plain new file on added on destination side that is
957 # unrelated to the droids we are looking for.
958 # unrelated to the droids we are looking for.
958 cr = _related(oc, c2)
959 cr = _related(oc, c2)
959 if cr and (of == f or of == c2.path()): # non-divergent
960 if cr and (of == f or of == c2.path()): # non-divergent
960 if backwards:
961 if backwards:
961 data['copy'][of] = f
962 data['copy'][of] = f
962 elif of in mb:
963 elif of in mb:
963 data['copy'][f] = of
964 data['copy'][f] = of
964 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
965 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
965 data['copy'][of] = f
966 data['copy'][of] = f
966 del data['fullcopy'][f]
967 del data['fullcopy'][f]
967 data['fullcopy'][of] = f
968 data['fullcopy'][of] = f
968 else: # divergence w.r.t. graft CA on one side of topological CA
969 else: # divergence w.r.t. graft CA on one side of topological CA
969 for sf in seen:
970 for sf in seen:
970 if sf in mb:
971 if sf in mb:
971 assert sf not in data['diverge']
972 assert sf not in data['diverge']
972 data['diverge'][sf] = [f, of]
973 data['diverge'][sf] = [f, of]
973 break
974 break
974 return
975 return
975
976
976 if of in mta:
977 if of in mta:
977 if backwards or remotebase:
978 if backwards or remotebase:
978 data['incomplete'][of] = f
979 data['incomplete'][of] = f
979 else:
980 else:
980 for sf in seen:
981 for sf in seen:
981 if sf in mb:
982 if sf in mb:
982 if tca == base:
983 if tca == base:
983 data['diverge'].setdefault(sf, []).append(f)
984 data['diverge'].setdefault(sf, []).append(f)
984 else:
985 else:
985 data['incompletediverge'][sf] = [of, f]
986 data['incompletediverge'][sf] = [of, f]
986 return
987 return
987
988
988 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
989 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
989 """reproduce copies from fromrev to rev in the dirstate
990 """reproduce copies from fromrev to rev in the dirstate
990
991
991 If skiprev is specified, it's a revision that should be used to
992 If skiprev is specified, it's a revision that should be used to
992 filter copy records. Any copies that occur between fromrev and
993 filter copy records. Any copies that occur between fromrev and
993 skiprev will not be duplicated, even if they appear in the set of
994 skiprev will not be duplicated, even if they appear in the set of
994 copies between fromrev and rev.
995 copies between fromrev and rev.
995 """
996 """
996 exclude = {}
997 exclude = {}
997 ctraceconfig = repo.ui.config('experimental', 'copytrace')
998 ctraceconfig = repo.ui.config('experimental', 'copytrace')
998 bctrace = stringutil.parsebool(ctraceconfig)
999 bctrace = stringutil.parsebool(ctraceconfig)
999 if (skiprev is not None and
1000 if (skiprev is not None and
1000 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
1001 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
1001 # copytrace='off' skips this line, but not the entire function because
1002 # copytrace='off' skips this line, but not the entire function because
1002 # the line below is O(size of the repo) during a rebase, while the rest
1003 # the line below is O(size of the repo) during a rebase, while the rest
1003 # of the function is much faster (and is required for carrying copy
1004 # of the function is much faster (and is required for carrying copy
1004 # metadata across the rebase anyway).
1005 # metadata across the rebase anyway).
1005 exclude = pathcopies(repo[fromrev], repo[skiprev])
1006 exclude = pathcopies(repo[fromrev], repo[skiprev])
1006 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
1007 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
1007 # copies.pathcopies returns backward renames, so dst might not
1008 # copies.pathcopies returns backward renames, so dst might not
1008 # actually be in the dirstate
1009 # actually be in the dirstate
1009 if dst in exclude:
1010 if dst in exclude:
1010 continue
1011 continue
1011 wctx[dst].markcopied(src)
1012 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now