##// END OF EJS Templates
copies: make "limit" argument to _tracefile() mandatory...
Martin von Zweigbergk -
r42427:c7422691 default
parent child Browse files
Show More
@@ -1,805 +1,805 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chain(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b'"""
111 """chain two sets of copies 'a' and 'b'"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126
126
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 # overwritten later, and case 3 will be removed later.
129 # overwritten later, and case 3 will be removed later.
130 t = a.copy()
130 t = a.copy()
131 for k, v in b.iteritems():
131 for k, v in b.iteritems():
132 if v in t:
132 if v in t:
133 # found a chain, i.e. cases 3 & 4.
133 # found a chain, i.e. cases 3 & 4.
134 if t[v] != k:
134 if t[v] != k:
135 # file wasn't renamed back to itself (i.e. case 4, not 3)
135 # file wasn't renamed back to itself (i.e. case 4, not 3)
136 t[k] = t[v]
136 t[k] = t[v]
137 else:
137 else:
138 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
138 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
139 t[k] = v
139 t[k] = v
140
140
141 for k, v in list(t.items()):
141 for k, v in list(t.items()):
142 # remove copies from files that didn't exist, i.e. case 5
142 # remove copies from files that didn't exist, i.e. case 5
143 if v not in src:
143 if v not in src:
144 del t[k]
144 del t[k]
145 # remove criss-crossed copies, i.e. case 3
145 # remove criss-crossed copies, i.e. case 3
146 elif k in src and v in dst:
146 elif k in src and v in dst:
147 del t[k]
147 del t[k]
148 # remove copies to files that were then removed, i.e. case 1
148 # remove copies to files that were then removed, i.e. case 1
149 # and file 'y' in cases 3 & 4 (in case of rename)
149 # and file 'y' in cases 3 & 4 (in case of rename)
150 elif k not in dst:
150 elif k not in dst:
151 del t[k]
151 del t[k]
152
152
153 return t
153 return t
154
154
155 def _tracefile(fctx, am, limit=node.nullrev):
155 def _tracefile(fctx, am, limit):
156 """return file context that is the ancestor of fctx present in ancestor
156 """return file context that is the ancestor of fctx present in ancestor
157 manifest am, stopping after the first ancestor lower than limit"""
157 manifest am, stopping after the first ancestor lower than limit"""
158
158
159 for f in fctx.ancestors():
159 for f in fctx.ancestors():
160 if am.get(f.path(), None) == f.filenode():
160 if am.get(f.path(), None) == f.filenode():
161 return f
161 return f
162 if limit >= 0 and not f.isintroducedafter(limit):
162 if not f.isintroducedafter(limit):
163 return None
163 return None
164
164
165 def _dirstatecopies(repo, match=None):
165 def _dirstatecopies(repo, match=None):
166 ds = repo.dirstate
166 ds = repo.dirstate
167 c = ds.copies().copy()
167 c = ds.copies().copy()
168 for k in list(c):
168 for k in list(c):
169 if ds[k] not in 'anm' or (match and not match(k)):
169 if ds[k] not in 'anm' or (match and not match(k)):
170 del c[k]
170 del c[k]
171 return c
171 return c
172
172
173 def _computeforwardmissing(a, b, match=None):
173 def _computeforwardmissing(a, b, match=None):
174 """Computes which files are in b but not a.
174 """Computes which files are in b but not a.
175 This is its own function so extensions can easily wrap this call to see what
175 This is its own function so extensions can easily wrap this call to see what
176 files _forwardcopies is about to process.
176 files _forwardcopies is about to process.
177 """
177 """
178 ma = a.manifest()
178 ma = a.manifest()
179 mb = b.manifest()
179 mb = b.manifest()
180 return mb.filesnotin(ma, match=match)
180 return mb.filesnotin(ma, match=match)
181
181
182 def usechangesetcentricalgo(repo):
182 def usechangesetcentricalgo(repo):
183 """Checks if we should use changeset-centric copy algorithms"""
183 """Checks if we should use changeset-centric copy algorithms"""
184 return (repo.ui.config('experimental', 'copies.read-from') in
184 return (repo.ui.config('experimental', 'copies.read-from') in
185 ('changeset-only', 'compatibility'))
185 ('changeset-only', 'compatibility'))
186
186
187 def _committedforwardcopies(a, b, match):
187 def _committedforwardcopies(a, b, match):
188 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
188 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
189 # files might have to be traced back to the fctx parent of the last
189 # files might have to be traced back to the fctx parent of the last
190 # one-side-only changeset, but not further back than that
190 # one-side-only changeset, but not further back than that
191 repo = a._repo
191 repo = a._repo
192
192
193 if usechangesetcentricalgo(repo):
193 if usechangesetcentricalgo(repo):
194 return _changesetforwardcopies(a, b, match)
194 return _changesetforwardcopies(a, b, match)
195
195
196 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
196 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
197 dbg = repo.ui.debug
197 dbg = repo.ui.debug
198 if debug:
198 if debug:
199 dbg('debug.copies: looking into rename from %s to %s\n'
199 dbg('debug.copies: looking into rename from %s to %s\n'
200 % (a, b))
200 % (a, b))
201 limit = _findlimit(repo, a, b)
201 limit = _findlimit(repo, a, b)
202 if debug:
202 if debug:
203 dbg('debug.copies: search limit: %d\n' % limit)
203 dbg('debug.copies: search limit: %d\n' % limit)
204 am = a.manifest()
204 am = a.manifest()
205
205
206 # find where new files came from
206 # find where new files came from
207 # we currently don't try to find where old files went, too expensive
207 # we currently don't try to find where old files went, too expensive
208 # this means we can miss a case like 'hg rm b; hg cp a b'
208 # this means we can miss a case like 'hg rm b; hg cp a b'
209 cm = {}
209 cm = {}
210
210
211 # Computing the forward missing is quite expensive on large manifests, since
211 # Computing the forward missing is quite expensive on large manifests, since
212 # it compares the entire manifests. We can optimize it in the common use
212 # it compares the entire manifests. We can optimize it in the common use
213 # case of computing what copies are in a commit versus its parent (like
213 # case of computing what copies are in a commit versus its parent (like
214 # during a rebase or histedit). Note, we exclude merge commits from this
214 # during a rebase or histedit). Note, we exclude merge commits from this
215 # optimization, since the ctx.files() for a merge commit is not correct for
215 # optimization, since the ctx.files() for a merge commit is not correct for
216 # this comparison.
216 # this comparison.
217 forwardmissingmatch = match
217 forwardmissingmatch = match
218 if b.p1() == a and b.p2().node() == node.nullid:
218 if b.p1() == a and b.p2().node() == node.nullid:
219 filesmatcher = matchmod.exact(b.files())
219 filesmatcher = matchmod.exact(b.files())
220 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
220 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
221 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
221 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
222
222
223 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
223 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
224
224
225 if debug:
225 if debug:
226 dbg('debug.copies: missing files to search: %d\n' % len(missing))
226 dbg('debug.copies: missing files to search: %d\n' % len(missing))
227
227
228 for f in sorted(missing):
228 for f in sorted(missing):
229 if debug:
229 if debug:
230 dbg('debug.copies: tracing file: %s\n' % f)
230 dbg('debug.copies: tracing file: %s\n' % f)
231 fctx = b[f]
231 fctx = b[f]
232 fctx._ancestrycontext = ancestrycontext
232 fctx._ancestrycontext = ancestrycontext
233
233
234 if debug:
234 if debug:
235 start = util.timer()
235 start = util.timer()
236 ofctx = _tracefile(fctx, am, limit)
236 ofctx = _tracefile(fctx, am, limit)
237 if ofctx:
237 if ofctx:
238 if debug:
238 if debug:
239 dbg('debug.copies: rename of: %s\n' % ofctx._path)
239 dbg('debug.copies: rename of: %s\n' % ofctx._path)
240 cm[f] = ofctx.path()
240 cm[f] = ofctx.path()
241 if debug:
241 if debug:
242 dbg('debug.copies: time: %f seconds\n'
242 dbg('debug.copies: time: %f seconds\n'
243 % (util.timer() - start))
243 % (util.timer() - start))
244 return cm
244 return cm
245
245
246 def _changesetforwardcopies(a, b, match):
246 def _changesetforwardcopies(a, b, match):
247 if a.rev() == node.nullrev:
247 if a.rev() == node.nullrev:
248 return {}
248 return {}
249
249
250 repo = a.repo()
250 repo = a.repo()
251 children = {}
251 children = {}
252 cl = repo.changelog
252 cl = repo.changelog
253 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
253 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
254 for r in missingrevs:
254 for r in missingrevs:
255 for p in cl.parentrevs(r):
255 for p in cl.parentrevs(r):
256 if p == node.nullrev:
256 if p == node.nullrev:
257 continue
257 continue
258 if p not in children:
258 if p not in children:
259 children[p] = [r]
259 children[p] = [r]
260 else:
260 else:
261 children[p].append(r)
261 children[p].append(r)
262
262
263 roots = set(children) - set(missingrevs)
263 roots = set(children) - set(missingrevs)
264 # 'work' contains 3-tuples of a (revision number, parent number, copies).
264 # 'work' contains 3-tuples of a (revision number, parent number, copies).
265 # The parent number is only used for knowing which parent the copies dict
265 # The parent number is only used for knowing which parent the copies dict
266 # came from.
266 # came from.
267 work = [(r, 1, {}) for r in roots]
267 work = [(r, 1, {}) for r in roots]
268 heapq.heapify(work)
268 heapq.heapify(work)
269 while work:
269 while work:
270 r, i1, copies1 = heapq.heappop(work)
270 r, i1, copies1 = heapq.heappop(work)
271 if work and work[0][0] == r:
271 if work and work[0][0] == r:
272 # We are tracing copies from both parents
272 # We are tracing copies from both parents
273 r, i2, copies2 = heapq.heappop(work)
273 r, i2, copies2 = heapq.heappop(work)
274 copies = {}
274 copies = {}
275 ctx = repo[r]
275 ctx = repo[r]
276 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
276 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
277 allcopies = set(copies1) | set(copies2)
277 allcopies = set(copies1) | set(copies2)
278 # TODO: perhaps this filtering should be done as long as ctx
278 # TODO: perhaps this filtering should be done as long as ctx
279 # is merge, whether or not we're tracing from both parent.
279 # is merge, whether or not we're tracing from both parent.
280 for dst in allcopies:
280 for dst in allcopies:
281 if not match(dst):
281 if not match(dst):
282 continue
282 continue
283 if dst not in copies2:
283 if dst not in copies2:
284 # Copied on p1 side: mark as copy from p1 side if it didn't
284 # Copied on p1 side: mark as copy from p1 side if it didn't
285 # already exist on p2 side
285 # already exist on p2 side
286 if dst not in p2man:
286 if dst not in p2man:
287 copies[dst] = copies1[dst]
287 copies[dst] = copies1[dst]
288 elif dst not in copies1:
288 elif dst not in copies1:
289 # Copied on p2 side: mark as copy from p2 side if it didn't
289 # Copied on p2 side: mark as copy from p2 side if it didn't
290 # already exist on p1 side
290 # already exist on p1 side
291 if dst not in p1man:
291 if dst not in p1man:
292 copies[dst] = copies2[dst]
292 copies[dst] = copies2[dst]
293 else:
293 else:
294 # Copied on both sides: mark as copy from p1 side
294 # Copied on both sides: mark as copy from p1 side
295 copies[dst] = copies1[dst]
295 copies[dst] = copies1[dst]
296 else:
296 else:
297 copies = copies1
297 copies = copies1
298 if r == b.rev():
298 if r == b.rev():
299 return copies
299 return copies
300 for c in children[r]:
300 for c in children[r]:
301 childctx = repo[c]
301 childctx = repo[c]
302 if r == childctx.p1().rev():
302 if r == childctx.p1().rev():
303 parent = 1
303 parent = 1
304 childcopies = childctx.p1copies()
304 childcopies = childctx.p1copies()
305 else:
305 else:
306 assert r == childctx.p2().rev()
306 assert r == childctx.p2().rev()
307 parent = 2
307 parent = 2
308 childcopies = childctx.p2copies()
308 childcopies = childctx.p2copies()
309 if not match.always():
309 if not match.always():
310 childcopies = {dst: src for dst, src in childcopies.items()
310 childcopies = {dst: src for dst, src in childcopies.items()
311 if match(dst)}
311 if match(dst)}
312 childcopies = _chain(a, childctx, copies, childcopies)
312 childcopies = _chain(a, childctx, copies, childcopies)
313 heapq.heappush(work, (c, parent, childcopies))
313 heapq.heappush(work, (c, parent, childcopies))
314 assert False
314 assert False
315
315
316 def _forwardcopies(a, b, match=None):
316 def _forwardcopies(a, b, match=None):
317 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
317 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
318
318
319 match = a.repo().narrowmatch(match)
319 match = a.repo().narrowmatch(match)
320 # check for working copy
320 # check for working copy
321 if b.rev() is None:
321 if b.rev() is None:
322 if a == b.p1():
322 if a == b.p1():
323 # short-circuit to avoid issues with merge states
323 # short-circuit to avoid issues with merge states
324 return _dirstatecopies(b._repo, match)
324 return _dirstatecopies(b._repo, match)
325
325
326 cm = _committedforwardcopies(a, b.p1(), match)
326 cm = _committedforwardcopies(a, b.p1(), match)
327 # combine copies from dirstate if necessary
327 # combine copies from dirstate if necessary
328 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
328 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
329 return _committedforwardcopies(a, b, match)
329 return _committedforwardcopies(a, b, match)
330
330
331 def _backwardrenames(a, b, match):
331 def _backwardrenames(a, b, match):
332 if a._repo.ui.config('experimental', 'copytrace') == 'off':
332 if a._repo.ui.config('experimental', 'copytrace') == 'off':
333 return {}
333 return {}
334
334
335 # Even though we're not taking copies into account, 1:n rename situations
335 # Even though we're not taking copies into account, 1:n rename situations
336 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
336 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
337 # arbitrarily pick one of the renames.
337 # arbitrarily pick one of the renames.
338 # We don't want to pass in "match" here, since that would filter
338 # We don't want to pass in "match" here, since that would filter
339 # the destination by it. Since we're reversing the copies, we want
339 # the destination by it. Since we're reversing the copies, we want
340 # to filter the source instead.
340 # to filter the source instead.
341 f = _forwardcopies(b, a)
341 f = _forwardcopies(b, a)
342 r = {}
342 r = {}
343 for k, v in sorted(f.iteritems()):
343 for k, v in sorted(f.iteritems()):
344 if match and not match(v):
344 if match and not match(v):
345 continue
345 continue
346 # remove copies
346 # remove copies
347 if v in a:
347 if v in a:
348 continue
348 continue
349 r[v] = k
349 r[v] = k
350 return r
350 return r
351
351
352 def pathcopies(x, y, match=None):
352 def pathcopies(x, y, match=None):
353 """find {dst@y: src@x} copy mapping for directed compare"""
353 """find {dst@y: src@x} copy mapping for directed compare"""
354 repo = x._repo
354 repo = x._repo
355 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
355 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
356 if debug:
356 if debug:
357 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
357 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
358 % (x, y))
358 % (x, y))
359 if x == y or not x or not y:
359 if x == y or not x or not y:
360 return {}
360 return {}
361 a = y.ancestor(x)
361 a = y.ancestor(x)
362 if a == x:
362 if a == x:
363 if debug:
363 if debug:
364 repo.ui.debug('debug.copies: search mode: forward\n')
364 repo.ui.debug('debug.copies: search mode: forward\n')
365 return _forwardcopies(x, y, match=match)
365 return _forwardcopies(x, y, match=match)
366 if a == y:
366 if a == y:
367 if debug:
367 if debug:
368 repo.ui.debug('debug.copies: search mode: backward\n')
368 repo.ui.debug('debug.copies: search mode: backward\n')
369 return _backwardrenames(x, y, match=match)
369 return _backwardrenames(x, y, match=match)
370 if debug:
370 if debug:
371 repo.ui.debug('debug.copies: search mode: combined\n')
371 repo.ui.debug('debug.copies: search mode: combined\n')
372 return _chain(x, y, _backwardrenames(x, a, match=match),
372 return _chain(x, y, _backwardrenames(x, a, match=match),
373 _forwardcopies(a, y, match=match))
373 _forwardcopies(a, y, match=match))
374
374
375 def mergecopies(repo, c1, c2, base):
375 def mergecopies(repo, c1, c2, base):
376 """
376 """
377 Finds moves and copies between context c1 and c2 that are relevant for
377 Finds moves and copies between context c1 and c2 that are relevant for
378 merging. 'base' will be used as the merge base.
378 merging. 'base' will be used as the merge base.
379
379
380 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
380 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
381 files that were moved/ copied in one merge parent and modified in another.
381 files that were moved/ copied in one merge parent and modified in another.
382 For example:
382 For example:
383
383
384 o ---> 4 another commit
384 o ---> 4 another commit
385 |
385 |
386 | o ---> 3 commit that modifies a.txt
386 | o ---> 3 commit that modifies a.txt
387 | /
387 | /
388 o / ---> 2 commit that moves a.txt to b.txt
388 o / ---> 2 commit that moves a.txt to b.txt
389 |/
389 |/
390 o ---> 1 merge base
390 o ---> 1 merge base
391
391
392 If we try to rebase revision 3 on revision 4, since there is no a.txt in
392 If we try to rebase revision 3 on revision 4, since there is no a.txt in
393 revision 4, and if user have copytrace disabled, we prints the following
393 revision 4, and if user have copytrace disabled, we prints the following
394 message:
394 message:
395
395
396 ```other changed <file> which local deleted```
396 ```other changed <file> which local deleted```
397
397
398 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
398 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
399 "dirmove".
399 "dirmove".
400
400
401 "copy" is a mapping from destination name -> source name,
401 "copy" is a mapping from destination name -> source name,
402 where source is in c1 and destination is in c2 or vice-versa.
402 where source is in c1 and destination is in c2 or vice-versa.
403
403
404 "movewithdir" is a mapping from source name -> destination name,
404 "movewithdir" is a mapping from source name -> destination name,
405 where the file at source present in one context but not the other
405 where the file at source present in one context but not the other
406 needs to be moved to destination by the merge process, because the
406 needs to be moved to destination by the merge process, because the
407 other context moved the directory it is in.
407 other context moved the directory it is in.
408
408
409 "diverge" is a mapping of source name -> list of destination names
409 "diverge" is a mapping of source name -> list of destination names
410 for divergent renames.
410 for divergent renames.
411
411
412 "renamedelete" is a mapping of source name -> list of destination
412 "renamedelete" is a mapping of source name -> list of destination
413 names for files deleted in c1 that were renamed in c2 or vice-versa.
413 names for files deleted in c1 that were renamed in c2 or vice-versa.
414
414
415 "dirmove" is a mapping of detected source dir -> destination dir renames.
415 "dirmove" is a mapping of detected source dir -> destination dir renames.
416 This is needed for handling changes to new files previously grafted into
416 This is needed for handling changes to new files previously grafted into
417 renamed directories.
417 renamed directories.
418
418
419 This function calls different copytracing algorithms based on config.
419 This function calls different copytracing algorithms based on config.
420 """
420 """
421 # avoid silly behavior for update from empty dir
421 # avoid silly behavior for update from empty dir
422 if not c1 or not c2 or c1 == c2:
422 if not c1 or not c2 or c1 == c2:
423 return {}, {}, {}, {}, {}
423 return {}, {}, {}, {}, {}
424
424
425 narrowmatch = c1.repo().narrowmatch()
425 narrowmatch = c1.repo().narrowmatch()
426
426
427 # avoid silly behavior for parent -> working dir
427 # avoid silly behavior for parent -> working dir
428 if c2.node() is None and c1.node() == repo.dirstate.p1():
428 if c2.node() is None and c1.node() == repo.dirstate.p1():
429 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
429 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
430
430
431 copytracing = repo.ui.config('experimental', 'copytrace')
431 copytracing = repo.ui.config('experimental', 'copytrace')
432 if stringutil.parsebool(copytracing) is False:
432 if stringutil.parsebool(copytracing) is False:
433 # stringutil.parsebool() returns None when it is unable to parse the
433 # stringutil.parsebool() returns None when it is unable to parse the
434 # value, so we should rely on making sure copytracing is on such cases
434 # value, so we should rely on making sure copytracing is on such cases
435 return {}, {}, {}, {}, {}
435 return {}, {}, {}, {}, {}
436
436
437 if usechangesetcentricalgo(repo):
437 if usechangesetcentricalgo(repo):
438 # The heuristics don't make sense when we need changeset-centric algos
438 # The heuristics don't make sense when we need changeset-centric algos
439 return _fullcopytracing(repo, c1, c2, base)
439 return _fullcopytracing(repo, c1, c2, base)
440
440
441 # Copy trace disabling is explicitly below the node == p1 logic above
441 # Copy trace disabling is explicitly below the node == p1 logic above
442 # because the logic above is required for a simple copy to be kept across a
442 # because the logic above is required for a simple copy to be kept across a
443 # rebase.
443 # rebase.
444 if copytracing == 'heuristics':
444 if copytracing == 'heuristics':
445 # Do full copytracing if only non-public revisions are involved as
445 # Do full copytracing if only non-public revisions are involved as
446 # that will be fast enough and will also cover the copies which could
446 # that will be fast enough and will also cover the copies which could
447 # be missed by heuristics
447 # be missed by heuristics
448 if _isfullcopytraceable(repo, c1, base):
448 if _isfullcopytraceable(repo, c1, base):
449 return _fullcopytracing(repo, c1, c2, base)
449 return _fullcopytracing(repo, c1, c2, base)
450 return _heuristicscopytracing(repo, c1, c2, base)
450 return _heuristicscopytracing(repo, c1, c2, base)
451 else:
451 else:
452 return _fullcopytracing(repo, c1, c2, base)
452 return _fullcopytracing(repo, c1, c2, base)
453
453
454 def _isfullcopytraceable(repo, c1, base):
454 def _isfullcopytraceable(repo, c1, base):
455 """ Checks that if base, source and destination are all no-public branches,
455 """ Checks that if base, source and destination are all no-public branches,
456 if yes let's use the full copytrace algorithm for increased capabilities
456 if yes let's use the full copytrace algorithm for increased capabilities
457 since it will be fast enough.
457 since it will be fast enough.
458
458
459 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
459 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
460 number of changesets from c1 to base such that if number of changesets are
460 number of changesets from c1 to base such that if number of changesets are
461 more than the limit, full copytracing algorithm won't be used.
461 more than the limit, full copytracing algorithm won't be used.
462 """
462 """
463 if c1.rev() is None:
463 if c1.rev() is None:
464 c1 = c1.p1()
464 c1 = c1.p1()
465 if c1.mutable() and base.mutable():
465 if c1.mutable() and base.mutable():
466 sourcecommitlimit = repo.ui.configint('experimental',
466 sourcecommitlimit = repo.ui.configint('experimental',
467 'copytrace.sourcecommitlimit')
467 'copytrace.sourcecommitlimit')
468 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
468 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
469 return commits < sourcecommitlimit
469 return commits < sourcecommitlimit
470 return False
470 return False
471
471
472 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
472 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
473 copy, renamedelete):
473 copy, renamedelete):
474 if src not in m2:
474 if src not in m2:
475 # deleted on side 2
475 # deleted on side 2
476 if src not in m1:
476 if src not in m1:
477 # renamed on side 1, deleted on side 2
477 # renamed on side 1, deleted on side 2
478 renamedelete[src] = dsts1
478 renamedelete[src] = dsts1
479 elif m2[src] != mb[src]:
479 elif m2[src] != mb[src]:
480 if not _related(c2[src], base[src]):
480 if not _related(c2[src], base[src]):
481 return
481 return
482 # modified on side 2
482 # modified on side 2
483 for dst in dsts1:
483 for dst in dsts1:
484 if dst not in m2:
484 if dst not in m2:
485 # dst not added on side 2 (handle as regular
485 # dst not added on side 2 (handle as regular
486 # "both created" case in manifestmerge otherwise)
486 # "both created" case in manifestmerge otherwise)
487 copy[dst] = src
487 copy[dst] = src
488
488
489 def _fullcopytracing(repo, c1, c2, base):
489 def _fullcopytracing(repo, c1, c2, base):
490 """ The full copytracing algorithm which finds all the new files that were
490 """ The full copytracing algorithm which finds all the new files that were
491 added from merge base up to the top commit and for each file it checks if
491 added from merge base up to the top commit and for each file it checks if
492 this file was copied from another file.
492 this file was copied from another file.
493
493
494 This is pretty slow when a lot of changesets are involved but will track all
494 This is pretty slow when a lot of changesets are involved but will track all
495 the copies.
495 the copies.
496 """
496 """
497 m1 = c1.manifest()
497 m1 = c1.manifest()
498 m2 = c2.manifest()
498 m2 = c2.manifest()
499 mb = base.manifest()
499 mb = base.manifest()
500
500
501 copies1 = pathcopies(base, c1)
501 copies1 = pathcopies(base, c1)
502 copies2 = pathcopies(base, c2)
502 copies2 = pathcopies(base, c2)
503
503
504 inversecopies1 = {}
504 inversecopies1 = {}
505 inversecopies2 = {}
505 inversecopies2 = {}
506 for dst, src in copies1.items():
506 for dst, src in copies1.items():
507 inversecopies1.setdefault(src, []).append(dst)
507 inversecopies1.setdefault(src, []).append(dst)
508 for dst, src in copies2.items():
508 for dst, src in copies2.items():
509 inversecopies2.setdefault(src, []).append(dst)
509 inversecopies2.setdefault(src, []).append(dst)
510
510
511 copy = {}
511 copy = {}
512 diverge = {}
512 diverge = {}
513 renamedelete = {}
513 renamedelete = {}
514 allsources = set(inversecopies1) | set(inversecopies2)
514 allsources = set(inversecopies1) | set(inversecopies2)
515 for src in allsources:
515 for src in allsources:
516 dsts1 = inversecopies1.get(src)
516 dsts1 = inversecopies1.get(src)
517 dsts2 = inversecopies2.get(src)
517 dsts2 = inversecopies2.get(src)
518 if dsts1 and dsts2:
518 if dsts1 and dsts2:
519 # copied/renamed on both sides
519 # copied/renamed on both sides
520 if src not in m1 and src not in m2:
520 if src not in m1 and src not in m2:
521 # renamed on both sides
521 # renamed on both sides
522 dsts1 = set(dsts1)
522 dsts1 = set(dsts1)
523 dsts2 = set(dsts2)
523 dsts2 = set(dsts2)
524 # If there's some overlap in the rename destinations, we
524 # If there's some overlap in the rename destinations, we
525 # consider it not divergent. For example, if side 1 copies 'a'
525 # consider it not divergent. For example, if side 1 copies 'a'
526 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
526 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
527 # and 'd' and deletes 'a'.
527 # and 'd' and deletes 'a'.
528 if dsts1 & dsts2:
528 if dsts1 & dsts2:
529 for dst in (dsts1 & dsts2):
529 for dst in (dsts1 & dsts2):
530 copy[dst] = src
530 copy[dst] = src
531 else:
531 else:
532 diverge[src] = sorted(dsts1 | dsts2)
532 diverge[src] = sorted(dsts1 | dsts2)
533 elif src in m1 and src in m2:
533 elif src in m1 and src in m2:
534 # copied on both sides
534 # copied on both sides
535 dsts1 = set(dsts1)
535 dsts1 = set(dsts1)
536 dsts2 = set(dsts2)
536 dsts2 = set(dsts2)
537 for dst in (dsts1 & dsts2):
537 for dst in (dsts1 & dsts2):
538 copy[dst] = src
538 copy[dst] = src
539 # TODO: Handle cases where it was renamed on one side and copied
539 # TODO: Handle cases where it was renamed on one side and copied
540 # on the other side
540 # on the other side
541 elif dsts1:
541 elif dsts1:
542 # copied/renamed only on side 1
542 # copied/renamed only on side 1
543 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
543 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
544 copy, renamedelete)
544 copy, renamedelete)
545 elif dsts2:
545 elif dsts2:
546 # copied/renamed only on side 2
546 # copied/renamed only on side 2
547 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
547 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
548 copy, renamedelete)
548 copy, renamedelete)
549
549
550 renamedeleteset = set()
550 renamedeleteset = set()
551 divergeset = set()
551 divergeset = set()
552 for dsts in diverge.values():
552 for dsts in diverge.values():
553 divergeset.update(dsts)
553 divergeset.update(dsts)
554 for dsts in renamedelete.values():
554 for dsts in renamedelete.values():
555 renamedeleteset.update(dsts)
555 renamedeleteset.update(dsts)
556
556
557 # find interesting file sets from manifests
557 # find interesting file sets from manifests
558 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
558 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
559 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
559 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
560 u1 = sorted(addedinm1 - addedinm2)
560 u1 = sorted(addedinm1 - addedinm2)
561 u2 = sorted(addedinm2 - addedinm1)
561 u2 = sorted(addedinm2 - addedinm1)
562
562
563 header = " unmatched files in %s"
563 header = " unmatched files in %s"
564 if u1:
564 if u1:
565 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
565 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
566 if u2:
566 if u2:
567 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
567 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
568
568
569 fullcopy = copies1.copy()
569 fullcopy = copies1.copy()
570 fullcopy.update(copies2)
570 fullcopy.update(copies2)
571 if not fullcopy:
571 if not fullcopy:
572 return copy, {}, diverge, renamedelete, {}
572 return copy, {}, diverge, renamedelete, {}
573
573
574 if repo.ui.debugflag:
574 if repo.ui.debugflag:
575 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
575 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
576 "% = renamed and deleted):\n")
576 "% = renamed and deleted):\n")
577 for f in sorted(fullcopy):
577 for f in sorted(fullcopy):
578 note = ""
578 note = ""
579 if f in copy:
579 if f in copy:
580 note += "*"
580 note += "*"
581 if f in divergeset:
581 if f in divergeset:
582 note += "!"
582 note += "!"
583 if f in renamedeleteset:
583 if f in renamedeleteset:
584 note += "%"
584 note += "%"
585 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
585 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
586 note))
586 note))
587 del divergeset
587 del divergeset
588
588
589 repo.ui.debug(" checking for directory renames\n")
589 repo.ui.debug(" checking for directory renames\n")
590
590
591 # generate a directory move map
591 # generate a directory move map
592 d1, d2 = c1.dirs(), c2.dirs()
592 d1, d2 = c1.dirs(), c2.dirs()
593 # Hack for adding '', which is not otherwise added, to d1 and d2
593 # Hack for adding '', which is not otherwise added, to d1 and d2
594 d1.addpath('/')
594 d1.addpath('/')
595 d2.addpath('/')
595 d2.addpath('/')
596 invalid = set()
596 invalid = set()
597 dirmove = {}
597 dirmove = {}
598
598
599 # examine each file copy for a potential directory move, which is
599 # examine each file copy for a potential directory move, which is
600 # when all the files in a directory are moved to a new directory
600 # when all the files in a directory are moved to a new directory
601 for dst, src in fullcopy.iteritems():
601 for dst, src in fullcopy.iteritems():
602 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
602 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 if dsrc in invalid:
603 if dsrc in invalid:
604 # already seen to be uninteresting
604 # already seen to be uninteresting
605 continue
605 continue
606 elif dsrc in d1 and ddst in d1:
606 elif dsrc in d1 and ddst in d1:
607 # directory wasn't entirely moved locally
607 # directory wasn't entirely moved locally
608 invalid.add(dsrc)
608 invalid.add(dsrc)
609 elif dsrc in d2 and ddst in d2:
609 elif dsrc in d2 and ddst in d2:
610 # directory wasn't entirely moved remotely
610 # directory wasn't entirely moved remotely
611 invalid.add(dsrc)
611 invalid.add(dsrc)
612 elif dsrc in dirmove and dirmove[dsrc] != ddst:
612 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 # files from the same directory moved to two different places
613 # files from the same directory moved to two different places
614 invalid.add(dsrc)
614 invalid.add(dsrc)
615 else:
615 else:
616 # looks good so far
616 # looks good so far
617 dirmove[dsrc] = ddst
617 dirmove[dsrc] = ddst
618
618
619 for i in invalid:
619 for i in invalid:
620 if i in dirmove:
620 if i in dirmove:
621 del dirmove[i]
621 del dirmove[i]
622 del d1, d2, invalid
622 del d1, d2, invalid
623
623
624 if not dirmove:
624 if not dirmove:
625 return copy, {}, diverge, renamedelete, {}
625 return copy, {}, diverge, renamedelete, {}
626
626
627 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
627 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628
628
629 for d in dirmove:
629 for d in dirmove:
630 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
630 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 (d, dirmove[d]))
631 (d, dirmove[d]))
632
632
633 movewithdir = {}
633 movewithdir = {}
634 # check unaccounted nonoverlapping files against directory moves
634 # check unaccounted nonoverlapping files against directory moves
635 for f in u1 + u2:
635 for f in u1 + u2:
636 if f not in fullcopy:
636 if f not in fullcopy:
637 for d in dirmove:
637 for d in dirmove:
638 if f.startswith(d):
638 if f.startswith(d):
639 # new file added in a directory that was moved, move it
639 # new file added in a directory that was moved, move it
640 df = dirmove[d] + f[len(d):]
640 df = dirmove[d] + f[len(d):]
641 if df not in copy:
641 if df not in copy:
642 movewithdir[f] = df
642 movewithdir[f] = df
643 repo.ui.debug((" pending file src: '%s' -> "
643 repo.ui.debug((" pending file src: '%s' -> "
644 "dst: '%s'\n") % (f, df))
644 "dst: '%s'\n") % (f, df))
645 break
645 break
646
646
647 return copy, movewithdir, diverge, renamedelete, dirmove
647 return copy, movewithdir, diverge, renamedelete, dirmove
648
648
649 def _heuristicscopytracing(repo, c1, c2, base):
649 def _heuristicscopytracing(repo, c1, c2, base):
650 """ Fast copytracing using filename heuristics
650 """ Fast copytracing using filename heuristics
651
651
652 Assumes that moves or renames are of following two types:
652 Assumes that moves or renames are of following two types:
653
653
654 1) Inside a directory only (same directory name but different filenames)
654 1) Inside a directory only (same directory name but different filenames)
655 2) Move from one directory to another
655 2) Move from one directory to another
656 (same filenames but different directory names)
656 (same filenames but different directory names)
657
657
658 Works only when there are no merge commits in the "source branch".
658 Works only when there are no merge commits in the "source branch".
659 Source branch is commits from base up to c2 not including base.
659 Source branch is commits from base up to c2 not including base.
660
660
661 If merge is involved it fallbacks to _fullcopytracing().
661 If merge is involved it fallbacks to _fullcopytracing().
662
662
663 Can be used by setting the following config:
663 Can be used by setting the following config:
664
664
665 [experimental]
665 [experimental]
666 copytrace = heuristics
666 copytrace = heuristics
667
667
668 In some cases the copy/move candidates found by heuristics can be very large
668 In some cases the copy/move candidates found by heuristics can be very large
669 in number and that will make the algorithm slow. The number of possible
669 in number and that will make the algorithm slow. The number of possible
670 candidates to check can be limited by using the config
670 candidates to check can be limited by using the config
671 `experimental.copytrace.movecandidateslimit` which defaults to 100.
671 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 """
672 """
673
673
674 if c1.rev() is None:
674 if c1.rev() is None:
675 c1 = c1.p1()
675 c1 = c1.p1()
676 if c2.rev() is None:
676 if c2.rev() is None:
677 c2 = c2.p1()
677 c2 = c2.p1()
678
678
679 copies = {}
679 copies = {}
680
680
681 changedfiles = set()
681 changedfiles = set()
682 m1 = c1.manifest()
682 m1 = c1.manifest()
683 if not repo.revs('%d::%d', base.rev(), c2.rev()):
683 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 # If base is not in c2 branch, we switch to fullcopytracing
684 # If base is not in c2 branch, we switch to fullcopytracing
685 repo.ui.debug("switching to full copytracing as base is not "
685 repo.ui.debug("switching to full copytracing as base is not "
686 "an ancestor of c2\n")
686 "an ancestor of c2\n")
687 return _fullcopytracing(repo, c1, c2, base)
687 return _fullcopytracing(repo, c1, c2, base)
688
688
689 ctx = c2
689 ctx = c2
690 while ctx != base:
690 while ctx != base:
691 if len(ctx.parents()) == 2:
691 if len(ctx.parents()) == 2:
692 # To keep things simple let's not handle merges
692 # To keep things simple let's not handle merges
693 repo.ui.debug("switching to full copytracing because of merges\n")
693 repo.ui.debug("switching to full copytracing because of merges\n")
694 return _fullcopytracing(repo, c1, c2, base)
694 return _fullcopytracing(repo, c1, c2, base)
695 changedfiles.update(ctx.files())
695 changedfiles.update(ctx.files())
696 ctx = ctx.p1()
696 ctx = ctx.p1()
697
697
698 cp = _forwardcopies(base, c2)
698 cp = _forwardcopies(base, c2)
699 for dst, src in cp.iteritems():
699 for dst, src in cp.iteritems():
700 if src in m1:
700 if src in m1:
701 copies[dst] = src
701 copies[dst] = src
702
702
703 # file is missing if it isn't present in the destination, but is present in
703 # file is missing if it isn't present in the destination, but is present in
704 # the base and present in the source.
704 # the base and present in the source.
705 # Presence in the base is important to exclude added files, presence in the
705 # Presence in the base is important to exclude added files, presence in the
706 # source is important to exclude removed files.
706 # source is important to exclude removed files.
707 filt = lambda f: f not in m1 and f in base and f in c2
707 filt = lambda f: f not in m1 and f in base and f in c2
708 missingfiles = [f for f in changedfiles if filt(f)]
708 missingfiles = [f for f in changedfiles if filt(f)]
709
709
710 if missingfiles:
710 if missingfiles:
711 basenametofilename = collections.defaultdict(list)
711 basenametofilename = collections.defaultdict(list)
712 dirnametofilename = collections.defaultdict(list)
712 dirnametofilename = collections.defaultdict(list)
713
713
714 for f in m1.filesnotin(base.manifest()):
714 for f in m1.filesnotin(base.manifest()):
715 basename = os.path.basename(f)
715 basename = os.path.basename(f)
716 dirname = os.path.dirname(f)
716 dirname = os.path.dirname(f)
717 basenametofilename[basename].append(f)
717 basenametofilename[basename].append(f)
718 dirnametofilename[dirname].append(f)
718 dirnametofilename[dirname].append(f)
719
719
720 for f in missingfiles:
720 for f in missingfiles:
721 basename = os.path.basename(f)
721 basename = os.path.basename(f)
722 dirname = os.path.dirname(f)
722 dirname = os.path.dirname(f)
723 samebasename = basenametofilename[basename]
723 samebasename = basenametofilename[basename]
724 samedirname = dirnametofilename[dirname]
724 samedirname = dirnametofilename[dirname]
725 movecandidates = samebasename + samedirname
725 movecandidates = samebasename + samedirname
726 # f is guaranteed to be present in c2, that's why
726 # f is guaranteed to be present in c2, that's why
727 # c2.filectx(f) won't fail
727 # c2.filectx(f) won't fail
728 f2 = c2.filectx(f)
728 f2 = c2.filectx(f)
729 # we can have a lot of candidates which can slow down the heuristics
729 # we can have a lot of candidates which can slow down the heuristics
730 # config value to limit the number of candidates moves to check
730 # config value to limit the number of candidates moves to check
731 maxcandidates = repo.ui.configint('experimental',
731 maxcandidates = repo.ui.configint('experimental',
732 'copytrace.movecandidateslimit')
732 'copytrace.movecandidateslimit')
733
733
734 if len(movecandidates) > maxcandidates:
734 if len(movecandidates) > maxcandidates:
735 repo.ui.status(_("skipping copytracing for '%s', more "
735 repo.ui.status(_("skipping copytracing for '%s', more "
736 "candidates than the limit: %d\n")
736 "candidates than the limit: %d\n")
737 % (f, len(movecandidates)))
737 % (f, len(movecandidates)))
738 continue
738 continue
739
739
740 for candidate in movecandidates:
740 for candidate in movecandidates:
741 f1 = c1.filectx(candidate)
741 f1 = c1.filectx(candidate)
742 if _related(f1, f2):
742 if _related(f1, f2):
743 # if there are a few related copies then we'll merge
743 # if there are a few related copies then we'll merge
744 # changes into all of them. This matches the behaviour
744 # changes into all of them. This matches the behaviour
745 # of upstream copytracing
745 # of upstream copytracing
746 copies[candidate] = f
746 copies[candidate] = f
747
747
748 return copies, {}, {}, {}, {}
748 return copies, {}, {}, {}, {}
749
749
750 def _related(f1, f2):
750 def _related(f1, f2):
751 """return True if f1 and f2 filectx have a common ancestor
751 """return True if f1 and f2 filectx have a common ancestor
752
752
753 Walk back to common ancestor to see if the two files originate
753 Walk back to common ancestor to see if the two files originate
754 from the same file. Since workingfilectx's rev() is None it messes
754 from the same file. Since workingfilectx's rev() is None it messes
755 up the integer comparison logic, hence the pre-step check for
755 up the integer comparison logic, hence the pre-step check for
756 None (f1 and f2 can only be workingfilectx's initially).
756 None (f1 and f2 can only be workingfilectx's initially).
757 """
757 """
758
758
759 if f1 == f2:
759 if f1 == f2:
760 return True # a match
760 return True # a match
761
761
762 g1, g2 = f1.ancestors(), f2.ancestors()
762 g1, g2 = f1.ancestors(), f2.ancestors()
763 try:
763 try:
764 f1r, f2r = f1.linkrev(), f2.linkrev()
764 f1r, f2r = f1.linkrev(), f2.linkrev()
765
765
766 if f1r is None:
766 if f1r is None:
767 f1 = next(g1)
767 f1 = next(g1)
768 if f2r is None:
768 if f2r is None:
769 f2 = next(g2)
769 f2 = next(g2)
770
770
771 while True:
771 while True:
772 f1r, f2r = f1.linkrev(), f2.linkrev()
772 f1r, f2r = f1.linkrev(), f2.linkrev()
773 if f1r > f2r:
773 if f1r > f2r:
774 f1 = next(g1)
774 f1 = next(g1)
775 elif f2r > f1r:
775 elif f2r > f1r:
776 f2 = next(g2)
776 f2 = next(g2)
777 else: # f1 and f2 point to files in the same linkrev
777 else: # f1 and f2 point to files in the same linkrev
778 return f1 == f2 # true if they point to the same file
778 return f1 == f2 # true if they point to the same file
779 except StopIteration:
779 except StopIteration:
780 return False
780 return False
781
781
782 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
782 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 """reproduce copies from fromrev to rev in the dirstate
783 """reproduce copies from fromrev to rev in the dirstate
784
784
785 If skiprev is specified, it's a revision that should be used to
785 If skiprev is specified, it's a revision that should be used to
786 filter copy records. Any copies that occur between fromrev and
786 filter copy records. Any copies that occur between fromrev and
787 skiprev will not be duplicated, even if they appear in the set of
787 skiprev will not be duplicated, even if they appear in the set of
788 copies between fromrev and rev.
788 copies between fromrev and rev.
789 """
789 """
790 exclude = {}
790 exclude = {}
791 ctraceconfig = repo.ui.config('experimental', 'copytrace')
791 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 bctrace = stringutil.parsebool(ctraceconfig)
792 bctrace = stringutil.parsebool(ctraceconfig)
793 if (skiprev is not None and
793 if (skiprev is not None and
794 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
794 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 # copytrace='off' skips this line, but not the entire function because
795 # copytrace='off' skips this line, but not the entire function because
796 # the line below is O(size of the repo) during a rebase, while the rest
796 # the line below is O(size of the repo) during a rebase, while the rest
797 # of the function is much faster (and is required for carrying copy
797 # of the function is much faster (and is required for carrying copy
798 # metadata across the rebase anyway).
798 # metadata across the rebase anyway).
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
800 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 # copies.pathcopies returns backward renames, so dst might not
801 # copies.pathcopies returns backward renames, so dst might not
802 # actually be in the dirstate
802 # actually be in the dirstate
803 if dst in exclude:
803 if dst in exclude:
804 continue
804 continue
805 wctx[dst].markcopied(src)
805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now