##// END OF EJS Templates
copies: remove a redundant matcher filtering in _changesetforwardcopies()...
Martin von Zweigbergk -
r42718:c7104896 default
parent child Browse files
Show More
@@ -1,809 +1,807 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
132 t = _chain(a, b)
133 _filter(src, dst, t)
133 _filter(src, dst, t)
134 return t
134 return t
135
135
136 def _filter(src, dst, t):
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
139 # remove copies from files that didn't exist
140 if v not in src:
140 if v not in src:
141 del t[k]
141 del t[k]
142 # remove criss-crossed copies
142 # remove criss-crossed copies
143 elif k in src and v in dst:
143 elif k in src and v in dst:
144 del t[k]
144 del t[k]
145 # remove copies to files that were then removed
145 # remove copies to files that were then removed
146 elif k not in dst:
146 elif k not in dst:
147 del t[k]
147 del t[k]
148
148
149 def _chain(a, b):
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
150 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
151 t = a.copy()
152 for k, v in b.iteritems():
152 for k, v in b.iteritems():
153 if v in t:
153 if v in t:
154 t[k] = t[v]
154 t[k] = t[v]
155 else:
155 else:
156 t[k] = v
156 t[k] = v
157 return t
157 return t
158
158
159 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
162
162
163 for f in fctx.ancestors():
163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
164 if am.get(f.path(), None) == f.filenode():
165 return f
165 return f
166 if not f.isintroducedafter(limit):
166 if not f.isintroducedafter(limit):
167 return None
167 return None
168
168
169 def _dirstatecopies(repo, match=None):
169 def _dirstatecopies(repo, match=None):
170 ds = repo.dirstate
170 ds = repo.dirstate
171 c = ds.copies().copy()
171 c = ds.copies().copy()
172 for k in list(c):
172 for k in list(c):
173 if ds[k] not in 'anm' or (match and not match(k)):
173 if ds[k] not in 'anm' or (match and not match(k)):
174 del c[k]
174 del c[k]
175 return c
175 return c
176
176
177 def _computeforwardmissing(a, b, match=None):
177 def _computeforwardmissing(a, b, match=None):
178 """Computes which files are in b but not a.
178 """Computes which files are in b but not a.
179 This is its own function so extensions can easily wrap this call to see what
179 This is its own function so extensions can easily wrap this call to see what
180 files _forwardcopies is about to process.
180 files _forwardcopies is about to process.
181 """
181 """
182 ma = a.manifest()
182 ma = a.manifest()
183 mb = b.manifest()
183 mb = b.manifest()
184 return mb.filesnotin(ma, match=match)
184 return mb.filesnotin(ma, match=match)
185
185
186 def usechangesetcentricalgo(repo):
186 def usechangesetcentricalgo(repo):
187 """Checks if we should use changeset-centric copy algorithms"""
187 """Checks if we should use changeset-centric copy algorithms"""
188 return (repo.ui.config('experimental', 'copies.read-from') in
188 return (repo.ui.config('experimental', 'copies.read-from') in
189 ('changeset-only', 'compatibility'))
189 ('changeset-only', 'compatibility'))
190
190
191 def _committedforwardcopies(a, b, match):
191 def _committedforwardcopies(a, b, match):
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 # files might have to be traced back to the fctx parent of the last
193 # files might have to be traced back to the fctx parent of the last
194 # one-side-only changeset, but not further back than that
194 # one-side-only changeset, but not further back than that
195 repo = a._repo
195 repo = a._repo
196
196
197 if usechangesetcentricalgo(repo):
197 if usechangesetcentricalgo(repo):
198 return _changesetforwardcopies(a, b, match)
198 return _changesetforwardcopies(a, b, match)
199
199
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 dbg = repo.ui.debug
201 dbg = repo.ui.debug
202 if debug:
202 if debug:
203 dbg('debug.copies: looking into rename from %s to %s\n'
203 dbg('debug.copies: looking into rename from %s to %s\n'
204 % (a, b))
204 % (a, b))
205 limit = _findlimit(repo, a, b)
205 limit = _findlimit(repo, a, b)
206 if debug:
206 if debug:
207 dbg('debug.copies: search limit: %d\n' % limit)
207 dbg('debug.copies: search limit: %d\n' % limit)
208 am = a.manifest()
208 am = a.manifest()
209
209
210 # find where new files came from
210 # find where new files came from
211 # we currently don't try to find where old files went, too expensive
211 # we currently don't try to find where old files went, too expensive
212 # this means we can miss a case like 'hg rm b; hg cp a b'
212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 cm = {}
213 cm = {}
214
214
215 # Computing the forward missing is quite expensive on large manifests, since
215 # Computing the forward missing is quite expensive on large manifests, since
216 # it compares the entire manifests. We can optimize it in the common use
216 # it compares the entire manifests. We can optimize it in the common use
217 # case of computing what copies are in a commit versus its parent (like
217 # case of computing what copies are in a commit versus its parent (like
218 # during a rebase or histedit). Note, we exclude merge commits from this
218 # during a rebase or histedit). Note, we exclude merge commits from this
219 # optimization, since the ctx.files() for a merge commit is not correct for
219 # optimization, since the ctx.files() for a merge commit is not correct for
220 # this comparison.
220 # this comparison.
221 forwardmissingmatch = match
221 forwardmissingmatch = match
222 if b.p1() == a and b.p2().node() == node.nullid:
222 if b.p1() == a and b.p2().node() == node.nullid:
223 filesmatcher = matchmod.exact(b.files())
223 filesmatcher = matchmod.exact(b.files())
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226
226
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228
228
229 if debug:
229 if debug:
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231
231
232 for f in sorted(missing):
232 for f in sorted(missing):
233 if debug:
233 if debug:
234 dbg('debug.copies: tracing file: %s\n' % f)
234 dbg('debug.copies: tracing file: %s\n' % f)
235 fctx = b[f]
235 fctx = b[f]
236 fctx._ancestrycontext = ancestrycontext
236 fctx._ancestrycontext = ancestrycontext
237
237
238 if debug:
238 if debug:
239 start = util.timer()
239 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
240 ofctx = _tracefile(fctx, am, limit)
241 if ofctx:
241 if ofctx:
242 if debug:
242 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 cm[f] = ofctx.path()
244 cm[f] = ofctx.path()
245 if debug:
245 if debug:
246 dbg('debug.copies: time: %f seconds\n'
246 dbg('debug.copies: time: %f seconds\n'
247 % (util.timer() - start))
247 % (util.timer() - start))
248 return cm
248 return cm
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() == node.nullrev:
251 if a.rev() == node.nullrev:
252 return {}
252 return {}
253
253
254 repo = a.repo()
254 repo = a.repo()
255 children = {}
255 children = {}
256 cl = repo.changelog
256 cl = repo.changelog
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 for r in missingrevs:
258 for r in missingrevs:
259 for p in cl.parentrevs(r):
259 for p in cl.parentrevs(r):
260 if p == node.nullrev:
260 if p == node.nullrev:
261 continue
261 continue
262 if p not in children:
262 if p not in children:
263 children[p] = [r]
263 children[p] = [r]
264 else:
264 else:
265 children[p].append(r)
265 children[p].append(r)
266
266
267 roots = set(children) - set(missingrevs)
267 roots = set(children) - set(missingrevs)
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # The parent number is only used for knowing which parent the copies dict
269 # The parent number is only used for knowing which parent the copies dict
270 # came from.
270 # came from.
271 work = [(r, 1, {}) for r in roots]
271 work = [(r, 1, {}) for r in roots]
272 heapq.heapify(work)
272 heapq.heapify(work)
273 alwaysmatch = match.always()
273 alwaysmatch = match.always()
274 while work:
274 while work:
275 r, i1, copies1 = heapq.heappop(work)
275 r, i1, copies1 = heapq.heappop(work)
276 if work and work[0][0] == r:
276 if work and work[0][0] == r:
277 # We are tracing copies from both parents
277 # We are tracing copies from both parents
278 r, i2, copies2 = heapq.heappop(work)
278 r, i2, copies2 = heapq.heappop(work)
279 copies = {}
279 copies = {}
280 allcopies = set(copies1) | set(copies2)
280 allcopies = set(copies1) | set(copies2)
281 for dst in allcopies:
281 for dst in allcopies:
282 if not alwaysmatch and not match(dst):
283 continue
284 # Unlike when copies are stored in the filelog, we consider
282 # Unlike when copies are stored in the filelog, we consider
285 # it a copy even if the destination already existed on the
283 # it a copy even if the destination already existed on the
286 # other branch. It's simply too expensive to check if the
284 # other branch. It's simply too expensive to check if the
287 # file existed in the manifest.
285 # file existed in the manifest.
288 if dst in copies1:
286 if dst in copies1:
289 # If it was copied on the p1 side, mark it as copied from
287 # If it was copied on the p1 side, mark it as copied from
290 # that side, even if it was also copied on the p2 side.
288 # that side, even if it was also copied on the p2 side.
291 copies[dst] = copies1[dst]
289 copies[dst] = copies1[dst]
292 else:
290 else:
293 copies[dst] = copies2[dst]
291 copies[dst] = copies2[dst]
294 else:
292 else:
295 copies = copies1
293 copies = copies1
296 if r == b.rev():
294 if r == b.rev():
297 _filter(a, b, copies)
295 _filter(a, b, copies)
298 return copies
296 return copies
299 for i, c in enumerate(children[r]):
297 for i, c in enumerate(children[r]):
300 childctx = repo[c]
298 childctx = repo[c]
301 if r == childctx.p1().rev():
299 if r == childctx.p1().rev():
302 parent = 1
300 parent = 1
303 childcopies = childctx.p1copies()
301 childcopies = childctx.p1copies()
304 else:
302 else:
305 assert r == childctx.p2().rev()
303 assert r == childctx.p2().rev()
306 parent = 2
304 parent = 2
307 childcopies = childctx.p2copies()
305 childcopies = childctx.p2copies()
308 if not alwaysmatch:
306 if not alwaysmatch:
309 childcopies = {dst: src for dst, src in childcopies.items()
307 childcopies = {dst: src for dst, src in childcopies.items()
310 if match(dst)}
308 if match(dst)}
311 # Copy the dict only if later iterations will also need it
309 # Copy the dict only if later iterations will also need it
312 if i != len(children[r]) - 1:
310 if i != len(children[r]) - 1:
313 newcopies = copies.copy()
311 newcopies = copies.copy()
314 else:
312 else:
315 newcopies = copies
313 newcopies = copies
316 if childcopies:
314 if childcopies:
317 newcopies = _chain(newcopies, childcopies)
315 newcopies = _chain(newcopies, childcopies)
318 for f in childctx.filesremoved():
316 for f in childctx.filesremoved():
319 if f in newcopies:
317 if f in newcopies:
320 del newcopies[f]
318 del newcopies[f]
321 heapq.heappush(work, (c, parent, newcopies))
319 heapq.heappush(work, (c, parent, newcopies))
322 assert False
320 assert False
323
321
324 def _forwardcopies(a, b, match=None):
322 def _forwardcopies(a, b, match=None):
325 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
323 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
326
324
327 match = a.repo().narrowmatch(match)
325 match = a.repo().narrowmatch(match)
328 # check for working copy
326 # check for working copy
329 if b.rev() is None:
327 if b.rev() is None:
330 if a == b.p1():
328 if a == b.p1():
331 # short-circuit to avoid issues with merge states
329 # short-circuit to avoid issues with merge states
332 return _dirstatecopies(b._repo, match)
330 return _dirstatecopies(b._repo, match)
333
331
334 cm = _committedforwardcopies(a, b.p1(), match)
332 cm = _committedforwardcopies(a, b.p1(), match)
335 # combine copies from dirstate if necessary
333 # combine copies from dirstate if necessary
336 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
334 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
337 return _committedforwardcopies(a, b, match)
335 return _committedforwardcopies(a, b, match)
338
336
339 def _backwardrenames(a, b, match):
337 def _backwardrenames(a, b, match):
340 if a._repo.ui.config('experimental', 'copytrace') == 'off':
338 if a._repo.ui.config('experimental', 'copytrace') == 'off':
341 return {}
339 return {}
342
340
343 # Even though we're not taking copies into account, 1:n rename situations
341 # Even though we're not taking copies into account, 1:n rename situations
344 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
342 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
345 # arbitrarily pick one of the renames.
343 # arbitrarily pick one of the renames.
346 # We don't want to pass in "match" here, since that would filter
344 # We don't want to pass in "match" here, since that would filter
347 # the destination by it. Since we're reversing the copies, we want
345 # the destination by it. Since we're reversing the copies, we want
348 # to filter the source instead.
346 # to filter the source instead.
349 f = _forwardcopies(b, a)
347 f = _forwardcopies(b, a)
350 r = {}
348 r = {}
351 for k, v in sorted(f.iteritems()):
349 for k, v in sorted(f.iteritems()):
352 if match and not match(v):
350 if match and not match(v):
353 continue
351 continue
354 # remove copies
352 # remove copies
355 if v in a:
353 if v in a:
356 continue
354 continue
357 r[v] = k
355 r[v] = k
358 return r
356 return r
359
357
360 def pathcopies(x, y, match=None):
358 def pathcopies(x, y, match=None):
361 """find {dst@y: src@x} copy mapping for directed compare"""
359 """find {dst@y: src@x} copy mapping for directed compare"""
362 repo = x._repo
360 repo = x._repo
363 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
361 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
364 if debug:
362 if debug:
365 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
363 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
366 % (x, y))
364 % (x, y))
367 if x == y or not x or not y:
365 if x == y or not x or not y:
368 return {}
366 return {}
369 a = y.ancestor(x)
367 a = y.ancestor(x)
370 if a == x:
368 if a == x:
371 if debug:
369 if debug:
372 repo.ui.debug('debug.copies: search mode: forward\n')
370 repo.ui.debug('debug.copies: search mode: forward\n')
373 return _forwardcopies(x, y, match=match)
371 return _forwardcopies(x, y, match=match)
374 if a == y:
372 if a == y:
375 if debug:
373 if debug:
376 repo.ui.debug('debug.copies: search mode: backward\n')
374 repo.ui.debug('debug.copies: search mode: backward\n')
377 return _backwardrenames(x, y, match=match)
375 return _backwardrenames(x, y, match=match)
378 if debug:
376 if debug:
379 repo.ui.debug('debug.copies: search mode: combined\n')
377 repo.ui.debug('debug.copies: search mode: combined\n')
380 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
378 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
381 _forwardcopies(a, y, match=match))
379 _forwardcopies(a, y, match=match))
382
380
383 def mergecopies(repo, c1, c2, base):
381 def mergecopies(repo, c1, c2, base):
384 """
382 """
385 Finds moves and copies between context c1 and c2 that are relevant for
383 Finds moves and copies between context c1 and c2 that are relevant for
386 merging. 'base' will be used as the merge base.
384 merging. 'base' will be used as the merge base.
387
385
388 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
386 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
389 files that were moved/ copied in one merge parent and modified in another.
387 files that were moved/ copied in one merge parent and modified in another.
390 For example:
388 For example:
391
389
392 o ---> 4 another commit
390 o ---> 4 another commit
393 |
391 |
394 | o ---> 3 commit that modifies a.txt
392 | o ---> 3 commit that modifies a.txt
395 | /
393 | /
396 o / ---> 2 commit that moves a.txt to b.txt
394 o / ---> 2 commit that moves a.txt to b.txt
397 |/
395 |/
398 o ---> 1 merge base
396 o ---> 1 merge base
399
397
400 If we try to rebase revision 3 on revision 4, since there is no a.txt in
398 If we try to rebase revision 3 on revision 4, since there is no a.txt in
401 revision 4, and if user have copytrace disabled, we prints the following
399 revision 4, and if user have copytrace disabled, we prints the following
402 message:
400 message:
403
401
404 ```other changed <file> which local deleted```
402 ```other changed <file> which local deleted```
405
403
406 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
404 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
407 "dirmove".
405 "dirmove".
408
406
409 "copy" is a mapping from destination name -> source name,
407 "copy" is a mapping from destination name -> source name,
410 where source is in c1 and destination is in c2 or vice-versa.
408 where source is in c1 and destination is in c2 or vice-versa.
411
409
412 "movewithdir" is a mapping from source name -> destination name,
410 "movewithdir" is a mapping from source name -> destination name,
413 where the file at source present in one context but not the other
411 where the file at source present in one context but not the other
414 needs to be moved to destination by the merge process, because the
412 needs to be moved to destination by the merge process, because the
415 other context moved the directory it is in.
413 other context moved the directory it is in.
416
414
417 "diverge" is a mapping of source name -> list of destination names
415 "diverge" is a mapping of source name -> list of destination names
418 for divergent renames.
416 for divergent renames.
419
417
420 "renamedelete" is a mapping of source name -> list of destination
418 "renamedelete" is a mapping of source name -> list of destination
421 names for files deleted in c1 that were renamed in c2 or vice-versa.
419 names for files deleted in c1 that were renamed in c2 or vice-versa.
422
420
423 "dirmove" is a mapping of detected source dir -> destination dir renames.
421 "dirmove" is a mapping of detected source dir -> destination dir renames.
424 This is needed for handling changes to new files previously grafted into
422 This is needed for handling changes to new files previously grafted into
425 renamed directories.
423 renamed directories.
426
424
427 This function calls different copytracing algorithms based on config.
425 This function calls different copytracing algorithms based on config.
428 """
426 """
429 # avoid silly behavior for update from empty dir
427 # avoid silly behavior for update from empty dir
430 if not c1 or not c2 or c1 == c2:
428 if not c1 or not c2 or c1 == c2:
431 return {}, {}, {}, {}, {}
429 return {}, {}, {}, {}, {}
432
430
433 narrowmatch = c1.repo().narrowmatch()
431 narrowmatch = c1.repo().narrowmatch()
434
432
435 # avoid silly behavior for parent -> working dir
433 # avoid silly behavior for parent -> working dir
436 if c2.node() is None and c1.node() == repo.dirstate.p1():
434 if c2.node() is None and c1.node() == repo.dirstate.p1():
437 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
435 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
438
436
439 copytracing = repo.ui.config('experimental', 'copytrace')
437 copytracing = repo.ui.config('experimental', 'copytrace')
440 if stringutil.parsebool(copytracing) is False:
438 if stringutil.parsebool(copytracing) is False:
441 # stringutil.parsebool() returns None when it is unable to parse the
439 # stringutil.parsebool() returns None when it is unable to parse the
442 # value, so we should rely on making sure copytracing is on such cases
440 # value, so we should rely on making sure copytracing is on such cases
443 return {}, {}, {}, {}, {}
441 return {}, {}, {}, {}, {}
444
442
445 if usechangesetcentricalgo(repo):
443 if usechangesetcentricalgo(repo):
446 # The heuristics don't make sense when we need changeset-centric algos
444 # The heuristics don't make sense when we need changeset-centric algos
447 return _fullcopytracing(repo, c1, c2, base)
445 return _fullcopytracing(repo, c1, c2, base)
448
446
449 # Copy trace disabling is explicitly below the node == p1 logic above
447 # Copy trace disabling is explicitly below the node == p1 logic above
450 # because the logic above is required for a simple copy to be kept across a
448 # because the logic above is required for a simple copy to be kept across a
451 # rebase.
449 # rebase.
452 if copytracing == 'heuristics':
450 if copytracing == 'heuristics':
453 # Do full copytracing if only non-public revisions are involved as
451 # Do full copytracing if only non-public revisions are involved as
454 # that will be fast enough and will also cover the copies which could
452 # that will be fast enough and will also cover the copies which could
455 # be missed by heuristics
453 # be missed by heuristics
456 if _isfullcopytraceable(repo, c1, base):
454 if _isfullcopytraceable(repo, c1, base):
457 return _fullcopytracing(repo, c1, c2, base)
455 return _fullcopytracing(repo, c1, c2, base)
458 return _heuristicscopytracing(repo, c1, c2, base)
456 return _heuristicscopytracing(repo, c1, c2, base)
459 else:
457 else:
460 return _fullcopytracing(repo, c1, c2, base)
458 return _fullcopytracing(repo, c1, c2, base)
461
459
462 def _isfullcopytraceable(repo, c1, base):
460 def _isfullcopytraceable(repo, c1, base):
463 """ Checks that if base, source and destination are all no-public branches,
461 """ Checks that if base, source and destination are all no-public branches,
464 if yes let's use the full copytrace algorithm for increased capabilities
462 if yes let's use the full copytrace algorithm for increased capabilities
465 since it will be fast enough.
463 since it will be fast enough.
466
464
467 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
465 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
468 number of changesets from c1 to base such that if number of changesets are
466 number of changesets from c1 to base such that if number of changesets are
469 more than the limit, full copytracing algorithm won't be used.
467 more than the limit, full copytracing algorithm won't be used.
470 """
468 """
471 if c1.rev() is None:
469 if c1.rev() is None:
472 c1 = c1.p1()
470 c1 = c1.p1()
473 if c1.mutable() and base.mutable():
471 if c1.mutable() and base.mutable():
474 sourcecommitlimit = repo.ui.configint('experimental',
472 sourcecommitlimit = repo.ui.configint('experimental',
475 'copytrace.sourcecommitlimit')
473 'copytrace.sourcecommitlimit')
476 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
474 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
477 return commits < sourcecommitlimit
475 return commits < sourcecommitlimit
478 return False
476 return False
479
477
480 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
478 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
481 copy, renamedelete):
479 copy, renamedelete):
482 if src not in m2:
480 if src not in m2:
483 # deleted on side 2
481 # deleted on side 2
484 if src not in m1:
482 if src not in m1:
485 # renamed on side 1, deleted on side 2
483 # renamed on side 1, deleted on side 2
486 renamedelete[src] = dsts1
484 renamedelete[src] = dsts1
487 elif m2[src] != mb[src]:
485 elif m2[src] != mb[src]:
488 if not _related(c2[src], base[src]):
486 if not _related(c2[src], base[src]):
489 return
487 return
490 # modified on side 2
488 # modified on side 2
491 for dst in dsts1:
489 for dst in dsts1:
492 if dst not in m2:
490 if dst not in m2:
493 # dst not added on side 2 (handle as regular
491 # dst not added on side 2 (handle as regular
494 # "both created" case in manifestmerge otherwise)
492 # "both created" case in manifestmerge otherwise)
495 copy[dst] = src
493 copy[dst] = src
496
494
497 def _fullcopytracing(repo, c1, c2, base):
495 def _fullcopytracing(repo, c1, c2, base):
498 """ The full copytracing algorithm which finds all the new files that were
496 """ The full copytracing algorithm which finds all the new files that were
499 added from merge base up to the top commit and for each file it checks if
497 added from merge base up to the top commit and for each file it checks if
500 this file was copied from another file.
498 this file was copied from another file.
501
499
502 This is pretty slow when a lot of changesets are involved but will track all
500 This is pretty slow when a lot of changesets are involved but will track all
503 the copies.
501 the copies.
504 """
502 """
505 m1 = c1.manifest()
503 m1 = c1.manifest()
506 m2 = c2.manifest()
504 m2 = c2.manifest()
507 mb = base.manifest()
505 mb = base.manifest()
508
506
509 copies1 = pathcopies(base, c1)
507 copies1 = pathcopies(base, c1)
510 copies2 = pathcopies(base, c2)
508 copies2 = pathcopies(base, c2)
511
509
512 inversecopies1 = {}
510 inversecopies1 = {}
513 inversecopies2 = {}
511 inversecopies2 = {}
514 for dst, src in copies1.items():
512 for dst, src in copies1.items():
515 inversecopies1.setdefault(src, []).append(dst)
513 inversecopies1.setdefault(src, []).append(dst)
516 for dst, src in copies2.items():
514 for dst, src in copies2.items():
517 inversecopies2.setdefault(src, []).append(dst)
515 inversecopies2.setdefault(src, []).append(dst)
518
516
519 copy = {}
517 copy = {}
520 diverge = {}
518 diverge = {}
521 renamedelete = {}
519 renamedelete = {}
522 allsources = set(inversecopies1) | set(inversecopies2)
520 allsources = set(inversecopies1) | set(inversecopies2)
523 for src in allsources:
521 for src in allsources:
524 dsts1 = inversecopies1.get(src)
522 dsts1 = inversecopies1.get(src)
525 dsts2 = inversecopies2.get(src)
523 dsts2 = inversecopies2.get(src)
526 if dsts1 and dsts2:
524 if dsts1 and dsts2:
527 # copied/renamed on both sides
525 # copied/renamed on both sides
528 if src not in m1 and src not in m2:
526 if src not in m1 and src not in m2:
529 # renamed on both sides
527 # renamed on both sides
530 dsts1 = set(dsts1)
528 dsts1 = set(dsts1)
531 dsts2 = set(dsts2)
529 dsts2 = set(dsts2)
532 # If there's some overlap in the rename destinations, we
530 # If there's some overlap in the rename destinations, we
533 # consider it not divergent. For example, if side 1 copies 'a'
531 # consider it not divergent. For example, if side 1 copies 'a'
534 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
532 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
535 # and 'd' and deletes 'a'.
533 # and 'd' and deletes 'a'.
536 if dsts1 & dsts2:
534 if dsts1 & dsts2:
537 for dst in (dsts1 & dsts2):
535 for dst in (dsts1 & dsts2):
538 copy[dst] = src
536 copy[dst] = src
539 else:
537 else:
540 diverge[src] = sorted(dsts1 | dsts2)
538 diverge[src] = sorted(dsts1 | dsts2)
541 elif src in m1 and src in m2:
539 elif src in m1 and src in m2:
542 # copied on both sides
540 # copied on both sides
543 dsts1 = set(dsts1)
541 dsts1 = set(dsts1)
544 dsts2 = set(dsts2)
542 dsts2 = set(dsts2)
545 for dst in (dsts1 & dsts2):
543 for dst in (dsts1 & dsts2):
546 copy[dst] = src
544 copy[dst] = src
547 # TODO: Handle cases where it was renamed on one side and copied
545 # TODO: Handle cases where it was renamed on one side and copied
548 # on the other side
546 # on the other side
549 elif dsts1:
547 elif dsts1:
550 # copied/renamed only on side 1
548 # copied/renamed only on side 1
551 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
549 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
552 copy, renamedelete)
550 copy, renamedelete)
553 elif dsts2:
551 elif dsts2:
554 # copied/renamed only on side 2
552 # copied/renamed only on side 2
555 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
553 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
556 copy, renamedelete)
554 copy, renamedelete)
557
555
558 renamedeleteset = set()
556 renamedeleteset = set()
559 divergeset = set()
557 divergeset = set()
560 for dsts in diverge.values():
558 for dsts in diverge.values():
561 divergeset.update(dsts)
559 divergeset.update(dsts)
562 for dsts in renamedelete.values():
560 for dsts in renamedelete.values():
563 renamedeleteset.update(dsts)
561 renamedeleteset.update(dsts)
564
562
565 # find interesting file sets from manifests
563 # find interesting file sets from manifests
566 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
564 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
567 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
565 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
568 u1 = sorted(addedinm1 - addedinm2)
566 u1 = sorted(addedinm1 - addedinm2)
569 u2 = sorted(addedinm2 - addedinm1)
567 u2 = sorted(addedinm2 - addedinm1)
570
568
571 header = " unmatched files in %s"
569 header = " unmatched files in %s"
572 if u1:
570 if u1:
573 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
571 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
574 if u2:
572 if u2:
575 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
573 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
576
574
577 fullcopy = copies1.copy()
575 fullcopy = copies1.copy()
578 fullcopy.update(copies2)
576 fullcopy.update(copies2)
579 if not fullcopy:
577 if not fullcopy:
580 return copy, {}, diverge, renamedelete, {}
578 return copy, {}, diverge, renamedelete, {}
581
579
582 if repo.ui.debugflag:
580 if repo.ui.debugflag:
583 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
581 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
584 "% = renamed and deleted):\n")
582 "% = renamed and deleted):\n")
585 for f in sorted(fullcopy):
583 for f in sorted(fullcopy):
586 note = ""
584 note = ""
587 if f in copy:
585 if f in copy:
588 note += "*"
586 note += "*"
589 if f in divergeset:
587 if f in divergeset:
590 note += "!"
588 note += "!"
591 if f in renamedeleteset:
589 if f in renamedeleteset:
592 note += "%"
590 note += "%"
593 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
591 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
594 note))
592 note))
595 del divergeset
593 del divergeset
596
594
597 repo.ui.debug(" checking for directory renames\n")
595 repo.ui.debug(" checking for directory renames\n")
598
596
599 # generate a directory move map
597 # generate a directory move map
600 d1, d2 = c1.dirs(), c2.dirs()
598 d1, d2 = c1.dirs(), c2.dirs()
601 invalid = set()
599 invalid = set()
602 dirmove = {}
600 dirmove = {}
603
601
604 # examine each file copy for a potential directory move, which is
602 # examine each file copy for a potential directory move, which is
605 # when all the files in a directory are moved to a new directory
603 # when all the files in a directory are moved to a new directory
606 for dst, src in fullcopy.iteritems():
604 for dst, src in fullcopy.iteritems():
607 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
605 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
608 if dsrc in invalid:
606 if dsrc in invalid:
609 # already seen to be uninteresting
607 # already seen to be uninteresting
610 continue
608 continue
611 elif dsrc in d1 and ddst in d1:
609 elif dsrc in d1 and ddst in d1:
612 # directory wasn't entirely moved locally
610 # directory wasn't entirely moved locally
613 invalid.add(dsrc)
611 invalid.add(dsrc)
614 elif dsrc in d2 and ddst in d2:
612 elif dsrc in d2 and ddst in d2:
615 # directory wasn't entirely moved remotely
613 # directory wasn't entirely moved remotely
616 invalid.add(dsrc)
614 invalid.add(dsrc)
617 elif dsrc in dirmove and dirmove[dsrc] != ddst:
615 elif dsrc in dirmove and dirmove[dsrc] != ddst:
618 # files from the same directory moved to two different places
616 # files from the same directory moved to two different places
619 invalid.add(dsrc)
617 invalid.add(dsrc)
620 else:
618 else:
621 # looks good so far
619 # looks good so far
622 dirmove[dsrc] = ddst
620 dirmove[dsrc] = ddst
623
621
624 for i in invalid:
622 for i in invalid:
625 if i in dirmove:
623 if i in dirmove:
626 del dirmove[i]
624 del dirmove[i]
627 del d1, d2, invalid
625 del d1, d2, invalid
628
626
629 if not dirmove:
627 if not dirmove:
630 return copy, {}, diverge, renamedelete, {}
628 return copy, {}, diverge, renamedelete, {}
631
629
632 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
630 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
633
631
634 for d in dirmove:
632 for d in dirmove:
635 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
633 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
636 (d, dirmove[d]))
634 (d, dirmove[d]))
637
635
638 movewithdir = {}
636 movewithdir = {}
639 # check unaccounted nonoverlapping files against directory moves
637 # check unaccounted nonoverlapping files against directory moves
640 for f in u1 + u2:
638 for f in u1 + u2:
641 if f not in fullcopy:
639 if f not in fullcopy:
642 for d in dirmove:
640 for d in dirmove:
643 if f.startswith(d):
641 if f.startswith(d):
644 # new file added in a directory that was moved, move it
642 # new file added in a directory that was moved, move it
645 df = dirmove[d] + f[len(d):]
643 df = dirmove[d] + f[len(d):]
646 if df not in copy:
644 if df not in copy:
647 movewithdir[f] = df
645 movewithdir[f] = df
648 repo.ui.debug((" pending file src: '%s' -> "
646 repo.ui.debug((" pending file src: '%s' -> "
649 "dst: '%s'\n") % (f, df))
647 "dst: '%s'\n") % (f, df))
650 break
648 break
651
649
652 return copy, movewithdir, diverge, renamedelete, dirmove
650 return copy, movewithdir, diverge, renamedelete, dirmove
653
651
654 def _heuristicscopytracing(repo, c1, c2, base):
652 def _heuristicscopytracing(repo, c1, c2, base):
655 """ Fast copytracing using filename heuristics
653 """ Fast copytracing using filename heuristics
656
654
657 Assumes that moves or renames are of following two types:
655 Assumes that moves or renames are of following two types:
658
656
659 1) Inside a directory only (same directory name but different filenames)
657 1) Inside a directory only (same directory name but different filenames)
660 2) Move from one directory to another
658 2) Move from one directory to another
661 (same filenames but different directory names)
659 (same filenames but different directory names)
662
660
663 Works only when there are no merge commits in the "source branch".
661 Works only when there are no merge commits in the "source branch".
664 Source branch is commits from base up to c2 not including base.
662 Source branch is commits from base up to c2 not including base.
665
663
666 If merge is involved it fallbacks to _fullcopytracing().
664 If merge is involved it fallbacks to _fullcopytracing().
667
665
668 Can be used by setting the following config:
666 Can be used by setting the following config:
669
667
670 [experimental]
668 [experimental]
671 copytrace = heuristics
669 copytrace = heuristics
672
670
673 In some cases the copy/move candidates found by heuristics can be very large
671 In some cases the copy/move candidates found by heuristics can be very large
674 in number and that will make the algorithm slow. The number of possible
672 in number and that will make the algorithm slow. The number of possible
675 candidates to check can be limited by using the config
673 candidates to check can be limited by using the config
676 `experimental.copytrace.movecandidateslimit` which defaults to 100.
674 `experimental.copytrace.movecandidateslimit` which defaults to 100.
677 """
675 """
678
676
679 if c1.rev() is None:
677 if c1.rev() is None:
680 c1 = c1.p1()
678 c1 = c1.p1()
681 if c2.rev() is None:
679 if c2.rev() is None:
682 c2 = c2.p1()
680 c2 = c2.p1()
683
681
684 copies = {}
682 copies = {}
685
683
686 changedfiles = set()
684 changedfiles = set()
687 m1 = c1.manifest()
685 m1 = c1.manifest()
688 if not repo.revs('%d::%d', base.rev(), c2.rev()):
686 if not repo.revs('%d::%d', base.rev(), c2.rev()):
689 # If base is not in c2 branch, we switch to fullcopytracing
687 # If base is not in c2 branch, we switch to fullcopytracing
690 repo.ui.debug("switching to full copytracing as base is not "
688 repo.ui.debug("switching to full copytracing as base is not "
691 "an ancestor of c2\n")
689 "an ancestor of c2\n")
692 return _fullcopytracing(repo, c1, c2, base)
690 return _fullcopytracing(repo, c1, c2, base)
693
691
694 ctx = c2
692 ctx = c2
695 while ctx != base:
693 while ctx != base:
696 if len(ctx.parents()) == 2:
694 if len(ctx.parents()) == 2:
697 # To keep things simple let's not handle merges
695 # To keep things simple let's not handle merges
698 repo.ui.debug("switching to full copytracing because of merges\n")
696 repo.ui.debug("switching to full copytracing because of merges\n")
699 return _fullcopytracing(repo, c1, c2, base)
697 return _fullcopytracing(repo, c1, c2, base)
700 changedfiles.update(ctx.files())
698 changedfiles.update(ctx.files())
701 ctx = ctx.p1()
699 ctx = ctx.p1()
702
700
703 cp = _forwardcopies(base, c2)
701 cp = _forwardcopies(base, c2)
704 for dst, src in cp.iteritems():
702 for dst, src in cp.iteritems():
705 if src in m1:
703 if src in m1:
706 copies[dst] = src
704 copies[dst] = src
707
705
708 # file is missing if it isn't present in the destination, but is present in
706 # file is missing if it isn't present in the destination, but is present in
709 # the base and present in the source.
707 # the base and present in the source.
710 # Presence in the base is important to exclude added files, presence in the
708 # Presence in the base is important to exclude added files, presence in the
711 # source is important to exclude removed files.
709 # source is important to exclude removed files.
712 filt = lambda f: f not in m1 and f in base and f in c2
710 filt = lambda f: f not in m1 and f in base and f in c2
713 missingfiles = [f for f in changedfiles if filt(f)]
711 missingfiles = [f for f in changedfiles if filt(f)]
714
712
715 if missingfiles:
713 if missingfiles:
716 basenametofilename = collections.defaultdict(list)
714 basenametofilename = collections.defaultdict(list)
717 dirnametofilename = collections.defaultdict(list)
715 dirnametofilename = collections.defaultdict(list)
718
716
719 for f in m1.filesnotin(base.manifest()):
717 for f in m1.filesnotin(base.manifest()):
720 basename = os.path.basename(f)
718 basename = os.path.basename(f)
721 dirname = os.path.dirname(f)
719 dirname = os.path.dirname(f)
722 basenametofilename[basename].append(f)
720 basenametofilename[basename].append(f)
723 dirnametofilename[dirname].append(f)
721 dirnametofilename[dirname].append(f)
724
722
725 for f in missingfiles:
723 for f in missingfiles:
726 basename = os.path.basename(f)
724 basename = os.path.basename(f)
727 dirname = os.path.dirname(f)
725 dirname = os.path.dirname(f)
728 samebasename = basenametofilename[basename]
726 samebasename = basenametofilename[basename]
729 samedirname = dirnametofilename[dirname]
727 samedirname = dirnametofilename[dirname]
730 movecandidates = samebasename + samedirname
728 movecandidates = samebasename + samedirname
731 # f is guaranteed to be present in c2, that's why
729 # f is guaranteed to be present in c2, that's why
732 # c2.filectx(f) won't fail
730 # c2.filectx(f) won't fail
733 f2 = c2.filectx(f)
731 f2 = c2.filectx(f)
734 # we can have a lot of candidates which can slow down the heuristics
732 # we can have a lot of candidates which can slow down the heuristics
735 # config value to limit the number of candidates moves to check
733 # config value to limit the number of candidates moves to check
736 maxcandidates = repo.ui.configint('experimental',
734 maxcandidates = repo.ui.configint('experimental',
737 'copytrace.movecandidateslimit')
735 'copytrace.movecandidateslimit')
738
736
739 if len(movecandidates) > maxcandidates:
737 if len(movecandidates) > maxcandidates:
740 repo.ui.status(_("skipping copytracing for '%s', more "
738 repo.ui.status(_("skipping copytracing for '%s', more "
741 "candidates than the limit: %d\n")
739 "candidates than the limit: %d\n")
742 % (f, len(movecandidates)))
740 % (f, len(movecandidates)))
743 continue
741 continue
744
742
745 for candidate in movecandidates:
743 for candidate in movecandidates:
746 f1 = c1.filectx(candidate)
744 f1 = c1.filectx(candidate)
747 if _related(f1, f2):
745 if _related(f1, f2):
748 # if there are a few related copies then we'll merge
746 # if there are a few related copies then we'll merge
749 # changes into all of them. This matches the behaviour
747 # changes into all of them. This matches the behaviour
750 # of upstream copytracing
748 # of upstream copytracing
751 copies[candidate] = f
749 copies[candidate] = f
752
750
753 return copies, {}, {}, {}, {}
751 return copies, {}, {}, {}, {}
754
752
755 def _related(f1, f2):
753 def _related(f1, f2):
756 """return True if f1 and f2 filectx have a common ancestor
754 """return True if f1 and f2 filectx have a common ancestor
757
755
758 Walk back to common ancestor to see if the two files originate
756 Walk back to common ancestor to see if the two files originate
759 from the same file. Since workingfilectx's rev() is None it messes
757 from the same file. Since workingfilectx's rev() is None it messes
760 up the integer comparison logic, hence the pre-step check for
758 up the integer comparison logic, hence the pre-step check for
761 None (f1 and f2 can only be workingfilectx's initially).
759 None (f1 and f2 can only be workingfilectx's initially).
762 """
760 """
763
761
764 if f1 == f2:
762 if f1 == f2:
765 return True # a match
763 return True # a match
766
764
767 g1, g2 = f1.ancestors(), f2.ancestors()
765 g1, g2 = f1.ancestors(), f2.ancestors()
768 try:
766 try:
769 f1r, f2r = f1.linkrev(), f2.linkrev()
767 f1r, f2r = f1.linkrev(), f2.linkrev()
770
768
771 if f1r is None:
769 if f1r is None:
772 f1 = next(g1)
770 f1 = next(g1)
773 if f2r is None:
771 if f2r is None:
774 f2 = next(g2)
772 f2 = next(g2)
775
773
776 while True:
774 while True:
777 f1r, f2r = f1.linkrev(), f2.linkrev()
775 f1r, f2r = f1.linkrev(), f2.linkrev()
778 if f1r > f2r:
776 if f1r > f2r:
779 f1 = next(g1)
777 f1 = next(g1)
780 elif f2r > f1r:
778 elif f2r > f1r:
781 f2 = next(g2)
779 f2 = next(g2)
782 else: # f1 and f2 point to files in the same linkrev
780 else: # f1 and f2 point to files in the same linkrev
783 return f1 == f2 # true if they point to the same file
781 return f1 == f2 # true if they point to the same file
784 except StopIteration:
782 except StopIteration:
785 return False
783 return False
786
784
787 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
785 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
788 """reproduce copies from fromrev to rev in the dirstate
786 """reproduce copies from fromrev to rev in the dirstate
789
787
790 If skiprev is specified, it's a revision that should be used to
788 If skiprev is specified, it's a revision that should be used to
791 filter copy records. Any copies that occur between fromrev and
789 filter copy records. Any copies that occur between fromrev and
792 skiprev will not be duplicated, even if they appear in the set of
790 skiprev will not be duplicated, even if they appear in the set of
793 copies between fromrev and rev.
791 copies between fromrev and rev.
794 """
792 """
795 exclude = {}
793 exclude = {}
796 ctraceconfig = repo.ui.config('experimental', 'copytrace')
794 ctraceconfig = repo.ui.config('experimental', 'copytrace')
797 bctrace = stringutil.parsebool(ctraceconfig)
795 bctrace = stringutil.parsebool(ctraceconfig)
798 if (skiprev is not None and
796 if (skiprev is not None and
799 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
797 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
800 # copytrace='off' skips this line, but not the entire function because
798 # copytrace='off' skips this line, but not the entire function because
801 # the line below is O(size of the repo) during a rebase, while the rest
799 # the line below is O(size of the repo) during a rebase, while the rest
802 # of the function is much faster (and is required for carrying copy
800 # of the function is much faster (and is required for carrying copy
803 # metadata across the rebase anyway).
801 # metadata across the rebase anyway).
804 exclude = pathcopies(repo[fromrev], repo[skiprev])
802 exclude = pathcopies(repo[fromrev], repo[skiprev])
805 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
803 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
806 if dst in exclude:
804 if dst in exclude:
807 continue
805 continue
808 if dst in wctx:
806 if dst in wctx:
809 wctx[dst].markcopied(src)
807 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now