##// END OF EJS Templates
copies: return only path from _tracefile() since that's all caller needs...
Martin von Zweigbergk -
r42751:4ebbd7c4 default
parent child Browse files
Show More
@@ -1,805 +1,806 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
132 t = _chain(a, b)
133 _filter(src, dst, t)
133 _filter(src, dst, t)
134 return t
134 return t
135
135
136 def _filter(src, dst, t):
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
139 # remove copies from files that didn't exist
140 if v not in src:
140 if v not in src:
141 del t[k]
141 del t[k]
142 # remove criss-crossed copies
142 # remove criss-crossed copies
143 elif k in src and v in dst:
143 elif k in src and v in dst:
144 del t[k]
144 del t[k]
145 # remove copies to files that were then removed
145 # remove copies to files that were then removed
146 elif k not in dst:
146 elif k not in dst:
147 del t[k]
147 del t[k]
148
148
149 def _chain(a, b):
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
150 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
151 t = a.copy()
152 for k, v in b.iteritems():
152 for k, v in b.iteritems():
153 if v in t:
153 if v in t:
154 t[k] = t[v]
154 t[k] = t[v]
155 else:
155 else:
156 t[k] = v
156 t[k] = v
157 return t
157 return t
158
158
159 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
162
162
163 for f in fctx.ancestors():
163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
164 path = f.path()
165 return f
165 if am.get(path, None) == f.filenode():
166 return path
166 if not f.isintroducedafter(limit):
167 if not f.isintroducedafter(limit):
167 return None
168 return None
168
169
169 def _dirstatecopies(repo, match=None):
170 def _dirstatecopies(repo, match=None):
170 ds = repo.dirstate
171 ds = repo.dirstate
171 c = ds.copies().copy()
172 c = ds.copies().copy()
172 for k in list(c):
173 for k in list(c):
173 if ds[k] not in 'anm' or (match and not match(k)):
174 if ds[k] not in 'anm' or (match and not match(k)):
174 del c[k]
175 del c[k]
175 return c
176 return c
176
177
177 def _computeforwardmissing(a, b, match=None):
178 def _computeforwardmissing(a, b, match=None):
178 """Computes which files are in b but not a.
179 """Computes which files are in b but not a.
179 This is its own function so extensions can easily wrap this call to see what
180 This is its own function so extensions can easily wrap this call to see what
180 files _forwardcopies is about to process.
181 files _forwardcopies is about to process.
181 """
182 """
182 ma = a.manifest()
183 ma = a.manifest()
183 mb = b.manifest()
184 mb = b.manifest()
184 return mb.filesnotin(ma, match=match)
185 return mb.filesnotin(ma, match=match)
185
186
186 def usechangesetcentricalgo(repo):
187 def usechangesetcentricalgo(repo):
187 """Checks if we should use changeset-centric copy algorithms"""
188 """Checks if we should use changeset-centric copy algorithms"""
188 return (repo.ui.config('experimental', 'copies.read-from') in
189 return (repo.ui.config('experimental', 'copies.read-from') in
189 ('changeset-only', 'compatibility'))
190 ('changeset-only', 'compatibility'))
190
191
191 def _committedforwardcopies(a, b, match):
192 def _committedforwardcopies(a, b, match):
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 # files might have to be traced back to the fctx parent of the last
194 # files might have to be traced back to the fctx parent of the last
194 # one-side-only changeset, but not further back than that
195 # one-side-only changeset, but not further back than that
195 repo = a._repo
196 repo = a._repo
196
197
197 if usechangesetcentricalgo(repo):
198 if usechangesetcentricalgo(repo):
198 return _changesetforwardcopies(a, b, match)
199 return _changesetforwardcopies(a, b, match)
199
200
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 dbg = repo.ui.debug
202 dbg = repo.ui.debug
202 if debug:
203 if debug:
203 dbg('debug.copies: looking into rename from %s to %s\n'
204 dbg('debug.copies: looking into rename from %s to %s\n'
204 % (a, b))
205 % (a, b))
205 limit = _findlimit(repo, a, b)
206 limit = _findlimit(repo, a, b)
206 if debug:
207 if debug:
207 dbg('debug.copies: search limit: %d\n' % limit)
208 dbg('debug.copies: search limit: %d\n' % limit)
208 am = a.manifest()
209 am = a.manifest()
209
210
210 # find where new files came from
211 # find where new files came from
211 # we currently don't try to find where old files went, too expensive
212 # we currently don't try to find where old files went, too expensive
212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 # this means we can miss a case like 'hg rm b; hg cp a b'
213 cm = {}
214 cm = {}
214
215
215 # Computing the forward missing is quite expensive on large manifests, since
216 # Computing the forward missing is quite expensive on large manifests, since
216 # it compares the entire manifests. We can optimize it in the common use
217 # it compares the entire manifests. We can optimize it in the common use
217 # case of computing what copies are in a commit versus its parent (like
218 # case of computing what copies are in a commit versus its parent (like
218 # during a rebase or histedit). Note, we exclude merge commits from this
219 # during a rebase or histedit). Note, we exclude merge commits from this
219 # optimization, since the ctx.files() for a merge commit is not correct for
220 # optimization, since the ctx.files() for a merge commit is not correct for
220 # this comparison.
221 # this comparison.
221 forwardmissingmatch = match
222 forwardmissingmatch = match
222 if b.p1() == a and b.p2().node() == node.nullid:
223 if b.p1() == a and b.p2().node() == node.nullid:
223 filesmatcher = matchmod.exact(b.files())
224 filesmatcher = matchmod.exact(b.files())
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226
227
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228
229
229 if debug:
230 if debug:
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231
232
232 for f in sorted(missing):
233 for f in sorted(missing):
233 if debug:
234 if debug:
234 dbg('debug.copies: tracing file: %s\n' % f)
235 dbg('debug.copies: tracing file: %s\n' % f)
235 fctx = b[f]
236 fctx = b[f]
236 fctx._ancestrycontext = ancestrycontext
237 fctx._ancestrycontext = ancestrycontext
237
238
238 if debug:
239 if debug:
239 start = util.timer()
240 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
241 opath = _tracefile(fctx, am, limit)
241 if ofctx:
242 if opath:
242 if debug:
243 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 dbg('debug.copies: rename of: %s\n' % opath)
244 cm[f] = ofctx.path()
245 cm[f] = opath
245 if debug:
246 if debug:
246 dbg('debug.copies: time: %f seconds\n'
247 dbg('debug.copies: time: %f seconds\n'
247 % (util.timer() - start))
248 % (util.timer() - start))
248 return cm
249 return cm
249
250
250 def _changesetforwardcopies(a, b, match):
251 def _changesetforwardcopies(a, b, match):
251 if a.rev() == node.nullrev:
252 if a.rev() == node.nullrev:
252 return {}
253 return {}
253
254
254 repo = a.repo()
255 repo = a.repo()
255 children = {}
256 children = {}
256 cl = repo.changelog
257 cl = repo.changelog
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 for r in missingrevs:
259 for r in missingrevs:
259 for p in cl.parentrevs(r):
260 for p in cl.parentrevs(r):
260 if p == node.nullrev:
261 if p == node.nullrev:
261 continue
262 continue
262 if p not in children:
263 if p not in children:
263 children[p] = [r]
264 children[p] = [r]
264 else:
265 else:
265 children[p].append(r)
266 children[p].append(r)
266
267
267 roots = set(children) - set(missingrevs)
268 roots = set(children) - set(missingrevs)
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # The parent number is only used for knowing which parent the copies dict
270 # The parent number is only used for knowing which parent the copies dict
270 # came from.
271 # came from.
271 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
272 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
272 # instance for *one* of the child nodes (the last one). Once an instance
273 # instance for *one* of the child nodes (the last one). Once an instance
273 # has been put on the queue, it is thus no longer safe to modify it.
274 # has been put on the queue, it is thus no longer safe to modify it.
274 # Conversely, it *is* safe to modify an instance popped off the queue.
275 # Conversely, it *is* safe to modify an instance popped off the queue.
275 work = [(r, 1, {}) for r in roots]
276 work = [(r, 1, {}) for r in roots]
276 heapq.heapify(work)
277 heapq.heapify(work)
277 alwaysmatch = match.always()
278 alwaysmatch = match.always()
278 while work:
279 while work:
279 r, i1, copies = heapq.heappop(work)
280 r, i1, copies = heapq.heappop(work)
280 if work and work[0][0] == r:
281 if work and work[0][0] == r:
281 # We are tracing copies from both parents
282 # We are tracing copies from both parents
282 r, i2, copies2 = heapq.heappop(work)
283 r, i2, copies2 = heapq.heappop(work)
283 for dst, src in copies2.items():
284 for dst, src in copies2.items():
284 # Unlike when copies are stored in the filelog, we consider
285 # Unlike when copies are stored in the filelog, we consider
285 # it a copy even if the destination already existed on the
286 # it a copy even if the destination already existed on the
286 # other branch. It's simply too expensive to check if the
287 # other branch. It's simply too expensive to check if the
287 # file existed in the manifest.
288 # file existed in the manifest.
288 if dst not in copies:
289 if dst not in copies:
289 # If it was copied on the p1 side, leave it as copied from
290 # If it was copied on the p1 side, leave it as copied from
290 # that side, even if it was also copied on the p2 side.
291 # that side, even if it was also copied on the p2 side.
291 copies[dst] = copies2[dst]
292 copies[dst] = copies2[dst]
292 if r == b.rev():
293 if r == b.rev():
293 _filter(a, b, copies)
294 _filter(a, b, copies)
294 return copies
295 return copies
295 for i, c in enumerate(children[r]):
296 for i, c in enumerate(children[r]):
296 childctx = repo[c]
297 childctx = repo[c]
297 if r == childctx.p1().rev():
298 if r == childctx.p1().rev():
298 parent = 1
299 parent = 1
299 childcopies = childctx.p1copies()
300 childcopies = childctx.p1copies()
300 else:
301 else:
301 assert r == childctx.p2().rev()
302 assert r == childctx.p2().rev()
302 parent = 2
303 parent = 2
303 childcopies = childctx.p2copies()
304 childcopies = childctx.p2copies()
304 if not alwaysmatch:
305 if not alwaysmatch:
305 childcopies = {dst: src for dst, src in childcopies.items()
306 childcopies = {dst: src for dst, src in childcopies.items()
306 if match(dst)}
307 if match(dst)}
307 # Copy the dict only if later iterations will also need it
308 # Copy the dict only if later iterations will also need it
308 if i != len(children[r]) - 1:
309 if i != len(children[r]) - 1:
309 newcopies = copies.copy()
310 newcopies = copies.copy()
310 else:
311 else:
311 newcopies = copies
312 newcopies = copies
312 if childcopies:
313 if childcopies:
313 newcopies = _chain(newcopies, childcopies)
314 newcopies = _chain(newcopies, childcopies)
314 for f in childctx.filesremoved():
315 for f in childctx.filesremoved():
315 if f in newcopies:
316 if f in newcopies:
316 del newcopies[f]
317 del newcopies[f]
317 heapq.heappush(work, (c, parent, newcopies))
318 heapq.heappush(work, (c, parent, newcopies))
318 assert False
319 assert False
319
320
320 def _forwardcopies(a, b, match=None):
321 def _forwardcopies(a, b, match=None):
321 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
322 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
322
323
323 match = a.repo().narrowmatch(match)
324 match = a.repo().narrowmatch(match)
324 # check for working copy
325 # check for working copy
325 if b.rev() is None:
326 if b.rev() is None:
326 if a == b.p1():
327 if a == b.p1():
327 # short-circuit to avoid issues with merge states
328 # short-circuit to avoid issues with merge states
328 return _dirstatecopies(b._repo, match)
329 return _dirstatecopies(b._repo, match)
329
330
330 cm = _committedforwardcopies(a, b.p1(), match)
331 cm = _committedforwardcopies(a, b.p1(), match)
331 # combine copies from dirstate if necessary
332 # combine copies from dirstate if necessary
332 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
333 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
333 return _committedforwardcopies(a, b, match)
334 return _committedforwardcopies(a, b, match)
334
335
335 def _backwardrenames(a, b, match):
336 def _backwardrenames(a, b, match):
336 if a._repo.ui.config('experimental', 'copytrace') == 'off':
337 if a._repo.ui.config('experimental', 'copytrace') == 'off':
337 return {}
338 return {}
338
339
339 # Even though we're not taking copies into account, 1:n rename situations
340 # Even though we're not taking copies into account, 1:n rename situations
340 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
341 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
341 # arbitrarily pick one of the renames.
342 # arbitrarily pick one of the renames.
342 # We don't want to pass in "match" here, since that would filter
343 # We don't want to pass in "match" here, since that would filter
343 # the destination by it. Since we're reversing the copies, we want
344 # the destination by it. Since we're reversing the copies, we want
344 # to filter the source instead.
345 # to filter the source instead.
345 f = _forwardcopies(b, a)
346 f = _forwardcopies(b, a)
346 r = {}
347 r = {}
347 for k, v in sorted(f.iteritems()):
348 for k, v in sorted(f.iteritems()):
348 if match and not match(v):
349 if match and not match(v):
349 continue
350 continue
350 # remove copies
351 # remove copies
351 if v in a:
352 if v in a:
352 continue
353 continue
353 r[v] = k
354 r[v] = k
354 return r
355 return r
355
356
356 def pathcopies(x, y, match=None):
357 def pathcopies(x, y, match=None):
357 """find {dst@y: src@x} copy mapping for directed compare"""
358 """find {dst@y: src@x} copy mapping for directed compare"""
358 repo = x._repo
359 repo = x._repo
359 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
360 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
360 if debug:
361 if debug:
361 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
362 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
362 % (x, y))
363 % (x, y))
363 if x == y or not x or not y:
364 if x == y or not x or not y:
364 return {}
365 return {}
365 a = y.ancestor(x)
366 a = y.ancestor(x)
366 if a == x:
367 if a == x:
367 if debug:
368 if debug:
368 repo.ui.debug('debug.copies: search mode: forward\n')
369 repo.ui.debug('debug.copies: search mode: forward\n')
369 return _forwardcopies(x, y, match=match)
370 return _forwardcopies(x, y, match=match)
370 if a == y:
371 if a == y:
371 if debug:
372 if debug:
372 repo.ui.debug('debug.copies: search mode: backward\n')
373 repo.ui.debug('debug.copies: search mode: backward\n')
373 return _backwardrenames(x, y, match=match)
374 return _backwardrenames(x, y, match=match)
374 if debug:
375 if debug:
375 repo.ui.debug('debug.copies: search mode: combined\n')
376 repo.ui.debug('debug.copies: search mode: combined\n')
376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
377 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
377 _forwardcopies(a, y, match=match))
378 _forwardcopies(a, y, match=match))
378
379
379 def mergecopies(repo, c1, c2, base):
380 def mergecopies(repo, c1, c2, base):
380 """
381 """
381 Finds moves and copies between context c1 and c2 that are relevant for
382 Finds moves and copies between context c1 and c2 that are relevant for
382 merging. 'base' will be used as the merge base.
383 merging. 'base' will be used as the merge base.
383
384
384 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
385 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
385 files that were moved/ copied in one merge parent and modified in another.
386 files that were moved/ copied in one merge parent and modified in another.
386 For example:
387 For example:
387
388
388 o ---> 4 another commit
389 o ---> 4 another commit
389 |
390 |
390 | o ---> 3 commit that modifies a.txt
391 | o ---> 3 commit that modifies a.txt
391 | /
392 | /
392 o / ---> 2 commit that moves a.txt to b.txt
393 o / ---> 2 commit that moves a.txt to b.txt
393 |/
394 |/
394 o ---> 1 merge base
395 o ---> 1 merge base
395
396
396 If we try to rebase revision 3 on revision 4, since there is no a.txt in
397 If we try to rebase revision 3 on revision 4, since there is no a.txt in
397 revision 4, and if user have copytrace disabled, we prints the following
398 revision 4, and if user have copytrace disabled, we prints the following
398 message:
399 message:
399
400
400 ```other changed <file> which local deleted```
401 ```other changed <file> which local deleted```
401
402
402 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
403 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
403 "dirmove".
404 "dirmove".
404
405
405 "copy" is a mapping from destination name -> source name,
406 "copy" is a mapping from destination name -> source name,
406 where source is in c1 and destination is in c2 or vice-versa.
407 where source is in c1 and destination is in c2 or vice-versa.
407
408
408 "movewithdir" is a mapping from source name -> destination name,
409 "movewithdir" is a mapping from source name -> destination name,
409 where the file at source present in one context but not the other
410 where the file at source present in one context but not the other
410 needs to be moved to destination by the merge process, because the
411 needs to be moved to destination by the merge process, because the
411 other context moved the directory it is in.
412 other context moved the directory it is in.
412
413
413 "diverge" is a mapping of source name -> list of destination names
414 "diverge" is a mapping of source name -> list of destination names
414 for divergent renames.
415 for divergent renames.
415
416
416 "renamedelete" is a mapping of source name -> list of destination
417 "renamedelete" is a mapping of source name -> list of destination
417 names for files deleted in c1 that were renamed in c2 or vice-versa.
418 names for files deleted in c1 that were renamed in c2 or vice-versa.
418
419
419 "dirmove" is a mapping of detected source dir -> destination dir renames.
420 "dirmove" is a mapping of detected source dir -> destination dir renames.
420 This is needed for handling changes to new files previously grafted into
421 This is needed for handling changes to new files previously grafted into
421 renamed directories.
422 renamed directories.
422
423
423 This function calls different copytracing algorithms based on config.
424 This function calls different copytracing algorithms based on config.
424 """
425 """
425 # avoid silly behavior for update from empty dir
426 # avoid silly behavior for update from empty dir
426 if not c1 or not c2 or c1 == c2:
427 if not c1 or not c2 or c1 == c2:
427 return {}, {}, {}, {}, {}
428 return {}, {}, {}, {}, {}
428
429
429 narrowmatch = c1.repo().narrowmatch()
430 narrowmatch = c1.repo().narrowmatch()
430
431
431 # avoid silly behavior for parent -> working dir
432 # avoid silly behavior for parent -> working dir
432 if c2.node() is None and c1.node() == repo.dirstate.p1():
433 if c2.node() is None and c1.node() == repo.dirstate.p1():
433 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
434 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
434
435
435 copytracing = repo.ui.config('experimental', 'copytrace')
436 copytracing = repo.ui.config('experimental', 'copytrace')
436 if stringutil.parsebool(copytracing) is False:
437 if stringutil.parsebool(copytracing) is False:
437 # stringutil.parsebool() returns None when it is unable to parse the
438 # stringutil.parsebool() returns None when it is unable to parse the
438 # value, so we should rely on making sure copytracing is on such cases
439 # value, so we should rely on making sure copytracing is on such cases
439 return {}, {}, {}, {}, {}
440 return {}, {}, {}, {}, {}
440
441
441 if usechangesetcentricalgo(repo):
442 if usechangesetcentricalgo(repo):
442 # The heuristics don't make sense when we need changeset-centric algos
443 # The heuristics don't make sense when we need changeset-centric algos
443 return _fullcopytracing(repo, c1, c2, base)
444 return _fullcopytracing(repo, c1, c2, base)
444
445
445 # Copy trace disabling is explicitly below the node == p1 logic above
446 # Copy trace disabling is explicitly below the node == p1 logic above
446 # because the logic above is required for a simple copy to be kept across a
447 # because the logic above is required for a simple copy to be kept across a
447 # rebase.
448 # rebase.
448 if copytracing == 'heuristics':
449 if copytracing == 'heuristics':
449 # Do full copytracing if only non-public revisions are involved as
450 # Do full copytracing if only non-public revisions are involved as
450 # that will be fast enough and will also cover the copies which could
451 # that will be fast enough and will also cover the copies which could
451 # be missed by heuristics
452 # be missed by heuristics
452 if _isfullcopytraceable(repo, c1, base):
453 if _isfullcopytraceable(repo, c1, base):
453 return _fullcopytracing(repo, c1, c2, base)
454 return _fullcopytracing(repo, c1, c2, base)
454 return _heuristicscopytracing(repo, c1, c2, base)
455 return _heuristicscopytracing(repo, c1, c2, base)
455 else:
456 else:
456 return _fullcopytracing(repo, c1, c2, base)
457 return _fullcopytracing(repo, c1, c2, base)
457
458
458 def _isfullcopytraceable(repo, c1, base):
459 def _isfullcopytraceable(repo, c1, base):
459 """ Checks that if base, source and destination are all no-public branches,
460 """ Checks that if base, source and destination are all no-public branches,
460 if yes let's use the full copytrace algorithm for increased capabilities
461 if yes let's use the full copytrace algorithm for increased capabilities
461 since it will be fast enough.
462 since it will be fast enough.
462
463
463 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
464 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
464 number of changesets from c1 to base such that if number of changesets are
465 number of changesets from c1 to base such that if number of changesets are
465 more than the limit, full copytracing algorithm won't be used.
466 more than the limit, full copytracing algorithm won't be used.
466 """
467 """
467 if c1.rev() is None:
468 if c1.rev() is None:
468 c1 = c1.p1()
469 c1 = c1.p1()
469 if c1.mutable() and base.mutable():
470 if c1.mutable() and base.mutable():
470 sourcecommitlimit = repo.ui.configint('experimental',
471 sourcecommitlimit = repo.ui.configint('experimental',
471 'copytrace.sourcecommitlimit')
472 'copytrace.sourcecommitlimit')
472 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
473 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
473 return commits < sourcecommitlimit
474 return commits < sourcecommitlimit
474 return False
475 return False
475
476
476 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
477 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
477 copy, renamedelete):
478 copy, renamedelete):
478 if src not in m2:
479 if src not in m2:
479 # deleted on side 2
480 # deleted on side 2
480 if src not in m1:
481 if src not in m1:
481 # renamed on side 1, deleted on side 2
482 # renamed on side 1, deleted on side 2
482 renamedelete[src] = dsts1
483 renamedelete[src] = dsts1
483 elif m2[src] != mb[src]:
484 elif m2[src] != mb[src]:
484 if not _related(c2[src], base[src]):
485 if not _related(c2[src], base[src]):
485 return
486 return
486 # modified on side 2
487 # modified on side 2
487 for dst in dsts1:
488 for dst in dsts1:
488 if dst not in m2:
489 if dst not in m2:
489 # dst not added on side 2 (handle as regular
490 # dst not added on side 2 (handle as regular
490 # "both created" case in manifestmerge otherwise)
491 # "both created" case in manifestmerge otherwise)
491 copy[dst] = src
492 copy[dst] = src
492
493
493 def _fullcopytracing(repo, c1, c2, base):
494 def _fullcopytracing(repo, c1, c2, base):
494 """ The full copytracing algorithm which finds all the new files that were
495 """ The full copytracing algorithm which finds all the new files that were
495 added from merge base up to the top commit and for each file it checks if
496 added from merge base up to the top commit and for each file it checks if
496 this file was copied from another file.
497 this file was copied from another file.
497
498
498 This is pretty slow when a lot of changesets are involved but will track all
499 This is pretty slow when a lot of changesets are involved but will track all
499 the copies.
500 the copies.
500 """
501 """
501 m1 = c1.manifest()
502 m1 = c1.manifest()
502 m2 = c2.manifest()
503 m2 = c2.manifest()
503 mb = base.manifest()
504 mb = base.manifest()
504
505
505 copies1 = pathcopies(base, c1)
506 copies1 = pathcopies(base, c1)
506 copies2 = pathcopies(base, c2)
507 copies2 = pathcopies(base, c2)
507
508
508 inversecopies1 = {}
509 inversecopies1 = {}
509 inversecopies2 = {}
510 inversecopies2 = {}
510 for dst, src in copies1.items():
511 for dst, src in copies1.items():
511 inversecopies1.setdefault(src, []).append(dst)
512 inversecopies1.setdefault(src, []).append(dst)
512 for dst, src in copies2.items():
513 for dst, src in copies2.items():
513 inversecopies2.setdefault(src, []).append(dst)
514 inversecopies2.setdefault(src, []).append(dst)
514
515
515 copy = {}
516 copy = {}
516 diverge = {}
517 diverge = {}
517 renamedelete = {}
518 renamedelete = {}
518 allsources = set(inversecopies1) | set(inversecopies2)
519 allsources = set(inversecopies1) | set(inversecopies2)
519 for src in allsources:
520 for src in allsources:
520 dsts1 = inversecopies1.get(src)
521 dsts1 = inversecopies1.get(src)
521 dsts2 = inversecopies2.get(src)
522 dsts2 = inversecopies2.get(src)
522 if dsts1 and dsts2:
523 if dsts1 and dsts2:
523 # copied/renamed on both sides
524 # copied/renamed on both sides
524 if src not in m1 and src not in m2:
525 if src not in m1 and src not in m2:
525 # renamed on both sides
526 # renamed on both sides
526 dsts1 = set(dsts1)
527 dsts1 = set(dsts1)
527 dsts2 = set(dsts2)
528 dsts2 = set(dsts2)
528 # If there's some overlap in the rename destinations, we
529 # If there's some overlap in the rename destinations, we
529 # consider it not divergent. For example, if side 1 copies 'a'
530 # consider it not divergent. For example, if side 1 copies 'a'
530 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
531 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
531 # and 'd' and deletes 'a'.
532 # and 'd' and deletes 'a'.
532 if dsts1 & dsts2:
533 if dsts1 & dsts2:
533 for dst in (dsts1 & dsts2):
534 for dst in (dsts1 & dsts2):
534 copy[dst] = src
535 copy[dst] = src
535 else:
536 else:
536 diverge[src] = sorted(dsts1 | dsts2)
537 diverge[src] = sorted(dsts1 | dsts2)
537 elif src in m1 and src in m2:
538 elif src in m1 and src in m2:
538 # copied on both sides
539 # copied on both sides
539 dsts1 = set(dsts1)
540 dsts1 = set(dsts1)
540 dsts2 = set(dsts2)
541 dsts2 = set(dsts2)
541 for dst in (dsts1 & dsts2):
542 for dst in (dsts1 & dsts2):
542 copy[dst] = src
543 copy[dst] = src
543 # TODO: Handle cases where it was renamed on one side and copied
544 # TODO: Handle cases where it was renamed on one side and copied
544 # on the other side
545 # on the other side
545 elif dsts1:
546 elif dsts1:
546 # copied/renamed only on side 1
547 # copied/renamed only on side 1
547 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
548 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
548 copy, renamedelete)
549 copy, renamedelete)
549 elif dsts2:
550 elif dsts2:
550 # copied/renamed only on side 2
551 # copied/renamed only on side 2
551 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
552 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
552 copy, renamedelete)
553 copy, renamedelete)
553
554
554 renamedeleteset = set()
555 renamedeleteset = set()
555 divergeset = set()
556 divergeset = set()
556 for dsts in diverge.values():
557 for dsts in diverge.values():
557 divergeset.update(dsts)
558 divergeset.update(dsts)
558 for dsts in renamedelete.values():
559 for dsts in renamedelete.values():
559 renamedeleteset.update(dsts)
560 renamedeleteset.update(dsts)
560
561
561 # find interesting file sets from manifests
562 # find interesting file sets from manifests
562 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
563 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
563 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
564 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
564 u1 = sorted(addedinm1 - addedinm2)
565 u1 = sorted(addedinm1 - addedinm2)
565 u2 = sorted(addedinm2 - addedinm1)
566 u2 = sorted(addedinm2 - addedinm1)
566
567
567 header = " unmatched files in %s"
568 header = " unmatched files in %s"
568 if u1:
569 if u1:
569 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
570 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
570 if u2:
571 if u2:
571 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
572 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
572
573
573 fullcopy = copies1.copy()
574 fullcopy = copies1.copy()
574 fullcopy.update(copies2)
575 fullcopy.update(copies2)
575 if not fullcopy:
576 if not fullcopy:
576 return copy, {}, diverge, renamedelete, {}
577 return copy, {}, diverge, renamedelete, {}
577
578
578 if repo.ui.debugflag:
579 if repo.ui.debugflag:
579 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
580 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
580 "% = renamed and deleted):\n")
581 "% = renamed and deleted):\n")
581 for f in sorted(fullcopy):
582 for f in sorted(fullcopy):
582 note = ""
583 note = ""
583 if f in copy:
584 if f in copy:
584 note += "*"
585 note += "*"
585 if f in divergeset:
586 if f in divergeset:
586 note += "!"
587 note += "!"
587 if f in renamedeleteset:
588 if f in renamedeleteset:
588 note += "%"
589 note += "%"
589 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
590 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
590 note))
591 note))
591 del divergeset
592 del divergeset
592
593
593 repo.ui.debug(" checking for directory renames\n")
594 repo.ui.debug(" checking for directory renames\n")
594
595
595 # generate a directory move map
596 # generate a directory move map
596 d1, d2 = c1.dirs(), c2.dirs()
597 d1, d2 = c1.dirs(), c2.dirs()
597 invalid = set()
598 invalid = set()
598 dirmove = {}
599 dirmove = {}
599
600
600 # examine each file copy for a potential directory move, which is
601 # examine each file copy for a potential directory move, which is
601 # when all the files in a directory are moved to a new directory
602 # when all the files in a directory are moved to a new directory
602 for dst, src in fullcopy.iteritems():
603 for dst, src in fullcopy.iteritems():
603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
604 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
604 if dsrc in invalid:
605 if dsrc in invalid:
605 # already seen to be uninteresting
606 # already seen to be uninteresting
606 continue
607 continue
607 elif dsrc in d1 and ddst in d1:
608 elif dsrc in d1 and ddst in d1:
608 # directory wasn't entirely moved locally
609 # directory wasn't entirely moved locally
609 invalid.add(dsrc)
610 invalid.add(dsrc)
610 elif dsrc in d2 and ddst in d2:
611 elif dsrc in d2 and ddst in d2:
611 # directory wasn't entirely moved remotely
612 # directory wasn't entirely moved remotely
612 invalid.add(dsrc)
613 invalid.add(dsrc)
613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
614 elif dsrc in dirmove and dirmove[dsrc] != ddst:
614 # files from the same directory moved to two different places
615 # files from the same directory moved to two different places
615 invalid.add(dsrc)
616 invalid.add(dsrc)
616 else:
617 else:
617 # looks good so far
618 # looks good so far
618 dirmove[dsrc] = ddst
619 dirmove[dsrc] = ddst
619
620
620 for i in invalid:
621 for i in invalid:
621 if i in dirmove:
622 if i in dirmove:
622 del dirmove[i]
623 del dirmove[i]
623 del d1, d2, invalid
624 del d1, d2, invalid
624
625
625 if not dirmove:
626 if not dirmove:
626 return copy, {}, diverge, renamedelete, {}
627 return copy, {}, diverge, renamedelete, {}
627
628
628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
629 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
629
630
630 for d in dirmove:
631 for d in dirmove:
631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
632 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
632 (d, dirmove[d]))
633 (d, dirmove[d]))
633
634
634 movewithdir = {}
635 movewithdir = {}
635 # check unaccounted nonoverlapping files against directory moves
636 # check unaccounted nonoverlapping files against directory moves
636 for f in u1 + u2:
637 for f in u1 + u2:
637 if f not in fullcopy:
638 if f not in fullcopy:
638 for d in dirmove:
639 for d in dirmove:
639 if f.startswith(d):
640 if f.startswith(d):
640 # new file added in a directory that was moved, move it
641 # new file added in a directory that was moved, move it
641 df = dirmove[d] + f[len(d):]
642 df = dirmove[d] + f[len(d):]
642 if df not in copy:
643 if df not in copy:
643 movewithdir[f] = df
644 movewithdir[f] = df
644 repo.ui.debug((" pending file src: '%s' -> "
645 repo.ui.debug((" pending file src: '%s' -> "
645 "dst: '%s'\n") % (f, df))
646 "dst: '%s'\n") % (f, df))
646 break
647 break
647
648
648 return copy, movewithdir, diverge, renamedelete, dirmove
649 return copy, movewithdir, diverge, renamedelete, dirmove
649
650
650 def _heuristicscopytracing(repo, c1, c2, base):
651 def _heuristicscopytracing(repo, c1, c2, base):
651 """ Fast copytracing using filename heuristics
652 """ Fast copytracing using filename heuristics
652
653
653 Assumes that moves or renames are of following two types:
654 Assumes that moves or renames are of following two types:
654
655
655 1) Inside a directory only (same directory name but different filenames)
656 1) Inside a directory only (same directory name but different filenames)
656 2) Move from one directory to another
657 2) Move from one directory to another
657 (same filenames but different directory names)
658 (same filenames but different directory names)
658
659
659 Works only when there are no merge commits in the "source branch".
660 Works only when there are no merge commits in the "source branch".
660 Source branch is commits from base up to c2 not including base.
661 Source branch is commits from base up to c2 not including base.
661
662
662 If merge is involved it fallbacks to _fullcopytracing().
663 If merge is involved it fallbacks to _fullcopytracing().
663
664
664 Can be used by setting the following config:
665 Can be used by setting the following config:
665
666
666 [experimental]
667 [experimental]
667 copytrace = heuristics
668 copytrace = heuristics
668
669
669 In some cases the copy/move candidates found by heuristics can be very large
670 In some cases the copy/move candidates found by heuristics can be very large
670 in number and that will make the algorithm slow. The number of possible
671 in number and that will make the algorithm slow. The number of possible
671 candidates to check can be limited by using the config
672 candidates to check can be limited by using the config
672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
673 `experimental.copytrace.movecandidateslimit` which defaults to 100.
673 """
674 """
674
675
675 if c1.rev() is None:
676 if c1.rev() is None:
676 c1 = c1.p1()
677 c1 = c1.p1()
677 if c2.rev() is None:
678 if c2.rev() is None:
678 c2 = c2.p1()
679 c2 = c2.p1()
679
680
680 copies = {}
681 copies = {}
681
682
682 changedfiles = set()
683 changedfiles = set()
683 m1 = c1.manifest()
684 m1 = c1.manifest()
684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
685 if not repo.revs('%d::%d', base.rev(), c2.rev()):
685 # If base is not in c2 branch, we switch to fullcopytracing
686 # If base is not in c2 branch, we switch to fullcopytracing
686 repo.ui.debug("switching to full copytracing as base is not "
687 repo.ui.debug("switching to full copytracing as base is not "
687 "an ancestor of c2\n")
688 "an ancestor of c2\n")
688 return _fullcopytracing(repo, c1, c2, base)
689 return _fullcopytracing(repo, c1, c2, base)
689
690
690 ctx = c2
691 ctx = c2
691 while ctx != base:
692 while ctx != base:
692 if len(ctx.parents()) == 2:
693 if len(ctx.parents()) == 2:
693 # To keep things simple let's not handle merges
694 # To keep things simple let's not handle merges
694 repo.ui.debug("switching to full copytracing because of merges\n")
695 repo.ui.debug("switching to full copytracing because of merges\n")
695 return _fullcopytracing(repo, c1, c2, base)
696 return _fullcopytracing(repo, c1, c2, base)
696 changedfiles.update(ctx.files())
697 changedfiles.update(ctx.files())
697 ctx = ctx.p1()
698 ctx = ctx.p1()
698
699
699 cp = _forwardcopies(base, c2)
700 cp = _forwardcopies(base, c2)
700 for dst, src in cp.iteritems():
701 for dst, src in cp.iteritems():
701 if src in m1:
702 if src in m1:
702 copies[dst] = src
703 copies[dst] = src
703
704
704 # file is missing if it isn't present in the destination, but is present in
705 # file is missing if it isn't present in the destination, but is present in
705 # the base and present in the source.
706 # the base and present in the source.
706 # Presence in the base is important to exclude added files, presence in the
707 # Presence in the base is important to exclude added files, presence in the
707 # source is important to exclude removed files.
708 # source is important to exclude removed files.
708 filt = lambda f: f not in m1 and f in base and f in c2
709 filt = lambda f: f not in m1 and f in base and f in c2
709 missingfiles = [f for f in changedfiles if filt(f)]
710 missingfiles = [f for f in changedfiles if filt(f)]
710
711
711 if missingfiles:
712 if missingfiles:
712 basenametofilename = collections.defaultdict(list)
713 basenametofilename = collections.defaultdict(list)
713 dirnametofilename = collections.defaultdict(list)
714 dirnametofilename = collections.defaultdict(list)
714
715
715 for f in m1.filesnotin(base.manifest()):
716 for f in m1.filesnotin(base.manifest()):
716 basename = os.path.basename(f)
717 basename = os.path.basename(f)
717 dirname = os.path.dirname(f)
718 dirname = os.path.dirname(f)
718 basenametofilename[basename].append(f)
719 basenametofilename[basename].append(f)
719 dirnametofilename[dirname].append(f)
720 dirnametofilename[dirname].append(f)
720
721
721 for f in missingfiles:
722 for f in missingfiles:
722 basename = os.path.basename(f)
723 basename = os.path.basename(f)
723 dirname = os.path.dirname(f)
724 dirname = os.path.dirname(f)
724 samebasename = basenametofilename[basename]
725 samebasename = basenametofilename[basename]
725 samedirname = dirnametofilename[dirname]
726 samedirname = dirnametofilename[dirname]
726 movecandidates = samebasename + samedirname
727 movecandidates = samebasename + samedirname
727 # f is guaranteed to be present in c2, that's why
728 # f is guaranteed to be present in c2, that's why
728 # c2.filectx(f) won't fail
729 # c2.filectx(f) won't fail
729 f2 = c2.filectx(f)
730 f2 = c2.filectx(f)
730 # we can have a lot of candidates which can slow down the heuristics
731 # we can have a lot of candidates which can slow down the heuristics
731 # config value to limit the number of candidates moves to check
732 # config value to limit the number of candidates moves to check
732 maxcandidates = repo.ui.configint('experimental',
733 maxcandidates = repo.ui.configint('experimental',
733 'copytrace.movecandidateslimit')
734 'copytrace.movecandidateslimit')
734
735
735 if len(movecandidates) > maxcandidates:
736 if len(movecandidates) > maxcandidates:
736 repo.ui.status(_("skipping copytracing for '%s', more "
737 repo.ui.status(_("skipping copytracing for '%s', more "
737 "candidates than the limit: %d\n")
738 "candidates than the limit: %d\n")
738 % (f, len(movecandidates)))
739 % (f, len(movecandidates)))
739 continue
740 continue
740
741
741 for candidate in movecandidates:
742 for candidate in movecandidates:
742 f1 = c1.filectx(candidate)
743 f1 = c1.filectx(candidate)
743 if _related(f1, f2):
744 if _related(f1, f2):
744 # if there are a few related copies then we'll merge
745 # if there are a few related copies then we'll merge
745 # changes into all of them. This matches the behaviour
746 # changes into all of them. This matches the behaviour
746 # of upstream copytracing
747 # of upstream copytracing
747 copies[candidate] = f
748 copies[candidate] = f
748
749
749 return copies, {}, {}, {}, {}
750 return copies, {}, {}, {}, {}
750
751
751 def _related(f1, f2):
752 def _related(f1, f2):
752 """return True if f1 and f2 filectx have a common ancestor
753 """return True if f1 and f2 filectx have a common ancestor
753
754
754 Walk back to common ancestor to see if the two files originate
755 Walk back to common ancestor to see if the two files originate
755 from the same file. Since workingfilectx's rev() is None it messes
756 from the same file. Since workingfilectx's rev() is None it messes
756 up the integer comparison logic, hence the pre-step check for
757 up the integer comparison logic, hence the pre-step check for
757 None (f1 and f2 can only be workingfilectx's initially).
758 None (f1 and f2 can only be workingfilectx's initially).
758 """
759 """
759
760
760 if f1 == f2:
761 if f1 == f2:
761 return True # a match
762 return True # a match
762
763
763 g1, g2 = f1.ancestors(), f2.ancestors()
764 g1, g2 = f1.ancestors(), f2.ancestors()
764 try:
765 try:
765 f1r, f2r = f1.linkrev(), f2.linkrev()
766 f1r, f2r = f1.linkrev(), f2.linkrev()
766
767
767 if f1r is None:
768 if f1r is None:
768 f1 = next(g1)
769 f1 = next(g1)
769 if f2r is None:
770 if f2r is None:
770 f2 = next(g2)
771 f2 = next(g2)
771
772
772 while True:
773 while True:
773 f1r, f2r = f1.linkrev(), f2.linkrev()
774 f1r, f2r = f1.linkrev(), f2.linkrev()
774 if f1r > f2r:
775 if f1r > f2r:
775 f1 = next(g1)
776 f1 = next(g1)
776 elif f2r > f1r:
777 elif f2r > f1r:
777 f2 = next(g2)
778 f2 = next(g2)
778 else: # f1 and f2 point to files in the same linkrev
779 else: # f1 and f2 point to files in the same linkrev
779 return f1 == f2 # true if they point to the same file
780 return f1 == f2 # true if they point to the same file
780 except StopIteration:
781 except StopIteration:
781 return False
782 return False
782
783
783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
784 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
784 """reproduce copies from fromrev to rev in the dirstate
785 """reproduce copies from fromrev to rev in the dirstate
785
786
786 If skiprev is specified, it's a revision that should be used to
787 If skiprev is specified, it's a revision that should be used to
787 filter copy records. Any copies that occur between fromrev and
788 filter copy records. Any copies that occur between fromrev and
788 skiprev will not be duplicated, even if they appear in the set of
789 skiprev will not be duplicated, even if they appear in the set of
789 copies between fromrev and rev.
790 copies between fromrev and rev.
790 """
791 """
791 exclude = {}
792 exclude = {}
792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
793 ctraceconfig = repo.ui.config('experimental', 'copytrace')
793 bctrace = stringutil.parsebool(ctraceconfig)
794 bctrace = stringutil.parsebool(ctraceconfig)
794 if (skiprev is not None and
795 if (skiprev is not None and
795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
796 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
796 # copytrace='off' skips this line, but not the entire function because
797 # copytrace='off' skips this line, but not the entire function because
797 # the line below is O(size of the repo) during a rebase, while the rest
798 # the line below is O(size of the repo) during a rebase, while the rest
798 # of the function is much faster (and is required for carrying copy
799 # of the function is much faster (and is required for carrying copy
799 # metadata across the rebase anyway).
800 # metadata across the rebase anyway).
800 exclude = pathcopies(repo[fromrev], repo[skiprev])
801 exclude = pathcopies(repo[fromrev], repo[skiprev])
801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
802 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
802 if dst in exclude:
803 if dst in exclude:
803 continue
804 continue
804 if dst in wctx:
805 if dst in wctx:
805 wctx[dst].markcopied(src)
806 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now