##// END OF EJS Templates
copies: simplify merging of copy dicts on merge commits...
Martin von Zweigbergk -
r42719:907cef39 default
parent child Browse files
Show More
@@ -1,807 +1,801 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
132 t = _chain(a, b)
133 _filter(src, dst, t)
133 _filter(src, dst, t)
134 return t
134 return t
135
135
136 def _filter(src, dst, t):
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
139 # remove copies from files that didn't exist
140 if v not in src:
140 if v not in src:
141 del t[k]
141 del t[k]
142 # remove criss-crossed copies
142 # remove criss-crossed copies
143 elif k in src and v in dst:
143 elif k in src and v in dst:
144 del t[k]
144 del t[k]
145 # remove copies to files that were then removed
145 # remove copies to files that were then removed
146 elif k not in dst:
146 elif k not in dst:
147 del t[k]
147 del t[k]
148
148
149 def _chain(a, b):
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
150 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
151 t = a.copy()
152 for k, v in b.iteritems():
152 for k, v in b.iteritems():
153 if v in t:
153 if v in t:
154 t[k] = t[v]
154 t[k] = t[v]
155 else:
155 else:
156 t[k] = v
156 t[k] = v
157 return t
157 return t
158
158
159 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
162
162
163 for f in fctx.ancestors():
163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
164 if am.get(f.path(), None) == f.filenode():
165 return f
165 return f
166 if not f.isintroducedafter(limit):
166 if not f.isintroducedafter(limit):
167 return None
167 return None
168
168
169 def _dirstatecopies(repo, match=None):
169 def _dirstatecopies(repo, match=None):
170 ds = repo.dirstate
170 ds = repo.dirstate
171 c = ds.copies().copy()
171 c = ds.copies().copy()
172 for k in list(c):
172 for k in list(c):
173 if ds[k] not in 'anm' or (match and not match(k)):
173 if ds[k] not in 'anm' or (match and not match(k)):
174 del c[k]
174 del c[k]
175 return c
175 return c
176
176
177 def _computeforwardmissing(a, b, match=None):
177 def _computeforwardmissing(a, b, match=None):
178 """Computes which files are in b but not a.
178 """Computes which files are in b but not a.
179 This is its own function so extensions can easily wrap this call to see what
179 This is its own function so extensions can easily wrap this call to see what
180 files _forwardcopies is about to process.
180 files _forwardcopies is about to process.
181 """
181 """
182 ma = a.manifest()
182 ma = a.manifest()
183 mb = b.manifest()
183 mb = b.manifest()
184 return mb.filesnotin(ma, match=match)
184 return mb.filesnotin(ma, match=match)
185
185
186 def usechangesetcentricalgo(repo):
186 def usechangesetcentricalgo(repo):
187 """Checks if we should use changeset-centric copy algorithms"""
187 """Checks if we should use changeset-centric copy algorithms"""
188 return (repo.ui.config('experimental', 'copies.read-from') in
188 return (repo.ui.config('experimental', 'copies.read-from') in
189 ('changeset-only', 'compatibility'))
189 ('changeset-only', 'compatibility'))
190
190
191 def _committedforwardcopies(a, b, match):
191 def _committedforwardcopies(a, b, match):
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 # files might have to be traced back to the fctx parent of the last
193 # files might have to be traced back to the fctx parent of the last
194 # one-side-only changeset, but not further back than that
194 # one-side-only changeset, but not further back than that
195 repo = a._repo
195 repo = a._repo
196
196
197 if usechangesetcentricalgo(repo):
197 if usechangesetcentricalgo(repo):
198 return _changesetforwardcopies(a, b, match)
198 return _changesetforwardcopies(a, b, match)
199
199
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 dbg = repo.ui.debug
201 dbg = repo.ui.debug
202 if debug:
202 if debug:
203 dbg('debug.copies: looking into rename from %s to %s\n'
203 dbg('debug.copies: looking into rename from %s to %s\n'
204 % (a, b))
204 % (a, b))
205 limit = _findlimit(repo, a, b)
205 limit = _findlimit(repo, a, b)
206 if debug:
206 if debug:
207 dbg('debug.copies: search limit: %d\n' % limit)
207 dbg('debug.copies: search limit: %d\n' % limit)
208 am = a.manifest()
208 am = a.manifest()
209
209
210 # find where new files came from
210 # find where new files came from
211 # we currently don't try to find where old files went, too expensive
211 # we currently don't try to find where old files went, too expensive
212 # this means we can miss a case like 'hg rm b; hg cp a b'
212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 cm = {}
213 cm = {}
214
214
215 # Computing the forward missing is quite expensive on large manifests, since
215 # Computing the forward missing is quite expensive on large manifests, since
216 # it compares the entire manifests. We can optimize it in the common use
216 # it compares the entire manifests. We can optimize it in the common use
217 # case of computing what copies are in a commit versus its parent (like
217 # case of computing what copies are in a commit versus its parent (like
218 # during a rebase or histedit). Note, we exclude merge commits from this
218 # during a rebase or histedit). Note, we exclude merge commits from this
219 # optimization, since the ctx.files() for a merge commit is not correct for
219 # optimization, since the ctx.files() for a merge commit is not correct for
220 # this comparison.
220 # this comparison.
221 forwardmissingmatch = match
221 forwardmissingmatch = match
222 if b.p1() == a and b.p2().node() == node.nullid:
222 if b.p1() == a and b.p2().node() == node.nullid:
223 filesmatcher = matchmod.exact(b.files())
223 filesmatcher = matchmod.exact(b.files())
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226
226
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228
228
229 if debug:
229 if debug:
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231
231
232 for f in sorted(missing):
232 for f in sorted(missing):
233 if debug:
233 if debug:
234 dbg('debug.copies: tracing file: %s\n' % f)
234 dbg('debug.copies: tracing file: %s\n' % f)
235 fctx = b[f]
235 fctx = b[f]
236 fctx._ancestrycontext = ancestrycontext
236 fctx._ancestrycontext = ancestrycontext
237
237
238 if debug:
238 if debug:
239 start = util.timer()
239 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
240 ofctx = _tracefile(fctx, am, limit)
241 if ofctx:
241 if ofctx:
242 if debug:
242 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 cm[f] = ofctx.path()
244 cm[f] = ofctx.path()
245 if debug:
245 if debug:
246 dbg('debug.copies: time: %f seconds\n'
246 dbg('debug.copies: time: %f seconds\n'
247 % (util.timer() - start))
247 % (util.timer() - start))
248 return cm
248 return cm
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() == node.nullrev:
251 if a.rev() == node.nullrev:
252 return {}
252 return {}
253
253
254 repo = a.repo()
254 repo = a.repo()
255 children = {}
255 children = {}
256 cl = repo.changelog
256 cl = repo.changelog
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 for r in missingrevs:
258 for r in missingrevs:
259 for p in cl.parentrevs(r):
259 for p in cl.parentrevs(r):
260 if p == node.nullrev:
260 if p == node.nullrev:
261 continue
261 continue
262 if p not in children:
262 if p not in children:
263 children[p] = [r]
263 children[p] = [r]
264 else:
264 else:
265 children[p].append(r)
265 children[p].append(r)
266
266
267 roots = set(children) - set(missingrevs)
267 roots = set(children) - set(missingrevs)
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # The parent number is only used for knowing which parent the copies dict
269 # The parent number is only used for knowing which parent the copies dict
270 # came from.
270 # came from.
271 work = [(r, 1, {}) for r in roots]
271 work = [(r, 1, {}) for r in roots]
272 heapq.heapify(work)
272 heapq.heapify(work)
273 alwaysmatch = match.always()
273 alwaysmatch = match.always()
274 while work:
274 while work:
275 r, i1, copies1 = heapq.heappop(work)
275 r, i1, copies = heapq.heappop(work)
276 if work and work[0][0] == r:
276 if work and work[0][0] == r:
277 # We are tracing copies from both parents
277 # We are tracing copies from both parents
278 r, i2, copies2 = heapq.heappop(work)
278 r, i2, copies2 = heapq.heappop(work)
279 copies = {}
279 for dst, src in copies2.items():
280 allcopies = set(copies1) | set(copies2)
281 for dst in allcopies:
282 # Unlike when copies are stored in the filelog, we consider
280 # Unlike when copies are stored in the filelog, we consider
283 # it a copy even if the destination already existed on the
281 # it a copy even if the destination already existed on the
284 # other branch. It's simply too expensive to check if the
282 # other branch. It's simply too expensive to check if the
285 # file existed in the manifest.
283 # file existed in the manifest.
286 if dst in copies1:
284 if dst not in copies:
287 # If it was copied on the p1 side, mark it as copied from
285 # If it was copied on the p1 side, leave it as copied from
288 # that side, even if it was also copied on the p2 side.
286 # that side, even if it was also copied on the p2 side.
289 copies[dst] = copies1[dst]
290 else:
291 copies[dst] = copies2[dst]
287 copies[dst] = copies2[dst]
292 else:
293 copies = copies1
294 if r == b.rev():
288 if r == b.rev():
295 _filter(a, b, copies)
289 _filter(a, b, copies)
296 return copies
290 return copies
297 for i, c in enumerate(children[r]):
291 for i, c in enumerate(children[r]):
298 childctx = repo[c]
292 childctx = repo[c]
299 if r == childctx.p1().rev():
293 if r == childctx.p1().rev():
300 parent = 1
294 parent = 1
301 childcopies = childctx.p1copies()
295 childcopies = childctx.p1copies()
302 else:
296 else:
303 assert r == childctx.p2().rev()
297 assert r == childctx.p2().rev()
304 parent = 2
298 parent = 2
305 childcopies = childctx.p2copies()
299 childcopies = childctx.p2copies()
306 if not alwaysmatch:
300 if not alwaysmatch:
307 childcopies = {dst: src for dst, src in childcopies.items()
301 childcopies = {dst: src for dst, src in childcopies.items()
308 if match(dst)}
302 if match(dst)}
309 # Copy the dict only if later iterations will also need it
303 # Copy the dict only if later iterations will also need it
310 if i != len(children[r]) - 1:
304 if i != len(children[r]) - 1:
311 newcopies = copies.copy()
305 newcopies = copies.copy()
312 else:
306 else:
313 newcopies = copies
307 newcopies = copies
314 if childcopies:
308 if childcopies:
315 newcopies = _chain(newcopies, childcopies)
309 newcopies = _chain(newcopies, childcopies)
316 for f in childctx.filesremoved():
310 for f in childctx.filesremoved():
317 if f in newcopies:
311 if f in newcopies:
318 del newcopies[f]
312 del newcopies[f]
319 heapq.heappush(work, (c, parent, newcopies))
313 heapq.heappush(work, (c, parent, newcopies))
320 assert False
314 assert False
321
315
322 def _forwardcopies(a, b, match=None):
316 def _forwardcopies(a, b, match=None):
323 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
317 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
324
318
325 match = a.repo().narrowmatch(match)
319 match = a.repo().narrowmatch(match)
326 # check for working copy
320 # check for working copy
327 if b.rev() is None:
321 if b.rev() is None:
328 if a == b.p1():
322 if a == b.p1():
329 # short-circuit to avoid issues with merge states
323 # short-circuit to avoid issues with merge states
330 return _dirstatecopies(b._repo, match)
324 return _dirstatecopies(b._repo, match)
331
325
332 cm = _committedforwardcopies(a, b.p1(), match)
326 cm = _committedforwardcopies(a, b.p1(), match)
333 # combine copies from dirstate if necessary
327 # combine copies from dirstate if necessary
334 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
328 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
335 return _committedforwardcopies(a, b, match)
329 return _committedforwardcopies(a, b, match)
336
330
337 def _backwardrenames(a, b, match):
331 def _backwardrenames(a, b, match):
338 if a._repo.ui.config('experimental', 'copytrace') == 'off':
332 if a._repo.ui.config('experimental', 'copytrace') == 'off':
339 return {}
333 return {}
340
334
341 # Even though we're not taking copies into account, 1:n rename situations
335 # Even though we're not taking copies into account, 1:n rename situations
342 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
336 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
343 # arbitrarily pick one of the renames.
337 # arbitrarily pick one of the renames.
344 # We don't want to pass in "match" here, since that would filter
338 # We don't want to pass in "match" here, since that would filter
345 # the destination by it. Since we're reversing the copies, we want
339 # the destination by it. Since we're reversing the copies, we want
346 # to filter the source instead.
340 # to filter the source instead.
347 f = _forwardcopies(b, a)
341 f = _forwardcopies(b, a)
348 r = {}
342 r = {}
349 for k, v in sorted(f.iteritems()):
343 for k, v in sorted(f.iteritems()):
350 if match and not match(v):
344 if match and not match(v):
351 continue
345 continue
352 # remove copies
346 # remove copies
353 if v in a:
347 if v in a:
354 continue
348 continue
355 r[v] = k
349 r[v] = k
356 return r
350 return r
357
351
358 def pathcopies(x, y, match=None):
352 def pathcopies(x, y, match=None):
359 """find {dst@y: src@x} copy mapping for directed compare"""
353 """find {dst@y: src@x} copy mapping for directed compare"""
360 repo = x._repo
354 repo = x._repo
361 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
355 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
362 if debug:
356 if debug:
363 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
357 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
364 % (x, y))
358 % (x, y))
365 if x == y or not x or not y:
359 if x == y or not x or not y:
366 return {}
360 return {}
367 a = y.ancestor(x)
361 a = y.ancestor(x)
368 if a == x:
362 if a == x:
369 if debug:
363 if debug:
370 repo.ui.debug('debug.copies: search mode: forward\n')
364 repo.ui.debug('debug.copies: search mode: forward\n')
371 return _forwardcopies(x, y, match=match)
365 return _forwardcopies(x, y, match=match)
372 if a == y:
366 if a == y:
373 if debug:
367 if debug:
374 repo.ui.debug('debug.copies: search mode: backward\n')
368 repo.ui.debug('debug.copies: search mode: backward\n')
375 return _backwardrenames(x, y, match=match)
369 return _backwardrenames(x, y, match=match)
376 if debug:
370 if debug:
377 repo.ui.debug('debug.copies: search mode: combined\n')
371 repo.ui.debug('debug.copies: search mode: combined\n')
378 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
372 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
379 _forwardcopies(a, y, match=match))
373 _forwardcopies(a, y, match=match))
380
374
381 def mergecopies(repo, c1, c2, base):
375 def mergecopies(repo, c1, c2, base):
382 """
376 """
383 Finds moves and copies between context c1 and c2 that are relevant for
377 Finds moves and copies between context c1 and c2 that are relevant for
384 merging. 'base' will be used as the merge base.
378 merging. 'base' will be used as the merge base.
385
379
386 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
380 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
387 files that were moved/ copied in one merge parent and modified in another.
381 files that were moved/ copied in one merge parent and modified in another.
388 For example:
382 For example:
389
383
390 o ---> 4 another commit
384 o ---> 4 another commit
391 |
385 |
392 | o ---> 3 commit that modifies a.txt
386 | o ---> 3 commit that modifies a.txt
393 | /
387 | /
394 o / ---> 2 commit that moves a.txt to b.txt
388 o / ---> 2 commit that moves a.txt to b.txt
395 |/
389 |/
396 o ---> 1 merge base
390 o ---> 1 merge base
397
391
398 If we try to rebase revision 3 on revision 4, since there is no a.txt in
392 If we try to rebase revision 3 on revision 4, since there is no a.txt in
399 revision 4, and if user have copytrace disabled, we prints the following
393 revision 4, and if user have copytrace disabled, we prints the following
400 message:
394 message:
401
395
402 ```other changed <file> which local deleted```
396 ```other changed <file> which local deleted```
403
397
404 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
398 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
405 "dirmove".
399 "dirmove".
406
400
407 "copy" is a mapping from destination name -> source name,
401 "copy" is a mapping from destination name -> source name,
408 where source is in c1 and destination is in c2 or vice-versa.
402 where source is in c1 and destination is in c2 or vice-versa.
409
403
410 "movewithdir" is a mapping from source name -> destination name,
404 "movewithdir" is a mapping from source name -> destination name,
411 where the file at source present in one context but not the other
405 where the file at source present in one context but not the other
412 needs to be moved to destination by the merge process, because the
406 needs to be moved to destination by the merge process, because the
413 other context moved the directory it is in.
407 other context moved the directory it is in.
414
408
415 "diverge" is a mapping of source name -> list of destination names
409 "diverge" is a mapping of source name -> list of destination names
416 for divergent renames.
410 for divergent renames.
417
411
418 "renamedelete" is a mapping of source name -> list of destination
412 "renamedelete" is a mapping of source name -> list of destination
419 names for files deleted in c1 that were renamed in c2 or vice-versa.
413 names for files deleted in c1 that were renamed in c2 or vice-versa.
420
414
421 "dirmove" is a mapping of detected source dir -> destination dir renames.
415 "dirmove" is a mapping of detected source dir -> destination dir renames.
422 This is needed for handling changes to new files previously grafted into
416 This is needed for handling changes to new files previously grafted into
423 renamed directories.
417 renamed directories.
424
418
425 This function calls different copytracing algorithms based on config.
419 This function calls different copytracing algorithms based on config.
426 """
420 """
427 # avoid silly behavior for update from empty dir
421 # avoid silly behavior for update from empty dir
428 if not c1 or not c2 or c1 == c2:
422 if not c1 or not c2 or c1 == c2:
429 return {}, {}, {}, {}, {}
423 return {}, {}, {}, {}, {}
430
424
431 narrowmatch = c1.repo().narrowmatch()
425 narrowmatch = c1.repo().narrowmatch()
432
426
433 # avoid silly behavior for parent -> working dir
427 # avoid silly behavior for parent -> working dir
434 if c2.node() is None and c1.node() == repo.dirstate.p1():
428 if c2.node() is None and c1.node() == repo.dirstate.p1():
435 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
429 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
436
430
437 copytracing = repo.ui.config('experimental', 'copytrace')
431 copytracing = repo.ui.config('experimental', 'copytrace')
438 if stringutil.parsebool(copytracing) is False:
432 if stringutil.parsebool(copytracing) is False:
439 # stringutil.parsebool() returns None when it is unable to parse the
433 # stringutil.parsebool() returns None when it is unable to parse the
440 # value, so we should rely on making sure copytracing is on such cases
434 # value, so we should rely on making sure copytracing is on such cases
441 return {}, {}, {}, {}, {}
435 return {}, {}, {}, {}, {}
442
436
443 if usechangesetcentricalgo(repo):
437 if usechangesetcentricalgo(repo):
444 # The heuristics don't make sense when we need changeset-centric algos
438 # The heuristics don't make sense when we need changeset-centric algos
445 return _fullcopytracing(repo, c1, c2, base)
439 return _fullcopytracing(repo, c1, c2, base)
446
440
447 # Copy trace disabling is explicitly below the node == p1 logic above
441 # Copy trace disabling is explicitly below the node == p1 logic above
448 # because the logic above is required for a simple copy to be kept across a
442 # because the logic above is required for a simple copy to be kept across a
449 # rebase.
443 # rebase.
450 if copytracing == 'heuristics':
444 if copytracing == 'heuristics':
451 # Do full copytracing if only non-public revisions are involved as
445 # Do full copytracing if only non-public revisions are involved as
452 # that will be fast enough and will also cover the copies which could
446 # that will be fast enough and will also cover the copies which could
453 # be missed by heuristics
447 # be missed by heuristics
454 if _isfullcopytraceable(repo, c1, base):
448 if _isfullcopytraceable(repo, c1, base):
455 return _fullcopytracing(repo, c1, c2, base)
449 return _fullcopytracing(repo, c1, c2, base)
456 return _heuristicscopytracing(repo, c1, c2, base)
450 return _heuristicscopytracing(repo, c1, c2, base)
457 else:
451 else:
458 return _fullcopytracing(repo, c1, c2, base)
452 return _fullcopytracing(repo, c1, c2, base)
459
453
460 def _isfullcopytraceable(repo, c1, base):
454 def _isfullcopytraceable(repo, c1, base):
461 """ Checks that if base, source and destination are all no-public branches,
455 """ Checks that if base, source and destination are all no-public branches,
462 if yes let's use the full copytrace algorithm for increased capabilities
456 if yes let's use the full copytrace algorithm for increased capabilities
463 since it will be fast enough.
457 since it will be fast enough.
464
458
465 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
459 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
466 number of changesets from c1 to base such that if number of changesets are
460 number of changesets from c1 to base such that if number of changesets are
467 more than the limit, full copytracing algorithm won't be used.
461 more than the limit, full copytracing algorithm won't be used.
468 """
462 """
469 if c1.rev() is None:
463 if c1.rev() is None:
470 c1 = c1.p1()
464 c1 = c1.p1()
471 if c1.mutable() and base.mutable():
465 if c1.mutable() and base.mutable():
472 sourcecommitlimit = repo.ui.configint('experimental',
466 sourcecommitlimit = repo.ui.configint('experimental',
473 'copytrace.sourcecommitlimit')
467 'copytrace.sourcecommitlimit')
474 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
468 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
475 return commits < sourcecommitlimit
469 return commits < sourcecommitlimit
476 return False
470 return False
477
471
478 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
472 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
479 copy, renamedelete):
473 copy, renamedelete):
480 if src not in m2:
474 if src not in m2:
481 # deleted on side 2
475 # deleted on side 2
482 if src not in m1:
476 if src not in m1:
483 # renamed on side 1, deleted on side 2
477 # renamed on side 1, deleted on side 2
484 renamedelete[src] = dsts1
478 renamedelete[src] = dsts1
485 elif m2[src] != mb[src]:
479 elif m2[src] != mb[src]:
486 if not _related(c2[src], base[src]):
480 if not _related(c2[src], base[src]):
487 return
481 return
488 # modified on side 2
482 # modified on side 2
489 for dst in dsts1:
483 for dst in dsts1:
490 if dst not in m2:
484 if dst not in m2:
491 # dst not added on side 2 (handle as regular
485 # dst not added on side 2 (handle as regular
492 # "both created" case in manifestmerge otherwise)
486 # "both created" case in manifestmerge otherwise)
493 copy[dst] = src
487 copy[dst] = src
494
488
495 def _fullcopytracing(repo, c1, c2, base):
489 def _fullcopytracing(repo, c1, c2, base):
496 """ The full copytracing algorithm which finds all the new files that were
490 """ The full copytracing algorithm which finds all the new files that were
497 added from merge base up to the top commit and for each file it checks if
491 added from merge base up to the top commit and for each file it checks if
498 this file was copied from another file.
492 this file was copied from another file.
499
493
500 This is pretty slow when a lot of changesets are involved but will track all
494 This is pretty slow when a lot of changesets are involved but will track all
501 the copies.
495 the copies.
502 """
496 """
503 m1 = c1.manifest()
497 m1 = c1.manifest()
504 m2 = c2.manifest()
498 m2 = c2.manifest()
505 mb = base.manifest()
499 mb = base.manifest()
506
500
507 copies1 = pathcopies(base, c1)
501 copies1 = pathcopies(base, c1)
508 copies2 = pathcopies(base, c2)
502 copies2 = pathcopies(base, c2)
509
503
510 inversecopies1 = {}
504 inversecopies1 = {}
511 inversecopies2 = {}
505 inversecopies2 = {}
512 for dst, src in copies1.items():
506 for dst, src in copies1.items():
513 inversecopies1.setdefault(src, []).append(dst)
507 inversecopies1.setdefault(src, []).append(dst)
514 for dst, src in copies2.items():
508 for dst, src in copies2.items():
515 inversecopies2.setdefault(src, []).append(dst)
509 inversecopies2.setdefault(src, []).append(dst)
516
510
517 copy = {}
511 copy = {}
518 diverge = {}
512 diverge = {}
519 renamedelete = {}
513 renamedelete = {}
520 allsources = set(inversecopies1) | set(inversecopies2)
514 allsources = set(inversecopies1) | set(inversecopies2)
521 for src in allsources:
515 for src in allsources:
522 dsts1 = inversecopies1.get(src)
516 dsts1 = inversecopies1.get(src)
523 dsts2 = inversecopies2.get(src)
517 dsts2 = inversecopies2.get(src)
524 if dsts1 and dsts2:
518 if dsts1 and dsts2:
525 # copied/renamed on both sides
519 # copied/renamed on both sides
526 if src not in m1 and src not in m2:
520 if src not in m1 and src not in m2:
527 # renamed on both sides
521 # renamed on both sides
528 dsts1 = set(dsts1)
522 dsts1 = set(dsts1)
529 dsts2 = set(dsts2)
523 dsts2 = set(dsts2)
530 # If there's some overlap in the rename destinations, we
524 # If there's some overlap in the rename destinations, we
531 # consider it not divergent. For example, if side 1 copies 'a'
525 # consider it not divergent. For example, if side 1 copies 'a'
532 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
526 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
533 # and 'd' and deletes 'a'.
527 # and 'd' and deletes 'a'.
534 if dsts1 & dsts2:
528 if dsts1 & dsts2:
535 for dst in (dsts1 & dsts2):
529 for dst in (dsts1 & dsts2):
536 copy[dst] = src
530 copy[dst] = src
537 else:
531 else:
538 diverge[src] = sorted(dsts1 | dsts2)
532 diverge[src] = sorted(dsts1 | dsts2)
539 elif src in m1 and src in m2:
533 elif src in m1 and src in m2:
540 # copied on both sides
534 # copied on both sides
541 dsts1 = set(dsts1)
535 dsts1 = set(dsts1)
542 dsts2 = set(dsts2)
536 dsts2 = set(dsts2)
543 for dst in (dsts1 & dsts2):
537 for dst in (dsts1 & dsts2):
544 copy[dst] = src
538 copy[dst] = src
545 # TODO: Handle cases where it was renamed on one side and copied
539 # TODO: Handle cases where it was renamed on one side and copied
546 # on the other side
540 # on the other side
547 elif dsts1:
541 elif dsts1:
548 # copied/renamed only on side 1
542 # copied/renamed only on side 1
549 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
543 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
550 copy, renamedelete)
544 copy, renamedelete)
551 elif dsts2:
545 elif dsts2:
552 # copied/renamed only on side 2
546 # copied/renamed only on side 2
553 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
547 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
554 copy, renamedelete)
548 copy, renamedelete)
555
549
556 renamedeleteset = set()
550 renamedeleteset = set()
557 divergeset = set()
551 divergeset = set()
558 for dsts in diverge.values():
552 for dsts in diverge.values():
559 divergeset.update(dsts)
553 divergeset.update(dsts)
560 for dsts in renamedelete.values():
554 for dsts in renamedelete.values():
561 renamedeleteset.update(dsts)
555 renamedeleteset.update(dsts)
562
556
563 # find interesting file sets from manifests
557 # find interesting file sets from manifests
564 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
558 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
565 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
559 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
566 u1 = sorted(addedinm1 - addedinm2)
560 u1 = sorted(addedinm1 - addedinm2)
567 u2 = sorted(addedinm2 - addedinm1)
561 u2 = sorted(addedinm2 - addedinm1)
568
562
569 header = " unmatched files in %s"
563 header = " unmatched files in %s"
570 if u1:
564 if u1:
571 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
565 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
572 if u2:
566 if u2:
573 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
567 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
574
568
575 fullcopy = copies1.copy()
569 fullcopy = copies1.copy()
576 fullcopy.update(copies2)
570 fullcopy.update(copies2)
577 if not fullcopy:
571 if not fullcopy:
578 return copy, {}, diverge, renamedelete, {}
572 return copy, {}, diverge, renamedelete, {}
579
573
580 if repo.ui.debugflag:
574 if repo.ui.debugflag:
581 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
575 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
582 "% = renamed and deleted):\n")
576 "% = renamed and deleted):\n")
583 for f in sorted(fullcopy):
577 for f in sorted(fullcopy):
584 note = ""
578 note = ""
585 if f in copy:
579 if f in copy:
586 note += "*"
580 note += "*"
587 if f in divergeset:
581 if f in divergeset:
588 note += "!"
582 note += "!"
589 if f in renamedeleteset:
583 if f in renamedeleteset:
590 note += "%"
584 note += "%"
591 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
585 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
592 note))
586 note))
593 del divergeset
587 del divergeset
594
588
595 repo.ui.debug(" checking for directory renames\n")
589 repo.ui.debug(" checking for directory renames\n")
596
590
597 # generate a directory move map
591 # generate a directory move map
598 d1, d2 = c1.dirs(), c2.dirs()
592 d1, d2 = c1.dirs(), c2.dirs()
599 invalid = set()
593 invalid = set()
600 dirmove = {}
594 dirmove = {}
601
595
602 # examine each file copy for a potential directory move, which is
596 # examine each file copy for a potential directory move, which is
603 # when all the files in a directory are moved to a new directory
597 # when all the files in a directory are moved to a new directory
604 for dst, src in fullcopy.iteritems():
598 for dst, src in fullcopy.iteritems():
605 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
599 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
606 if dsrc in invalid:
600 if dsrc in invalid:
607 # already seen to be uninteresting
601 # already seen to be uninteresting
608 continue
602 continue
609 elif dsrc in d1 and ddst in d1:
603 elif dsrc in d1 and ddst in d1:
610 # directory wasn't entirely moved locally
604 # directory wasn't entirely moved locally
611 invalid.add(dsrc)
605 invalid.add(dsrc)
612 elif dsrc in d2 and ddst in d2:
606 elif dsrc in d2 and ddst in d2:
613 # directory wasn't entirely moved remotely
607 # directory wasn't entirely moved remotely
614 invalid.add(dsrc)
608 invalid.add(dsrc)
615 elif dsrc in dirmove and dirmove[dsrc] != ddst:
609 elif dsrc in dirmove and dirmove[dsrc] != ddst:
616 # files from the same directory moved to two different places
610 # files from the same directory moved to two different places
617 invalid.add(dsrc)
611 invalid.add(dsrc)
618 else:
612 else:
619 # looks good so far
613 # looks good so far
620 dirmove[dsrc] = ddst
614 dirmove[dsrc] = ddst
621
615
622 for i in invalid:
616 for i in invalid:
623 if i in dirmove:
617 if i in dirmove:
624 del dirmove[i]
618 del dirmove[i]
625 del d1, d2, invalid
619 del d1, d2, invalid
626
620
627 if not dirmove:
621 if not dirmove:
628 return copy, {}, diverge, renamedelete, {}
622 return copy, {}, diverge, renamedelete, {}
629
623
630 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
624 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
631
625
632 for d in dirmove:
626 for d in dirmove:
633 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
627 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
634 (d, dirmove[d]))
628 (d, dirmove[d]))
635
629
636 movewithdir = {}
630 movewithdir = {}
637 # check unaccounted nonoverlapping files against directory moves
631 # check unaccounted nonoverlapping files against directory moves
638 for f in u1 + u2:
632 for f in u1 + u2:
639 if f not in fullcopy:
633 if f not in fullcopy:
640 for d in dirmove:
634 for d in dirmove:
641 if f.startswith(d):
635 if f.startswith(d):
642 # new file added in a directory that was moved, move it
636 # new file added in a directory that was moved, move it
643 df = dirmove[d] + f[len(d):]
637 df = dirmove[d] + f[len(d):]
644 if df not in copy:
638 if df not in copy:
645 movewithdir[f] = df
639 movewithdir[f] = df
646 repo.ui.debug((" pending file src: '%s' -> "
640 repo.ui.debug((" pending file src: '%s' -> "
647 "dst: '%s'\n") % (f, df))
641 "dst: '%s'\n") % (f, df))
648 break
642 break
649
643
650 return copy, movewithdir, diverge, renamedelete, dirmove
644 return copy, movewithdir, diverge, renamedelete, dirmove
651
645
652 def _heuristicscopytracing(repo, c1, c2, base):
646 def _heuristicscopytracing(repo, c1, c2, base):
653 """ Fast copytracing using filename heuristics
647 """ Fast copytracing using filename heuristics
654
648
655 Assumes that moves or renames are of following two types:
649 Assumes that moves or renames are of following two types:
656
650
657 1) Inside a directory only (same directory name but different filenames)
651 1) Inside a directory only (same directory name but different filenames)
658 2) Move from one directory to another
652 2) Move from one directory to another
659 (same filenames but different directory names)
653 (same filenames but different directory names)
660
654
661 Works only when there are no merge commits in the "source branch".
655 Works only when there are no merge commits in the "source branch".
662 Source branch is commits from base up to c2 not including base.
656 Source branch is commits from base up to c2 not including base.
663
657
664 If merge is involved it fallbacks to _fullcopytracing().
658 If merge is involved it fallbacks to _fullcopytracing().
665
659
666 Can be used by setting the following config:
660 Can be used by setting the following config:
667
661
668 [experimental]
662 [experimental]
669 copytrace = heuristics
663 copytrace = heuristics
670
664
671 In some cases the copy/move candidates found by heuristics can be very large
665 In some cases the copy/move candidates found by heuristics can be very large
672 in number and that will make the algorithm slow. The number of possible
666 in number and that will make the algorithm slow. The number of possible
673 candidates to check can be limited by using the config
667 candidates to check can be limited by using the config
674 `experimental.copytrace.movecandidateslimit` which defaults to 100.
668 `experimental.copytrace.movecandidateslimit` which defaults to 100.
675 """
669 """
676
670
677 if c1.rev() is None:
671 if c1.rev() is None:
678 c1 = c1.p1()
672 c1 = c1.p1()
679 if c2.rev() is None:
673 if c2.rev() is None:
680 c2 = c2.p1()
674 c2 = c2.p1()
681
675
682 copies = {}
676 copies = {}
683
677
684 changedfiles = set()
678 changedfiles = set()
685 m1 = c1.manifest()
679 m1 = c1.manifest()
686 if not repo.revs('%d::%d', base.rev(), c2.rev()):
680 if not repo.revs('%d::%d', base.rev(), c2.rev()):
687 # If base is not in c2 branch, we switch to fullcopytracing
681 # If base is not in c2 branch, we switch to fullcopytracing
688 repo.ui.debug("switching to full copytracing as base is not "
682 repo.ui.debug("switching to full copytracing as base is not "
689 "an ancestor of c2\n")
683 "an ancestor of c2\n")
690 return _fullcopytracing(repo, c1, c2, base)
684 return _fullcopytracing(repo, c1, c2, base)
691
685
692 ctx = c2
686 ctx = c2
693 while ctx != base:
687 while ctx != base:
694 if len(ctx.parents()) == 2:
688 if len(ctx.parents()) == 2:
695 # To keep things simple let's not handle merges
689 # To keep things simple let's not handle merges
696 repo.ui.debug("switching to full copytracing because of merges\n")
690 repo.ui.debug("switching to full copytracing because of merges\n")
697 return _fullcopytracing(repo, c1, c2, base)
691 return _fullcopytracing(repo, c1, c2, base)
698 changedfiles.update(ctx.files())
692 changedfiles.update(ctx.files())
699 ctx = ctx.p1()
693 ctx = ctx.p1()
700
694
701 cp = _forwardcopies(base, c2)
695 cp = _forwardcopies(base, c2)
702 for dst, src in cp.iteritems():
696 for dst, src in cp.iteritems():
703 if src in m1:
697 if src in m1:
704 copies[dst] = src
698 copies[dst] = src
705
699
706 # file is missing if it isn't present in the destination, but is present in
700 # file is missing if it isn't present in the destination, but is present in
707 # the base and present in the source.
701 # the base and present in the source.
708 # Presence in the base is important to exclude added files, presence in the
702 # Presence in the base is important to exclude added files, presence in the
709 # source is important to exclude removed files.
703 # source is important to exclude removed files.
710 filt = lambda f: f not in m1 and f in base and f in c2
704 filt = lambda f: f not in m1 and f in base and f in c2
711 missingfiles = [f for f in changedfiles if filt(f)]
705 missingfiles = [f for f in changedfiles if filt(f)]
712
706
713 if missingfiles:
707 if missingfiles:
714 basenametofilename = collections.defaultdict(list)
708 basenametofilename = collections.defaultdict(list)
715 dirnametofilename = collections.defaultdict(list)
709 dirnametofilename = collections.defaultdict(list)
716
710
717 for f in m1.filesnotin(base.manifest()):
711 for f in m1.filesnotin(base.manifest()):
718 basename = os.path.basename(f)
712 basename = os.path.basename(f)
719 dirname = os.path.dirname(f)
713 dirname = os.path.dirname(f)
720 basenametofilename[basename].append(f)
714 basenametofilename[basename].append(f)
721 dirnametofilename[dirname].append(f)
715 dirnametofilename[dirname].append(f)
722
716
723 for f in missingfiles:
717 for f in missingfiles:
724 basename = os.path.basename(f)
718 basename = os.path.basename(f)
725 dirname = os.path.dirname(f)
719 dirname = os.path.dirname(f)
726 samebasename = basenametofilename[basename]
720 samebasename = basenametofilename[basename]
727 samedirname = dirnametofilename[dirname]
721 samedirname = dirnametofilename[dirname]
728 movecandidates = samebasename + samedirname
722 movecandidates = samebasename + samedirname
729 # f is guaranteed to be present in c2, that's why
723 # f is guaranteed to be present in c2, that's why
730 # c2.filectx(f) won't fail
724 # c2.filectx(f) won't fail
731 f2 = c2.filectx(f)
725 f2 = c2.filectx(f)
732 # we can have a lot of candidates which can slow down the heuristics
726 # we can have a lot of candidates which can slow down the heuristics
733 # config value to limit the number of candidates moves to check
727 # config value to limit the number of candidates moves to check
734 maxcandidates = repo.ui.configint('experimental',
728 maxcandidates = repo.ui.configint('experimental',
735 'copytrace.movecandidateslimit')
729 'copytrace.movecandidateslimit')
736
730
737 if len(movecandidates) > maxcandidates:
731 if len(movecandidates) > maxcandidates:
738 repo.ui.status(_("skipping copytracing for '%s', more "
732 repo.ui.status(_("skipping copytracing for '%s', more "
739 "candidates than the limit: %d\n")
733 "candidates than the limit: %d\n")
740 % (f, len(movecandidates)))
734 % (f, len(movecandidates)))
741 continue
735 continue
742
736
743 for candidate in movecandidates:
737 for candidate in movecandidates:
744 f1 = c1.filectx(candidate)
738 f1 = c1.filectx(candidate)
745 if _related(f1, f2):
739 if _related(f1, f2):
746 # if there are a few related copies then we'll merge
740 # if there are a few related copies then we'll merge
747 # changes into all of them. This matches the behaviour
741 # changes into all of them. This matches the behaviour
748 # of upstream copytracing
742 # of upstream copytracing
749 copies[candidate] = f
743 copies[candidate] = f
750
744
751 return copies, {}, {}, {}, {}
745 return copies, {}, {}, {}, {}
752
746
753 def _related(f1, f2):
747 def _related(f1, f2):
754 """return True if f1 and f2 filectx have a common ancestor
748 """return True if f1 and f2 filectx have a common ancestor
755
749
756 Walk back to common ancestor to see if the two files originate
750 Walk back to common ancestor to see if the two files originate
757 from the same file. Since workingfilectx's rev() is None it messes
751 from the same file. Since workingfilectx's rev() is None it messes
758 up the integer comparison logic, hence the pre-step check for
752 up the integer comparison logic, hence the pre-step check for
759 None (f1 and f2 can only be workingfilectx's initially).
753 None (f1 and f2 can only be workingfilectx's initially).
760 """
754 """
761
755
762 if f1 == f2:
756 if f1 == f2:
763 return True # a match
757 return True # a match
764
758
765 g1, g2 = f1.ancestors(), f2.ancestors()
759 g1, g2 = f1.ancestors(), f2.ancestors()
766 try:
760 try:
767 f1r, f2r = f1.linkrev(), f2.linkrev()
761 f1r, f2r = f1.linkrev(), f2.linkrev()
768
762
769 if f1r is None:
763 if f1r is None:
770 f1 = next(g1)
764 f1 = next(g1)
771 if f2r is None:
765 if f2r is None:
772 f2 = next(g2)
766 f2 = next(g2)
773
767
774 while True:
768 while True:
775 f1r, f2r = f1.linkrev(), f2.linkrev()
769 f1r, f2r = f1.linkrev(), f2.linkrev()
776 if f1r > f2r:
770 if f1r > f2r:
777 f1 = next(g1)
771 f1 = next(g1)
778 elif f2r > f1r:
772 elif f2r > f1r:
779 f2 = next(g2)
773 f2 = next(g2)
780 else: # f1 and f2 point to files in the same linkrev
774 else: # f1 and f2 point to files in the same linkrev
781 return f1 == f2 # true if they point to the same file
775 return f1 == f2 # true if they point to the same file
782 except StopIteration:
776 except StopIteration:
783 return False
777 return False
784
778
785 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
779 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
786 """reproduce copies from fromrev to rev in the dirstate
780 """reproduce copies from fromrev to rev in the dirstate
787
781
788 If skiprev is specified, it's a revision that should be used to
782 If skiprev is specified, it's a revision that should be used to
789 filter copy records. Any copies that occur between fromrev and
783 filter copy records. Any copies that occur between fromrev and
790 skiprev will not be duplicated, even if they appear in the set of
784 skiprev will not be duplicated, even if they appear in the set of
791 copies between fromrev and rev.
785 copies between fromrev and rev.
792 """
786 """
793 exclude = {}
787 exclude = {}
794 ctraceconfig = repo.ui.config('experimental', 'copytrace')
788 ctraceconfig = repo.ui.config('experimental', 'copytrace')
795 bctrace = stringutil.parsebool(ctraceconfig)
789 bctrace = stringutil.parsebool(ctraceconfig)
796 if (skiprev is not None and
790 if (skiprev is not None and
797 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
791 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
798 # copytrace='off' skips this line, but not the entire function because
792 # copytrace='off' skips this line, but not the entire function because
799 # the line below is O(size of the repo) during a rebase, while the rest
793 # the line below is O(size of the repo) during a rebase, while the rest
800 # of the function is much faster (and is required for carrying copy
794 # of the function is much faster (and is required for carrying copy
801 # metadata across the rebase anyway).
795 # metadata across the rebase anyway).
802 exclude = pathcopies(repo[fromrev], repo[skiprev])
796 exclude = pathcopies(repo[fromrev], repo[skiprev])
803 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
797 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
804 if dst in exclude:
798 if dst in exclude:
805 continue
799 continue
806 if dst in wctx:
800 if dst in wctx:
807 wctx[dst].markcopied(src)
801 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now