##// END OF EJS Templates
copies: split up _chain() in naive chaining and filtering steps...
Martin von Zweigbergk -
r42565:f3d06d37 default
parent child Browse files
Show More
@@ -1,799 +1,805 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b'"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
126
131
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
132 t = _chain(a, b)
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
133 _filter(src, dst, t)
129 # overwritten later, and case 3 will be removed later.
134 return t
135
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
140 if v not in src:
141 del t[k]
142 # remove criss-crossed copies
143 elif k in src and v in dst:
144 del t[k]
145 # remove copies to files that were then removed
146 elif k not in dst:
147 del t[k]
148
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
130 t = a.copy()
151 t = a.copy()
131 for k, v in b.iteritems():
152 for k, v in b.iteritems():
132 if v in t:
153 if v in t:
133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
134 t[k] = t[v]
154 t[k] = t[v]
135 else:
155 else:
136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
137 t[k] = v
156 t[k] = v
138
139 for k, v in list(t.items()):
140 # remove copies from files that didn't exist, i.e. case 5
141 if v not in src:
142 del t[k]
143 # remove criss-crossed copies, i.e. case 3
144 elif k in src and v in dst:
145 del t[k]
146 # remove copies to files that were then removed, i.e. case 1
147 # and file 'y' in cases 3 & 4 (in case of rename)
148 elif k not in dst:
149 del t[k]
150
151 return t
157 return t
152
158
153 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
154 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
155 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
156
162
157 for f in fctx.ancestors():
163 for f in fctx.ancestors():
158 if am.get(f.path(), None) == f.filenode():
164 if am.get(f.path(), None) == f.filenode():
159 return f
165 return f
160 if not f.isintroducedafter(limit):
166 if not f.isintroducedafter(limit):
161 return None
167 return None
162
168
163 def _dirstatecopies(repo, match=None):
169 def _dirstatecopies(repo, match=None):
164 ds = repo.dirstate
170 ds = repo.dirstate
165 c = ds.copies().copy()
171 c = ds.copies().copy()
166 for k in list(c):
172 for k in list(c):
167 if ds[k] not in 'anm' or (match and not match(k)):
173 if ds[k] not in 'anm' or (match and not match(k)):
168 del c[k]
174 del c[k]
169 return c
175 return c
170
176
171 def _computeforwardmissing(a, b, match=None):
177 def _computeforwardmissing(a, b, match=None):
172 """Computes which files are in b but not a.
178 """Computes which files are in b but not a.
173 This is its own function so extensions can easily wrap this call to see what
179 This is its own function so extensions can easily wrap this call to see what
174 files _forwardcopies is about to process.
180 files _forwardcopies is about to process.
175 """
181 """
176 ma = a.manifest()
182 ma = a.manifest()
177 mb = b.manifest()
183 mb = b.manifest()
178 return mb.filesnotin(ma, match=match)
184 return mb.filesnotin(ma, match=match)
179
185
180 def usechangesetcentricalgo(repo):
186 def usechangesetcentricalgo(repo):
181 """Checks if we should use changeset-centric copy algorithms"""
187 """Checks if we should use changeset-centric copy algorithms"""
182 return (repo.ui.config('experimental', 'copies.read-from') in
188 return (repo.ui.config('experimental', 'copies.read-from') in
183 ('changeset-only', 'compatibility'))
189 ('changeset-only', 'compatibility'))
184
190
185 def _committedforwardcopies(a, b, match):
191 def _committedforwardcopies(a, b, match):
186 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
187 # files might have to be traced back to the fctx parent of the last
193 # files might have to be traced back to the fctx parent of the last
188 # one-side-only changeset, but not further back than that
194 # one-side-only changeset, but not further back than that
189 repo = a._repo
195 repo = a._repo
190
196
191 if usechangesetcentricalgo(repo):
197 if usechangesetcentricalgo(repo):
192 return _changesetforwardcopies(a, b, match)
198 return _changesetforwardcopies(a, b, match)
193
199
194 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
195 dbg = repo.ui.debug
201 dbg = repo.ui.debug
196 if debug:
202 if debug:
197 dbg('debug.copies: looking into rename from %s to %s\n'
203 dbg('debug.copies: looking into rename from %s to %s\n'
198 % (a, b))
204 % (a, b))
199 limit = _findlimit(repo, a, b)
205 limit = _findlimit(repo, a, b)
200 if debug:
206 if debug:
201 dbg('debug.copies: search limit: %d\n' % limit)
207 dbg('debug.copies: search limit: %d\n' % limit)
202 am = a.manifest()
208 am = a.manifest()
203
209
204 # find where new files came from
210 # find where new files came from
205 # we currently don't try to find where old files went, too expensive
211 # we currently don't try to find where old files went, too expensive
206 # this means we can miss a case like 'hg rm b; hg cp a b'
212 # this means we can miss a case like 'hg rm b; hg cp a b'
207 cm = {}
213 cm = {}
208
214
209 # Computing the forward missing is quite expensive on large manifests, since
215 # Computing the forward missing is quite expensive on large manifests, since
210 # it compares the entire manifests. We can optimize it in the common use
216 # it compares the entire manifests. We can optimize it in the common use
211 # case of computing what copies are in a commit versus its parent (like
217 # case of computing what copies are in a commit versus its parent (like
212 # during a rebase or histedit). Note, we exclude merge commits from this
218 # during a rebase or histedit). Note, we exclude merge commits from this
213 # optimization, since the ctx.files() for a merge commit is not correct for
219 # optimization, since the ctx.files() for a merge commit is not correct for
214 # this comparison.
220 # this comparison.
215 forwardmissingmatch = match
221 forwardmissingmatch = match
216 if b.p1() == a and b.p2().node() == node.nullid:
222 if b.p1() == a and b.p2().node() == node.nullid:
217 filesmatcher = matchmod.exact(b.files())
223 filesmatcher = matchmod.exact(b.files())
218 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
219 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
220
226
221 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
222
228
223 if debug:
229 if debug:
224 dbg('debug.copies: missing files to search: %d\n' % len(missing))
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
225
231
226 for f in sorted(missing):
232 for f in sorted(missing):
227 if debug:
233 if debug:
228 dbg('debug.copies: tracing file: %s\n' % f)
234 dbg('debug.copies: tracing file: %s\n' % f)
229 fctx = b[f]
235 fctx = b[f]
230 fctx._ancestrycontext = ancestrycontext
236 fctx._ancestrycontext = ancestrycontext
231
237
232 if debug:
238 if debug:
233 start = util.timer()
239 start = util.timer()
234 ofctx = _tracefile(fctx, am, limit)
240 ofctx = _tracefile(fctx, am, limit)
235 if ofctx:
241 if ofctx:
236 if debug:
242 if debug:
237 dbg('debug.copies: rename of: %s\n' % ofctx._path)
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
238 cm[f] = ofctx.path()
244 cm[f] = ofctx.path()
239 if debug:
245 if debug:
240 dbg('debug.copies: time: %f seconds\n'
246 dbg('debug.copies: time: %f seconds\n'
241 % (util.timer() - start))
247 % (util.timer() - start))
242 return cm
248 return cm
243
249
244 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
245 if a.rev() == node.nullrev:
251 if a.rev() == node.nullrev:
246 return {}
252 return {}
247
253
248 repo = a.repo()
254 repo = a.repo()
249 children = {}
255 children = {}
250 cl = repo.changelog
256 cl = repo.changelog
251 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
252 for r in missingrevs:
258 for r in missingrevs:
253 for p in cl.parentrevs(r):
259 for p in cl.parentrevs(r):
254 if p == node.nullrev:
260 if p == node.nullrev:
255 continue
261 continue
256 if p not in children:
262 if p not in children:
257 children[p] = [r]
263 children[p] = [r]
258 else:
264 else:
259 children[p].append(r)
265 children[p].append(r)
260
266
261 roots = set(children) - set(missingrevs)
267 roots = set(children) - set(missingrevs)
262 # 'work' contains 3-tuples of a (revision number, parent number, copies).
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
263 # The parent number is only used for knowing which parent the copies dict
269 # The parent number is only used for knowing which parent the copies dict
264 # came from.
270 # came from.
265 work = [(r, 1, {}) for r in roots]
271 work = [(r, 1, {}) for r in roots]
266 heapq.heapify(work)
272 heapq.heapify(work)
267 while work:
273 while work:
268 r, i1, copies1 = heapq.heappop(work)
274 r, i1, copies1 = heapq.heappop(work)
269 if work and work[0][0] == r:
275 if work and work[0][0] == r:
270 # We are tracing copies from both parents
276 # We are tracing copies from both parents
271 r, i2, copies2 = heapq.heappop(work)
277 r, i2, copies2 = heapq.heappop(work)
272 copies = {}
278 copies = {}
273 ctx = repo[r]
279 ctx = repo[r]
274 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
280 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
275 allcopies = set(copies1) | set(copies2)
281 allcopies = set(copies1) | set(copies2)
276 # TODO: perhaps this filtering should be done as long as ctx
282 # TODO: perhaps this filtering should be done as long as ctx
277 # is merge, whether or not we're tracing from both parent.
283 # is merge, whether or not we're tracing from both parent.
278 for dst in allcopies:
284 for dst in allcopies:
279 if not match(dst):
285 if not match(dst):
280 continue
286 continue
281 if dst not in copies2:
287 if dst not in copies2:
282 # Copied on p1 side: mark as copy from p1 side if it didn't
288 # Copied on p1 side: mark as copy from p1 side if it didn't
283 # already exist on p2 side
289 # already exist on p2 side
284 if dst not in p2man:
290 if dst not in p2man:
285 copies[dst] = copies1[dst]
291 copies[dst] = copies1[dst]
286 elif dst not in copies1:
292 elif dst not in copies1:
287 # Copied on p2 side: mark as copy from p2 side if it didn't
293 # Copied on p2 side: mark as copy from p2 side if it didn't
288 # already exist on p1 side
294 # already exist on p1 side
289 if dst not in p1man:
295 if dst not in p1man:
290 copies[dst] = copies2[dst]
296 copies[dst] = copies2[dst]
291 else:
297 else:
292 # Copied on both sides: mark as copy from p1 side
298 # Copied on both sides: mark as copy from p1 side
293 copies[dst] = copies1[dst]
299 copies[dst] = copies1[dst]
294 else:
300 else:
295 copies = copies1
301 copies = copies1
296 if r == b.rev():
302 if r == b.rev():
297 return copies
303 return copies
298 for c in children[r]:
304 for c in children[r]:
299 childctx = repo[c]
305 childctx = repo[c]
300 if r == childctx.p1().rev():
306 if r == childctx.p1().rev():
301 parent = 1
307 parent = 1
302 childcopies = childctx.p1copies()
308 childcopies = childctx.p1copies()
303 else:
309 else:
304 assert r == childctx.p2().rev()
310 assert r == childctx.p2().rev()
305 parent = 2
311 parent = 2
306 childcopies = childctx.p2copies()
312 childcopies = childctx.p2copies()
307 if not match.always():
313 if not match.always():
308 childcopies = {dst: src for dst, src in childcopies.items()
314 childcopies = {dst: src for dst, src in childcopies.items()
309 if match(dst)}
315 if match(dst)}
310 childcopies = _chain(a, childctx, copies, childcopies)
316 childcopies = _chainandfilter(a, childctx, copies, childcopies)
311 heapq.heappush(work, (c, parent, childcopies))
317 heapq.heappush(work, (c, parent, childcopies))
312 assert False
318 assert False
313
319
314 def _forwardcopies(a, b, match=None):
320 def _forwardcopies(a, b, match=None):
315 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
321 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
316
322
317 match = a.repo().narrowmatch(match)
323 match = a.repo().narrowmatch(match)
318 # check for working copy
324 # check for working copy
319 if b.rev() is None:
325 if b.rev() is None:
320 if a == b.p1():
326 if a == b.p1():
321 # short-circuit to avoid issues with merge states
327 # short-circuit to avoid issues with merge states
322 return _dirstatecopies(b._repo, match)
328 return _dirstatecopies(b._repo, match)
323
329
324 cm = _committedforwardcopies(a, b.p1(), match)
330 cm = _committedforwardcopies(a, b.p1(), match)
325 # combine copies from dirstate if necessary
331 # combine copies from dirstate if necessary
326 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
332 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
327 return _committedforwardcopies(a, b, match)
333 return _committedforwardcopies(a, b, match)
328
334
329 def _backwardrenames(a, b, match):
335 def _backwardrenames(a, b, match):
330 if a._repo.ui.config('experimental', 'copytrace') == 'off':
336 if a._repo.ui.config('experimental', 'copytrace') == 'off':
331 return {}
337 return {}
332
338
333 # Even though we're not taking copies into account, 1:n rename situations
339 # Even though we're not taking copies into account, 1:n rename situations
334 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
340 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
335 # arbitrarily pick one of the renames.
341 # arbitrarily pick one of the renames.
336 # We don't want to pass in "match" here, since that would filter
342 # We don't want to pass in "match" here, since that would filter
337 # the destination by it. Since we're reversing the copies, we want
343 # the destination by it. Since we're reversing the copies, we want
338 # to filter the source instead.
344 # to filter the source instead.
339 f = _forwardcopies(b, a)
345 f = _forwardcopies(b, a)
340 r = {}
346 r = {}
341 for k, v in sorted(f.iteritems()):
347 for k, v in sorted(f.iteritems()):
342 if match and not match(v):
348 if match and not match(v):
343 continue
349 continue
344 # remove copies
350 # remove copies
345 if v in a:
351 if v in a:
346 continue
352 continue
347 r[v] = k
353 r[v] = k
348 return r
354 return r
349
355
350 def pathcopies(x, y, match=None):
356 def pathcopies(x, y, match=None):
351 """find {dst@y: src@x} copy mapping for directed compare"""
357 """find {dst@y: src@x} copy mapping for directed compare"""
352 repo = x._repo
358 repo = x._repo
353 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
359 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
354 if debug:
360 if debug:
355 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
361 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
356 % (x, y))
362 % (x, y))
357 if x == y or not x or not y:
363 if x == y or not x or not y:
358 return {}
364 return {}
359 a = y.ancestor(x)
365 a = y.ancestor(x)
360 if a == x:
366 if a == x:
361 if debug:
367 if debug:
362 repo.ui.debug('debug.copies: search mode: forward\n')
368 repo.ui.debug('debug.copies: search mode: forward\n')
363 return _forwardcopies(x, y, match=match)
369 return _forwardcopies(x, y, match=match)
364 if a == y:
370 if a == y:
365 if debug:
371 if debug:
366 repo.ui.debug('debug.copies: search mode: backward\n')
372 repo.ui.debug('debug.copies: search mode: backward\n')
367 return _backwardrenames(x, y, match=match)
373 return _backwardrenames(x, y, match=match)
368 if debug:
374 if debug:
369 repo.ui.debug('debug.copies: search mode: combined\n')
375 repo.ui.debug('debug.copies: search mode: combined\n')
370 return _chain(x, y, _backwardrenames(x, a, match=match),
376 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
371 _forwardcopies(a, y, match=match))
377 _forwardcopies(a, y, match=match))
372
378
373 def mergecopies(repo, c1, c2, base):
379 def mergecopies(repo, c1, c2, base):
374 """
380 """
375 Finds moves and copies between context c1 and c2 that are relevant for
381 Finds moves and copies between context c1 and c2 that are relevant for
376 merging. 'base' will be used as the merge base.
382 merging. 'base' will be used as the merge base.
377
383
378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
384 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
379 files that were moved/ copied in one merge parent and modified in another.
385 files that were moved/ copied in one merge parent and modified in another.
380 For example:
386 For example:
381
387
382 o ---> 4 another commit
388 o ---> 4 another commit
383 |
389 |
384 | o ---> 3 commit that modifies a.txt
390 | o ---> 3 commit that modifies a.txt
385 | /
391 | /
386 o / ---> 2 commit that moves a.txt to b.txt
392 o / ---> 2 commit that moves a.txt to b.txt
387 |/
393 |/
388 o ---> 1 merge base
394 o ---> 1 merge base
389
395
390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
396 If we try to rebase revision 3 on revision 4, since there is no a.txt in
391 revision 4, and if user have copytrace disabled, we prints the following
397 revision 4, and if user have copytrace disabled, we prints the following
392 message:
398 message:
393
399
394 ```other changed <file> which local deleted```
400 ```other changed <file> which local deleted```
395
401
396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
402 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
397 "dirmove".
403 "dirmove".
398
404
399 "copy" is a mapping from destination name -> source name,
405 "copy" is a mapping from destination name -> source name,
400 where source is in c1 and destination is in c2 or vice-versa.
406 where source is in c1 and destination is in c2 or vice-versa.
401
407
402 "movewithdir" is a mapping from source name -> destination name,
408 "movewithdir" is a mapping from source name -> destination name,
403 where the file at source present in one context but not the other
409 where the file at source present in one context but not the other
404 needs to be moved to destination by the merge process, because the
410 needs to be moved to destination by the merge process, because the
405 other context moved the directory it is in.
411 other context moved the directory it is in.
406
412
407 "diverge" is a mapping of source name -> list of destination names
413 "diverge" is a mapping of source name -> list of destination names
408 for divergent renames.
414 for divergent renames.
409
415
410 "renamedelete" is a mapping of source name -> list of destination
416 "renamedelete" is a mapping of source name -> list of destination
411 names for files deleted in c1 that were renamed in c2 or vice-versa.
417 names for files deleted in c1 that were renamed in c2 or vice-versa.
412
418
413 "dirmove" is a mapping of detected source dir -> destination dir renames.
419 "dirmove" is a mapping of detected source dir -> destination dir renames.
414 This is needed for handling changes to new files previously grafted into
420 This is needed for handling changes to new files previously grafted into
415 renamed directories.
421 renamed directories.
416
422
417 This function calls different copytracing algorithms based on config.
423 This function calls different copytracing algorithms based on config.
418 """
424 """
419 # avoid silly behavior for update from empty dir
425 # avoid silly behavior for update from empty dir
420 if not c1 or not c2 or c1 == c2:
426 if not c1 or not c2 or c1 == c2:
421 return {}, {}, {}, {}, {}
427 return {}, {}, {}, {}, {}
422
428
423 narrowmatch = c1.repo().narrowmatch()
429 narrowmatch = c1.repo().narrowmatch()
424
430
425 # avoid silly behavior for parent -> working dir
431 # avoid silly behavior for parent -> working dir
426 if c2.node() is None and c1.node() == repo.dirstate.p1():
432 if c2.node() is None and c1.node() == repo.dirstate.p1():
427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
433 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
428
434
429 copytracing = repo.ui.config('experimental', 'copytrace')
435 copytracing = repo.ui.config('experimental', 'copytrace')
430 if stringutil.parsebool(copytracing) is False:
436 if stringutil.parsebool(copytracing) is False:
431 # stringutil.parsebool() returns None when it is unable to parse the
437 # stringutil.parsebool() returns None when it is unable to parse the
432 # value, so we should rely on making sure copytracing is on such cases
438 # value, so we should rely on making sure copytracing is on such cases
433 return {}, {}, {}, {}, {}
439 return {}, {}, {}, {}, {}
434
440
435 if usechangesetcentricalgo(repo):
441 if usechangesetcentricalgo(repo):
436 # The heuristics don't make sense when we need changeset-centric algos
442 # The heuristics don't make sense when we need changeset-centric algos
437 return _fullcopytracing(repo, c1, c2, base)
443 return _fullcopytracing(repo, c1, c2, base)
438
444
439 # Copy trace disabling is explicitly below the node == p1 logic above
445 # Copy trace disabling is explicitly below the node == p1 logic above
440 # because the logic above is required for a simple copy to be kept across a
446 # because the logic above is required for a simple copy to be kept across a
441 # rebase.
447 # rebase.
442 if copytracing == 'heuristics':
448 if copytracing == 'heuristics':
443 # Do full copytracing if only non-public revisions are involved as
449 # Do full copytracing if only non-public revisions are involved as
444 # that will be fast enough and will also cover the copies which could
450 # that will be fast enough and will also cover the copies which could
445 # be missed by heuristics
451 # be missed by heuristics
446 if _isfullcopytraceable(repo, c1, base):
452 if _isfullcopytraceable(repo, c1, base):
447 return _fullcopytracing(repo, c1, c2, base)
453 return _fullcopytracing(repo, c1, c2, base)
448 return _heuristicscopytracing(repo, c1, c2, base)
454 return _heuristicscopytracing(repo, c1, c2, base)
449 else:
455 else:
450 return _fullcopytracing(repo, c1, c2, base)
456 return _fullcopytracing(repo, c1, c2, base)
451
457
452 def _isfullcopytraceable(repo, c1, base):
458 def _isfullcopytraceable(repo, c1, base):
453 """ Checks that if base, source and destination are all no-public branches,
459 """ Checks that if base, source and destination are all no-public branches,
454 if yes let's use the full copytrace algorithm for increased capabilities
460 if yes let's use the full copytrace algorithm for increased capabilities
455 since it will be fast enough.
461 since it will be fast enough.
456
462
457 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
463 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
458 number of changesets from c1 to base such that if number of changesets are
464 number of changesets from c1 to base such that if number of changesets are
459 more than the limit, full copytracing algorithm won't be used.
465 more than the limit, full copytracing algorithm won't be used.
460 """
466 """
461 if c1.rev() is None:
467 if c1.rev() is None:
462 c1 = c1.p1()
468 c1 = c1.p1()
463 if c1.mutable() and base.mutable():
469 if c1.mutable() and base.mutable():
464 sourcecommitlimit = repo.ui.configint('experimental',
470 sourcecommitlimit = repo.ui.configint('experimental',
465 'copytrace.sourcecommitlimit')
471 'copytrace.sourcecommitlimit')
466 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
472 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
467 return commits < sourcecommitlimit
473 return commits < sourcecommitlimit
468 return False
474 return False
469
475
470 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
476 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
471 copy, renamedelete):
477 copy, renamedelete):
472 if src not in m2:
478 if src not in m2:
473 # deleted on side 2
479 # deleted on side 2
474 if src not in m1:
480 if src not in m1:
475 # renamed on side 1, deleted on side 2
481 # renamed on side 1, deleted on side 2
476 renamedelete[src] = dsts1
482 renamedelete[src] = dsts1
477 elif m2[src] != mb[src]:
483 elif m2[src] != mb[src]:
478 if not _related(c2[src], base[src]):
484 if not _related(c2[src], base[src]):
479 return
485 return
480 # modified on side 2
486 # modified on side 2
481 for dst in dsts1:
487 for dst in dsts1:
482 if dst not in m2:
488 if dst not in m2:
483 # dst not added on side 2 (handle as regular
489 # dst not added on side 2 (handle as regular
484 # "both created" case in manifestmerge otherwise)
490 # "both created" case in manifestmerge otherwise)
485 copy[dst] = src
491 copy[dst] = src
486
492
487 def _fullcopytracing(repo, c1, c2, base):
493 def _fullcopytracing(repo, c1, c2, base):
488 """ The full copytracing algorithm which finds all the new files that were
494 """ The full copytracing algorithm which finds all the new files that were
489 added from merge base up to the top commit and for each file it checks if
495 added from merge base up to the top commit and for each file it checks if
490 this file was copied from another file.
496 this file was copied from another file.
491
497
492 This is pretty slow when a lot of changesets are involved but will track all
498 This is pretty slow when a lot of changesets are involved but will track all
493 the copies.
499 the copies.
494 """
500 """
495 m1 = c1.manifest()
501 m1 = c1.manifest()
496 m2 = c2.manifest()
502 m2 = c2.manifest()
497 mb = base.manifest()
503 mb = base.manifest()
498
504
499 copies1 = pathcopies(base, c1)
505 copies1 = pathcopies(base, c1)
500 copies2 = pathcopies(base, c2)
506 copies2 = pathcopies(base, c2)
501
507
502 inversecopies1 = {}
508 inversecopies1 = {}
503 inversecopies2 = {}
509 inversecopies2 = {}
504 for dst, src in copies1.items():
510 for dst, src in copies1.items():
505 inversecopies1.setdefault(src, []).append(dst)
511 inversecopies1.setdefault(src, []).append(dst)
506 for dst, src in copies2.items():
512 for dst, src in copies2.items():
507 inversecopies2.setdefault(src, []).append(dst)
513 inversecopies2.setdefault(src, []).append(dst)
508
514
509 copy = {}
515 copy = {}
510 diverge = {}
516 diverge = {}
511 renamedelete = {}
517 renamedelete = {}
512 allsources = set(inversecopies1) | set(inversecopies2)
518 allsources = set(inversecopies1) | set(inversecopies2)
513 for src in allsources:
519 for src in allsources:
514 dsts1 = inversecopies1.get(src)
520 dsts1 = inversecopies1.get(src)
515 dsts2 = inversecopies2.get(src)
521 dsts2 = inversecopies2.get(src)
516 if dsts1 and dsts2:
522 if dsts1 and dsts2:
517 # copied/renamed on both sides
523 # copied/renamed on both sides
518 if src not in m1 and src not in m2:
524 if src not in m1 and src not in m2:
519 # renamed on both sides
525 # renamed on both sides
520 dsts1 = set(dsts1)
526 dsts1 = set(dsts1)
521 dsts2 = set(dsts2)
527 dsts2 = set(dsts2)
522 # If there's some overlap in the rename destinations, we
528 # If there's some overlap in the rename destinations, we
523 # consider it not divergent. For example, if side 1 copies 'a'
529 # consider it not divergent. For example, if side 1 copies 'a'
524 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
530 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
525 # and 'd' and deletes 'a'.
531 # and 'd' and deletes 'a'.
526 if dsts1 & dsts2:
532 if dsts1 & dsts2:
527 for dst in (dsts1 & dsts2):
533 for dst in (dsts1 & dsts2):
528 copy[dst] = src
534 copy[dst] = src
529 else:
535 else:
530 diverge[src] = sorted(dsts1 | dsts2)
536 diverge[src] = sorted(dsts1 | dsts2)
531 elif src in m1 and src in m2:
537 elif src in m1 and src in m2:
532 # copied on both sides
538 # copied on both sides
533 dsts1 = set(dsts1)
539 dsts1 = set(dsts1)
534 dsts2 = set(dsts2)
540 dsts2 = set(dsts2)
535 for dst in (dsts1 & dsts2):
541 for dst in (dsts1 & dsts2):
536 copy[dst] = src
542 copy[dst] = src
537 # TODO: Handle cases where it was renamed on one side and copied
543 # TODO: Handle cases where it was renamed on one side and copied
538 # on the other side
544 # on the other side
539 elif dsts1:
545 elif dsts1:
540 # copied/renamed only on side 1
546 # copied/renamed only on side 1
541 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
547 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
542 copy, renamedelete)
548 copy, renamedelete)
543 elif dsts2:
549 elif dsts2:
544 # copied/renamed only on side 2
550 # copied/renamed only on side 2
545 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
551 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
546 copy, renamedelete)
552 copy, renamedelete)
547
553
548 renamedeleteset = set()
554 renamedeleteset = set()
549 divergeset = set()
555 divergeset = set()
550 for dsts in diverge.values():
556 for dsts in diverge.values():
551 divergeset.update(dsts)
557 divergeset.update(dsts)
552 for dsts in renamedelete.values():
558 for dsts in renamedelete.values():
553 renamedeleteset.update(dsts)
559 renamedeleteset.update(dsts)
554
560
555 # find interesting file sets from manifests
561 # find interesting file sets from manifests
556 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
562 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
557 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
563 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
558 u1 = sorted(addedinm1 - addedinm2)
564 u1 = sorted(addedinm1 - addedinm2)
559 u2 = sorted(addedinm2 - addedinm1)
565 u2 = sorted(addedinm2 - addedinm1)
560
566
561 header = " unmatched files in %s"
567 header = " unmatched files in %s"
562 if u1:
568 if u1:
563 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
569 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
564 if u2:
570 if u2:
565 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
571 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
566
572
567 fullcopy = copies1.copy()
573 fullcopy = copies1.copy()
568 fullcopy.update(copies2)
574 fullcopy.update(copies2)
569 if not fullcopy:
575 if not fullcopy:
570 return copy, {}, diverge, renamedelete, {}
576 return copy, {}, diverge, renamedelete, {}
571
577
572 if repo.ui.debugflag:
578 if repo.ui.debugflag:
573 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
579 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
574 "% = renamed and deleted):\n")
580 "% = renamed and deleted):\n")
575 for f in sorted(fullcopy):
581 for f in sorted(fullcopy):
576 note = ""
582 note = ""
577 if f in copy:
583 if f in copy:
578 note += "*"
584 note += "*"
579 if f in divergeset:
585 if f in divergeset:
580 note += "!"
586 note += "!"
581 if f in renamedeleteset:
587 if f in renamedeleteset:
582 note += "%"
588 note += "%"
583 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
589 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
584 note))
590 note))
585 del divergeset
591 del divergeset
586
592
587 repo.ui.debug(" checking for directory renames\n")
593 repo.ui.debug(" checking for directory renames\n")
588
594
589 # generate a directory move map
595 # generate a directory move map
590 d1, d2 = c1.dirs(), c2.dirs()
596 d1, d2 = c1.dirs(), c2.dirs()
591 invalid = set()
597 invalid = set()
592 dirmove = {}
598 dirmove = {}
593
599
594 # examine each file copy for a potential directory move, which is
600 # examine each file copy for a potential directory move, which is
595 # when all the files in a directory are moved to a new directory
601 # when all the files in a directory are moved to a new directory
596 for dst, src in fullcopy.iteritems():
602 for dst, src in fullcopy.iteritems():
597 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
598 if dsrc in invalid:
604 if dsrc in invalid:
599 # already seen to be uninteresting
605 # already seen to be uninteresting
600 continue
606 continue
601 elif dsrc in d1 and ddst in d1:
607 elif dsrc in d1 and ddst in d1:
602 # directory wasn't entirely moved locally
608 # directory wasn't entirely moved locally
603 invalid.add(dsrc)
609 invalid.add(dsrc)
604 elif dsrc in d2 and ddst in d2:
610 elif dsrc in d2 and ddst in d2:
605 # directory wasn't entirely moved remotely
611 # directory wasn't entirely moved remotely
606 invalid.add(dsrc)
612 invalid.add(dsrc)
607 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
608 # files from the same directory moved to two different places
614 # files from the same directory moved to two different places
609 invalid.add(dsrc)
615 invalid.add(dsrc)
610 else:
616 else:
611 # looks good so far
617 # looks good so far
612 dirmove[dsrc] = ddst
618 dirmove[dsrc] = ddst
613
619
614 for i in invalid:
620 for i in invalid:
615 if i in dirmove:
621 if i in dirmove:
616 del dirmove[i]
622 del dirmove[i]
617 del d1, d2, invalid
623 del d1, d2, invalid
618
624
619 if not dirmove:
625 if not dirmove:
620 return copy, {}, diverge, renamedelete, {}
626 return copy, {}, diverge, renamedelete, {}
621
627
622 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
623
629
624 for d in dirmove:
630 for d in dirmove:
625 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
626 (d, dirmove[d]))
632 (d, dirmove[d]))
627
633
628 movewithdir = {}
634 movewithdir = {}
629 # check unaccounted nonoverlapping files against directory moves
635 # check unaccounted nonoverlapping files against directory moves
630 for f in u1 + u2:
636 for f in u1 + u2:
631 if f not in fullcopy:
637 if f not in fullcopy:
632 for d in dirmove:
638 for d in dirmove:
633 if f.startswith(d):
639 if f.startswith(d):
634 # new file added in a directory that was moved, move it
640 # new file added in a directory that was moved, move it
635 df = dirmove[d] + f[len(d):]
641 df = dirmove[d] + f[len(d):]
636 if df not in copy:
642 if df not in copy:
637 movewithdir[f] = df
643 movewithdir[f] = df
638 repo.ui.debug((" pending file src: '%s' -> "
644 repo.ui.debug((" pending file src: '%s' -> "
639 "dst: '%s'\n") % (f, df))
645 "dst: '%s'\n") % (f, df))
640 break
646 break
641
647
642 return copy, movewithdir, diverge, renamedelete, dirmove
648 return copy, movewithdir, diverge, renamedelete, dirmove
643
649
644 def _heuristicscopytracing(repo, c1, c2, base):
650 def _heuristicscopytracing(repo, c1, c2, base):
645 """ Fast copytracing using filename heuristics
651 """ Fast copytracing using filename heuristics
646
652
647 Assumes that moves or renames are of following two types:
653 Assumes that moves or renames are of following two types:
648
654
649 1) Inside a directory only (same directory name but different filenames)
655 1) Inside a directory only (same directory name but different filenames)
650 2) Move from one directory to another
656 2) Move from one directory to another
651 (same filenames but different directory names)
657 (same filenames but different directory names)
652
658
653 Works only when there are no merge commits in the "source branch".
659 Works only when there are no merge commits in the "source branch".
654 Source branch is commits from base up to c2 not including base.
660 Source branch is commits from base up to c2 not including base.
655
661
656 If merge is involved it fallbacks to _fullcopytracing().
662 If merge is involved it fallbacks to _fullcopytracing().
657
663
658 Can be used by setting the following config:
664 Can be used by setting the following config:
659
665
660 [experimental]
666 [experimental]
661 copytrace = heuristics
667 copytrace = heuristics
662
668
663 In some cases the copy/move candidates found by heuristics can be very large
669 In some cases the copy/move candidates found by heuristics can be very large
664 in number and that will make the algorithm slow. The number of possible
670 in number and that will make the algorithm slow. The number of possible
665 candidates to check can be limited by using the config
671 candidates to check can be limited by using the config
666 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
667 """
673 """
668
674
669 if c1.rev() is None:
675 if c1.rev() is None:
670 c1 = c1.p1()
676 c1 = c1.p1()
671 if c2.rev() is None:
677 if c2.rev() is None:
672 c2 = c2.p1()
678 c2 = c2.p1()
673
679
674 copies = {}
680 copies = {}
675
681
676 changedfiles = set()
682 changedfiles = set()
677 m1 = c1.manifest()
683 m1 = c1.manifest()
678 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
679 # If base is not in c2 branch, we switch to fullcopytracing
685 # If base is not in c2 branch, we switch to fullcopytracing
680 repo.ui.debug("switching to full copytracing as base is not "
686 repo.ui.debug("switching to full copytracing as base is not "
681 "an ancestor of c2\n")
687 "an ancestor of c2\n")
682 return _fullcopytracing(repo, c1, c2, base)
688 return _fullcopytracing(repo, c1, c2, base)
683
689
684 ctx = c2
690 ctx = c2
685 while ctx != base:
691 while ctx != base:
686 if len(ctx.parents()) == 2:
692 if len(ctx.parents()) == 2:
687 # To keep things simple let's not handle merges
693 # To keep things simple let's not handle merges
688 repo.ui.debug("switching to full copytracing because of merges\n")
694 repo.ui.debug("switching to full copytracing because of merges\n")
689 return _fullcopytracing(repo, c1, c2, base)
695 return _fullcopytracing(repo, c1, c2, base)
690 changedfiles.update(ctx.files())
696 changedfiles.update(ctx.files())
691 ctx = ctx.p1()
697 ctx = ctx.p1()
692
698
693 cp = _forwardcopies(base, c2)
699 cp = _forwardcopies(base, c2)
694 for dst, src in cp.iteritems():
700 for dst, src in cp.iteritems():
695 if src in m1:
701 if src in m1:
696 copies[dst] = src
702 copies[dst] = src
697
703
698 # file is missing if it isn't present in the destination, but is present in
704 # file is missing if it isn't present in the destination, but is present in
699 # the base and present in the source.
705 # the base and present in the source.
700 # Presence in the base is important to exclude added files, presence in the
706 # Presence in the base is important to exclude added files, presence in the
701 # source is important to exclude removed files.
707 # source is important to exclude removed files.
702 filt = lambda f: f not in m1 and f in base and f in c2
708 filt = lambda f: f not in m1 and f in base and f in c2
703 missingfiles = [f for f in changedfiles if filt(f)]
709 missingfiles = [f for f in changedfiles if filt(f)]
704
710
705 if missingfiles:
711 if missingfiles:
706 basenametofilename = collections.defaultdict(list)
712 basenametofilename = collections.defaultdict(list)
707 dirnametofilename = collections.defaultdict(list)
713 dirnametofilename = collections.defaultdict(list)
708
714
709 for f in m1.filesnotin(base.manifest()):
715 for f in m1.filesnotin(base.manifest()):
710 basename = os.path.basename(f)
716 basename = os.path.basename(f)
711 dirname = os.path.dirname(f)
717 dirname = os.path.dirname(f)
712 basenametofilename[basename].append(f)
718 basenametofilename[basename].append(f)
713 dirnametofilename[dirname].append(f)
719 dirnametofilename[dirname].append(f)
714
720
715 for f in missingfiles:
721 for f in missingfiles:
716 basename = os.path.basename(f)
722 basename = os.path.basename(f)
717 dirname = os.path.dirname(f)
723 dirname = os.path.dirname(f)
718 samebasename = basenametofilename[basename]
724 samebasename = basenametofilename[basename]
719 samedirname = dirnametofilename[dirname]
725 samedirname = dirnametofilename[dirname]
720 movecandidates = samebasename + samedirname
726 movecandidates = samebasename + samedirname
721 # f is guaranteed to be present in c2, that's why
727 # f is guaranteed to be present in c2, that's why
722 # c2.filectx(f) won't fail
728 # c2.filectx(f) won't fail
723 f2 = c2.filectx(f)
729 f2 = c2.filectx(f)
724 # we can have a lot of candidates which can slow down the heuristics
730 # we can have a lot of candidates which can slow down the heuristics
725 # config value to limit the number of candidates moves to check
731 # config value to limit the number of candidates moves to check
726 maxcandidates = repo.ui.configint('experimental',
732 maxcandidates = repo.ui.configint('experimental',
727 'copytrace.movecandidateslimit')
733 'copytrace.movecandidateslimit')
728
734
729 if len(movecandidates) > maxcandidates:
735 if len(movecandidates) > maxcandidates:
730 repo.ui.status(_("skipping copytracing for '%s', more "
736 repo.ui.status(_("skipping copytracing for '%s', more "
731 "candidates than the limit: %d\n")
737 "candidates than the limit: %d\n")
732 % (f, len(movecandidates)))
738 % (f, len(movecandidates)))
733 continue
739 continue
734
740
735 for candidate in movecandidates:
741 for candidate in movecandidates:
736 f1 = c1.filectx(candidate)
742 f1 = c1.filectx(candidate)
737 if _related(f1, f2):
743 if _related(f1, f2):
738 # if there are a few related copies then we'll merge
744 # if there are a few related copies then we'll merge
739 # changes into all of them. This matches the behaviour
745 # changes into all of them. This matches the behaviour
740 # of upstream copytracing
746 # of upstream copytracing
741 copies[candidate] = f
747 copies[candidate] = f
742
748
743 return copies, {}, {}, {}, {}
749 return copies, {}, {}, {}, {}
744
750
745 def _related(f1, f2):
751 def _related(f1, f2):
746 """return True if f1 and f2 filectx have a common ancestor
752 """return True if f1 and f2 filectx have a common ancestor
747
753
748 Walk back to common ancestor to see if the two files originate
754 Walk back to common ancestor to see if the two files originate
749 from the same file. Since workingfilectx's rev() is None it messes
755 from the same file. Since workingfilectx's rev() is None it messes
750 up the integer comparison logic, hence the pre-step check for
756 up the integer comparison logic, hence the pre-step check for
751 None (f1 and f2 can only be workingfilectx's initially).
757 None (f1 and f2 can only be workingfilectx's initially).
752 """
758 """
753
759
754 if f1 == f2:
760 if f1 == f2:
755 return True # a match
761 return True # a match
756
762
757 g1, g2 = f1.ancestors(), f2.ancestors()
763 g1, g2 = f1.ancestors(), f2.ancestors()
758 try:
764 try:
759 f1r, f2r = f1.linkrev(), f2.linkrev()
765 f1r, f2r = f1.linkrev(), f2.linkrev()
760
766
761 if f1r is None:
767 if f1r is None:
762 f1 = next(g1)
768 f1 = next(g1)
763 if f2r is None:
769 if f2r is None:
764 f2 = next(g2)
770 f2 = next(g2)
765
771
766 while True:
772 while True:
767 f1r, f2r = f1.linkrev(), f2.linkrev()
773 f1r, f2r = f1.linkrev(), f2.linkrev()
768 if f1r > f2r:
774 if f1r > f2r:
769 f1 = next(g1)
775 f1 = next(g1)
770 elif f2r > f1r:
776 elif f2r > f1r:
771 f2 = next(g2)
777 f2 = next(g2)
772 else: # f1 and f2 point to files in the same linkrev
778 else: # f1 and f2 point to files in the same linkrev
773 return f1 == f2 # true if they point to the same file
779 return f1 == f2 # true if they point to the same file
774 except StopIteration:
780 except StopIteration:
775 return False
781 return False
776
782
777 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
778 """reproduce copies from fromrev to rev in the dirstate
784 """reproduce copies from fromrev to rev in the dirstate
779
785
780 If skiprev is specified, it's a revision that should be used to
786 If skiprev is specified, it's a revision that should be used to
781 filter copy records. Any copies that occur between fromrev and
787 filter copy records. Any copies that occur between fromrev and
782 skiprev will not be duplicated, even if they appear in the set of
788 skiprev will not be duplicated, even if they appear in the set of
783 copies between fromrev and rev.
789 copies between fromrev and rev.
784 """
790 """
785 exclude = {}
791 exclude = {}
786 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
787 bctrace = stringutil.parsebool(ctraceconfig)
793 bctrace = stringutil.parsebool(ctraceconfig)
788 if (skiprev is not None and
794 if (skiprev is not None and
789 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
790 # copytrace='off' skips this line, but not the entire function because
796 # copytrace='off' skips this line, but not the entire function because
791 # the line below is O(size of the repo) during a rebase, while the rest
797 # the line below is O(size of the repo) during a rebase, while the rest
792 # of the function is much faster (and is required for carrying copy
798 # of the function is much faster (and is required for carrying copy
793 # metadata across the rebase anyway).
799 # metadata across the rebase anyway).
794 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 exclude = pathcopies(repo[fromrev], repo[skiprev])
795 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
796 if dst in exclude:
802 if dst in exclude:
797 continue
803 continue
798 if dst in wctx:
804 if dst in wctx:
799 wctx[dst].markcopied(src)
805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now