##// END OF EJS Templates
copies: delete misplaced comment...
Martin von Zweigbergk -
r42445:d1c7446d default
parent child Browse files
Show More
@@ -1,803 +1,801 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chain(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b'"""
111 """chain two sets of copies 'a' and 'b'"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126
126
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 # overwritten later, and case 3 will be removed later.
129 # overwritten later, and case 3 will be removed later.
130 t = a.copy()
130 t = a.copy()
131 for k, v in b.iteritems():
131 for k, v in b.iteritems():
132 if v in t:
132 if v in t:
133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
133 # Found a chain, i.e. cases 3 & 4. We'll remove case 3 later.
134 t[k] = t[v]
134 t[k] = t[v]
135 else:
135 else:
136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
136 # Renamed only in 'b', i.e. cases 5 & 6. We'll remove case 5 later.
137 t[k] = v
137 t[k] = v
138
138
139 for k, v in list(t.items()):
139 for k, v in list(t.items()):
140 # remove copies from files that didn't exist, i.e. case 5
140 # remove copies from files that didn't exist, i.e. case 5
141 if v not in src:
141 if v not in src:
142 del t[k]
142 del t[k]
143 # remove criss-crossed copies, i.e. case 3
143 # remove criss-crossed copies, i.e. case 3
144 elif k in src and v in dst:
144 elif k in src and v in dst:
145 del t[k]
145 del t[k]
146 # remove copies to files that were then removed, i.e. case 1
146 # remove copies to files that were then removed, i.e. case 1
147 # and file 'y' in cases 3 & 4 (in case of rename)
147 # and file 'y' in cases 3 & 4 (in case of rename)
148 elif k not in dst:
148 elif k not in dst:
149 del t[k]
149 del t[k]
150
150
151 return t
151 return t
152
152
153 def _tracefile(fctx, am, limit):
153 def _tracefile(fctx, am, limit):
154 """return file context that is the ancestor of fctx present in ancestor
154 """return file context that is the ancestor of fctx present in ancestor
155 manifest am, stopping after the first ancestor lower than limit"""
155 manifest am, stopping after the first ancestor lower than limit"""
156
156
157 for f in fctx.ancestors():
157 for f in fctx.ancestors():
158 if am.get(f.path(), None) == f.filenode():
158 if am.get(f.path(), None) == f.filenode():
159 return f
159 return f
160 if not f.isintroducedafter(limit):
160 if not f.isintroducedafter(limit):
161 return None
161 return None
162
162
163 def _dirstatecopies(repo, match=None):
163 def _dirstatecopies(repo, match=None):
164 ds = repo.dirstate
164 ds = repo.dirstate
165 c = ds.copies().copy()
165 c = ds.copies().copy()
166 for k in list(c):
166 for k in list(c):
167 if ds[k] not in 'anm' or (match and not match(k)):
167 if ds[k] not in 'anm' or (match and not match(k)):
168 del c[k]
168 del c[k]
169 return c
169 return c
170
170
171 def _computeforwardmissing(a, b, match=None):
171 def _computeforwardmissing(a, b, match=None):
172 """Computes which files are in b but not a.
172 """Computes which files are in b but not a.
173 This is its own function so extensions can easily wrap this call to see what
173 This is its own function so extensions can easily wrap this call to see what
174 files _forwardcopies is about to process.
174 files _forwardcopies is about to process.
175 """
175 """
176 ma = a.manifest()
176 ma = a.manifest()
177 mb = b.manifest()
177 mb = b.manifest()
178 return mb.filesnotin(ma, match=match)
178 return mb.filesnotin(ma, match=match)
179
179
180 def usechangesetcentricalgo(repo):
180 def usechangesetcentricalgo(repo):
181 """Checks if we should use changeset-centric copy algorithms"""
181 """Checks if we should use changeset-centric copy algorithms"""
182 return (repo.ui.config('experimental', 'copies.read-from') in
182 return (repo.ui.config('experimental', 'copies.read-from') in
183 ('changeset-only', 'compatibility'))
183 ('changeset-only', 'compatibility'))
184
184
185 def _committedforwardcopies(a, b, match):
185 def _committedforwardcopies(a, b, match):
186 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
186 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
187 # files might have to be traced back to the fctx parent of the last
187 # files might have to be traced back to the fctx parent of the last
188 # one-side-only changeset, but not further back than that
188 # one-side-only changeset, but not further back than that
189 repo = a._repo
189 repo = a._repo
190
190
191 if usechangesetcentricalgo(repo):
191 if usechangesetcentricalgo(repo):
192 return _changesetforwardcopies(a, b, match)
192 return _changesetforwardcopies(a, b, match)
193
193
194 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
194 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
195 dbg = repo.ui.debug
195 dbg = repo.ui.debug
196 if debug:
196 if debug:
197 dbg('debug.copies: looking into rename from %s to %s\n'
197 dbg('debug.copies: looking into rename from %s to %s\n'
198 % (a, b))
198 % (a, b))
199 limit = _findlimit(repo, a, b)
199 limit = _findlimit(repo, a, b)
200 if debug:
200 if debug:
201 dbg('debug.copies: search limit: %d\n' % limit)
201 dbg('debug.copies: search limit: %d\n' % limit)
202 am = a.manifest()
202 am = a.manifest()
203
203
204 # find where new files came from
204 # find where new files came from
205 # we currently don't try to find where old files went, too expensive
205 # we currently don't try to find where old files went, too expensive
206 # this means we can miss a case like 'hg rm b; hg cp a b'
206 # this means we can miss a case like 'hg rm b; hg cp a b'
207 cm = {}
207 cm = {}
208
208
209 # Computing the forward missing is quite expensive on large manifests, since
209 # Computing the forward missing is quite expensive on large manifests, since
210 # it compares the entire manifests. We can optimize it in the common use
210 # it compares the entire manifests. We can optimize it in the common use
211 # case of computing what copies are in a commit versus its parent (like
211 # case of computing what copies are in a commit versus its parent (like
212 # during a rebase or histedit). Note, we exclude merge commits from this
212 # during a rebase or histedit). Note, we exclude merge commits from this
213 # optimization, since the ctx.files() for a merge commit is not correct for
213 # optimization, since the ctx.files() for a merge commit is not correct for
214 # this comparison.
214 # this comparison.
215 forwardmissingmatch = match
215 forwardmissingmatch = match
216 if b.p1() == a and b.p2().node() == node.nullid:
216 if b.p1() == a and b.p2().node() == node.nullid:
217 filesmatcher = matchmod.exact(b.files())
217 filesmatcher = matchmod.exact(b.files())
218 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
218 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
219 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
219 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
220
220
221 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
221 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
222
222
223 if debug:
223 if debug:
224 dbg('debug.copies: missing files to search: %d\n' % len(missing))
224 dbg('debug.copies: missing files to search: %d\n' % len(missing))
225
225
226 for f in sorted(missing):
226 for f in sorted(missing):
227 if debug:
227 if debug:
228 dbg('debug.copies: tracing file: %s\n' % f)
228 dbg('debug.copies: tracing file: %s\n' % f)
229 fctx = b[f]
229 fctx = b[f]
230 fctx._ancestrycontext = ancestrycontext
230 fctx._ancestrycontext = ancestrycontext
231
231
232 if debug:
232 if debug:
233 start = util.timer()
233 start = util.timer()
234 ofctx = _tracefile(fctx, am, limit)
234 ofctx = _tracefile(fctx, am, limit)
235 if ofctx:
235 if ofctx:
236 if debug:
236 if debug:
237 dbg('debug.copies: rename of: %s\n' % ofctx._path)
237 dbg('debug.copies: rename of: %s\n' % ofctx._path)
238 cm[f] = ofctx.path()
238 cm[f] = ofctx.path()
239 if debug:
239 if debug:
240 dbg('debug.copies: time: %f seconds\n'
240 dbg('debug.copies: time: %f seconds\n'
241 % (util.timer() - start))
241 % (util.timer() - start))
242 return cm
242 return cm
243
243
244 def _changesetforwardcopies(a, b, match):
244 def _changesetforwardcopies(a, b, match):
245 if a.rev() == node.nullrev:
245 if a.rev() == node.nullrev:
246 return {}
246 return {}
247
247
248 repo = a.repo()
248 repo = a.repo()
249 children = {}
249 children = {}
250 cl = repo.changelog
250 cl = repo.changelog
251 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
251 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
252 for r in missingrevs:
252 for r in missingrevs:
253 for p in cl.parentrevs(r):
253 for p in cl.parentrevs(r):
254 if p == node.nullrev:
254 if p == node.nullrev:
255 continue
255 continue
256 if p not in children:
256 if p not in children:
257 children[p] = [r]
257 children[p] = [r]
258 else:
258 else:
259 children[p].append(r)
259 children[p].append(r)
260
260
261 roots = set(children) - set(missingrevs)
261 roots = set(children) - set(missingrevs)
262 # 'work' contains 3-tuples of a (revision number, parent number, copies).
262 # 'work' contains 3-tuples of a (revision number, parent number, copies).
263 # The parent number is only used for knowing which parent the copies dict
263 # The parent number is only used for knowing which parent the copies dict
264 # came from.
264 # came from.
265 work = [(r, 1, {}) for r in roots]
265 work = [(r, 1, {}) for r in roots]
266 heapq.heapify(work)
266 heapq.heapify(work)
267 while work:
267 while work:
268 r, i1, copies1 = heapq.heappop(work)
268 r, i1, copies1 = heapq.heappop(work)
269 if work and work[0][0] == r:
269 if work and work[0][0] == r:
270 # We are tracing copies from both parents
270 # We are tracing copies from both parents
271 r, i2, copies2 = heapq.heappop(work)
271 r, i2, copies2 = heapq.heappop(work)
272 copies = {}
272 copies = {}
273 ctx = repo[r]
273 ctx = repo[r]
274 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
274 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
275 allcopies = set(copies1) | set(copies2)
275 allcopies = set(copies1) | set(copies2)
276 # TODO: perhaps this filtering should be done as long as ctx
276 # TODO: perhaps this filtering should be done as long as ctx
277 # is merge, whether or not we're tracing from both parent.
277 # is merge, whether or not we're tracing from both parent.
278 for dst in allcopies:
278 for dst in allcopies:
279 if not match(dst):
279 if not match(dst):
280 continue
280 continue
281 if dst not in copies2:
281 if dst not in copies2:
282 # Copied on p1 side: mark as copy from p1 side if it didn't
282 # Copied on p1 side: mark as copy from p1 side if it didn't
283 # already exist on p2 side
283 # already exist on p2 side
284 if dst not in p2man:
284 if dst not in p2man:
285 copies[dst] = copies1[dst]
285 copies[dst] = copies1[dst]
286 elif dst not in copies1:
286 elif dst not in copies1:
287 # Copied on p2 side: mark as copy from p2 side if it didn't
287 # Copied on p2 side: mark as copy from p2 side if it didn't
288 # already exist on p1 side
288 # already exist on p1 side
289 if dst not in p1man:
289 if dst not in p1man:
290 copies[dst] = copies2[dst]
290 copies[dst] = copies2[dst]
291 else:
291 else:
292 # Copied on both sides: mark as copy from p1 side
292 # Copied on both sides: mark as copy from p1 side
293 copies[dst] = copies1[dst]
293 copies[dst] = copies1[dst]
294 else:
294 else:
295 copies = copies1
295 copies = copies1
296 if r == b.rev():
296 if r == b.rev():
297 return copies
297 return copies
298 for c in children[r]:
298 for c in children[r]:
299 childctx = repo[c]
299 childctx = repo[c]
300 if r == childctx.p1().rev():
300 if r == childctx.p1().rev():
301 parent = 1
301 parent = 1
302 childcopies = childctx.p1copies()
302 childcopies = childctx.p1copies()
303 else:
303 else:
304 assert r == childctx.p2().rev()
304 assert r == childctx.p2().rev()
305 parent = 2
305 parent = 2
306 childcopies = childctx.p2copies()
306 childcopies = childctx.p2copies()
307 if not match.always():
307 if not match.always():
308 childcopies = {dst: src for dst, src in childcopies.items()
308 childcopies = {dst: src for dst, src in childcopies.items()
309 if match(dst)}
309 if match(dst)}
310 childcopies = _chain(a, childctx, copies, childcopies)
310 childcopies = _chain(a, childctx, copies, childcopies)
311 heapq.heappush(work, (c, parent, childcopies))
311 heapq.heappush(work, (c, parent, childcopies))
312 assert False
312 assert False
313
313
314 def _forwardcopies(a, b, match=None):
314 def _forwardcopies(a, b, match=None):
315 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
315 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
316
316
317 match = a.repo().narrowmatch(match)
317 match = a.repo().narrowmatch(match)
318 # check for working copy
318 # check for working copy
319 if b.rev() is None:
319 if b.rev() is None:
320 if a == b.p1():
320 if a == b.p1():
321 # short-circuit to avoid issues with merge states
321 # short-circuit to avoid issues with merge states
322 return _dirstatecopies(b._repo, match)
322 return _dirstatecopies(b._repo, match)
323
323
324 cm = _committedforwardcopies(a, b.p1(), match)
324 cm = _committedforwardcopies(a, b.p1(), match)
325 # combine copies from dirstate if necessary
325 # combine copies from dirstate if necessary
326 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
326 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
327 return _committedforwardcopies(a, b, match)
327 return _committedforwardcopies(a, b, match)
328
328
329 def _backwardrenames(a, b, match):
329 def _backwardrenames(a, b, match):
330 if a._repo.ui.config('experimental', 'copytrace') == 'off':
330 if a._repo.ui.config('experimental', 'copytrace') == 'off':
331 return {}
331 return {}
332
332
333 # Even though we're not taking copies into account, 1:n rename situations
333 # Even though we're not taking copies into account, 1:n rename situations
334 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
334 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
335 # arbitrarily pick one of the renames.
335 # arbitrarily pick one of the renames.
336 # We don't want to pass in "match" here, since that would filter
336 # We don't want to pass in "match" here, since that would filter
337 # the destination by it. Since we're reversing the copies, we want
337 # the destination by it. Since we're reversing the copies, we want
338 # to filter the source instead.
338 # to filter the source instead.
339 f = _forwardcopies(b, a)
339 f = _forwardcopies(b, a)
340 r = {}
340 r = {}
341 for k, v in sorted(f.iteritems()):
341 for k, v in sorted(f.iteritems()):
342 if match and not match(v):
342 if match and not match(v):
343 continue
343 continue
344 # remove copies
344 # remove copies
345 if v in a:
345 if v in a:
346 continue
346 continue
347 r[v] = k
347 r[v] = k
348 return r
348 return r
349
349
350 def pathcopies(x, y, match=None):
350 def pathcopies(x, y, match=None):
351 """find {dst@y: src@x} copy mapping for directed compare"""
351 """find {dst@y: src@x} copy mapping for directed compare"""
352 repo = x._repo
352 repo = x._repo
353 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
353 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
354 if debug:
354 if debug:
355 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
355 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
356 % (x, y))
356 % (x, y))
357 if x == y or not x or not y:
357 if x == y or not x or not y:
358 return {}
358 return {}
359 a = y.ancestor(x)
359 a = y.ancestor(x)
360 if a == x:
360 if a == x:
361 if debug:
361 if debug:
362 repo.ui.debug('debug.copies: search mode: forward\n')
362 repo.ui.debug('debug.copies: search mode: forward\n')
363 return _forwardcopies(x, y, match=match)
363 return _forwardcopies(x, y, match=match)
364 if a == y:
364 if a == y:
365 if debug:
365 if debug:
366 repo.ui.debug('debug.copies: search mode: backward\n')
366 repo.ui.debug('debug.copies: search mode: backward\n')
367 return _backwardrenames(x, y, match=match)
367 return _backwardrenames(x, y, match=match)
368 if debug:
368 if debug:
369 repo.ui.debug('debug.copies: search mode: combined\n')
369 repo.ui.debug('debug.copies: search mode: combined\n')
370 return _chain(x, y, _backwardrenames(x, a, match=match),
370 return _chain(x, y, _backwardrenames(x, a, match=match),
371 _forwardcopies(a, y, match=match))
371 _forwardcopies(a, y, match=match))
372
372
373 def mergecopies(repo, c1, c2, base):
373 def mergecopies(repo, c1, c2, base):
374 """
374 """
375 Finds moves and copies between context c1 and c2 that are relevant for
375 Finds moves and copies between context c1 and c2 that are relevant for
376 merging. 'base' will be used as the merge base.
376 merging. 'base' will be used as the merge base.
377
377
378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
378 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
379 files that were moved/ copied in one merge parent and modified in another.
379 files that were moved/ copied in one merge parent and modified in another.
380 For example:
380 For example:
381
381
382 o ---> 4 another commit
382 o ---> 4 another commit
383 |
383 |
384 | o ---> 3 commit that modifies a.txt
384 | o ---> 3 commit that modifies a.txt
385 | /
385 | /
386 o / ---> 2 commit that moves a.txt to b.txt
386 o / ---> 2 commit that moves a.txt to b.txt
387 |/
387 |/
388 o ---> 1 merge base
388 o ---> 1 merge base
389
389
390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
390 If we try to rebase revision 3 on revision 4, since there is no a.txt in
391 revision 4, and if user have copytrace disabled, we prints the following
391 revision 4, and if user have copytrace disabled, we prints the following
392 message:
392 message:
393
393
394 ```other changed <file> which local deleted```
394 ```other changed <file> which local deleted```
395
395
396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
396 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
397 "dirmove".
397 "dirmove".
398
398
399 "copy" is a mapping from destination name -> source name,
399 "copy" is a mapping from destination name -> source name,
400 where source is in c1 and destination is in c2 or vice-versa.
400 where source is in c1 and destination is in c2 or vice-versa.
401
401
402 "movewithdir" is a mapping from source name -> destination name,
402 "movewithdir" is a mapping from source name -> destination name,
403 where the file at source present in one context but not the other
403 where the file at source present in one context but not the other
404 needs to be moved to destination by the merge process, because the
404 needs to be moved to destination by the merge process, because the
405 other context moved the directory it is in.
405 other context moved the directory it is in.
406
406
407 "diverge" is a mapping of source name -> list of destination names
407 "diverge" is a mapping of source name -> list of destination names
408 for divergent renames.
408 for divergent renames.
409
409
410 "renamedelete" is a mapping of source name -> list of destination
410 "renamedelete" is a mapping of source name -> list of destination
411 names for files deleted in c1 that were renamed in c2 or vice-versa.
411 names for files deleted in c1 that were renamed in c2 or vice-versa.
412
412
413 "dirmove" is a mapping of detected source dir -> destination dir renames.
413 "dirmove" is a mapping of detected source dir -> destination dir renames.
414 This is needed for handling changes to new files previously grafted into
414 This is needed for handling changes to new files previously grafted into
415 renamed directories.
415 renamed directories.
416
416
417 This function calls different copytracing algorithms based on config.
417 This function calls different copytracing algorithms based on config.
418 """
418 """
419 # avoid silly behavior for update from empty dir
419 # avoid silly behavior for update from empty dir
420 if not c1 or not c2 or c1 == c2:
420 if not c1 or not c2 or c1 == c2:
421 return {}, {}, {}, {}, {}
421 return {}, {}, {}, {}, {}
422
422
423 narrowmatch = c1.repo().narrowmatch()
423 narrowmatch = c1.repo().narrowmatch()
424
424
425 # avoid silly behavior for parent -> working dir
425 # avoid silly behavior for parent -> working dir
426 if c2.node() is None and c1.node() == repo.dirstate.p1():
426 if c2.node() is None and c1.node() == repo.dirstate.p1():
427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
427 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
428
428
429 copytracing = repo.ui.config('experimental', 'copytrace')
429 copytracing = repo.ui.config('experimental', 'copytrace')
430 if stringutil.parsebool(copytracing) is False:
430 if stringutil.parsebool(copytracing) is False:
431 # stringutil.parsebool() returns None when it is unable to parse the
431 # stringutil.parsebool() returns None when it is unable to parse the
432 # value, so we should rely on making sure copytracing is on such cases
432 # value, so we should rely on making sure copytracing is on such cases
433 return {}, {}, {}, {}, {}
433 return {}, {}, {}, {}, {}
434
434
435 if usechangesetcentricalgo(repo):
435 if usechangesetcentricalgo(repo):
436 # The heuristics don't make sense when we need changeset-centric algos
436 # The heuristics don't make sense when we need changeset-centric algos
437 return _fullcopytracing(repo, c1, c2, base)
437 return _fullcopytracing(repo, c1, c2, base)
438
438
439 # Copy trace disabling is explicitly below the node == p1 logic above
439 # Copy trace disabling is explicitly below the node == p1 logic above
440 # because the logic above is required for a simple copy to be kept across a
440 # because the logic above is required for a simple copy to be kept across a
441 # rebase.
441 # rebase.
442 if copytracing == 'heuristics':
442 if copytracing == 'heuristics':
443 # Do full copytracing if only non-public revisions are involved as
443 # Do full copytracing if only non-public revisions are involved as
444 # that will be fast enough and will also cover the copies which could
444 # that will be fast enough and will also cover the copies which could
445 # be missed by heuristics
445 # be missed by heuristics
446 if _isfullcopytraceable(repo, c1, base):
446 if _isfullcopytraceable(repo, c1, base):
447 return _fullcopytracing(repo, c1, c2, base)
447 return _fullcopytracing(repo, c1, c2, base)
448 return _heuristicscopytracing(repo, c1, c2, base)
448 return _heuristicscopytracing(repo, c1, c2, base)
449 else:
449 else:
450 return _fullcopytracing(repo, c1, c2, base)
450 return _fullcopytracing(repo, c1, c2, base)
451
451
452 def _isfullcopytraceable(repo, c1, base):
452 def _isfullcopytraceable(repo, c1, base):
453 """ Checks that if base, source and destination are all no-public branches,
453 """ Checks that if base, source and destination are all no-public branches,
454 if yes let's use the full copytrace algorithm for increased capabilities
454 if yes let's use the full copytrace algorithm for increased capabilities
455 since it will be fast enough.
455 since it will be fast enough.
456
456
457 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
457 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
458 number of changesets from c1 to base such that if number of changesets are
458 number of changesets from c1 to base such that if number of changesets are
459 more than the limit, full copytracing algorithm won't be used.
459 more than the limit, full copytracing algorithm won't be used.
460 """
460 """
461 if c1.rev() is None:
461 if c1.rev() is None:
462 c1 = c1.p1()
462 c1 = c1.p1()
463 if c1.mutable() and base.mutable():
463 if c1.mutable() and base.mutable():
464 sourcecommitlimit = repo.ui.configint('experimental',
464 sourcecommitlimit = repo.ui.configint('experimental',
465 'copytrace.sourcecommitlimit')
465 'copytrace.sourcecommitlimit')
466 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
466 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
467 return commits < sourcecommitlimit
467 return commits < sourcecommitlimit
468 return False
468 return False
469
469
470 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
470 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
471 copy, renamedelete):
471 copy, renamedelete):
472 if src not in m2:
472 if src not in m2:
473 # deleted on side 2
473 # deleted on side 2
474 if src not in m1:
474 if src not in m1:
475 # renamed on side 1, deleted on side 2
475 # renamed on side 1, deleted on side 2
476 renamedelete[src] = dsts1
476 renamedelete[src] = dsts1
477 elif m2[src] != mb[src]:
477 elif m2[src] != mb[src]:
478 if not _related(c2[src], base[src]):
478 if not _related(c2[src], base[src]):
479 return
479 return
480 # modified on side 2
480 # modified on side 2
481 for dst in dsts1:
481 for dst in dsts1:
482 if dst not in m2:
482 if dst not in m2:
483 # dst not added on side 2 (handle as regular
483 # dst not added on side 2 (handle as regular
484 # "both created" case in manifestmerge otherwise)
484 # "both created" case in manifestmerge otherwise)
485 copy[dst] = src
485 copy[dst] = src
486
486
487 def _fullcopytracing(repo, c1, c2, base):
487 def _fullcopytracing(repo, c1, c2, base):
488 """ The full copytracing algorithm which finds all the new files that were
488 """ The full copytracing algorithm which finds all the new files that were
489 added from merge base up to the top commit and for each file it checks if
489 added from merge base up to the top commit and for each file it checks if
490 this file was copied from another file.
490 this file was copied from another file.
491
491
492 This is pretty slow when a lot of changesets are involved but will track all
492 This is pretty slow when a lot of changesets are involved but will track all
493 the copies.
493 the copies.
494 """
494 """
495 m1 = c1.manifest()
495 m1 = c1.manifest()
496 m2 = c2.manifest()
496 m2 = c2.manifest()
497 mb = base.manifest()
497 mb = base.manifest()
498
498
499 copies1 = pathcopies(base, c1)
499 copies1 = pathcopies(base, c1)
500 copies2 = pathcopies(base, c2)
500 copies2 = pathcopies(base, c2)
501
501
502 inversecopies1 = {}
502 inversecopies1 = {}
503 inversecopies2 = {}
503 inversecopies2 = {}
504 for dst, src in copies1.items():
504 for dst, src in copies1.items():
505 inversecopies1.setdefault(src, []).append(dst)
505 inversecopies1.setdefault(src, []).append(dst)
506 for dst, src in copies2.items():
506 for dst, src in copies2.items():
507 inversecopies2.setdefault(src, []).append(dst)
507 inversecopies2.setdefault(src, []).append(dst)
508
508
509 copy = {}
509 copy = {}
510 diverge = {}
510 diverge = {}
511 renamedelete = {}
511 renamedelete = {}
512 allsources = set(inversecopies1) | set(inversecopies2)
512 allsources = set(inversecopies1) | set(inversecopies2)
513 for src in allsources:
513 for src in allsources:
514 dsts1 = inversecopies1.get(src)
514 dsts1 = inversecopies1.get(src)
515 dsts2 = inversecopies2.get(src)
515 dsts2 = inversecopies2.get(src)
516 if dsts1 and dsts2:
516 if dsts1 and dsts2:
517 # copied/renamed on both sides
517 # copied/renamed on both sides
518 if src not in m1 and src not in m2:
518 if src not in m1 and src not in m2:
519 # renamed on both sides
519 # renamed on both sides
520 dsts1 = set(dsts1)
520 dsts1 = set(dsts1)
521 dsts2 = set(dsts2)
521 dsts2 = set(dsts2)
522 # If there's some overlap in the rename destinations, we
522 # If there's some overlap in the rename destinations, we
523 # consider it not divergent. For example, if side 1 copies 'a'
523 # consider it not divergent. For example, if side 1 copies 'a'
524 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
524 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
525 # and 'd' and deletes 'a'.
525 # and 'd' and deletes 'a'.
526 if dsts1 & dsts2:
526 if dsts1 & dsts2:
527 for dst in (dsts1 & dsts2):
527 for dst in (dsts1 & dsts2):
528 copy[dst] = src
528 copy[dst] = src
529 else:
529 else:
530 diverge[src] = sorted(dsts1 | dsts2)
530 diverge[src] = sorted(dsts1 | dsts2)
531 elif src in m1 and src in m2:
531 elif src in m1 and src in m2:
532 # copied on both sides
532 # copied on both sides
533 dsts1 = set(dsts1)
533 dsts1 = set(dsts1)
534 dsts2 = set(dsts2)
534 dsts2 = set(dsts2)
535 for dst in (dsts1 & dsts2):
535 for dst in (dsts1 & dsts2):
536 copy[dst] = src
536 copy[dst] = src
537 # TODO: Handle cases where it was renamed on one side and copied
537 # TODO: Handle cases where it was renamed on one side and copied
538 # on the other side
538 # on the other side
539 elif dsts1:
539 elif dsts1:
540 # copied/renamed only on side 1
540 # copied/renamed only on side 1
541 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
541 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
542 copy, renamedelete)
542 copy, renamedelete)
543 elif dsts2:
543 elif dsts2:
544 # copied/renamed only on side 2
544 # copied/renamed only on side 2
545 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
545 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
546 copy, renamedelete)
546 copy, renamedelete)
547
547
548 renamedeleteset = set()
548 renamedeleteset = set()
549 divergeset = set()
549 divergeset = set()
550 for dsts in diverge.values():
550 for dsts in diverge.values():
551 divergeset.update(dsts)
551 divergeset.update(dsts)
552 for dsts in renamedelete.values():
552 for dsts in renamedelete.values():
553 renamedeleteset.update(dsts)
553 renamedeleteset.update(dsts)
554
554
555 # find interesting file sets from manifests
555 # find interesting file sets from manifests
556 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
556 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
557 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
557 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
558 u1 = sorted(addedinm1 - addedinm2)
558 u1 = sorted(addedinm1 - addedinm2)
559 u2 = sorted(addedinm2 - addedinm1)
559 u2 = sorted(addedinm2 - addedinm1)
560
560
561 header = " unmatched files in %s"
561 header = " unmatched files in %s"
562 if u1:
562 if u1:
563 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
563 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
564 if u2:
564 if u2:
565 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
565 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
566
566
567 fullcopy = copies1.copy()
567 fullcopy = copies1.copy()
568 fullcopy.update(copies2)
568 fullcopy.update(copies2)
569 if not fullcopy:
569 if not fullcopy:
570 return copy, {}, diverge, renamedelete, {}
570 return copy, {}, diverge, renamedelete, {}
571
571
572 if repo.ui.debugflag:
572 if repo.ui.debugflag:
573 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
573 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
574 "% = renamed and deleted):\n")
574 "% = renamed and deleted):\n")
575 for f in sorted(fullcopy):
575 for f in sorted(fullcopy):
576 note = ""
576 note = ""
577 if f in copy:
577 if f in copy:
578 note += "*"
578 note += "*"
579 if f in divergeset:
579 if f in divergeset:
580 note += "!"
580 note += "!"
581 if f in renamedeleteset:
581 if f in renamedeleteset:
582 note += "%"
582 note += "%"
583 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
583 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
584 note))
584 note))
585 del divergeset
585 del divergeset
586
586
587 repo.ui.debug(" checking for directory renames\n")
587 repo.ui.debug(" checking for directory renames\n")
588
588
589 # generate a directory move map
589 # generate a directory move map
590 d1, d2 = c1.dirs(), c2.dirs()
590 d1, d2 = c1.dirs(), c2.dirs()
591 # Hack for adding '', which is not otherwise added, to d1 and d2
591 # Hack for adding '', which is not otherwise added, to d1 and d2
592 d1.addpath('/')
592 d1.addpath('/')
593 d2.addpath('/')
593 d2.addpath('/')
594 invalid = set()
594 invalid = set()
595 dirmove = {}
595 dirmove = {}
596
596
597 # examine each file copy for a potential directory move, which is
597 # examine each file copy for a potential directory move, which is
598 # when all the files in a directory are moved to a new directory
598 # when all the files in a directory are moved to a new directory
599 for dst, src in fullcopy.iteritems():
599 for dst, src in fullcopy.iteritems():
600 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
600 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
601 if dsrc in invalid:
601 if dsrc in invalid:
602 # already seen to be uninteresting
602 # already seen to be uninteresting
603 continue
603 continue
604 elif dsrc in d1 and ddst in d1:
604 elif dsrc in d1 and ddst in d1:
605 # directory wasn't entirely moved locally
605 # directory wasn't entirely moved locally
606 invalid.add(dsrc)
606 invalid.add(dsrc)
607 elif dsrc in d2 and ddst in d2:
607 elif dsrc in d2 and ddst in d2:
608 # directory wasn't entirely moved remotely
608 # directory wasn't entirely moved remotely
609 invalid.add(dsrc)
609 invalid.add(dsrc)
610 elif dsrc in dirmove and dirmove[dsrc] != ddst:
610 elif dsrc in dirmove and dirmove[dsrc] != ddst:
611 # files from the same directory moved to two different places
611 # files from the same directory moved to two different places
612 invalid.add(dsrc)
612 invalid.add(dsrc)
613 else:
613 else:
614 # looks good so far
614 # looks good so far
615 dirmove[dsrc] = ddst
615 dirmove[dsrc] = ddst
616
616
617 for i in invalid:
617 for i in invalid:
618 if i in dirmove:
618 if i in dirmove:
619 del dirmove[i]
619 del dirmove[i]
620 del d1, d2, invalid
620 del d1, d2, invalid
621
621
622 if not dirmove:
622 if not dirmove:
623 return copy, {}, diverge, renamedelete, {}
623 return copy, {}, diverge, renamedelete, {}
624
624
625 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
625 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
626
626
627 for d in dirmove:
627 for d in dirmove:
628 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
628 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
629 (d, dirmove[d]))
629 (d, dirmove[d]))
630
630
631 movewithdir = {}
631 movewithdir = {}
632 # check unaccounted nonoverlapping files against directory moves
632 # check unaccounted nonoverlapping files against directory moves
633 for f in u1 + u2:
633 for f in u1 + u2:
634 if f not in fullcopy:
634 if f not in fullcopy:
635 for d in dirmove:
635 for d in dirmove:
636 if f.startswith(d):
636 if f.startswith(d):
637 # new file added in a directory that was moved, move it
637 # new file added in a directory that was moved, move it
638 df = dirmove[d] + f[len(d):]
638 df = dirmove[d] + f[len(d):]
639 if df not in copy:
639 if df not in copy:
640 movewithdir[f] = df
640 movewithdir[f] = df
641 repo.ui.debug((" pending file src: '%s' -> "
641 repo.ui.debug((" pending file src: '%s' -> "
642 "dst: '%s'\n") % (f, df))
642 "dst: '%s'\n") % (f, df))
643 break
643 break
644
644
645 return copy, movewithdir, diverge, renamedelete, dirmove
645 return copy, movewithdir, diverge, renamedelete, dirmove
646
646
647 def _heuristicscopytracing(repo, c1, c2, base):
647 def _heuristicscopytracing(repo, c1, c2, base):
648 """ Fast copytracing using filename heuristics
648 """ Fast copytracing using filename heuristics
649
649
650 Assumes that moves or renames are of following two types:
650 Assumes that moves or renames are of following two types:
651
651
652 1) Inside a directory only (same directory name but different filenames)
652 1) Inside a directory only (same directory name but different filenames)
653 2) Move from one directory to another
653 2) Move from one directory to another
654 (same filenames but different directory names)
654 (same filenames but different directory names)
655
655
656 Works only when there are no merge commits in the "source branch".
656 Works only when there are no merge commits in the "source branch".
657 Source branch is commits from base up to c2 not including base.
657 Source branch is commits from base up to c2 not including base.
658
658
659 If merge is involved it fallbacks to _fullcopytracing().
659 If merge is involved it fallbacks to _fullcopytracing().
660
660
661 Can be used by setting the following config:
661 Can be used by setting the following config:
662
662
663 [experimental]
663 [experimental]
664 copytrace = heuristics
664 copytrace = heuristics
665
665
666 In some cases the copy/move candidates found by heuristics can be very large
666 In some cases the copy/move candidates found by heuristics can be very large
667 in number and that will make the algorithm slow. The number of possible
667 in number and that will make the algorithm slow. The number of possible
668 candidates to check can be limited by using the config
668 candidates to check can be limited by using the config
669 `experimental.copytrace.movecandidateslimit` which defaults to 100.
669 `experimental.copytrace.movecandidateslimit` which defaults to 100.
670 """
670 """
671
671
672 if c1.rev() is None:
672 if c1.rev() is None:
673 c1 = c1.p1()
673 c1 = c1.p1()
674 if c2.rev() is None:
674 if c2.rev() is None:
675 c2 = c2.p1()
675 c2 = c2.p1()
676
676
677 copies = {}
677 copies = {}
678
678
679 changedfiles = set()
679 changedfiles = set()
680 m1 = c1.manifest()
680 m1 = c1.manifest()
681 if not repo.revs('%d::%d', base.rev(), c2.rev()):
681 if not repo.revs('%d::%d', base.rev(), c2.rev()):
682 # If base is not in c2 branch, we switch to fullcopytracing
682 # If base is not in c2 branch, we switch to fullcopytracing
683 repo.ui.debug("switching to full copytracing as base is not "
683 repo.ui.debug("switching to full copytracing as base is not "
684 "an ancestor of c2\n")
684 "an ancestor of c2\n")
685 return _fullcopytracing(repo, c1, c2, base)
685 return _fullcopytracing(repo, c1, c2, base)
686
686
687 ctx = c2
687 ctx = c2
688 while ctx != base:
688 while ctx != base:
689 if len(ctx.parents()) == 2:
689 if len(ctx.parents()) == 2:
690 # To keep things simple let's not handle merges
690 # To keep things simple let's not handle merges
691 repo.ui.debug("switching to full copytracing because of merges\n")
691 repo.ui.debug("switching to full copytracing because of merges\n")
692 return _fullcopytracing(repo, c1, c2, base)
692 return _fullcopytracing(repo, c1, c2, base)
693 changedfiles.update(ctx.files())
693 changedfiles.update(ctx.files())
694 ctx = ctx.p1()
694 ctx = ctx.p1()
695
695
696 cp = _forwardcopies(base, c2)
696 cp = _forwardcopies(base, c2)
697 for dst, src in cp.iteritems():
697 for dst, src in cp.iteritems():
698 if src in m1:
698 if src in m1:
699 copies[dst] = src
699 copies[dst] = src
700
700
701 # file is missing if it isn't present in the destination, but is present in
701 # file is missing if it isn't present in the destination, but is present in
702 # the base and present in the source.
702 # the base and present in the source.
703 # Presence in the base is important to exclude added files, presence in the
703 # Presence in the base is important to exclude added files, presence in the
704 # source is important to exclude removed files.
704 # source is important to exclude removed files.
705 filt = lambda f: f not in m1 and f in base and f in c2
705 filt = lambda f: f not in m1 and f in base and f in c2
706 missingfiles = [f for f in changedfiles if filt(f)]
706 missingfiles = [f for f in changedfiles if filt(f)]
707
707
708 if missingfiles:
708 if missingfiles:
709 basenametofilename = collections.defaultdict(list)
709 basenametofilename = collections.defaultdict(list)
710 dirnametofilename = collections.defaultdict(list)
710 dirnametofilename = collections.defaultdict(list)
711
711
712 for f in m1.filesnotin(base.manifest()):
712 for f in m1.filesnotin(base.manifest()):
713 basename = os.path.basename(f)
713 basename = os.path.basename(f)
714 dirname = os.path.dirname(f)
714 dirname = os.path.dirname(f)
715 basenametofilename[basename].append(f)
715 basenametofilename[basename].append(f)
716 dirnametofilename[dirname].append(f)
716 dirnametofilename[dirname].append(f)
717
717
718 for f in missingfiles:
718 for f in missingfiles:
719 basename = os.path.basename(f)
719 basename = os.path.basename(f)
720 dirname = os.path.dirname(f)
720 dirname = os.path.dirname(f)
721 samebasename = basenametofilename[basename]
721 samebasename = basenametofilename[basename]
722 samedirname = dirnametofilename[dirname]
722 samedirname = dirnametofilename[dirname]
723 movecandidates = samebasename + samedirname
723 movecandidates = samebasename + samedirname
724 # f is guaranteed to be present in c2, that's why
724 # f is guaranteed to be present in c2, that's why
725 # c2.filectx(f) won't fail
725 # c2.filectx(f) won't fail
726 f2 = c2.filectx(f)
726 f2 = c2.filectx(f)
727 # we can have a lot of candidates which can slow down the heuristics
727 # we can have a lot of candidates which can slow down the heuristics
728 # config value to limit the number of candidates moves to check
728 # config value to limit the number of candidates moves to check
729 maxcandidates = repo.ui.configint('experimental',
729 maxcandidates = repo.ui.configint('experimental',
730 'copytrace.movecandidateslimit')
730 'copytrace.movecandidateslimit')
731
731
732 if len(movecandidates) > maxcandidates:
732 if len(movecandidates) > maxcandidates:
733 repo.ui.status(_("skipping copytracing for '%s', more "
733 repo.ui.status(_("skipping copytracing for '%s', more "
734 "candidates than the limit: %d\n")
734 "candidates than the limit: %d\n")
735 % (f, len(movecandidates)))
735 % (f, len(movecandidates)))
736 continue
736 continue
737
737
738 for candidate in movecandidates:
738 for candidate in movecandidates:
739 f1 = c1.filectx(candidate)
739 f1 = c1.filectx(candidate)
740 if _related(f1, f2):
740 if _related(f1, f2):
741 # if there are a few related copies then we'll merge
741 # if there are a few related copies then we'll merge
742 # changes into all of them. This matches the behaviour
742 # changes into all of them. This matches the behaviour
743 # of upstream copytracing
743 # of upstream copytracing
744 copies[candidate] = f
744 copies[candidate] = f
745
745
746 return copies, {}, {}, {}, {}
746 return copies, {}, {}, {}, {}
747
747
748 def _related(f1, f2):
748 def _related(f1, f2):
749 """return True if f1 and f2 filectx have a common ancestor
749 """return True if f1 and f2 filectx have a common ancestor
750
750
751 Walk back to common ancestor to see if the two files originate
751 Walk back to common ancestor to see if the two files originate
752 from the same file. Since workingfilectx's rev() is None it messes
752 from the same file. Since workingfilectx's rev() is None it messes
753 up the integer comparison logic, hence the pre-step check for
753 up the integer comparison logic, hence the pre-step check for
754 None (f1 and f2 can only be workingfilectx's initially).
754 None (f1 and f2 can only be workingfilectx's initially).
755 """
755 """
756
756
757 if f1 == f2:
757 if f1 == f2:
758 return True # a match
758 return True # a match
759
759
760 g1, g2 = f1.ancestors(), f2.ancestors()
760 g1, g2 = f1.ancestors(), f2.ancestors()
761 try:
761 try:
762 f1r, f2r = f1.linkrev(), f2.linkrev()
762 f1r, f2r = f1.linkrev(), f2.linkrev()
763
763
764 if f1r is None:
764 if f1r is None:
765 f1 = next(g1)
765 f1 = next(g1)
766 if f2r is None:
766 if f2r is None:
767 f2 = next(g2)
767 f2 = next(g2)
768
768
769 while True:
769 while True:
770 f1r, f2r = f1.linkrev(), f2.linkrev()
770 f1r, f2r = f1.linkrev(), f2.linkrev()
771 if f1r > f2r:
771 if f1r > f2r:
772 f1 = next(g1)
772 f1 = next(g1)
773 elif f2r > f1r:
773 elif f2r > f1r:
774 f2 = next(g2)
774 f2 = next(g2)
775 else: # f1 and f2 point to files in the same linkrev
775 else: # f1 and f2 point to files in the same linkrev
776 return f1 == f2 # true if they point to the same file
776 return f1 == f2 # true if they point to the same file
777 except StopIteration:
777 except StopIteration:
778 return False
778 return False
779
779
780 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
780 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
781 """reproduce copies from fromrev to rev in the dirstate
781 """reproduce copies from fromrev to rev in the dirstate
782
782
783 If skiprev is specified, it's a revision that should be used to
783 If skiprev is specified, it's a revision that should be used to
784 filter copy records. Any copies that occur between fromrev and
784 filter copy records. Any copies that occur between fromrev and
785 skiprev will not be duplicated, even if they appear in the set of
785 skiprev will not be duplicated, even if they appear in the set of
786 copies between fromrev and rev.
786 copies between fromrev and rev.
787 """
787 """
788 exclude = {}
788 exclude = {}
789 ctraceconfig = repo.ui.config('experimental', 'copytrace')
789 ctraceconfig = repo.ui.config('experimental', 'copytrace')
790 bctrace = stringutil.parsebool(ctraceconfig)
790 bctrace = stringutil.parsebool(ctraceconfig)
791 if (skiprev is not None and
791 if (skiprev is not None and
792 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
792 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
793 # copytrace='off' skips this line, but not the entire function because
793 # copytrace='off' skips this line, but not the entire function because
794 # the line below is O(size of the repo) during a rebase, while the rest
794 # the line below is O(size of the repo) during a rebase, while the rest
795 # of the function is much faster (and is required for carrying copy
795 # of the function is much faster (and is required for carrying copy
796 # metadata across the rebase anyway).
796 # metadata across the rebase anyway).
797 exclude = pathcopies(repo[fromrev], repo[skiprev])
797 exclude = pathcopies(repo[fromrev], repo[skiprev])
798 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
798 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
799 # copies.pathcopies returns backward renames, so dst might not
800 # actually be in the dirstate
801 if dst in exclude:
799 if dst in exclude:
802 continue
800 continue
803 wctx[dst].markcopied(src)
801 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now