##// END OF EJS Templates
copies: document cases in _chain()...
Martin von Zweigbergk -
r42413:d1c2688e default
parent child Browse files
Show More
@@ -1,786 +1,806 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chain(src, dst, a, b):
111 """chain two sets of copies a->b"""
111 """chain two sets of copies 'a' and 'b'"""
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
118 #
119 # case src mid dst result
120 # 1 x y - -
121 # 2 x y y x->y
122 # 3 x y x -
123 # 4 x y z x->z
124 # 5 - x y -
125 # 6 x x y x->y
126
127 # Initialize result ('t') from 'a'. This catches cases 1 & 2. We'll remove
128 # case 1 later. We'll also catch cases 3 & 4 here. Case 4 will be
129 # overwritten later, and case 3 will be removed later.
112 t = a.copy()
130 t = a.copy()
113 for k, v in b.iteritems():
131 for k, v in b.iteritems():
114 if v in t:
132 if v in t:
115 # found a chain
133 # found a chain, i.e. cases 3 & 4.
116 if t[v] != k:
134 if t[v] != k:
117 # file wasn't renamed back to itself
135 # file wasn't renamed back to itself (i.e. case 4, not 3)
118 t[k] = t[v]
136 t[k] = t[v]
119 if v not in dst:
137 if v not in dst:
120 # chain was a rename, not a copy
138 # chain was a rename, not a copy
139 # this deletes the copy for 'y' in case 4
121 del t[v]
140 del t[v]
122 if v in src:
141 if v in src:
123 # file is a copy of an existing file
142 # file is a copy of an existing file, i.e. case 6.
124 t[k] = v
143 t[k] = v
125
144
126 for k, v in list(t.items()):
145 for k, v in list(t.items()):
127 # remove criss-crossed copies
146 # remove criss-crossed copies, i.e. case 3
128 if k in src and v in dst:
147 if k in src and v in dst:
129 del t[k]
148 del t[k]
130 # remove copies to files that were then removed
149 # remove copies to files that were then removed, i.e. case 1
150 # and file 'y' in cases 3 & 4 (in case of rename)
131 elif k not in dst:
151 elif k not in dst:
132 del t[k]
152 del t[k]
133
153
134 return t
154 return t
135
155
136 def _tracefile(fctx, am, limit=node.nullrev):
156 def _tracefile(fctx, am, limit=node.nullrev):
137 """return file context that is the ancestor of fctx present in ancestor
157 """return file context that is the ancestor of fctx present in ancestor
138 manifest am, stopping after the first ancestor lower than limit"""
158 manifest am, stopping after the first ancestor lower than limit"""
139
159
140 for f in fctx.ancestors():
160 for f in fctx.ancestors():
141 if am.get(f.path(), None) == f.filenode():
161 if am.get(f.path(), None) == f.filenode():
142 return f
162 return f
143 if limit >= 0 and not f.isintroducedafter(limit):
163 if limit >= 0 and not f.isintroducedafter(limit):
144 return None
164 return None
145
165
146 def _dirstatecopies(repo, match=None):
166 def _dirstatecopies(repo, match=None):
147 ds = repo.dirstate
167 ds = repo.dirstate
148 c = ds.copies().copy()
168 c = ds.copies().copy()
149 for k in list(c):
169 for k in list(c):
150 if ds[k] not in 'anm' or (match and not match(k)):
170 if ds[k] not in 'anm' or (match and not match(k)):
151 del c[k]
171 del c[k]
152 return c
172 return c
153
173
154 def _computeforwardmissing(a, b, match=None):
174 def _computeforwardmissing(a, b, match=None):
155 """Computes which files are in b but not a.
175 """Computes which files are in b but not a.
156 This is its own function so extensions can easily wrap this call to see what
176 This is its own function so extensions can easily wrap this call to see what
157 files _forwardcopies is about to process.
177 files _forwardcopies is about to process.
158 """
178 """
159 ma = a.manifest()
179 ma = a.manifest()
160 mb = b.manifest()
180 mb = b.manifest()
161 return mb.filesnotin(ma, match=match)
181 return mb.filesnotin(ma, match=match)
162
182
163 def usechangesetcentricalgo(repo):
183 def usechangesetcentricalgo(repo):
164 """Checks if we should use changeset-centric copy algorithms"""
184 """Checks if we should use changeset-centric copy algorithms"""
165 return (repo.ui.config('experimental', 'copies.read-from') in
185 return (repo.ui.config('experimental', 'copies.read-from') in
166 ('changeset-only', 'compatibility'))
186 ('changeset-only', 'compatibility'))
167
187
168 def _committedforwardcopies(a, b, match):
188 def _committedforwardcopies(a, b, match):
169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
189 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 # files might have to be traced back to the fctx parent of the last
190 # files might have to be traced back to the fctx parent of the last
171 # one-side-only changeset, but not further back than that
191 # one-side-only changeset, but not further back than that
172 repo = a._repo
192 repo = a._repo
173
193
174 if usechangesetcentricalgo(repo):
194 if usechangesetcentricalgo(repo):
175 return _changesetforwardcopies(a, b, match)
195 return _changesetforwardcopies(a, b, match)
176
196
177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
197 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 dbg = repo.ui.debug
198 dbg = repo.ui.debug
179 if debug:
199 if debug:
180 dbg('debug.copies: looking into rename from %s to %s\n'
200 dbg('debug.copies: looking into rename from %s to %s\n'
181 % (a, b))
201 % (a, b))
182 limit = _findlimit(repo, a, b)
202 limit = _findlimit(repo, a, b)
183 if debug:
203 if debug:
184 dbg('debug.copies: search limit: %d\n' % limit)
204 dbg('debug.copies: search limit: %d\n' % limit)
185 am = a.manifest()
205 am = a.manifest()
186
206
187 # find where new files came from
207 # find where new files came from
188 # we currently don't try to find where old files went, too expensive
208 # we currently don't try to find where old files went, too expensive
189 # this means we can miss a case like 'hg rm b; hg cp a b'
209 # this means we can miss a case like 'hg rm b; hg cp a b'
190 cm = {}
210 cm = {}
191
211
192 # Computing the forward missing is quite expensive on large manifests, since
212 # Computing the forward missing is quite expensive on large manifests, since
193 # it compares the entire manifests. We can optimize it in the common use
213 # it compares the entire manifests. We can optimize it in the common use
194 # case of computing what copies are in a commit versus its parent (like
214 # case of computing what copies are in a commit versus its parent (like
195 # during a rebase or histedit). Note, we exclude merge commits from this
215 # during a rebase or histedit). Note, we exclude merge commits from this
196 # optimization, since the ctx.files() for a merge commit is not correct for
216 # optimization, since the ctx.files() for a merge commit is not correct for
197 # this comparison.
217 # this comparison.
198 forwardmissingmatch = match
218 forwardmissingmatch = match
199 if b.p1() == a and b.p2().node() == node.nullid:
219 if b.p1() == a and b.p2().node() == node.nullid:
200 filesmatcher = matchmod.exact(b.files())
220 filesmatcher = matchmod.exact(b.files())
201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
221 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
222 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203
223
204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
224 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205
225
206 if debug:
226 if debug:
207 dbg('debug.copies: missing files to search: %d\n' % len(missing))
227 dbg('debug.copies: missing files to search: %d\n' % len(missing))
208
228
209 for f in sorted(missing):
229 for f in sorted(missing):
210 if debug:
230 if debug:
211 dbg('debug.copies: tracing file: %s\n' % f)
231 dbg('debug.copies: tracing file: %s\n' % f)
212 fctx = b[f]
232 fctx = b[f]
213 fctx._ancestrycontext = ancestrycontext
233 fctx._ancestrycontext = ancestrycontext
214
234
215 if debug:
235 if debug:
216 start = util.timer()
236 start = util.timer()
217 ofctx = _tracefile(fctx, am, limit)
237 ofctx = _tracefile(fctx, am, limit)
218 if ofctx:
238 if ofctx:
219 if debug:
239 if debug:
220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
240 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 cm[f] = ofctx.path()
241 cm[f] = ofctx.path()
222 if debug:
242 if debug:
223 dbg('debug.copies: time: %f seconds\n'
243 dbg('debug.copies: time: %f seconds\n'
224 % (util.timer() - start))
244 % (util.timer() - start))
225 return cm
245 return cm
226
246
227 def _changesetforwardcopies(a, b, match):
247 def _changesetforwardcopies(a, b, match):
228 if a.rev() == node.nullrev:
248 if a.rev() == node.nullrev:
229 return {}
249 return {}
230
250
231 repo = a.repo()
251 repo = a.repo()
232 children = {}
252 children = {}
233 cl = repo.changelog
253 cl = repo.changelog
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
254 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 for r in missingrevs:
255 for r in missingrevs:
236 for p in cl.parentrevs(r):
256 for p in cl.parentrevs(r):
237 if p == node.nullrev:
257 if p == node.nullrev:
238 continue
258 continue
239 if p not in children:
259 if p not in children:
240 children[p] = [r]
260 children[p] = [r]
241 else:
261 else:
242 children[p].append(r)
262 children[p].append(r)
243
263
244 roots = set(children) - set(missingrevs)
264 roots = set(children) - set(missingrevs)
245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
265 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 # The parent number is only used for knowing which parent the copies dict
266 # The parent number is only used for knowing which parent the copies dict
247 # came from.
267 # came from.
248 work = [(r, 1, {}) for r in roots]
268 work = [(r, 1, {}) for r in roots]
249 heapq.heapify(work)
269 heapq.heapify(work)
250 while work:
270 while work:
251 r, i1, copies1 = heapq.heappop(work)
271 r, i1, copies1 = heapq.heappop(work)
252 if work and work[0][0] == r:
272 if work and work[0][0] == r:
253 # We are tracing copies from both parents
273 # We are tracing copies from both parents
254 r, i2, copies2 = heapq.heappop(work)
274 r, i2, copies2 = heapq.heappop(work)
255 copies = {}
275 copies = {}
256 ctx = repo[r]
276 ctx = repo[r]
257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
277 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 allcopies = set(copies1) | set(copies2)
278 allcopies = set(copies1) | set(copies2)
259 # TODO: perhaps this filtering should be done as long as ctx
279 # TODO: perhaps this filtering should be done as long as ctx
260 # is merge, whether or not we're tracing from both parent.
280 # is merge, whether or not we're tracing from both parent.
261 for dst in allcopies:
281 for dst in allcopies:
262 if not match(dst):
282 if not match(dst):
263 continue
283 continue
264 if dst not in copies2:
284 if dst not in copies2:
265 # Copied on p1 side: mark as copy from p1 side if it didn't
285 # Copied on p1 side: mark as copy from p1 side if it didn't
266 # already exist on p2 side
286 # already exist on p2 side
267 if dst not in p2man:
287 if dst not in p2man:
268 copies[dst] = copies1[dst]
288 copies[dst] = copies1[dst]
269 elif dst not in copies1:
289 elif dst not in copies1:
270 # Copied on p2 side: mark as copy from p2 side if it didn't
290 # Copied on p2 side: mark as copy from p2 side if it didn't
271 # already exist on p1 side
291 # already exist on p1 side
272 if dst not in p1man:
292 if dst not in p1man:
273 copies[dst] = copies2[dst]
293 copies[dst] = copies2[dst]
274 else:
294 else:
275 # Copied on both sides: mark as copy from p1 side
295 # Copied on both sides: mark as copy from p1 side
276 copies[dst] = copies1[dst]
296 copies[dst] = copies1[dst]
277 else:
297 else:
278 copies = copies1
298 copies = copies1
279 if r == b.rev():
299 if r == b.rev():
280 return copies
300 return copies
281 for c in children[r]:
301 for c in children[r]:
282 childctx = repo[c]
302 childctx = repo[c]
283 if r == childctx.p1().rev():
303 if r == childctx.p1().rev():
284 parent = 1
304 parent = 1
285 childcopies = childctx.p1copies()
305 childcopies = childctx.p1copies()
286 else:
306 else:
287 assert r == childctx.p2().rev()
307 assert r == childctx.p2().rev()
288 parent = 2
308 parent = 2
289 childcopies = childctx.p2copies()
309 childcopies = childctx.p2copies()
290 if not match.always():
310 if not match.always():
291 childcopies = {dst: src for dst, src in childcopies.items()
311 childcopies = {dst: src for dst, src in childcopies.items()
292 if match(dst)}
312 if match(dst)}
293 childcopies = _chain(a, childctx, copies, childcopies)
313 childcopies = _chain(a, childctx, copies, childcopies)
294 heapq.heappush(work, (c, parent, childcopies))
314 heapq.heappush(work, (c, parent, childcopies))
295 assert False
315 assert False
296
316
297 def _forwardcopies(a, b, match=None):
317 def _forwardcopies(a, b, match=None):
298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
318 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299
319
300 match = a.repo().narrowmatch(match)
320 match = a.repo().narrowmatch(match)
301 # check for working copy
321 # check for working copy
302 if b.rev() is None:
322 if b.rev() is None:
303 if a == b.p1():
323 if a == b.p1():
304 # short-circuit to avoid issues with merge states
324 # short-circuit to avoid issues with merge states
305 return _dirstatecopies(b._repo, match)
325 return _dirstatecopies(b._repo, match)
306
326
307 cm = _committedforwardcopies(a, b.p1(), match)
327 cm = _committedforwardcopies(a, b.p1(), match)
308 # combine copies from dirstate if necessary
328 # combine copies from dirstate if necessary
309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
329 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 return _committedforwardcopies(a, b, match)
330 return _committedforwardcopies(a, b, match)
311
331
312 def _backwardrenames(a, b, match):
332 def _backwardrenames(a, b, match):
313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
333 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 return {}
334 return {}
315
335
316 # Even though we're not taking copies into account, 1:n rename situations
336 # Even though we're not taking copies into account, 1:n rename situations
317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
337 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 # arbitrarily pick one of the renames.
338 # arbitrarily pick one of the renames.
319 # We don't want to pass in "match" here, since that would filter
339 # We don't want to pass in "match" here, since that would filter
320 # the destination by it. Since we're reversing the copies, we want
340 # the destination by it. Since we're reversing the copies, we want
321 # to filter the source instead.
341 # to filter the source instead.
322 f = _forwardcopies(b, a)
342 f = _forwardcopies(b, a)
323 r = {}
343 r = {}
324 for k, v in sorted(f.iteritems()):
344 for k, v in sorted(f.iteritems()):
325 if match and not match(v):
345 if match and not match(v):
326 continue
346 continue
327 # remove copies
347 # remove copies
328 if v in a:
348 if v in a:
329 continue
349 continue
330 r[v] = k
350 r[v] = k
331 return r
351 return r
332
352
333 def pathcopies(x, y, match=None):
353 def pathcopies(x, y, match=None):
334 """find {dst@y: src@x} copy mapping for directed compare"""
354 """find {dst@y: src@x} copy mapping for directed compare"""
335 repo = x._repo
355 repo = x._repo
336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
356 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 if debug:
357 if debug:
338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
358 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 % (x, y))
359 % (x, y))
340 if x == y or not x or not y:
360 if x == y or not x or not y:
341 return {}
361 return {}
342 a = y.ancestor(x)
362 a = y.ancestor(x)
343 if a == x:
363 if a == x:
344 if debug:
364 if debug:
345 repo.ui.debug('debug.copies: search mode: forward\n')
365 repo.ui.debug('debug.copies: search mode: forward\n')
346 return _forwardcopies(x, y, match=match)
366 return _forwardcopies(x, y, match=match)
347 if a == y:
367 if a == y:
348 if debug:
368 if debug:
349 repo.ui.debug('debug.copies: search mode: backward\n')
369 repo.ui.debug('debug.copies: search mode: backward\n')
350 return _backwardrenames(x, y, match=match)
370 return _backwardrenames(x, y, match=match)
351 if debug:
371 if debug:
352 repo.ui.debug('debug.copies: search mode: combined\n')
372 repo.ui.debug('debug.copies: search mode: combined\n')
353 return _chain(x, y, _backwardrenames(x, a, match=match),
373 return _chain(x, y, _backwardrenames(x, a, match=match),
354 _forwardcopies(a, y, match=match))
374 _forwardcopies(a, y, match=match))
355
375
356 def mergecopies(repo, c1, c2, base):
376 def mergecopies(repo, c1, c2, base):
357 """
377 """
358 Finds moves and copies between context c1 and c2 that are relevant for
378 Finds moves and copies between context c1 and c2 that are relevant for
359 merging. 'base' will be used as the merge base.
379 merging. 'base' will be used as the merge base.
360
380
361 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
381 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
362 files that were moved/ copied in one merge parent and modified in another.
382 files that were moved/ copied in one merge parent and modified in another.
363 For example:
383 For example:
364
384
365 o ---> 4 another commit
385 o ---> 4 another commit
366 |
386 |
367 | o ---> 3 commit that modifies a.txt
387 | o ---> 3 commit that modifies a.txt
368 | /
388 | /
369 o / ---> 2 commit that moves a.txt to b.txt
389 o / ---> 2 commit that moves a.txt to b.txt
370 |/
390 |/
371 o ---> 1 merge base
391 o ---> 1 merge base
372
392
373 If we try to rebase revision 3 on revision 4, since there is no a.txt in
393 If we try to rebase revision 3 on revision 4, since there is no a.txt in
374 revision 4, and if user have copytrace disabled, we prints the following
394 revision 4, and if user have copytrace disabled, we prints the following
375 message:
395 message:
376
396
377 ```other changed <file> which local deleted```
397 ```other changed <file> which local deleted```
378
398
379 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
399 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
380 "dirmove".
400 "dirmove".
381
401
382 "copy" is a mapping from destination name -> source name,
402 "copy" is a mapping from destination name -> source name,
383 where source is in c1 and destination is in c2 or vice-versa.
403 where source is in c1 and destination is in c2 or vice-versa.
384
404
385 "movewithdir" is a mapping from source name -> destination name,
405 "movewithdir" is a mapping from source name -> destination name,
386 where the file at source present in one context but not the other
406 where the file at source present in one context but not the other
387 needs to be moved to destination by the merge process, because the
407 needs to be moved to destination by the merge process, because the
388 other context moved the directory it is in.
408 other context moved the directory it is in.
389
409
390 "diverge" is a mapping of source name -> list of destination names
410 "diverge" is a mapping of source name -> list of destination names
391 for divergent renames.
411 for divergent renames.
392
412
393 "renamedelete" is a mapping of source name -> list of destination
413 "renamedelete" is a mapping of source name -> list of destination
394 names for files deleted in c1 that were renamed in c2 or vice-versa.
414 names for files deleted in c1 that were renamed in c2 or vice-versa.
395
415
396 "dirmove" is a mapping of detected source dir -> destination dir renames.
416 "dirmove" is a mapping of detected source dir -> destination dir renames.
397 This is needed for handling changes to new files previously grafted into
417 This is needed for handling changes to new files previously grafted into
398 renamed directories.
418 renamed directories.
399
419
400 This function calls different copytracing algorithms based on config.
420 This function calls different copytracing algorithms based on config.
401 """
421 """
402 # avoid silly behavior for update from empty dir
422 # avoid silly behavior for update from empty dir
403 if not c1 or not c2 or c1 == c2:
423 if not c1 or not c2 or c1 == c2:
404 return {}, {}, {}, {}, {}
424 return {}, {}, {}, {}, {}
405
425
406 narrowmatch = c1.repo().narrowmatch()
426 narrowmatch = c1.repo().narrowmatch()
407
427
408 # avoid silly behavior for parent -> working dir
428 # avoid silly behavior for parent -> working dir
409 if c2.node() is None and c1.node() == repo.dirstate.p1():
429 if c2.node() is None and c1.node() == repo.dirstate.p1():
410 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
430 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
411
431
412 copytracing = repo.ui.config('experimental', 'copytrace')
432 copytracing = repo.ui.config('experimental', 'copytrace')
413 if stringutil.parsebool(copytracing) is False:
433 if stringutil.parsebool(copytracing) is False:
414 # stringutil.parsebool() returns None when it is unable to parse the
434 # stringutil.parsebool() returns None when it is unable to parse the
415 # value, so we should rely on making sure copytracing is on such cases
435 # value, so we should rely on making sure copytracing is on such cases
416 return {}, {}, {}, {}, {}
436 return {}, {}, {}, {}, {}
417
437
418 if usechangesetcentricalgo(repo):
438 if usechangesetcentricalgo(repo):
419 # The heuristics don't make sense when we need changeset-centric algos
439 # The heuristics don't make sense when we need changeset-centric algos
420 return _fullcopytracing(repo, c1, c2, base)
440 return _fullcopytracing(repo, c1, c2, base)
421
441
422 # Copy trace disabling is explicitly below the node == p1 logic above
442 # Copy trace disabling is explicitly below the node == p1 logic above
423 # because the logic above is required for a simple copy to be kept across a
443 # because the logic above is required for a simple copy to be kept across a
424 # rebase.
444 # rebase.
425 if copytracing == 'heuristics':
445 if copytracing == 'heuristics':
426 # Do full copytracing if only non-public revisions are involved as
446 # Do full copytracing if only non-public revisions are involved as
427 # that will be fast enough and will also cover the copies which could
447 # that will be fast enough and will also cover the copies which could
428 # be missed by heuristics
448 # be missed by heuristics
429 if _isfullcopytraceable(repo, c1, base):
449 if _isfullcopytraceable(repo, c1, base):
430 return _fullcopytracing(repo, c1, c2, base)
450 return _fullcopytracing(repo, c1, c2, base)
431 return _heuristicscopytracing(repo, c1, c2, base)
451 return _heuristicscopytracing(repo, c1, c2, base)
432 else:
452 else:
433 return _fullcopytracing(repo, c1, c2, base)
453 return _fullcopytracing(repo, c1, c2, base)
434
454
435 def _isfullcopytraceable(repo, c1, base):
455 def _isfullcopytraceable(repo, c1, base):
436 """ Checks that if base, source and destination are all no-public branches,
456 """ Checks that if base, source and destination are all no-public branches,
437 if yes let's use the full copytrace algorithm for increased capabilities
457 if yes let's use the full copytrace algorithm for increased capabilities
438 since it will be fast enough.
458 since it will be fast enough.
439
459
440 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
460 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
441 number of changesets from c1 to base such that if number of changesets are
461 number of changesets from c1 to base such that if number of changesets are
442 more than the limit, full copytracing algorithm won't be used.
462 more than the limit, full copytracing algorithm won't be used.
443 """
463 """
444 if c1.rev() is None:
464 if c1.rev() is None:
445 c1 = c1.p1()
465 c1 = c1.p1()
446 if c1.mutable() and base.mutable():
466 if c1.mutable() and base.mutable():
447 sourcecommitlimit = repo.ui.configint('experimental',
467 sourcecommitlimit = repo.ui.configint('experimental',
448 'copytrace.sourcecommitlimit')
468 'copytrace.sourcecommitlimit')
449 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
469 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
450 return commits < sourcecommitlimit
470 return commits < sourcecommitlimit
451 return False
471 return False
452
472
453 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
473 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
454 copy, renamedelete):
474 copy, renamedelete):
455 if src not in m2:
475 if src not in m2:
456 # deleted on side 2
476 # deleted on side 2
457 if src not in m1:
477 if src not in m1:
458 # renamed on side 1, deleted on side 2
478 # renamed on side 1, deleted on side 2
459 renamedelete[src] = dsts1
479 renamedelete[src] = dsts1
460 elif m2[src] != mb[src]:
480 elif m2[src] != mb[src]:
461 if not _related(c2[src], base[src]):
481 if not _related(c2[src], base[src]):
462 return
482 return
463 # modified on side 2
483 # modified on side 2
464 for dst in dsts1:
484 for dst in dsts1:
465 if dst not in m2:
485 if dst not in m2:
466 # dst not added on side 2 (handle as regular
486 # dst not added on side 2 (handle as regular
467 # "both created" case in manifestmerge otherwise)
487 # "both created" case in manifestmerge otherwise)
468 copy[dst] = src
488 copy[dst] = src
469
489
470 def _fullcopytracing(repo, c1, c2, base):
490 def _fullcopytracing(repo, c1, c2, base):
471 """ The full copytracing algorithm which finds all the new files that were
491 """ The full copytracing algorithm which finds all the new files that were
472 added from merge base up to the top commit and for each file it checks if
492 added from merge base up to the top commit and for each file it checks if
473 this file was copied from another file.
493 this file was copied from another file.
474
494
475 This is pretty slow when a lot of changesets are involved but will track all
495 This is pretty slow when a lot of changesets are involved but will track all
476 the copies.
496 the copies.
477 """
497 """
478 m1 = c1.manifest()
498 m1 = c1.manifest()
479 m2 = c2.manifest()
499 m2 = c2.manifest()
480 mb = base.manifest()
500 mb = base.manifest()
481
501
482 copies1 = pathcopies(base, c1)
502 copies1 = pathcopies(base, c1)
483 copies2 = pathcopies(base, c2)
503 copies2 = pathcopies(base, c2)
484
504
485 inversecopies1 = {}
505 inversecopies1 = {}
486 inversecopies2 = {}
506 inversecopies2 = {}
487 for dst, src in copies1.items():
507 for dst, src in copies1.items():
488 inversecopies1.setdefault(src, []).append(dst)
508 inversecopies1.setdefault(src, []).append(dst)
489 for dst, src in copies2.items():
509 for dst, src in copies2.items():
490 inversecopies2.setdefault(src, []).append(dst)
510 inversecopies2.setdefault(src, []).append(dst)
491
511
492 copy = {}
512 copy = {}
493 diverge = {}
513 diverge = {}
494 renamedelete = {}
514 renamedelete = {}
495 allsources = set(inversecopies1) | set(inversecopies2)
515 allsources = set(inversecopies1) | set(inversecopies2)
496 for src in allsources:
516 for src in allsources:
497 dsts1 = inversecopies1.get(src)
517 dsts1 = inversecopies1.get(src)
498 dsts2 = inversecopies2.get(src)
518 dsts2 = inversecopies2.get(src)
499 if dsts1 and dsts2:
519 if dsts1 and dsts2:
500 # copied/renamed on both sides
520 # copied/renamed on both sides
501 if src not in m1 and src not in m2:
521 if src not in m1 and src not in m2:
502 # renamed on both sides
522 # renamed on both sides
503 dsts1 = set(dsts1)
523 dsts1 = set(dsts1)
504 dsts2 = set(dsts2)
524 dsts2 = set(dsts2)
505 # If there's some overlap in the rename destinations, we
525 # If there's some overlap in the rename destinations, we
506 # consider it not divergent. For example, if side 1 copies 'a'
526 # consider it not divergent. For example, if side 1 copies 'a'
507 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
527 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
508 # and 'd' and deletes 'a'.
528 # and 'd' and deletes 'a'.
509 if dsts1 & dsts2:
529 if dsts1 & dsts2:
510 for dst in (dsts1 & dsts2):
530 for dst in (dsts1 & dsts2):
511 copy[dst] = src
531 copy[dst] = src
512 else:
532 else:
513 diverge[src] = sorted(dsts1 | dsts2)
533 diverge[src] = sorted(dsts1 | dsts2)
514 elif src in m1 and src in m2:
534 elif src in m1 and src in m2:
515 # copied on both sides
535 # copied on both sides
516 dsts1 = set(dsts1)
536 dsts1 = set(dsts1)
517 dsts2 = set(dsts2)
537 dsts2 = set(dsts2)
518 for dst in (dsts1 & dsts2):
538 for dst in (dsts1 & dsts2):
519 copy[dst] = src
539 copy[dst] = src
520 # TODO: Handle cases where it was renamed on one side and copied
540 # TODO: Handle cases where it was renamed on one side and copied
521 # on the other side
541 # on the other side
522 elif dsts1:
542 elif dsts1:
523 # copied/renamed only on side 1
543 # copied/renamed only on side 1
524 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
544 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
525 copy, renamedelete)
545 copy, renamedelete)
526 elif dsts2:
546 elif dsts2:
527 # copied/renamed only on side 2
547 # copied/renamed only on side 2
528 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
548 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
529 copy, renamedelete)
549 copy, renamedelete)
530
550
531 renamedeleteset = set()
551 renamedeleteset = set()
532 divergeset = set()
552 divergeset = set()
533 for dsts in diverge.values():
553 for dsts in diverge.values():
534 divergeset.update(dsts)
554 divergeset.update(dsts)
535 for dsts in renamedelete.values():
555 for dsts in renamedelete.values():
536 renamedeleteset.update(dsts)
556 renamedeleteset.update(dsts)
537
557
538 # find interesting file sets from manifests
558 # find interesting file sets from manifests
539 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
559 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
540 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
560 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
541 u1 = sorted(addedinm1 - addedinm2)
561 u1 = sorted(addedinm1 - addedinm2)
542 u2 = sorted(addedinm2 - addedinm1)
562 u2 = sorted(addedinm2 - addedinm1)
543
563
544 header = " unmatched files in %s"
564 header = " unmatched files in %s"
545 if u1:
565 if u1:
546 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
566 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
547 if u2:
567 if u2:
548 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
568 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
549
569
550 fullcopy = copies1.copy()
570 fullcopy = copies1.copy()
551 fullcopy.update(copies2)
571 fullcopy.update(copies2)
552 if not fullcopy:
572 if not fullcopy:
553 return copy, {}, diverge, renamedelete, {}
573 return copy, {}, diverge, renamedelete, {}
554
574
555 if repo.ui.debugflag:
575 if repo.ui.debugflag:
556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
576 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
557 "% = renamed and deleted):\n")
577 "% = renamed and deleted):\n")
558 for f in sorted(fullcopy):
578 for f in sorted(fullcopy):
559 note = ""
579 note = ""
560 if f in copy:
580 if f in copy:
561 note += "*"
581 note += "*"
562 if f in divergeset:
582 if f in divergeset:
563 note += "!"
583 note += "!"
564 if f in renamedeleteset:
584 if f in renamedeleteset:
565 note += "%"
585 note += "%"
566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
586 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
567 note))
587 note))
568 del divergeset
588 del divergeset
569
589
570 repo.ui.debug(" checking for directory renames\n")
590 repo.ui.debug(" checking for directory renames\n")
571
591
572 # generate a directory move map
592 # generate a directory move map
573 d1, d2 = c1.dirs(), c2.dirs()
593 d1, d2 = c1.dirs(), c2.dirs()
574 # Hack for adding '', which is not otherwise added, to d1 and d2
594 # Hack for adding '', which is not otherwise added, to d1 and d2
575 d1.addpath('/')
595 d1.addpath('/')
576 d2.addpath('/')
596 d2.addpath('/')
577 invalid = set()
597 invalid = set()
578 dirmove = {}
598 dirmove = {}
579
599
580 # examine each file copy for a potential directory move, which is
600 # examine each file copy for a potential directory move, which is
581 # when all the files in a directory are moved to a new directory
601 # when all the files in a directory are moved to a new directory
582 for dst, src in fullcopy.iteritems():
602 for dst, src in fullcopy.iteritems():
583 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
584 if dsrc in invalid:
604 if dsrc in invalid:
585 # already seen to be uninteresting
605 # already seen to be uninteresting
586 continue
606 continue
587 elif dsrc in d1 and ddst in d1:
607 elif dsrc in d1 and ddst in d1:
588 # directory wasn't entirely moved locally
608 # directory wasn't entirely moved locally
589 invalid.add(dsrc)
609 invalid.add(dsrc)
590 elif dsrc in d2 and ddst in d2:
610 elif dsrc in d2 and ddst in d2:
591 # directory wasn't entirely moved remotely
611 # directory wasn't entirely moved remotely
592 invalid.add(dsrc)
612 invalid.add(dsrc)
593 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
594 # files from the same directory moved to two different places
614 # files from the same directory moved to two different places
595 invalid.add(dsrc)
615 invalid.add(dsrc)
596 else:
616 else:
597 # looks good so far
617 # looks good so far
598 dirmove[dsrc] = ddst
618 dirmove[dsrc] = ddst
599
619
600 for i in invalid:
620 for i in invalid:
601 if i in dirmove:
621 if i in dirmove:
602 del dirmove[i]
622 del dirmove[i]
603 del d1, d2, invalid
623 del d1, d2, invalid
604
624
605 if not dirmove:
625 if not dirmove:
606 return copy, {}, diverge, renamedelete, {}
626 return copy, {}, diverge, renamedelete, {}
607
627
608 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
609
629
610 for d in dirmove:
630 for d in dirmove:
611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 (d, dirmove[d]))
632 (d, dirmove[d]))
613
633
614 movewithdir = {}
634 movewithdir = {}
615 # check unaccounted nonoverlapping files against directory moves
635 # check unaccounted nonoverlapping files against directory moves
616 for f in u1 + u2:
636 for f in u1 + u2:
617 if f not in fullcopy:
637 if f not in fullcopy:
618 for d in dirmove:
638 for d in dirmove:
619 if f.startswith(d):
639 if f.startswith(d):
620 # new file added in a directory that was moved, move it
640 # new file added in a directory that was moved, move it
621 df = dirmove[d] + f[len(d):]
641 df = dirmove[d] + f[len(d):]
622 if df not in copy:
642 if df not in copy:
623 movewithdir[f] = df
643 movewithdir[f] = df
624 repo.ui.debug((" pending file src: '%s' -> "
644 repo.ui.debug((" pending file src: '%s' -> "
625 "dst: '%s'\n") % (f, df))
645 "dst: '%s'\n") % (f, df))
626 break
646 break
627
647
628 return copy, movewithdir, diverge, renamedelete, dirmove
648 return copy, movewithdir, diverge, renamedelete, dirmove
629
649
630 def _heuristicscopytracing(repo, c1, c2, base):
650 def _heuristicscopytracing(repo, c1, c2, base):
631 """ Fast copytracing using filename heuristics
651 """ Fast copytracing using filename heuristics
632
652
633 Assumes that moves or renames are of following two types:
653 Assumes that moves or renames are of following two types:
634
654
635 1) Inside a directory only (same directory name but different filenames)
655 1) Inside a directory only (same directory name but different filenames)
636 2) Move from one directory to another
656 2) Move from one directory to another
637 (same filenames but different directory names)
657 (same filenames but different directory names)
638
658
639 Works only when there are no merge commits in the "source branch".
659 Works only when there are no merge commits in the "source branch".
640 Source branch is commits from base up to c2 not including base.
660 Source branch is commits from base up to c2 not including base.
641
661
642 If merge is involved it fallbacks to _fullcopytracing().
662 If merge is involved it fallbacks to _fullcopytracing().
643
663
644 Can be used by setting the following config:
664 Can be used by setting the following config:
645
665
646 [experimental]
666 [experimental]
647 copytrace = heuristics
667 copytrace = heuristics
648
668
649 In some cases the copy/move candidates found by heuristics can be very large
669 In some cases the copy/move candidates found by heuristics can be very large
650 in number and that will make the algorithm slow. The number of possible
670 in number and that will make the algorithm slow. The number of possible
651 candidates to check can be limited by using the config
671 candidates to check can be limited by using the config
652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 """
673 """
654
674
655 if c1.rev() is None:
675 if c1.rev() is None:
656 c1 = c1.p1()
676 c1 = c1.p1()
657 if c2.rev() is None:
677 if c2.rev() is None:
658 c2 = c2.p1()
678 c2 = c2.p1()
659
679
660 copies = {}
680 copies = {}
661
681
662 changedfiles = set()
682 changedfiles = set()
663 m1 = c1.manifest()
683 m1 = c1.manifest()
664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 # If base is not in c2 branch, we switch to fullcopytracing
685 # If base is not in c2 branch, we switch to fullcopytracing
666 repo.ui.debug("switching to full copytracing as base is not "
686 repo.ui.debug("switching to full copytracing as base is not "
667 "an ancestor of c2\n")
687 "an ancestor of c2\n")
668 return _fullcopytracing(repo, c1, c2, base)
688 return _fullcopytracing(repo, c1, c2, base)
669
689
670 ctx = c2
690 ctx = c2
671 while ctx != base:
691 while ctx != base:
672 if len(ctx.parents()) == 2:
692 if len(ctx.parents()) == 2:
673 # To keep things simple let's not handle merges
693 # To keep things simple let's not handle merges
674 repo.ui.debug("switching to full copytracing because of merges\n")
694 repo.ui.debug("switching to full copytracing because of merges\n")
675 return _fullcopytracing(repo, c1, c2, base)
695 return _fullcopytracing(repo, c1, c2, base)
676 changedfiles.update(ctx.files())
696 changedfiles.update(ctx.files())
677 ctx = ctx.p1()
697 ctx = ctx.p1()
678
698
679 cp = _forwardcopies(base, c2)
699 cp = _forwardcopies(base, c2)
680 for dst, src in cp.iteritems():
700 for dst, src in cp.iteritems():
681 if src in m1:
701 if src in m1:
682 copies[dst] = src
702 copies[dst] = src
683
703
684 # file is missing if it isn't present in the destination, but is present in
704 # file is missing if it isn't present in the destination, but is present in
685 # the base and present in the source.
705 # the base and present in the source.
686 # Presence in the base is important to exclude added files, presence in the
706 # Presence in the base is important to exclude added files, presence in the
687 # source is important to exclude removed files.
707 # source is important to exclude removed files.
688 filt = lambda f: f not in m1 and f in base and f in c2
708 filt = lambda f: f not in m1 and f in base and f in c2
689 missingfiles = [f for f in changedfiles if filt(f)]
709 missingfiles = [f for f in changedfiles if filt(f)]
690
710
691 if missingfiles:
711 if missingfiles:
692 basenametofilename = collections.defaultdict(list)
712 basenametofilename = collections.defaultdict(list)
693 dirnametofilename = collections.defaultdict(list)
713 dirnametofilename = collections.defaultdict(list)
694
714
695 for f in m1.filesnotin(base.manifest()):
715 for f in m1.filesnotin(base.manifest()):
696 basename = os.path.basename(f)
716 basename = os.path.basename(f)
697 dirname = os.path.dirname(f)
717 dirname = os.path.dirname(f)
698 basenametofilename[basename].append(f)
718 basenametofilename[basename].append(f)
699 dirnametofilename[dirname].append(f)
719 dirnametofilename[dirname].append(f)
700
720
701 for f in missingfiles:
721 for f in missingfiles:
702 basename = os.path.basename(f)
722 basename = os.path.basename(f)
703 dirname = os.path.dirname(f)
723 dirname = os.path.dirname(f)
704 samebasename = basenametofilename[basename]
724 samebasename = basenametofilename[basename]
705 samedirname = dirnametofilename[dirname]
725 samedirname = dirnametofilename[dirname]
706 movecandidates = samebasename + samedirname
726 movecandidates = samebasename + samedirname
707 # f is guaranteed to be present in c2, that's why
727 # f is guaranteed to be present in c2, that's why
708 # c2.filectx(f) won't fail
728 # c2.filectx(f) won't fail
709 f2 = c2.filectx(f)
729 f2 = c2.filectx(f)
710 # we can have a lot of candidates which can slow down the heuristics
730 # we can have a lot of candidates which can slow down the heuristics
711 # config value to limit the number of candidates moves to check
731 # config value to limit the number of candidates moves to check
712 maxcandidates = repo.ui.configint('experimental',
732 maxcandidates = repo.ui.configint('experimental',
713 'copytrace.movecandidateslimit')
733 'copytrace.movecandidateslimit')
714
734
715 if len(movecandidates) > maxcandidates:
735 if len(movecandidates) > maxcandidates:
716 repo.ui.status(_("skipping copytracing for '%s', more "
736 repo.ui.status(_("skipping copytracing for '%s', more "
717 "candidates than the limit: %d\n")
737 "candidates than the limit: %d\n")
718 % (f, len(movecandidates)))
738 % (f, len(movecandidates)))
719 continue
739 continue
720
740
721 for candidate in movecandidates:
741 for candidate in movecandidates:
722 f1 = c1.filectx(candidate)
742 f1 = c1.filectx(candidate)
723 if _related(f1, f2):
743 if _related(f1, f2):
724 # if there are a few related copies then we'll merge
744 # if there are a few related copies then we'll merge
725 # changes into all of them. This matches the behaviour
745 # changes into all of them. This matches the behaviour
726 # of upstream copytracing
746 # of upstream copytracing
727 copies[candidate] = f
747 copies[candidate] = f
728
748
729 return copies, {}, {}, {}, {}
749 return copies, {}, {}, {}, {}
730
750
731 def _related(f1, f2):
751 def _related(f1, f2):
732 """return True if f1 and f2 filectx have a common ancestor
752 """return True if f1 and f2 filectx have a common ancestor
733
753
734 Walk back to common ancestor to see if the two files originate
754 Walk back to common ancestor to see if the two files originate
735 from the same file. Since workingfilectx's rev() is None it messes
755 from the same file. Since workingfilectx's rev() is None it messes
736 up the integer comparison logic, hence the pre-step check for
756 up the integer comparison logic, hence the pre-step check for
737 None (f1 and f2 can only be workingfilectx's initially).
757 None (f1 and f2 can only be workingfilectx's initially).
738 """
758 """
739
759
740 if f1 == f2:
760 if f1 == f2:
741 return True # a match
761 return True # a match
742
762
743 g1, g2 = f1.ancestors(), f2.ancestors()
763 g1, g2 = f1.ancestors(), f2.ancestors()
744 try:
764 try:
745 f1r, f2r = f1.linkrev(), f2.linkrev()
765 f1r, f2r = f1.linkrev(), f2.linkrev()
746
766
747 if f1r is None:
767 if f1r is None:
748 f1 = next(g1)
768 f1 = next(g1)
749 if f2r is None:
769 if f2r is None:
750 f2 = next(g2)
770 f2 = next(g2)
751
771
752 while True:
772 while True:
753 f1r, f2r = f1.linkrev(), f2.linkrev()
773 f1r, f2r = f1.linkrev(), f2.linkrev()
754 if f1r > f2r:
774 if f1r > f2r:
755 f1 = next(g1)
775 f1 = next(g1)
756 elif f2r > f1r:
776 elif f2r > f1r:
757 f2 = next(g2)
777 f2 = next(g2)
758 else: # f1 and f2 point to files in the same linkrev
778 else: # f1 and f2 point to files in the same linkrev
759 return f1 == f2 # true if they point to the same file
779 return f1 == f2 # true if they point to the same file
760 except StopIteration:
780 except StopIteration:
761 return False
781 return False
762
782
763 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
764 """reproduce copies from fromrev to rev in the dirstate
784 """reproduce copies from fromrev to rev in the dirstate
765
785
766 If skiprev is specified, it's a revision that should be used to
786 If skiprev is specified, it's a revision that should be used to
767 filter copy records. Any copies that occur between fromrev and
787 filter copy records. Any copies that occur between fromrev and
768 skiprev will not be duplicated, even if they appear in the set of
788 skiprev will not be duplicated, even if they appear in the set of
769 copies between fromrev and rev.
789 copies between fromrev and rev.
770 """
790 """
771 exclude = {}
791 exclude = {}
772 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
773 bctrace = stringutil.parsebool(ctraceconfig)
793 bctrace = stringutil.parsebool(ctraceconfig)
774 if (skiprev is not None and
794 if (skiprev is not None and
775 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
776 # copytrace='off' skips this line, but not the entire function because
796 # copytrace='off' skips this line, but not the entire function because
777 # the line below is O(size of the repo) during a rebase, while the rest
797 # the line below is O(size of the repo) during a rebase, while the rest
778 # of the function is much faster (and is required for carrying copy
798 # of the function is much faster (and is required for carrying copy
779 # metadata across the rebase anyway).
799 # metadata across the rebase anyway).
780 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 exclude = pathcopies(repo[fromrev], repo[skiprev])
781 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
782 # copies.pathcopies returns backward renames, so dst might not
802 # copies.pathcopies returns backward renames, so dst might not
783 # actually be in the dirstate
803 # actually be in the dirstate
784 if dst in exclude:
804 if dst in exclude:
785 continue
805 continue
786 wctx[dst].markcopied(src)
806 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now