##// END OF EJS Templates
copies: ignore heuristics copytracing when using changeset-centric algos...
Martin von Zweigbergk -
r42412:a6be3af3 default
parent child Browse files
Show More
@@ -1,782 +1,786 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chain(src, dst, a, b):
110 def _chain(src, dst, a, b):
111 """chain two sets of copies a->b"""
111 """chain two sets of copies a->b"""
112 t = a.copy()
112 t = a.copy()
113 for k, v in b.iteritems():
113 for k, v in b.iteritems():
114 if v in t:
114 if v in t:
115 # found a chain
115 # found a chain
116 if t[v] != k:
116 if t[v] != k:
117 # file wasn't renamed back to itself
117 # file wasn't renamed back to itself
118 t[k] = t[v]
118 t[k] = t[v]
119 if v not in dst:
119 if v not in dst:
120 # chain was a rename, not a copy
120 # chain was a rename, not a copy
121 del t[v]
121 del t[v]
122 if v in src:
122 if v in src:
123 # file is a copy of an existing file
123 # file is a copy of an existing file
124 t[k] = v
124 t[k] = v
125
125
126 for k, v in list(t.items()):
126 for k, v in list(t.items()):
127 # remove criss-crossed copies
127 # remove criss-crossed copies
128 if k in src and v in dst:
128 if k in src and v in dst:
129 del t[k]
129 del t[k]
130 # remove copies to files that were then removed
130 # remove copies to files that were then removed
131 elif k not in dst:
131 elif k not in dst:
132 del t[k]
132 del t[k]
133
133
134 return t
134 return t
135
135
136 def _tracefile(fctx, am, limit=node.nullrev):
136 def _tracefile(fctx, am, limit=node.nullrev):
137 """return file context that is the ancestor of fctx present in ancestor
137 """return file context that is the ancestor of fctx present in ancestor
138 manifest am, stopping after the first ancestor lower than limit"""
138 manifest am, stopping after the first ancestor lower than limit"""
139
139
140 for f in fctx.ancestors():
140 for f in fctx.ancestors():
141 if am.get(f.path(), None) == f.filenode():
141 if am.get(f.path(), None) == f.filenode():
142 return f
142 return f
143 if limit >= 0 and not f.isintroducedafter(limit):
143 if limit >= 0 and not f.isintroducedafter(limit):
144 return None
144 return None
145
145
146 def _dirstatecopies(repo, match=None):
146 def _dirstatecopies(repo, match=None):
147 ds = repo.dirstate
147 ds = repo.dirstate
148 c = ds.copies().copy()
148 c = ds.copies().copy()
149 for k in list(c):
149 for k in list(c):
150 if ds[k] not in 'anm' or (match and not match(k)):
150 if ds[k] not in 'anm' or (match and not match(k)):
151 del c[k]
151 del c[k]
152 return c
152 return c
153
153
154 def _computeforwardmissing(a, b, match=None):
154 def _computeforwardmissing(a, b, match=None):
155 """Computes which files are in b but not a.
155 """Computes which files are in b but not a.
156 This is its own function so extensions can easily wrap this call to see what
156 This is its own function so extensions can easily wrap this call to see what
157 files _forwardcopies is about to process.
157 files _forwardcopies is about to process.
158 """
158 """
159 ma = a.manifest()
159 ma = a.manifest()
160 mb = b.manifest()
160 mb = b.manifest()
161 return mb.filesnotin(ma, match=match)
161 return mb.filesnotin(ma, match=match)
162
162
163 def usechangesetcentricalgo(repo):
163 def usechangesetcentricalgo(repo):
164 """Checks if we should use changeset-centric copy algorithms"""
164 """Checks if we should use changeset-centric copy algorithms"""
165 return (repo.ui.config('experimental', 'copies.read-from') in
165 return (repo.ui.config('experimental', 'copies.read-from') in
166 ('changeset-only', 'compatibility'))
166 ('changeset-only', 'compatibility'))
167
167
168 def _committedforwardcopies(a, b, match):
168 def _committedforwardcopies(a, b, match):
169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
169 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
170 # files might have to be traced back to the fctx parent of the last
170 # files might have to be traced back to the fctx parent of the last
171 # one-side-only changeset, but not further back than that
171 # one-side-only changeset, but not further back than that
172 repo = a._repo
172 repo = a._repo
173
173
174 if usechangesetcentricalgo(repo):
174 if usechangesetcentricalgo(repo):
175 return _changesetforwardcopies(a, b, match)
175 return _changesetforwardcopies(a, b, match)
176
176
177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
177 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
178 dbg = repo.ui.debug
178 dbg = repo.ui.debug
179 if debug:
179 if debug:
180 dbg('debug.copies: looking into rename from %s to %s\n'
180 dbg('debug.copies: looking into rename from %s to %s\n'
181 % (a, b))
181 % (a, b))
182 limit = _findlimit(repo, a, b)
182 limit = _findlimit(repo, a, b)
183 if debug:
183 if debug:
184 dbg('debug.copies: search limit: %d\n' % limit)
184 dbg('debug.copies: search limit: %d\n' % limit)
185 am = a.manifest()
185 am = a.manifest()
186
186
187 # find where new files came from
187 # find where new files came from
188 # we currently don't try to find where old files went, too expensive
188 # we currently don't try to find where old files went, too expensive
189 # this means we can miss a case like 'hg rm b; hg cp a b'
189 # this means we can miss a case like 'hg rm b; hg cp a b'
190 cm = {}
190 cm = {}
191
191
192 # Computing the forward missing is quite expensive on large manifests, since
192 # Computing the forward missing is quite expensive on large manifests, since
193 # it compares the entire manifests. We can optimize it in the common use
193 # it compares the entire manifests. We can optimize it in the common use
194 # case of computing what copies are in a commit versus its parent (like
194 # case of computing what copies are in a commit versus its parent (like
195 # during a rebase or histedit). Note, we exclude merge commits from this
195 # during a rebase or histedit). Note, we exclude merge commits from this
196 # optimization, since the ctx.files() for a merge commit is not correct for
196 # optimization, since the ctx.files() for a merge commit is not correct for
197 # this comparison.
197 # this comparison.
198 forwardmissingmatch = match
198 forwardmissingmatch = match
199 if b.p1() == a and b.p2().node() == node.nullid:
199 if b.p1() == a and b.p2().node() == node.nullid:
200 filesmatcher = matchmod.exact(b.files())
200 filesmatcher = matchmod.exact(b.files())
201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
201 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
202 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
203
203
204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
204 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
205
205
206 if debug:
206 if debug:
207 dbg('debug.copies: missing files to search: %d\n' % len(missing))
207 dbg('debug.copies: missing files to search: %d\n' % len(missing))
208
208
209 for f in sorted(missing):
209 for f in sorted(missing):
210 if debug:
210 if debug:
211 dbg('debug.copies: tracing file: %s\n' % f)
211 dbg('debug.copies: tracing file: %s\n' % f)
212 fctx = b[f]
212 fctx = b[f]
213 fctx._ancestrycontext = ancestrycontext
213 fctx._ancestrycontext = ancestrycontext
214
214
215 if debug:
215 if debug:
216 start = util.timer()
216 start = util.timer()
217 ofctx = _tracefile(fctx, am, limit)
217 ofctx = _tracefile(fctx, am, limit)
218 if ofctx:
218 if ofctx:
219 if debug:
219 if debug:
220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
220 dbg('debug.copies: rename of: %s\n' % ofctx._path)
221 cm[f] = ofctx.path()
221 cm[f] = ofctx.path()
222 if debug:
222 if debug:
223 dbg('debug.copies: time: %f seconds\n'
223 dbg('debug.copies: time: %f seconds\n'
224 % (util.timer() - start))
224 % (util.timer() - start))
225 return cm
225 return cm
226
226
227 def _changesetforwardcopies(a, b, match):
227 def _changesetforwardcopies(a, b, match):
228 if a.rev() == node.nullrev:
228 if a.rev() == node.nullrev:
229 return {}
229 return {}
230
230
231 repo = a.repo()
231 repo = a.repo()
232 children = {}
232 children = {}
233 cl = repo.changelog
233 cl = repo.changelog
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
234 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
235 for r in missingrevs:
235 for r in missingrevs:
236 for p in cl.parentrevs(r):
236 for p in cl.parentrevs(r):
237 if p == node.nullrev:
237 if p == node.nullrev:
238 continue
238 continue
239 if p not in children:
239 if p not in children:
240 children[p] = [r]
240 children[p] = [r]
241 else:
241 else:
242 children[p].append(r)
242 children[p].append(r)
243
243
244 roots = set(children) - set(missingrevs)
244 roots = set(children) - set(missingrevs)
245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
245 # 'work' contains 3-tuples of a (revision number, parent number, copies).
246 # The parent number is only used for knowing which parent the copies dict
246 # The parent number is only used for knowing which parent the copies dict
247 # came from.
247 # came from.
248 work = [(r, 1, {}) for r in roots]
248 work = [(r, 1, {}) for r in roots]
249 heapq.heapify(work)
249 heapq.heapify(work)
250 while work:
250 while work:
251 r, i1, copies1 = heapq.heappop(work)
251 r, i1, copies1 = heapq.heappop(work)
252 if work and work[0][0] == r:
252 if work and work[0][0] == r:
253 # We are tracing copies from both parents
253 # We are tracing copies from both parents
254 r, i2, copies2 = heapq.heappop(work)
254 r, i2, copies2 = heapq.heappop(work)
255 copies = {}
255 copies = {}
256 ctx = repo[r]
256 ctx = repo[r]
257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
257 p1man, p2man = ctx.p1().manifest(), ctx.p2().manifest()
258 allcopies = set(copies1) | set(copies2)
258 allcopies = set(copies1) | set(copies2)
259 # TODO: perhaps this filtering should be done as long as ctx
259 # TODO: perhaps this filtering should be done as long as ctx
260 # is merge, whether or not we're tracing from both parent.
260 # is merge, whether or not we're tracing from both parent.
261 for dst in allcopies:
261 for dst in allcopies:
262 if not match(dst):
262 if not match(dst):
263 continue
263 continue
264 if dst not in copies2:
264 if dst not in copies2:
265 # Copied on p1 side: mark as copy from p1 side if it didn't
265 # Copied on p1 side: mark as copy from p1 side if it didn't
266 # already exist on p2 side
266 # already exist on p2 side
267 if dst not in p2man:
267 if dst not in p2man:
268 copies[dst] = copies1[dst]
268 copies[dst] = copies1[dst]
269 elif dst not in copies1:
269 elif dst not in copies1:
270 # Copied on p2 side: mark as copy from p2 side if it didn't
270 # Copied on p2 side: mark as copy from p2 side if it didn't
271 # already exist on p1 side
271 # already exist on p1 side
272 if dst not in p1man:
272 if dst not in p1man:
273 copies[dst] = copies2[dst]
273 copies[dst] = copies2[dst]
274 else:
274 else:
275 # Copied on both sides: mark as copy from p1 side
275 # Copied on both sides: mark as copy from p1 side
276 copies[dst] = copies1[dst]
276 copies[dst] = copies1[dst]
277 else:
277 else:
278 copies = copies1
278 copies = copies1
279 if r == b.rev():
279 if r == b.rev():
280 return copies
280 return copies
281 for c in children[r]:
281 for c in children[r]:
282 childctx = repo[c]
282 childctx = repo[c]
283 if r == childctx.p1().rev():
283 if r == childctx.p1().rev():
284 parent = 1
284 parent = 1
285 childcopies = childctx.p1copies()
285 childcopies = childctx.p1copies()
286 else:
286 else:
287 assert r == childctx.p2().rev()
287 assert r == childctx.p2().rev()
288 parent = 2
288 parent = 2
289 childcopies = childctx.p2copies()
289 childcopies = childctx.p2copies()
290 if not match.always():
290 if not match.always():
291 childcopies = {dst: src for dst, src in childcopies.items()
291 childcopies = {dst: src for dst, src in childcopies.items()
292 if match(dst)}
292 if match(dst)}
293 childcopies = _chain(a, childctx, copies, childcopies)
293 childcopies = _chain(a, childctx, copies, childcopies)
294 heapq.heappush(work, (c, parent, childcopies))
294 heapq.heappush(work, (c, parent, childcopies))
295 assert False
295 assert False
296
296
297 def _forwardcopies(a, b, match=None):
297 def _forwardcopies(a, b, match=None):
298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
298 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
299
299
300 match = a.repo().narrowmatch(match)
300 match = a.repo().narrowmatch(match)
301 # check for working copy
301 # check for working copy
302 if b.rev() is None:
302 if b.rev() is None:
303 if a == b.p1():
303 if a == b.p1():
304 # short-circuit to avoid issues with merge states
304 # short-circuit to avoid issues with merge states
305 return _dirstatecopies(b._repo, match)
305 return _dirstatecopies(b._repo, match)
306
306
307 cm = _committedforwardcopies(a, b.p1(), match)
307 cm = _committedforwardcopies(a, b.p1(), match)
308 # combine copies from dirstate if necessary
308 # combine copies from dirstate if necessary
309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
309 return _chain(a, b, cm, _dirstatecopies(b._repo, match))
310 return _committedforwardcopies(a, b, match)
310 return _committedforwardcopies(a, b, match)
311
311
312 def _backwardrenames(a, b, match):
312 def _backwardrenames(a, b, match):
313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
313 if a._repo.ui.config('experimental', 'copytrace') == 'off':
314 return {}
314 return {}
315
315
316 # Even though we're not taking copies into account, 1:n rename situations
316 # Even though we're not taking copies into account, 1:n rename situations
317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
317 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
318 # arbitrarily pick one of the renames.
318 # arbitrarily pick one of the renames.
319 # We don't want to pass in "match" here, since that would filter
319 # We don't want to pass in "match" here, since that would filter
320 # the destination by it. Since we're reversing the copies, we want
320 # the destination by it. Since we're reversing the copies, we want
321 # to filter the source instead.
321 # to filter the source instead.
322 f = _forwardcopies(b, a)
322 f = _forwardcopies(b, a)
323 r = {}
323 r = {}
324 for k, v in sorted(f.iteritems()):
324 for k, v in sorted(f.iteritems()):
325 if match and not match(v):
325 if match and not match(v):
326 continue
326 continue
327 # remove copies
327 # remove copies
328 if v in a:
328 if v in a:
329 continue
329 continue
330 r[v] = k
330 r[v] = k
331 return r
331 return r
332
332
333 def pathcopies(x, y, match=None):
333 def pathcopies(x, y, match=None):
334 """find {dst@y: src@x} copy mapping for directed compare"""
334 """find {dst@y: src@x} copy mapping for directed compare"""
335 repo = x._repo
335 repo = x._repo
336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
336 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
337 if debug:
337 if debug:
338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
338 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
339 % (x, y))
339 % (x, y))
340 if x == y or not x or not y:
340 if x == y or not x or not y:
341 return {}
341 return {}
342 a = y.ancestor(x)
342 a = y.ancestor(x)
343 if a == x:
343 if a == x:
344 if debug:
344 if debug:
345 repo.ui.debug('debug.copies: search mode: forward\n')
345 repo.ui.debug('debug.copies: search mode: forward\n')
346 return _forwardcopies(x, y, match=match)
346 return _forwardcopies(x, y, match=match)
347 if a == y:
347 if a == y:
348 if debug:
348 if debug:
349 repo.ui.debug('debug.copies: search mode: backward\n')
349 repo.ui.debug('debug.copies: search mode: backward\n')
350 return _backwardrenames(x, y, match=match)
350 return _backwardrenames(x, y, match=match)
351 if debug:
351 if debug:
352 repo.ui.debug('debug.copies: search mode: combined\n')
352 repo.ui.debug('debug.copies: search mode: combined\n')
353 return _chain(x, y, _backwardrenames(x, a, match=match),
353 return _chain(x, y, _backwardrenames(x, a, match=match),
354 _forwardcopies(a, y, match=match))
354 _forwardcopies(a, y, match=match))
355
355
356 def mergecopies(repo, c1, c2, base):
356 def mergecopies(repo, c1, c2, base):
357 """
357 """
358 Finds moves and copies between context c1 and c2 that are relevant for
358 Finds moves and copies between context c1 and c2 that are relevant for
359 merging. 'base' will be used as the merge base.
359 merging. 'base' will be used as the merge base.
360
360
361 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
361 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
362 files that were moved/ copied in one merge parent and modified in another.
362 files that were moved/ copied in one merge parent and modified in another.
363 For example:
363 For example:
364
364
365 o ---> 4 another commit
365 o ---> 4 another commit
366 |
366 |
367 | o ---> 3 commit that modifies a.txt
367 | o ---> 3 commit that modifies a.txt
368 | /
368 | /
369 o / ---> 2 commit that moves a.txt to b.txt
369 o / ---> 2 commit that moves a.txt to b.txt
370 |/
370 |/
371 o ---> 1 merge base
371 o ---> 1 merge base
372
372
373 If we try to rebase revision 3 on revision 4, since there is no a.txt in
373 If we try to rebase revision 3 on revision 4, since there is no a.txt in
374 revision 4, and if user have copytrace disabled, we prints the following
374 revision 4, and if user have copytrace disabled, we prints the following
375 message:
375 message:
376
376
377 ```other changed <file> which local deleted```
377 ```other changed <file> which local deleted```
378
378
379 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
379 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
380 "dirmove".
380 "dirmove".
381
381
382 "copy" is a mapping from destination name -> source name,
382 "copy" is a mapping from destination name -> source name,
383 where source is in c1 and destination is in c2 or vice-versa.
383 where source is in c1 and destination is in c2 or vice-versa.
384
384
385 "movewithdir" is a mapping from source name -> destination name,
385 "movewithdir" is a mapping from source name -> destination name,
386 where the file at source present in one context but not the other
386 where the file at source present in one context but not the other
387 needs to be moved to destination by the merge process, because the
387 needs to be moved to destination by the merge process, because the
388 other context moved the directory it is in.
388 other context moved the directory it is in.
389
389
390 "diverge" is a mapping of source name -> list of destination names
390 "diverge" is a mapping of source name -> list of destination names
391 for divergent renames.
391 for divergent renames.
392
392
393 "renamedelete" is a mapping of source name -> list of destination
393 "renamedelete" is a mapping of source name -> list of destination
394 names for files deleted in c1 that were renamed in c2 or vice-versa.
394 names for files deleted in c1 that were renamed in c2 or vice-versa.
395
395
396 "dirmove" is a mapping of detected source dir -> destination dir renames.
396 "dirmove" is a mapping of detected source dir -> destination dir renames.
397 This is needed for handling changes to new files previously grafted into
397 This is needed for handling changes to new files previously grafted into
398 renamed directories.
398 renamed directories.
399
399
400 This function calls different copytracing algorithms based on config.
400 This function calls different copytracing algorithms based on config.
401 """
401 """
402 # avoid silly behavior for update from empty dir
402 # avoid silly behavior for update from empty dir
403 if not c1 or not c2 or c1 == c2:
403 if not c1 or not c2 or c1 == c2:
404 return {}, {}, {}, {}, {}
404 return {}, {}, {}, {}, {}
405
405
406 narrowmatch = c1.repo().narrowmatch()
406 narrowmatch = c1.repo().narrowmatch()
407
407
408 # avoid silly behavior for parent -> working dir
408 # avoid silly behavior for parent -> working dir
409 if c2.node() is None and c1.node() == repo.dirstate.p1():
409 if c2.node() is None and c1.node() == repo.dirstate.p1():
410 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
410 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
411
411
412 copytracing = repo.ui.config('experimental', 'copytrace')
412 copytracing = repo.ui.config('experimental', 'copytrace')
413 if stringutil.parsebool(copytracing) is False:
413 if stringutil.parsebool(copytracing) is False:
414 # stringutil.parsebool() returns None when it is unable to parse the
414 # stringutil.parsebool() returns None when it is unable to parse the
415 # value, so we should rely on making sure copytracing is on such cases
415 # value, so we should rely on making sure copytracing is on such cases
416 return {}, {}, {}, {}, {}
416 return {}, {}, {}, {}, {}
417
417
418 if usechangesetcentricalgo(repo):
419 # The heuristics don't make sense when we need changeset-centric algos
420 return _fullcopytracing(repo, c1, c2, base)
421
418 # Copy trace disabling is explicitly below the node == p1 logic above
422 # Copy trace disabling is explicitly below the node == p1 logic above
419 # because the logic above is required for a simple copy to be kept across a
423 # because the logic above is required for a simple copy to be kept across a
420 # rebase.
424 # rebase.
421 if copytracing == 'heuristics':
425 if copytracing == 'heuristics':
422 # Do full copytracing if only non-public revisions are involved as
426 # Do full copytracing if only non-public revisions are involved as
423 # that will be fast enough and will also cover the copies which could
427 # that will be fast enough and will also cover the copies which could
424 # be missed by heuristics
428 # be missed by heuristics
425 if _isfullcopytraceable(repo, c1, base):
429 if _isfullcopytraceable(repo, c1, base):
426 return _fullcopytracing(repo, c1, c2, base)
430 return _fullcopytracing(repo, c1, c2, base)
427 return _heuristicscopytracing(repo, c1, c2, base)
431 return _heuristicscopytracing(repo, c1, c2, base)
428 else:
432 else:
429 return _fullcopytracing(repo, c1, c2, base)
433 return _fullcopytracing(repo, c1, c2, base)
430
434
431 def _isfullcopytraceable(repo, c1, base):
435 def _isfullcopytraceable(repo, c1, base):
432 """ Checks that if base, source and destination are all no-public branches,
436 """ Checks that if base, source and destination are all no-public branches,
433 if yes let's use the full copytrace algorithm for increased capabilities
437 if yes let's use the full copytrace algorithm for increased capabilities
434 since it will be fast enough.
438 since it will be fast enough.
435
439
436 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
440 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
437 number of changesets from c1 to base such that if number of changesets are
441 number of changesets from c1 to base such that if number of changesets are
438 more than the limit, full copytracing algorithm won't be used.
442 more than the limit, full copytracing algorithm won't be used.
439 """
443 """
440 if c1.rev() is None:
444 if c1.rev() is None:
441 c1 = c1.p1()
445 c1 = c1.p1()
442 if c1.mutable() and base.mutable():
446 if c1.mutable() and base.mutable():
443 sourcecommitlimit = repo.ui.configint('experimental',
447 sourcecommitlimit = repo.ui.configint('experimental',
444 'copytrace.sourcecommitlimit')
448 'copytrace.sourcecommitlimit')
445 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
449 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
446 return commits < sourcecommitlimit
450 return commits < sourcecommitlimit
447 return False
451 return False
448
452
449 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
453 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
450 copy, renamedelete):
454 copy, renamedelete):
451 if src not in m2:
455 if src not in m2:
452 # deleted on side 2
456 # deleted on side 2
453 if src not in m1:
457 if src not in m1:
454 # renamed on side 1, deleted on side 2
458 # renamed on side 1, deleted on side 2
455 renamedelete[src] = dsts1
459 renamedelete[src] = dsts1
456 elif m2[src] != mb[src]:
460 elif m2[src] != mb[src]:
457 if not _related(c2[src], base[src]):
461 if not _related(c2[src], base[src]):
458 return
462 return
459 # modified on side 2
463 # modified on side 2
460 for dst in dsts1:
464 for dst in dsts1:
461 if dst not in m2:
465 if dst not in m2:
462 # dst not added on side 2 (handle as regular
466 # dst not added on side 2 (handle as regular
463 # "both created" case in manifestmerge otherwise)
467 # "both created" case in manifestmerge otherwise)
464 copy[dst] = src
468 copy[dst] = src
465
469
466 def _fullcopytracing(repo, c1, c2, base):
470 def _fullcopytracing(repo, c1, c2, base):
467 """ The full copytracing algorithm which finds all the new files that were
471 """ The full copytracing algorithm which finds all the new files that were
468 added from merge base up to the top commit and for each file it checks if
472 added from merge base up to the top commit and for each file it checks if
469 this file was copied from another file.
473 this file was copied from another file.
470
474
471 This is pretty slow when a lot of changesets are involved but will track all
475 This is pretty slow when a lot of changesets are involved but will track all
472 the copies.
476 the copies.
473 """
477 """
474 m1 = c1.manifest()
478 m1 = c1.manifest()
475 m2 = c2.manifest()
479 m2 = c2.manifest()
476 mb = base.manifest()
480 mb = base.manifest()
477
481
478 copies1 = pathcopies(base, c1)
482 copies1 = pathcopies(base, c1)
479 copies2 = pathcopies(base, c2)
483 copies2 = pathcopies(base, c2)
480
484
481 inversecopies1 = {}
485 inversecopies1 = {}
482 inversecopies2 = {}
486 inversecopies2 = {}
483 for dst, src in copies1.items():
487 for dst, src in copies1.items():
484 inversecopies1.setdefault(src, []).append(dst)
488 inversecopies1.setdefault(src, []).append(dst)
485 for dst, src in copies2.items():
489 for dst, src in copies2.items():
486 inversecopies2.setdefault(src, []).append(dst)
490 inversecopies2.setdefault(src, []).append(dst)
487
491
488 copy = {}
492 copy = {}
489 diverge = {}
493 diverge = {}
490 renamedelete = {}
494 renamedelete = {}
491 allsources = set(inversecopies1) | set(inversecopies2)
495 allsources = set(inversecopies1) | set(inversecopies2)
492 for src in allsources:
496 for src in allsources:
493 dsts1 = inversecopies1.get(src)
497 dsts1 = inversecopies1.get(src)
494 dsts2 = inversecopies2.get(src)
498 dsts2 = inversecopies2.get(src)
495 if dsts1 and dsts2:
499 if dsts1 and dsts2:
496 # copied/renamed on both sides
500 # copied/renamed on both sides
497 if src not in m1 and src not in m2:
501 if src not in m1 and src not in m2:
498 # renamed on both sides
502 # renamed on both sides
499 dsts1 = set(dsts1)
503 dsts1 = set(dsts1)
500 dsts2 = set(dsts2)
504 dsts2 = set(dsts2)
501 # If there's some overlap in the rename destinations, we
505 # If there's some overlap in the rename destinations, we
502 # consider it not divergent. For example, if side 1 copies 'a'
506 # consider it not divergent. For example, if side 1 copies 'a'
503 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
507 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
504 # and 'd' and deletes 'a'.
508 # and 'd' and deletes 'a'.
505 if dsts1 & dsts2:
509 if dsts1 & dsts2:
506 for dst in (dsts1 & dsts2):
510 for dst in (dsts1 & dsts2):
507 copy[dst] = src
511 copy[dst] = src
508 else:
512 else:
509 diverge[src] = sorted(dsts1 | dsts2)
513 diverge[src] = sorted(dsts1 | dsts2)
510 elif src in m1 and src in m2:
514 elif src in m1 and src in m2:
511 # copied on both sides
515 # copied on both sides
512 dsts1 = set(dsts1)
516 dsts1 = set(dsts1)
513 dsts2 = set(dsts2)
517 dsts2 = set(dsts2)
514 for dst in (dsts1 & dsts2):
518 for dst in (dsts1 & dsts2):
515 copy[dst] = src
519 copy[dst] = src
516 # TODO: Handle cases where it was renamed on one side and copied
520 # TODO: Handle cases where it was renamed on one side and copied
517 # on the other side
521 # on the other side
518 elif dsts1:
522 elif dsts1:
519 # copied/renamed only on side 1
523 # copied/renamed only on side 1
520 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
524 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
521 copy, renamedelete)
525 copy, renamedelete)
522 elif dsts2:
526 elif dsts2:
523 # copied/renamed only on side 2
527 # copied/renamed only on side 2
524 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
528 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
525 copy, renamedelete)
529 copy, renamedelete)
526
530
527 renamedeleteset = set()
531 renamedeleteset = set()
528 divergeset = set()
532 divergeset = set()
529 for dsts in diverge.values():
533 for dsts in diverge.values():
530 divergeset.update(dsts)
534 divergeset.update(dsts)
531 for dsts in renamedelete.values():
535 for dsts in renamedelete.values():
532 renamedeleteset.update(dsts)
536 renamedeleteset.update(dsts)
533
537
534 # find interesting file sets from manifests
538 # find interesting file sets from manifests
535 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
539 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
536 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
540 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
537 u1 = sorted(addedinm1 - addedinm2)
541 u1 = sorted(addedinm1 - addedinm2)
538 u2 = sorted(addedinm2 - addedinm1)
542 u2 = sorted(addedinm2 - addedinm1)
539
543
540 header = " unmatched files in %s"
544 header = " unmatched files in %s"
541 if u1:
545 if u1:
542 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
546 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
543 if u2:
547 if u2:
544 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
548 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
545
549
546 fullcopy = copies1.copy()
550 fullcopy = copies1.copy()
547 fullcopy.update(copies2)
551 fullcopy.update(copies2)
548 if not fullcopy:
552 if not fullcopy:
549 return copy, {}, diverge, renamedelete, {}
553 return copy, {}, diverge, renamedelete, {}
550
554
551 if repo.ui.debugflag:
555 if repo.ui.debugflag:
552 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
553 "% = renamed and deleted):\n")
557 "% = renamed and deleted):\n")
554 for f in sorted(fullcopy):
558 for f in sorted(fullcopy):
555 note = ""
559 note = ""
556 if f in copy:
560 if f in copy:
557 note += "*"
561 note += "*"
558 if f in divergeset:
562 if f in divergeset:
559 note += "!"
563 note += "!"
560 if f in renamedeleteset:
564 if f in renamedeleteset:
561 note += "%"
565 note += "%"
562 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
563 note))
567 note))
564 del divergeset
568 del divergeset
565
569
566 repo.ui.debug(" checking for directory renames\n")
570 repo.ui.debug(" checking for directory renames\n")
567
571
568 # generate a directory move map
572 # generate a directory move map
569 d1, d2 = c1.dirs(), c2.dirs()
573 d1, d2 = c1.dirs(), c2.dirs()
570 # Hack for adding '', which is not otherwise added, to d1 and d2
574 # Hack for adding '', which is not otherwise added, to d1 and d2
571 d1.addpath('/')
575 d1.addpath('/')
572 d2.addpath('/')
576 d2.addpath('/')
573 invalid = set()
577 invalid = set()
574 dirmove = {}
578 dirmove = {}
575
579
576 # examine each file copy for a potential directory move, which is
580 # examine each file copy for a potential directory move, which is
577 # when all the files in a directory are moved to a new directory
581 # when all the files in a directory are moved to a new directory
578 for dst, src in fullcopy.iteritems():
582 for dst, src in fullcopy.iteritems():
579 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
583 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
580 if dsrc in invalid:
584 if dsrc in invalid:
581 # already seen to be uninteresting
585 # already seen to be uninteresting
582 continue
586 continue
583 elif dsrc in d1 and ddst in d1:
587 elif dsrc in d1 and ddst in d1:
584 # directory wasn't entirely moved locally
588 # directory wasn't entirely moved locally
585 invalid.add(dsrc)
589 invalid.add(dsrc)
586 elif dsrc in d2 and ddst in d2:
590 elif dsrc in d2 and ddst in d2:
587 # directory wasn't entirely moved remotely
591 # directory wasn't entirely moved remotely
588 invalid.add(dsrc)
592 invalid.add(dsrc)
589 elif dsrc in dirmove and dirmove[dsrc] != ddst:
593 elif dsrc in dirmove and dirmove[dsrc] != ddst:
590 # files from the same directory moved to two different places
594 # files from the same directory moved to two different places
591 invalid.add(dsrc)
595 invalid.add(dsrc)
592 else:
596 else:
593 # looks good so far
597 # looks good so far
594 dirmove[dsrc] = ddst
598 dirmove[dsrc] = ddst
595
599
596 for i in invalid:
600 for i in invalid:
597 if i in dirmove:
601 if i in dirmove:
598 del dirmove[i]
602 del dirmove[i]
599 del d1, d2, invalid
603 del d1, d2, invalid
600
604
601 if not dirmove:
605 if not dirmove:
602 return copy, {}, diverge, renamedelete, {}
606 return copy, {}, diverge, renamedelete, {}
603
607
604 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
608 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
605
609
606 for d in dirmove:
610 for d in dirmove:
607 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
608 (d, dirmove[d]))
612 (d, dirmove[d]))
609
613
610 movewithdir = {}
614 movewithdir = {}
611 # check unaccounted nonoverlapping files against directory moves
615 # check unaccounted nonoverlapping files against directory moves
612 for f in u1 + u2:
616 for f in u1 + u2:
613 if f not in fullcopy:
617 if f not in fullcopy:
614 for d in dirmove:
618 for d in dirmove:
615 if f.startswith(d):
619 if f.startswith(d):
616 # new file added in a directory that was moved, move it
620 # new file added in a directory that was moved, move it
617 df = dirmove[d] + f[len(d):]
621 df = dirmove[d] + f[len(d):]
618 if df not in copy:
622 if df not in copy:
619 movewithdir[f] = df
623 movewithdir[f] = df
620 repo.ui.debug((" pending file src: '%s' -> "
624 repo.ui.debug((" pending file src: '%s' -> "
621 "dst: '%s'\n") % (f, df))
625 "dst: '%s'\n") % (f, df))
622 break
626 break
623
627
624 return copy, movewithdir, diverge, renamedelete, dirmove
628 return copy, movewithdir, diverge, renamedelete, dirmove
625
629
626 def _heuristicscopytracing(repo, c1, c2, base):
630 def _heuristicscopytracing(repo, c1, c2, base):
627 """ Fast copytracing using filename heuristics
631 """ Fast copytracing using filename heuristics
628
632
629 Assumes that moves or renames are of following two types:
633 Assumes that moves or renames are of following two types:
630
634
631 1) Inside a directory only (same directory name but different filenames)
635 1) Inside a directory only (same directory name but different filenames)
632 2) Move from one directory to another
636 2) Move from one directory to another
633 (same filenames but different directory names)
637 (same filenames but different directory names)
634
638
635 Works only when there are no merge commits in the "source branch".
639 Works only when there are no merge commits in the "source branch".
636 Source branch is commits from base up to c2 not including base.
640 Source branch is commits from base up to c2 not including base.
637
641
638 If merge is involved it fallbacks to _fullcopytracing().
642 If merge is involved it fallbacks to _fullcopytracing().
639
643
640 Can be used by setting the following config:
644 Can be used by setting the following config:
641
645
642 [experimental]
646 [experimental]
643 copytrace = heuristics
647 copytrace = heuristics
644
648
645 In some cases the copy/move candidates found by heuristics can be very large
649 In some cases the copy/move candidates found by heuristics can be very large
646 in number and that will make the algorithm slow. The number of possible
650 in number and that will make the algorithm slow. The number of possible
647 candidates to check can be limited by using the config
651 candidates to check can be limited by using the config
648 `experimental.copytrace.movecandidateslimit` which defaults to 100.
652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
649 """
653 """
650
654
651 if c1.rev() is None:
655 if c1.rev() is None:
652 c1 = c1.p1()
656 c1 = c1.p1()
653 if c2.rev() is None:
657 if c2.rev() is None:
654 c2 = c2.p1()
658 c2 = c2.p1()
655
659
656 copies = {}
660 copies = {}
657
661
658 changedfiles = set()
662 changedfiles = set()
659 m1 = c1.manifest()
663 m1 = c1.manifest()
660 if not repo.revs('%d::%d', base.rev(), c2.rev()):
664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
661 # If base is not in c2 branch, we switch to fullcopytracing
665 # If base is not in c2 branch, we switch to fullcopytracing
662 repo.ui.debug("switching to full copytracing as base is not "
666 repo.ui.debug("switching to full copytracing as base is not "
663 "an ancestor of c2\n")
667 "an ancestor of c2\n")
664 return _fullcopytracing(repo, c1, c2, base)
668 return _fullcopytracing(repo, c1, c2, base)
665
669
666 ctx = c2
670 ctx = c2
667 while ctx != base:
671 while ctx != base:
668 if len(ctx.parents()) == 2:
672 if len(ctx.parents()) == 2:
669 # To keep things simple let's not handle merges
673 # To keep things simple let's not handle merges
670 repo.ui.debug("switching to full copytracing because of merges\n")
674 repo.ui.debug("switching to full copytracing because of merges\n")
671 return _fullcopytracing(repo, c1, c2, base)
675 return _fullcopytracing(repo, c1, c2, base)
672 changedfiles.update(ctx.files())
676 changedfiles.update(ctx.files())
673 ctx = ctx.p1()
677 ctx = ctx.p1()
674
678
675 cp = _forwardcopies(base, c2)
679 cp = _forwardcopies(base, c2)
676 for dst, src in cp.iteritems():
680 for dst, src in cp.iteritems():
677 if src in m1:
681 if src in m1:
678 copies[dst] = src
682 copies[dst] = src
679
683
680 # file is missing if it isn't present in the destination, but is present in
684 # file is missing if it isn't present in the destination, but is present in
681 # the base and present in the source.
685 # the base and present in the source.
682 # Presence in the base is important to exclude added files, presence in the
686 # Presence in the base is important to exclude added files, presence in the
683 # source is important to exclude removed files.
687 # source is important to exclude removed files.
684 filt = lambda f: f not in m1 and f in base and f in c2
688 filt = lambda f: f not in m1 and f in base and f in c2
685 missingfiles = [f for f in changedfiles if filt(f)]
689 missingfiles = [f for f in changedfiles if filt(f)]
686
690
687 if missingfiles:
691 if missingfiles:
688 basenametofilename = collections.defaultdict(list)
692 basenametofilename = collections.defaultdict(list)
689 dirnametofilename = collections.defaultdict(list)
693 dirnametofilename = collections.defaultdict(list)
690
694
691 for f in m1.filesnotin(base.manifest()):
695 for f in m1.filesnotin(base.manifest()):
692 basename = os.path.basename(f)
696 basename = os.path.basename(f)
693 dirname = os.path.dirname(f)
697 dirname = os.path.dirname(f)
694 basenametofilename[basename].append(f)
698 basenametofilename[basename].append(f)
695 dirnametofilename[dirname].append(f)
699 dirnametofilename[dirname].append(f)
696
700
697 for f in missingfiles:
701 for f in missingfiles:
698 basename = os.path.basename(f)
702 basename = os.path.basename(f)
699 dirname = os.path.dirname(f)
703 dirname = os.path.dirname(f)
700 samebasename = basenametofilename[basename]
704 samebasename = basenametofilename[basename]
701 samedirname = dirnametofilename[dirname]
705 samedirname = dirnametofilename[dirname]
702 movecandidates = samebasename + samedirname
706 movecandidates = samebasename + samedirname
703 # f is guaranteed to be present in c2, that's why
707 # f is guaranteed to be present in c2, that's why
704 # c2.filectx(f) won't fail
708 # c2.filectx(f) won't fail
705 f2 = c2.filectx(f)
709 f2 = c2.filectx(f)
706 # we can have a lot of candidates which can slow down the heuristics
710 # we can have a lot of candidates which can slow down the heuristics
707 # config value to limit the number of candidates moves to check
711 # config value to limit the number of candidates moves to check
708 maxcandidates = repo.ui.configint('experimental',
712 maxcandidates = repo.ui.configint('experimental',
709 'copytrace.movecandidateslimit')
713 'copytrace.movecandidateslimit')
710
714
711 if len(movecandidates) > maxcandidates:
715 if len(movecandidates) > maxcandidates:
712 repo.ui.status(_("skipping copytracing for '%s', more "
716 repo.ui.status(_("skipping copytracing for '%s', more "
713 "candidates than the limit: %d\n")
717 "candidates than the limit: %d\n")
714 % (f, len(movecandidates)))
718 % (f, len(movecandidates)))
715 continue
719 continue
716
720
717 for candidate in movecandidates:
721 for candidate in movecandidates:
718 f1 = c1.filectx(candidate)
722 f1 = c1.filectx(candidate)
719 if _related(f1, f2):
723 if _related(f1, f2):
720 # if there are a few related copies then we'll merge
724 # if there are a few related copies then we'll merge
721 # changes into all of them. This matches the behaviour
725 # changes into all of them. This matches the behaviour
722 # of upstream copytracing
726 # of upstream copytracing
723 copies[candidate] = f
727 copies[candidate] = f
724
728
725 return copies, {}, {}, {}, {}
729 return copies, {}, {}, {}, {}
726
730
727 def _related(f1, f2):
731 def _related(f1, f2):
728 """return True if f1 and f2 filectx have a common ancestor
732 """return True if f1 and f2 filectx have a common ancestor
729
733
730 Walk back to common ancestor to see if the two files originate
734 Walk back to common ancestor to see if the two files originate
731 from the same file. Since workingfilectx's rev() is None it messes
735 from the same file. Since workingfilectx's rev() is None it messes
732 up the integer comparison logic, hence the pre-step check for
736 up the integer comparison logic, hence the pre-step check for
733 None (f1 and f2 can only be workingfilectx's initially).
737 None (f1 and f2 can only be workingfilectx's initially).
734 """
738 """
735
739
736 if f1 == f2:
740 if f1 == f2:
737 return True # a match
741 return True # a match
738
742
739 g1, g2 = f1.ancestors(), f2.ancestors()
743 g1, g2 = f1.ancestors(), f2.ancestors()
740 try:
744 try:
741 f1r, f2r = f1.linkrev(), f2.linkrev()
745 f1r, f2r = f1.linkrev(), f2.linkrev()
742
746
743 if f1r is None:
747 if f1r is None:
744 f1 = next(g1)
748 f1 = next(g1)
745 if f2r is None:
749 if f2r is None:
746 f2 = next(g2)
750 f2 = next(g2)
747
751
748 while True:
752 while True:
749 f1r, f2r = f1.linkrev(), f2.linkrev()
753 f1r, f2r = f1.linkrev(), f2.linkrev()
750 if f1r > f2r:
754 if f1r > f2r:
751 f1 = next(g1)
755 f1 = next(g1)
752 elif f2r > f1r:
756 elif f2r > f1r:
753 f2 = next(g2)
757 f2 = next(g2)
754 else: # f1 and f2 point to files in the same linkrev
758 else: # f1 and f2 point to files in the same linkrev
755 return f1 == f2 # true if they point to the same file
759 return f1 == f2 # true if they point to the same file
756 except StopIteration:
760 except StopIteration:
757 return False
761 return False
758
762
759 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
763 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
760 """reproduce copies from fromrev to rev in the dirstate
764 """reproduce copies from fromrev to rev in the dirstate
761
765
762 If skiprev is specified, it's a revision that should be used to
766 If skiprev is specified, it's a revision that should be used to
763 filter copy records. Any copies that occur between fromrev and
767 filter copy records. Any copies that occur between fromrev and
764 skiprev will not be duplicated, even if they appear in the set of
768 skiprev will not be duplicated, even if they appear in the set of
765 copies between fromrev and rev.
769 copies between fromrev and rev.
766 """
770 """
767 exclude = {}
771 exclude = {}
768 ctraceconfig = repo.ui.config('experimental', 'copytrace')
772 ctraceconfig = repo.ui.config('experimental', 'copytrace')
769 bctrace = stringutil.parsebool(ctraceconfig)
773 bctrace = stringutil.parsebool(ctraceconfig)
770 if (skiprev is not None and
774 if (skiprev is not None and
771 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
775 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
772 # copytrace='off' skips this line, but not the entire function because
776 # copytrace='off' skips this line, but not the entire function because
773 # the line below is O(size of the repo) during a rebase, while the rest
777 # the line below is O(size of the repo) during a rebase, while the rest
774 # of the function is much faster (and is required for carrying copy
778 # of the function is much faster (and is required for carrying copy
775 # metadata across the rebase anyway).
779 # metadata across the rebase anyway).
776 exclude = pathcopies(repo[fromrev], repo[skiprev])
780 exclude = pathcopies(repo[fromrev], repo[skiprev])
777 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
781 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
778 # copies.pathcopies returns backward renames, so dst might not
782 # copies.pathcopies returns backward renames, so dst might not
779 # actually be in the dirstate
783 # actually be in the dirstate
780 if dst in exclude:
784 if dst in exclude:
781 continue
785 continue
782 wctx[dst].markcopied(src)
786 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now