##// END OF EJS Templates
copies: avoid reusing the same variable for two different copy dicts...
Martin von Zweigbergk -
r42714:e7c55e24 default
parent child Browse files
Show More
@@ -1,811 +1,811 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
132 t = _chain(a, b)
133 _filter(src, dst, t)
133 _filter(src, dst, t)
134 return t
134 return t
135
135
136 def _filter(src, dst, t):
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
139 # remove copies from files that didn't exist
140 if v not in src:
140 if v not in src:
141 del t[k]
141 del t[k]
142 # remove criss-crossed copies
142 # remove criss-crossed copies
143 elif k in src and v in dst:
143 elif k in src and v in dst:
144 del t[k]
144 del t[k]
145 # remove copies to files that were then removed
145 # remove copies to files that were then removed
146 elif k not in dst:
146 elif k not in dst:
147 del t[k]
147 del t[k]
148
148
149 def _chain(a, b):
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
150 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
151 t = a.copy()
152 for k, v in b.iteritems():
152 for k, v in b.iteritems():
153 if v in t:
153 if v in t:
154 t[k] = t[v]
154 t[k] = t[v]
155 else:
155 else:
156 t[k] = v
156 t[k] = v
157 return t
157 return t
158
158
159 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
162
162
163 for f in fctx.ancestors():
163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
164 if am.get(f.path(), None) == f.filenode():
165 return f
165 return f
166 if not f.isintroducedafter(limit):
166 if not f.isintroducedafter(limit):
167 return None
167 return None
168
168
169 def _dirstatecopies(repo, match=None):
169 def _dirstatecopies(repo, match=None):
170 ds = repo.dirstate
170 ds = repo.dirstate
171 c = ds.copies().copy()
171 c = ds.copies().copy()
172 for k in list(c):
172 for k in list(c):
173 if ds[k] not in 'anm' or (match and not match(k)):
173 if ds[k] not in 'anm' or (match and not match(k)):
174 del c[k]
174 del c[k]
175 return c
175 return c
176
176
177 def _computeforwardmissing(a, b, match=None):
177 def _computeforwardmissing(a, b, match=None):
178 """Computes which files are in b but not a.
178 """Computes which files are in b but not a.
179 This is its own function so extensions can easily wrap this call to see what
179 This is its own function so extensions can easily wrap this call to see what
180 files _forwardcopies is about to process.
180 files _forwardcopies is about to process.
181 """
181 """
182 ma = a.manifest()
182 ma = a.manifest()
183 mb = b.manifest()
183 mb = b.manifest()
184 return mb.filesnotin(ma, match=match)
184 return mb.filesnotin(ma, match=match)
185
185
186 def usechangesetcentricalgo(repo):
186 def usechangesetcentricalgo(repo):
187 """Checks if we should use changeset-centric copy algorithms"""
187 """Checks if we should use changeset-centric copy algorithms"""
188 return (repo.ui.config('experimental', 'copies.read-from') in
188 return (repo.ui.config('experimental', 'copies.read-from') in
189 ('changeset-only', 'compatibility'))
189 ('changeset-only', 'compatibility'))
190
190
191 def _committedforwardcopies(a, b, match):
191 def _committedforwardcopies(a, b, match):
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 # files might have to be traced back to the fctx parent of the last
193 # files might have to be traced back to the fctx parent of the last
194 # one-side-only changeset, but not further back than that
194 # one-side-only changeset, but not further back than that
195 repo = a._repo
195 repo = a._repo
196
196
197 if usechangesetcentricalgo(repo):
197 if usechangesetcentricalgo(repo):
198 return _changesetforwardcopies(a, b, match)
198 return _changesetforwardcopies(a, b, match)
199
199
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 dbg = repo.ui.debug
201 dbg = repo.ui.debug
202 if debug:
202 if debug:
203 dbg('debug.copies: looking into rename from %s to %s\n'
203 dbg('debug.copies: looking into rename from %s to %s\n'
204 % (a, b))
204 % (a, b))
205 limit = _findlimit(repo, a, b)
205 limit = _findlimit(repo, a, b)
206 if debug:
206 if debug:
207 dbg('debug.copies: search limit: %d\n' % limit)
207 dbg('debug.copies: search limit: %d\n' % limit)
208 am = a.manifest()
208 am = a.manifest()
209
209
210 # find where new files came from
210 # find where new files came from
211 # we currently don't try to find where old files went, too expensive
211 # we currently don't try to find where old files went, too expensive
212 # this means we can miss a case like 'hg rm b; hg cp a b'
212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 cm = {}
213 cm = {}
214
214
215 # Computing the forward missing is quite expensive on large manifests, since
215 # Computing the forward missing is quite expensive on large manifests, since
216 # it compares the entire manifests. We can optimize it in the common use
216 # it compares the entire manifests. We can optimize it in the common use
217 # case of computing what copies are in a commit versus its parent (like
217 # case of computing what copies are in a commit versus its parent (like
218 # during a rebase or histedit). Note, we exclude merge commits from this
218 # during a rebase or histedit). Note, we exclude merge commits from this
219 # optimization, since the ctx.files() for a merge commit is not correct for
219 # optimization, since the ctx.files() for a merge commit is not correct for
220 # this comparison.
220 # this comparison.
221 forwardmissingmatch = match
221 forwardmissingmatch = match
222 if b.p1() == a and b.p2().node() == node.nullid:
222 if b.p1() == a and b.p2().node() == node.nullid:
223 filesmatcher = matchmod.exact(b.files())
223 filesmatcher = matchmod.exact(b.files())
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226
226
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228
228
229 if debug:
229 if debug:
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231
231
232 for f in sorted(missing):
232 for f in sorted(missing):
233 if debug:
233 if debug:
234 dbg('debug.copies: tracing file: %s\n' % f)
234 dbg('debug.copies: tracing file: %s\n' % f)
235 fctx = b[f]
235 fctx = b[f]
236 fctx._ancestrycontext = ancestrycontext
236 fctx._ancestrycontext = ancestrycontext
237
237
238 if debug:
238 if debug:
239 start = util.timer()
239 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
240 ofctx = _tracefile(fctx, am, limit)
241 if ofctx:
241 if ofctx:
242 if debug:
242 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 cm[f] = ofctx.path()
244 cm[f] = ofctx.path()
245 if debug:
245 if debug:
246 dbg('debug.copies: time: %f seconds\n'
246 dbg('debug.copies: time: %f seconds\n'
247 % (util.timer() - start))
247 % (util.timer() - start))
248 return cm
248 return cm
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() == node.nullrev:
251 if a.rev() == node.nullrev:
252 return {}
252 return {}
253
253
254 repo = a.repo()
254 repo = a.repo()
255 children = {}
255 children = {}
256 cl = repo.changelog
256 cl = repo.changelog
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 for r in missingrevs:
258 for r in missingrevs:
259 for p in cl.parentrevs(r):
259 for p in cl.parentrevs(r):
260 if p == node.nullrev:
260 if p == node.nullrev:
261 continue
261 continue
262 if p not in children:
262 if p not in children:
263 children[p] = [r]
263 children[p] = [r]
264 else:
264 else:
265 children[p].append(r)
265 children[p].append(r)
266
266
267 roots = set(children) - set(missingrevs)
267 roots = set(children) - set(missingrevs)
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # The parent number is only used for knowing which parent the copies dict
269 # The parent number is only used for knowing which parent the copies dict
270 # came from.
270 # came from.
271 work = [(r, 1, {}) for r in roots]
271 work = [(r, 1, {}) for r in roots]
272 heapq.heapify(work)
272 heapq.heapify(work)
273 alwaysmatch = match.always()
273 alwaysmatch = match.always()
274 while work:
274 while work:
275 r, i1, copies1 = heapq.heappop(work)
275 r, i1, copies1 = heapq.heappop(work)
276 if work and work[0][0] == r:
276 if work and work[0][0] == r:
277 # We are tracing copies from both parents
277 # We are tracing copies from both parents
278 r, i2, copies2 = heapq.heappop(work)
278 r, i2, copies2 = heapq.heappop(work)
279 copies = {}
279 copies = {}
280 allcopies = set(copies1) | set(copies2)
280 allcopies = set(copies1) | set(copies2)
281 # TODO: perhaps this filtering should be done as long as ctx
281 # TODO: perhaps this filtering should be done as long as ctx
282 # is merge, whether or not we're tracing from both parent.
282 # is merge, whether or not we're tracing from both parent.
283 for dst in allcopies:
283 for dst in allcopies:
284 if not alwaysmatch and not match(dst):
284 if not alwaysmatch and not match(dst):
285 continue
285 continue
286 # Unlike when copies are stored in the filelog, we consider
286 # Unlike when copies are stored in the filelog, we consider
287 # it a copy even if the destination already existed on the
287 # it a copy even if the destination already existed on the
288 # other branch. It's simply too expensive to check if the
288 # other branch. It's simply too expensive to check if the
289 # file existed in the manifest.
289 # file existed in the manifest.
290 if dst in copies1:
290 if dst in copies1:
291 # If it was copied on the p1 side, mark it as copied from
291 # If it was copied on the p1 side, mark it as copied from
292 # that side, even if it was also copied on the p2 side.
292 # that side, even if it was also copied on the p2 side.
293 copies[dst] = copies1[dst]
293 copies[dst] = copies1[dst]
294 else:
294 else:
295 copies[dst] = copies2[dst]
295 copies[dst] = copies2[dst]
296 else:
296 else:
297 copies = copies1
297 copies = copies1
298 if r == b.rev():
298 if r == b.rev():
299 _filter(a, b, copies)
299 _filter(a, b, copies)
300 return copies
300 return copies
301 for i, c in enumerate(children[r]):
301 for i, c in enumerate(children[r]):
302 childctx = repo[c]
302 childctx = repo[c]
303 if r == childctx.p1().rev():
303 if r == childctx.p1().rev():
304 parent = 1
304 parent = 1
305 childcopies = childctx.p1copies()
305 childcopies = childctx.p1copies()
306 else:
306 else:
307 assert r == childctx.p2().rev()
307 assert r == childctx.p2().rev()
308 parent = 2
308 parent = 2
309 childcopies = childctx.p2copies()
309 childcopies = childctx.p2copies()
310 if not alwaysmatch:
310 if not alwaysmatch:
311 childcopies = {dst: src for dst, src in childcopies.items()
311 childcopies = {dst: src for dst, src in childcopies.items()
312 if match(dst)}
312 if match(dst)}
313 # Copy the dict only if later iterations will also need it
313 # Copy the dict only if later iterations will also need it
314 if i != len(children[r]) - 1:
314 if i != len(children[r]) - 1:
315 copies = copies.copy()
315 newcopies = copies.copy()
316 if childcopies:
317 childcopies = _chain(copies, childcopies)
318 else:
316 else:
319 childcopies = copies
317 newcopies = copies
318 if childcopies:
319 newcopies = _chain(newcopies, childcopies)
320 for f in childctx.filesremoved():
320 for f in childctx.filesremoved():
321 if f in childcopies:
321 if f in newcopies:
322 del childcopies[f]
322 del newcopies[f]
323 heapq.heappush(work, (c, parent, childcopies))
323 heapq.heappush(work, (c, parent, newcopies))
324 assert False
324 assert False
325
325
326 def _forwardcopies(a, b, match=None):
326 def _forwardcopies(a, b, match=None):
327 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
327 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
328
328
329 match = a.repo().narrowmatch(match)
329 match = a.repo().narrowmatch(match)
330 # check for working copy
330 # check for working copy
331 if b.rev() is None:
331 if b.rev() is None:
332 if a == b.p1():
332 if a == b.p1():
333 # short-circuit to avoid issues with merge states
333 # short-circuit to avoid issues with merge states
334 return _dirstatecopies(b._repo, match)
334 return _dirstatecopies(b._repo, match)
335
335
336 cm = _committedforwardcopies(a, b.p1(), match)
336 cm = _committedforwardcopies(a, b.p1(), match)
337 # combine copies from dirstate if necessary
337 # combine copies from dirstate if necessary
338 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
338 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
339 return _committedforwardcopies(a, b, match)
339 return _committedforwardcopies(a, b, match)
340
340
341 def _backwardrenames(a, b, match):
341 def _backwardrenames(a, b, match):
342 if a._repo.ui.config('experimental', 'copytrace') == 'off':
342 if a._repo.ui.config('experimental', 'copytrace') == 'off':
343 return {}
343 return {}
344
344
345 # Even though we're not taking copies into account, 1:n rename situations
345 # Even though we're not taking copies into account, 1:n rename situations
346 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
346 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
347 # arbitrarily pick one of the renames.
347 # arbitrarily pick one of the renames.
348 # We don't want to pass in "match" here, since that would filter
348 # We don't want to pass in "match" here, since that would filter
349 # the destination by it. Since we're reversing the copies, we want
349 # the destination by it. Since we're reversing the copies, we want
350 # to filter the source instead.
350 # to filter the source instead.
351 f = _forwardcopies(b, a)
351 f = _forwardcopies(b, a)
352 r = {}
352 r = {}
353 for k, v in sorted(f.iteritems()):
353 for k, v in sorted(f.iteritems()):
354 if match and not match(v):
354 if match and not match(v):
355 continue
355 continue
356 # remove copies
356 # remove copies
357 if v in a:
357 if v in a:
358 continue
358 continue
359 r[v] = k
359 r[v] = k
360 return r
360 return r
361
361
362 def pathcopies(x, y, match=None):
362 def pathcopies(x, y, match=None):
363 """find {dst@y: src@x} copy mapping for directed compare"""
363 """find {dst@y: src@x} copy mapping for directed compare"""
364 repo = x._repo
364 repo = x._repo
365 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
365 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
366 if debug:
366 if debug:
367 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
367 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
368 % (x, y))
368 % (x, y))
369 if x == y or not x or not y:
369 if x == y or not x or not y:
370 return {}
370 return {}
371 a = y.ancestor(x)
371 a = y.ancestor(x)
372 if a == x:
372 if a == x:
373 if debug:
373 if debug:
374 repo.ui.debug('debug.copies: search mode: forward\n')
374 repo.ui.debug('debug.copies: search mode: forward\n')
375 return _forwardcopies(x, y, match=match)
375 return _forwardcopies(x, y, match=match)
376 if a == y:
376 if a == y:
377 if debug:
377 if debug:
378 repo.ui.debug('debug.copies: search mode: backward\n')
378 repo.ui.debug('debug.copies: search mode: backward\n')
379 return _backwardrenames(x, y, match=match)
379 return _backwardrenames(x, y, match=match)
380 if debug:
380 if debug:
381 repo.ui.debug('debug.copies: search mode: combined\n')
381 repo.ui.debug('debug.copies: search mode: combined\n')
382 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
382 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
383 _forwardcopies(a, y, match=match))
383 _forwardcopies(a, y, match=match))
384
384
385 def mergecopies(repo, c1, c2, base):
385 def mergecopies(repo, c1, c2, base):
386 """
386 """
387 Finds moves and copies between context c1 and c2 that are relevant for
387 Finds moves and copies between context c1 and c2 that are relevant for
388 merging. 'base' will be used as the merge base.
388 merging. 'base' will be used as the merge base.
389
389
390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 files that were moved/ copied in one merge parent and modified in another.
391 files that were moved/ copied in one merge parent and modified in another.
392 For example:
392 For example:
393
393
394 o ---> 4 another commit
394 o ---> 4 another commit
395 |
395 |
396 | o ---> 3 commit that modifies a.txt
396 | o ---> 3 commit that modifies a.txt
397 | /
397 | /
398 o / ---> 2 commit that moves a.txt to b.txt
398 o / ---> 2 commit that moves a.txt to b.txt
399 |/
399 |/
400 o ---> 1 merge base
400 o ---> 1 merge base
401
401
402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 revision 4, and if user have copytrace disabled, we prints the following
403 revision 4, and if user have copytrace disabled, we prints the following
404 message:
404 message:
405
405
406 ```other changed <file> which local deleted```
406 ```other changed <file> which local deleted```
407
407
408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 "dirmove".
409 "dirmove".
410
410
411 "copy" is a mapping from destination name -> source name,
411 "copy" is a mapping from destination name -> source name,
412 where source is in c1 and destination is in c2 or vice-versa.
412 where source is in c1 and destination is in c2 or vice-versa.
413
413
414 "movewithdir" is a mapping from source name -> destination name,
414 "movewithdir" is a mapping from source name -> destination name,
415 where the file at source present in one context but not the other
415 where the file at source present in one context but not the other
416 needs to be moved to destination by the merge process, because the
416 needs to be moved to destination by the merge process, because the
417 other context moved the directory it is in.
417 other context moved the directory it is in.
418
418
419 "diverge" is a mapping of source name -> list of destination names
419 "diverge" is a mapping of source name -> list of destination names
420 for divergent renames.
420 for divergent renames.
421
421
422 "renamedelete" is a mapping of source name -> list of destination
422 "renamedelete" is a mapping of source name -> list of destination
423 names for files deleted in c1 that were renamed in c2 or vice-versa.
423 names for files deleted in c1 that were renamed in c2 or vice-versa.
424
424
425 "dirmove" is a mapping of detected source dir -> destination dir renames.
425 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 This is needed for handling changes to new files previously grafted into
426 This is needed for handling changes to new files previously grafted into
427 renamed directories.
427 renamed directories.
428
428
429 This function calls different copytracing algorithms based on config.
429 This function calls different copytracing algorithms based on config.
430 """
430 """
431 # avoid silly behavior for update from empty dir
431 # avoid silly behavior for update from empty dir
432 if not c1 or not c2 or c1 == c2:
432 if not c1 or not c2 or c1 == c2:
433 return {}, {}, {}, {}, {}
433 return {}, {}, {}, {}, {}
434
434
435 narrowmatch = c1.repo().narrowmatch()
435 narrowmatch = c1.repo().narrowmatch()
436
436
437 # avoid silly behavior for parent -> working dir
437 # avoid silly behavior for parent -> working dir
438 if c2.node() is None and c1.node() == repo.dirstate.p1():
438 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440
440
441 copytracing = repo.ui.config('experimental', 'copytrace')
441 copytracing = repo.ui.config('experimental', 'copytrace')
442 if stringutil.parsebool(copytracing) is False:
442 if stringutil.parsebool(copytracing) is False:
443 # stringutil.parsebool() returns None when it is unable to parse the
443 # stringutil.parsebool() returns None when it is unable to parse the
444 # value, so we should rely on making sure copytracing is on such cases
444 # value, so we should rely on making sure copytracing is on such cases
445 return {}, {}, {}, {}, {}
445 return {}, {}, {}, {}, {}
446
446
447 if usechangesetcentricalgo(repo):
447 if usechangesetcentricalgo(repo):
448 # The heuristics don't make sense when we need changeset-centric algos
448 # The heuristics don't make sense when we need changeset-centric algos
449 return _fullcopytracing(repo, c1, c2, base)
449 return _fullcopytracing(repo, c1, c2, base)
450
450
451 # Copy trace disabling is explicitly below the node == p1 logic above
451 # Copy trace disabling is explicitly below the node == p1 logic above
452 # because the logic above is required for a simple copy to be kept across a
452 # because the logic above is required for a simple copy to be kept across a
453 # rebase.
453 # rebase.
454 if copytracing == 'heuristics':
454 if copytracing == 'heuristics':
455 # Do full copytracing if only non-public revisions are involved as
455 # Do full copytracing if only non-public revisions are involved as
456 # that will be fast enough and will also cover the copies which could
456 # that will be fast enough and will also cover the copies which could
457 # be missed by heuristics
457 # be missed by heuristics
458 if _isfullcopytraceable(repo, c1, base):
458 if _isfullcopytraceable(repo, c1, base):
459 return _fullcopytracing(repo, c1, c2, base)
459 return _fullcopytracing(repo, c1, c2, base)
460 return _heuristicscopytracing(repo, c1, c2, base)
460 return _heuristicscopytracing(repo, c1, c2, base)
461 else:
461 else:
462 return _fullcopytracing(repo, c1, c2, base)
462 return _fullcopytracing(repo, c1, c2, base)
463
463
464 def _isfullcopytraceable(repo, c1, base):
464 def _isfullcopytraceable(repo, c1, base):
465 """ Checks that if base, source and destination are all no-public branches,
465 """ Checks that if base, source and destination are all no-public branches,
466 if yes let's use the full copytrace algorithm for increased capabilities
466 if yes let's use the full copytrace algorithm for increased capabilities
467 since it will be fast enough.
467 since it will be fast enough.
468
468
469 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
469 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 number of changesets from c1 to base such that if number of changesets are
470 number of changesets from c1 to base such that if number of changesets are
471 more than the limit, full copytracing algorithm won't be used.
471 more than the limit, full copytracing algorithm won't be used.
472 """
472 """
473 if c1.rev() is None:
473 if c1.rev() is None:
474 c1 = c1.p1()
474 c1 = c1.p1()
475 if c1.mutable() and base.mutable():
475 if c1.mutable() and base.mutable():
476 sourcecommitlimit = repo.ui.configint('experimental',
476 sourcecommitlimit = repo.ui.configint('experimental',
477 'copytrace.sourcecommitlimit')
477 'copytrace.sourcecommitlimit')
478 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
478 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
479 return commits < sourcecommitlimit
479 return commits < sourcecommitlimit
480 return False
480 return False
481
481
482 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
482 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
483 copy, renamedelete):
483 copy, renamedelete):
484 if src not in m2:
484 if src not in m2:
485 # deleted on side 2
485 # deleted on side 2
486 if src not in m1:
486 if src not in m1:
487 # renamed on side 1, deleted on side 2
487 # renamed on side 1, deleted on side 2
488 renamedelete[src] = dsts1
488 renamedelete[src] = dsts1
489 elif m2[src] != mb[src]:
489 elif m2[src] != mb[src]:
490 if not _related(c2[src], base[src]):
490 if not _related(c2[src], base[src]):
491 return
491 return
492 # modified on side 2
492 # modified on side 2
493 for dst in dsts1:
493 for dst in dsts1:
494 if dst not in m2:
494 if dst not in m2:
495 # dst not added on side 2 (handle as regular
495 # dst not added on side 2 (handle as regular
496 # "both created" case in manifestmerge otherwise)
496 # "both created" case in manifestmerge otherwise)
497 copy[dst] = src
497 copy[dst] = src
498
498
499 def _fullcopytracing(repo, c1, c2, base):
499 def _fullcopytracing(repo, c1, c2, base):
500 """ The full copytracing algorithm which finds all the new files that were
500 """ The full copytracing algorithm which finds all the new files that were
501 added from merge base up to the top commit and for each file it checks if
501 added from merge base up to the top commit and for each file it checks if
502 this file was copied from another file.
502 this file was copied from another file.
503
503
504 This is pretty slow when a lot of changesets are involved but will track all
504 This is pretty slow when a lot of changesets are involved but will track all
505 the copies.
505 the copies.
506 """
506 """
507 m1 = c1.manifest()
507 m1 = c1.manifest()
508 m2 = c2.manifest()
508 m2 = c2.manifest()
509 mb = base.manifest()
509 mb = base.manifest()
510
510
511 copies1 = pathcopies(base, c1)
511 copies1 = pathcopies(base, c1)
512 copies2 = pathcopies(base, c2)
512 copies2 = pathcopies(base, c2)
513
513
514 inversecopies1 = {}
514 inversecopies1 = {}
515 inversecopies2 = {}
515 inversecopies2 = {}
516 for dst, src in copies1.items():
516 for dst, src in copies1.items():
517 inversecopies1.setdefault(src, []).append(dst)
517 inversecopies1.setdefault(src, []).append(dst)
518 for dst, src in copies2.items():
518 for dst, src in copies2.items():
519 inversecopies2.setdefault(src, []).append(dst)
519 inversecopies2.setdefault(src, []).append(dst)
520
520
521 copy = {}
521 copy = {}
522 diverge = {}
522 diverge = {}
523 renamedelete = {}
523 renamedelete = {}
524 allsources = set(inversecopies1) | set(inversecopies2)
524 allsources = set(inversecopies1) | set(inversecopies2)
525 for src in allsources:
525 for src in allsources:
526 dsts1 = inversecopies1.get(src)
526 dsts1 = inversecopies1.get(src)
527 dsts2 = inversecopies2.get(src)
527 dsts2 = inversecopies2.get(src)
528 if dsts1 and dsts2:
528 if dsts1 and dsts2:
529 # copied/renamed on both sides
529 # copied/renamed on both sides
530 if src not in m1 and src not in m2:
530 if src not in m1 and src not in m2:
531 # renamed on both sides
531 # renamed on both sides
532 dsts1 = set(dsts1)
532 dsts1 = set(dsts1)
533 dsts2 = set(dsts2)
533 dsts2 = set(dsts2)
534 # If there's some overlap in the rename destinations, we
534 # If there's some overlap in the rename destinations, we
535 # consider it not divergent. For example, if side 1 copies 'a'
535 # consider it not divergent. For example, if side 1 copies 'a'
536 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
536 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 # and 'd' and deletes 'a'.
537 # and 'd' and deletes 'a'.
538 if dsts1 & dsts2:
538 if dsts1 & dsts2:
539 for dst in (dsts1 & dsts2):
539 for dst in (dsts1 & dsts2):
540 copy[dst] = src
540 copy[dst] = src
541 else:
541 else:
542 diverge[src] = sorted(dsts1 | dsts2)
542 diverge[src] = sorted(dsts1 | dsts2)
543 elif src in m1 and src in m2:
543 elif src in m1 and src in m2:
544 # copied on both sides
544 # copied on both sides
545 dsts1 = set(dsts1)
545 dsts1 = set(dsts1)
546 dsts2 = set(dsts2)
546 dsts2 = set(dsts2)
547 for dst in (dsts1 & dsts2):
547 for dst in (dsts1 & dsts2):
548 copy[dst] = src
548 copy[dst] = src
549 # TODO: Handle cases where it was renamed on one side and copied
549 # TODO: Handle cases where it was renamed on one side and copied
550 # on the other side
550 # on the other side
551 elif dsts1:
551 elif dsts1:
552 # copied/renamed only on side 1
552 # copied/renamed only on side 1
553 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
553 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
554 copy, renamedelete)
554 copy, renamedelete)
555 elif dsts2:
555 elif dsts2:
556 # copied/renamed only on side 2
556 # copied/renamed only on side 2
557 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
557 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
558 copy, renamedelete)
558 copy, renamedelete)
559
559
560 renamedeleteset = set()
560 renamedeleteset = set()
561 divergeset = set()
561 divergeset = set()
562 for dsts in diverge.values():
562 for dsts in diverge.values():
563 divergeset.update(dsts)
563 divergeset.update(dsts)
564 for dsts in renamedelete.values():
564 for dsts in renamedelete.values():
565 renamedeleteset.update(dsts)
565 renamedeleteset.update(dsts)
566
566
567 # find interesting file sets from manifests
567 # find interesting file sets from manifests
568 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
568 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
569 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
569 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
570 u1 = sorted(addedinm1 - addedinm2)
570 u1 = sorted(addedinm1 - addedinm2)
571 u2 = sorted(addedinm2 - addedinm1)
571 u2 = sorted(addedinm2 - addedinm1)
572
572
573 header = " unmatched files in %s"
573 header = " unmatched files in %s"
574 if u1:
574 if u1:
575 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
575 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
576 if u2:
576 if u2:
577 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
577 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
578
578
579 fullcopy = copies1.copy()
579 fullcopy = copies1.copy()
580 fullcopy.update(copies2)
580 fullcopy.update(copies2)
581 if not fullcopy:
581 if not fullcopy:
582 return copy, {}, diverge, renamedelete, {}
582 return copy, {}, diverge, renamedelete, {}
583
583
584 if repo.ui.debugflag:
584 if repo.ui.debugflag:
585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
586 "% = renamed and deleted):\n")
586 "% = renamed and deleted):\n")
587 for f in sorted(fullcopy):
587 for f in sorted(fullcopy):
588 note = ""
588 note = ""
589 if f in copy:
589 if f in copy:
590 note += "*"
590 note += "*"
591 if f in divergeset:
591 if f in divergeset:
592 note += "!"
592 note += "!"
593 if f in renamedeleteset:
593 if f in renamedeleteset:
594 note += "%"
594 note += "%"
595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
596 note))
596 note))
597 del divergeset
597 del divergeset
598
598
599 repo.ui.debug(" checking for directory renames\n")
599 repo.ui.debug(" checking for directory renames\n")
600
600
601 # generate a directory move map
601 # generate a directory move map
602 d1, d2 = c1.dirs(), c2.dirs()
602 d1, d2 = c1.dirs(), c2.dirs()
603 invalid = set()
603 invalid = set()
604 dirmove = {}
604 dirmove = {}
605
605
606 # examine each file copy for a potential directory move, which is
606 # examine each file copy for a potential directory move, which is
607 # when all the files in a directory are moved to a new directory
607 # when all the files in a directory are moved to a new directory
608 for dst, src in fullcopy.iteritems():
608 for dst, src in fullcopy.iteritems():
609 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
609 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
610 if dsrc in invalid:
610 if dsrc in invalid:
611 # already seen to be uninteresting
611 # already seen to be uninteresting
612 continue
612 continue
613 elif dsrc in d1 and ddst in d1:
613 elif dsrc in d1 and ddst in d1:
614 # directory wasn't entirely moved locally
614 # directory wasn't entirely moved locally
615 invalid.add(dsrc)
615 invalid.add(dsrc)
616 elif dsrc in d2 and ddst in d2:
616 elif dsrc in d2 and ddst in d2:
617 # directory wasn't entirely moved remotely
617 # directory wasn't entirely moved remotely
618 invalid.add(dsrc)
618 invalid.add(dsrc)
619 elif dsrc in dirmove and dirmove[dsrc] != ddst:
619 elif dsrc in dirmove and dirmove[dsrc] != ddst:
620 # files from the same directory moved to two different places
620 # files from the same directory moved to two different places
621 invalid.add(dsrc)
621 invalid.add(dsrc)
622 else:
622 else:
623 # looks good so far
623 # looks good so far
624 dirmove[dsrc] = ddst
624 dirmove[dsrc] = ddst
625
625
626 for i in invalid:
626 for i in invalid:
627 if i in dirmove:
627 if i in dirmove:
628 del dirmove[i]
628 del dirmove[i]
629 del d1, d2, invalid
629 del d1, d2, invalid
630
630
631 if not dirmove:
631 if not dirmove:
632 return copy, {}, diverge, renamedelete, {}
632 return copy, {}, diverge, renamedelete, {}
633
633
634 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
634 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
635
635
636 for d in dirmove:
636 for d in dirmove:
637 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
637 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
638 (d, dirmove[d]))
638 (d, dirmove[d]))
639
639
640 movewithdir = {}
640 movewithdir = {}
641 # check unaccounted nonoverlapping files against directory moves
641 # check unaccounted nonoverlapping files against directory moves
642 for f in u1 + u2:
642 for f in u1 + u2:
643 if f not in fullcopy:
643 if f not in fullcopy:
644 for d in dirmove:
644 for d in dirmove:
645 if f.startswith(d):
645 if f.startswith(d):
646 # new file added in a directory that was moved, move it
646 # new file added in a directory that was moved, move it
647 df = dirmove[d] + f[len(d):]
647 df = dirmove[d] + f[len(d):]
648 if df not in copy:
648 if df not in copy:
649 movewithdir[f] = df
649 movewithdir[f] = df
650 repo.ui.debug((" pending file src: '%s' -> "
650 repo.ui.debug((" pending file src: '%s' -> "
651 "dst: '%s'\n") % (f, df))
651 "dst: '%s'\n") % (f, df))
652 break
652 break
653
653
654 return copy, movewithdir, diverge, renamedelete, dirmove
654 return copy, movewithdir, diverge, renamedelete, dirmove
655
655
656 def _heuristicscopytracing(repo, c1, c2, base):
656 def _heuristicscopytracing(repo, c1, c2, base):
657 """ Fast copytracing using filename heuristics
657 """ Fast copytracing using filename heuristics
658
658
659 Assumes that moves or renames are of following two types:
659 Assumes that moves or renames are of following two types:
660
660
661 1) Inside a directory only (same directory name but different filenames)
661 1) Inside a directory only (same directory name but different filenames)
662 2) Move from one directory to another
662 2) Move from one directory to another
663 (same filenames but different directory names)
663 (same filenames but different directory names)
664
664
665 Works only when there are no merge commits in the "source branch".
665 Works only when there are no merge commits in the "source branch".
666 Source branch is commits from base up to c2 not including base.
666 Source branch is commits from base up to c2 not including base.
667
667
668 If merge is involved it fallbacks to _fullcopytracing().
668 If merge is involved it fallbacks to _fullcopytracing().
669
669
670 Can be used by setting the following config:
670 Can be used by setting the following config:
671
671
672 [experimental]
672 [experimental]
673 copytrace = heuristics
673 copytrace = heuristics
674
674
675 In some cases the copy/move candidates found by heuristics can be very large
675 In some cases the copy/move candidates found by heuristics can be very large
676 in number and that will make the algorithm slow. The number of possible
676 in number and that will make the algorithm slow. The number of possible
677 candidates to check can be limited by using the config
677 candidates to check can be limited by using the config
678 `experimental.copytrace.movecandidateslimit` which defaults to 100.
678 `experimental.copytrace.movecandidateslimit` which defaults to 100.
679 """
679 """
680
680
681 if c1.rev() is None:
681 if c1.rev() is None:
682 c1 = c1.p1()
682 c1 = c1.p1()
683 if c2.rev() is None:
683 if c2.rev() is None:
684 c2 = c2.p1()
684 c2 = c2.p1()
685
685
686 copies = {}
686 copies = {}
687
687
688 changedfiles = set()
688 changedfiles = set()
689 m1 = c1.manifest()
689 m1 = c1.manifest()
690 if not repo.revs('%d::%d', base.rev(), c2.rev()):
690 if not repo.revs('%d::%d', base.rev(), c2.rev()):
691 # If base is not in c2 branch, we switch to fullcopytracing
691 # If base is not in c2 branch, we switch to fullcopytracing
692 repo.ui.debug("switching to full copytracing as base is not "
692 repo.ui.debug("switching to full copytracing as base is not "
693 "an ancestor of c2\n")
693 "an ancestor of c2\n")
694 return _fullcopytracing(repo, c1, c2, base)
694 return _fullcopytracing(repo, c1, c2, base)
695
695
696 ctx = c2
696 ctx = c2
697 while ctx != base:
697 while ctx != base:
698 if len(ctx.parents()) == 2:
698 if len(ctx.parents()) == 2:
699 # To keep things simple let's not handle merges
699 # To keep things simple let's not handle merges
700 repo.ui.debug("switching to full copytracing because of merges\n")
700 repo.ui.debug("switching to full copytracing because of merges\n")
701 return _fullcopytracing(repo, c1, c2, base)
701 return _fullcopytracing(repo, c1, c2, base)
702 changedfiles.update(ctx.files())
702 changedfiles.update(ctx.files())
703 ctx = ctx.p1()
703 ctx = ctx.p1()
704
704
705 cp = _forwardcopies(base, c2)
705 cp = _forwardcopies(base, c2)
706 for dst, src in cp.iteritems():
706 for dst, src in cp.iteritems():
707 if src in m1:
707 if src in m1:
708 copies[dst] = src
708 copies[dst] = src
709
709
710 # file is missing if it isn't present in the destination, but is present in
710 # file is missing if it isn't present in the destination, but is present in
711 # the base and present in the source.
711 # the base and present in the source.
712 # Presence in the base is important to exclude added files, presence in the
712 # Presence in the base is important to exclude added files, presence in the
713 # source is important to exclude removed files.
713 # source is important to exclude removed files.
714 filt = lambda f: f not in m1 and f in base and f in c2
714 filt = lambda f: f not in m1 and f in base and f in c2
715 missingfiles = [f for f in changedfiles if filt(f)]
715 missingfiles = [f for f in changedfiles if filt(f)]
716
716
717 if missingfiles:
717 if missingfiles:
718 basenametofilename = collections.defaultdict(list)
718 basenametofilename = collections.defaultdict(list)
719 dirnametofilename = collections.defaultdict(list)
719 dirnametofilename = collections.defaultdict(list)
720
720
721 for f in m1.filesnotin(base.manifest()):
721 for f in m1.filesnotin(base.manifest()):
722 basename = os.path.basename(f)
722 basename = os.path.basename(f)
723 dirname = os.path.dirname(f)
723 dirname = os.path.dirname(f)
724 basenametofilename[basename].append(f)
724 basenametofilename[basename].append(f)
725 dirnametofilename[dirname].append(f)
725 dirnametofilename[dirname].append(f)
726
726
727 for f in missingfiles:
727 for f in missingfiles:
728 basename = os.path.basename(f)
728 basename = os.path.basename(f)
729 dirname = os.path.dirname(f)
729 dirname = os.path.dirname(f)
730 samebasename = basenametofilename[basename]
730 samebasename = basenametofilename[basename]
731 samedirname = dirnametofilename[dirname]
731 samedirname = dirnametofilename[dirname]
732 movecandidates = samebasename + samedirname
732 movecandidates = samebasename + samedirname
733 # f is guaranteed to be present in c2, that's why
733 # f is guaranteed to be present in c2, that's why
734 # c2.filectx(f) won't fail
734 # c2.filectx(f) won't fail
735 f2 = c2.filectx(f)
735 f2 = c2.filectx(f)
736 # we can have a lot of candidates which can slow down the heuristics
736 # we can have a lot of candidates which can slow down the heuristics
737 # config value to limit the number of candidates moves to check
737 # config value to limit the number of candidates moves to check
738 maxcandidates = repo.ui.configint('experimental',
738 maxcandidates = repo.ui.configint('experimental',
739 'copytrace.movecandidateslimit')
739 'copytrace.movecandidateslimit')
740
740
741 if len(movecandidates) > maxcandidates:
741 if len(movecandidates) > maxcandidates:
742 repo.ui.status(_("skipping copytracing for '%s', more "
742 repo.ui.status(_("skipping copytracing for '%s', more "
743 "candidates than the limit: %d\n")
743 "candidates than the limit: %d\n")
744 % (f, len(movecandidates)))
744 % (f, len(movecandidates)))
745 continue
745 continue
746
746
747 for candidate in movecandidates:
747 for candidate in movecandidates:
748 f1 = c1.filectx(candidate)
748 f1 = c1.filectx(candidate)
749 if _related(f1, f2):
749 if _related(f1, f2):
750 # if there are a few related copies then we'll merge
750 # if there are a few related copies then we'll merge
751 # changes into all of them. This matches the behaviour
751 # changes into all of them. This matches the behaviour
752 # of upstream copytracing
752 # of upstream copytracing
753 copies[candidate] = f
753 copies[candidate] = f
754
754
755 return copies, {}, {}, {}, {}
755 return copies, {}, {}, {}, {}
756
756
757 def _related(f1, f2):
757 def _related(f1, f2):
758 """return True if f1 and f2 filectx have a common ancestor
758 """return True if f1 and f2 filectx have a common ancestor
759
759
760 Walk back to common ancestor to see if the two files originate
760 Walk back to common ancestor to see if the two files originate
761 from the same file. Since workingfilectx's rev() is None it messes
761 from the same file. Since workingfilectx's rev() is None it messes
762 up the integer comparison logic, hence the pre-step check for
762 up the integer comparison logic, hence the pre-step check for
763 None (f1 and f2 can only be workingfilectx's initially).
763 None (f1 and f2 can only be workingfilectx's initially).
764 """
764 """
765
765
766 if f1 == f2:
766 if f1 == f2:
767 return True # a match
767 return True # a match
768
768
769 g1, g2 = f1.ancestors(), f2.ancestors()
769 g1, g2 = f1.ancestors(), f2.ancestors()
770 try:
770 try:
771 f1r, f2r = f1.linkrev(), f2.linkrev()
771 f1r, f2r = f1.linkrev(), f2.linkrev()
772
772
773 if f1r is None:
773 if f1r is None:
774 f1 = next(g1)
774 f1 = next(g1)
775 if f2r is None:
775 if f2r is None:
776 f2 = next(g2)
776 f2 = next(g2)
777
777
778 while True:
778 while True:
779 f1r, f2r = f1.linkrev(), f2.linkrev()
779 f1r, f2r = f1.linkrev(), f2.linkrev()
780 if f1r > f2r:
780 if f1r > f2r:
781 f1 = next(g1)
781 f1 = next(g1)
782 elif f2r > f1r:
782 elif f2r > f1r:
783 f2 = next(g2)
783 f2 = next(g2)
784 else: # f1 and f2 point to files in the same linkrev
784 else: # f1 and f2 point to files in the same linkrev
785 return f1 == f2 # true if they point to the same file
785 return f1 == f2 # true if they point to the same file
786 except StopIteration:
786 except StopIteration:
787 return False
787 return False
788
788
789 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
789 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
790 """reproduce copies from fromrev to rev in the dirstate
790 """reproduce copies from fromrev to rev in the dirstate
791
791
792 If skiprev is specified, it's a revision that should be used to
792 If skiprev is specified, it's a revision that should be used to
793 filter copy records. Any copies that occur between fromrev and
793 filter copy records. Any copies that occur between fromrev and
794 skiprev will not be duplicated, even if they appear in the set of
794 skiprev will not be duplicated, even if they appear in the set of
795 copies between fromrev and rev.
795 copies between fromrev and rev.
796 """
796 """
797 exclude = {}
797 exclude = {}
798 ctraceconfig = repo.ui.config('experimental', 'copytrace')
798 ctraceconfig = repo.ui.config('experimental', 'copytrace')
799 bctrace = stringutil.parsebool(ctraceconfig)
799 bctrace = stringutil.parsebool(ctraceconfig)
800 if (skiprev is not None and
800 if (skiprev is not None and
801 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
801 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
802 # copytrace='off' skips this line, but not the entire function because
802 # copytrace='off' skips this line, but not the entire function because
803 # the line below is O(size of the repo) during a rebase, while the rest
803 # the line below is O(size of the repo) during a rebase, while the rest
804 # of the function is much faster (and is required for carrying copy
804 # of the function is much faster (and is required for carrying copy
805 # metadata across the rebase anyway).
805 # metadata across the rebase anyway).
806 exclude = pathcopies(repo[fromrev], repo[skiprev])
806 exclude = pathcopies(repo[fromrev], repo[skiprev])
807 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
807 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
808 if dst in exclude:
808 if dst in exclude:
809 continue
809 continue
810 if dst in wctx:
810 if dst in wctx:
811 wctx[dst].markcopied(src)
811 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now