##// END OF EJS Templates
copies: avoid unnecessary copying of copy dict...
Martin von Zweigbergk -
r42687:5ceb9113 default
parent child Browse files
Show More
@@ -1,804 +1,810 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _chainandfilter(src, dst, a, b):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """chain two sets of copies 'a' and 'b' and filter result"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
115 # following table (not including trivial cases). For example, case 2 is
115 # following table (not including trivial cases). For example, case 2 is
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
132 t = _chain(a, b)
133 _filter(src, dst, t)
133 _filter(src, dst, t)
134 return t
134 return t
135
135
136 def _filter(src, dst, t):
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
138 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
139 # remove copies from files that didn't exist
140 if v not in src:
140 if v not in src:
141 del t[k]
141 del t[k]
142 # remove criss-crossed copies
142 # remove criss-crossed copies
143 elif k in src and v in dst:
143 elif k in src and v in dst:
144 del t[k]
144 del t[k]
145 # remove copies to files that were then removed
145 # remove copies to files that were then removed
146 elif k not in dst:
146 elif k not in dst:
147 del t[k]
147 del t[k]
148
148
149 def _chain(a, b):
149 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
150 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
151 t = a.copy()
152 for k, v in b.iteritems():
152 for k, v in b.iteritems():
153 if v in t:
153 if v in t:
154 t[k] = t[v]
154 t[k] = t[v]
155 else:
155 else:
156 t[k] = v
156 t[k] = v
157 return t
157 return t
158
158
159 def _tracefile(fctx, am, limit):
159 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
160 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
161 manifest am, stopping after the first ancestor lower than limit"""
162
162
163 for f in fctx.ancestors():
163 for f in fctx.ancestors():
164 if am.get(f.path(), None) == f.filenode():
164 if am.get(f.path(), None) == f.filenode():
165 return f
165 return f
166 if not f.isintroducedafter(limit):
166 if not f.isintroducedafter(limit):
167 return None
167 return None
168
168
169 def _dirstatecopies(repo, match=None):
169 def _dirstatecopies(repo, match=None):
170 ds = repo.dirstate
170 ds = repo.dirstate
171 c = ds.copies().copy()
171 c = ds.copies().copy()
172 for k in list(c):
172 for k in list(c):
173 if ds[k] not in 'anm' or (match and not match(k)):
173 if ds[k] not in 'anm' or (match and not match(k)):
174 del c[k]
174 del c[k]
175 return c
175 return c
176
176
177 def _computeforwardmissing(a, b, match=None):
177 def _computeforwardmissing(a, b, match=None):
178 """Computes which files are in b but not a.
178 """Computes which files are in b but not a.
179 This is its own function so extensions can easily wrap this call to see what
179 This is its own function so extensions can easily wrap this call to see what
180 files _forwardcopies is about to process.
180 files _forwardcopies is about to process.
181 """
181 """
182 ma = a.manifest()
182 ma = a.manifest()
183 mb = b.manifest()
183 mb = b.manifest()
184 return mb.filesnotin(ma, match=match)
184 return mb.filesnotin(ma, match=match)
185
185
186 def usechangesetcentricalgo(repo):
186 def usechangesetcentricalgo(repo):
187 """Checks if we should use changeset-centric copy algorithms"""
187 """Checks if we should use changeset-centric copy algorithms"""
188 return (repo.ui.config('experimental', 'copies.read-from') in
188 return (repo.ui.config('experimental', 'copies.read-from') in
189 ('changeset-only', 'compatibility'))
189 ('changeset-only', 'compatibility'))
190
190
191 def _committedforwardcopies(a, b, match):
191 def _committedforwardcopies(a, b, match):
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
192 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
193 # files might have to be traced back to the fctx parent of the last
193 # files might have to be traced back to the fctx parent of the last
194 # one-side-only changeset, but not further back than that
194 # one-side-only changeset, but not further back than that
195 repo = a._repo
195 repo = a._repo
196
196
197 if usechangesetcentricalgo(repo):
197 if usechangesetcentricalgo(repo):
198 return _changesetforwardcopies(a, b, match)
198 return _changesetforwardcopies(a, b, match)
199
199
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
200 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
201 dbg = repo.ui.debug
201 dbg = repo.ui.debug
202 if debug:
202 if debug:
203 dbg('debug.copies: looking into rename from %s to %s\n'
203 dbg('debug.copies: looking into rename from %s to %s\n'
204 % (a, b))
204 % (a, b))
205 limit = _findlimit(repo, a, b)
205 limit = _findlimit(repo, a, b)
206 if debug:
206 if debug:
207 dbg('debug.copies: search limit: %d\n' % limit)
207 dbg('debug.copies: search limit: %d\n' % limit)
208 am = a.manifest()
208 am = a.manifest()
209
209
210 # find where new files came from
210 # find where new files came from
211 # we currently don't try to find where old files went, too expensive
211 # we currently don't try to find where old files went, too expensive
212 # this means we can miss a case like 'hg rm b; hg cp a b'
212 # this means we can miss a case like 'hg rm b; hg cp a b'
213 cm = {}
213 cm = {}
214
214
215 # Computing the forward missing is quite expensive on large manifests, since
215 # Computing the forward missing is quite expensive on large manifests, since
216 # it compares the entire manifests. We can optimize it in the common use
216 # it compares the entire manifests. We can optimize it in the common use
217 # case of computing what copies are in a commit versus its parent (like
217 # case of computing what copies are in a commit versus its parent (like
218 # during a rebase or histedit). Note, we exclude merge commits from this
218 # during a rebase or histedit). Note, we exclude merge commits from this
219 # optimization, since the ctx.files() for a merge commit is not correct for
219 # optimization, since the ctx.files() for a merge commit is not correct for
220 # this comparison.
220 # this comparison.
221 forwardmissingmatch = match
221 forwardmissingmatch = match
222 if b.p1() == a and b.p2().node() == node.nullid:
222 if b.p1() == a and b.p2().node() == node.nullid:
223 filesmatcher = matchmod.exact(b.files())
223 filesmatcher = matchmod.exact(b.files())
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
224 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
225 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
226
226
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
227 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
228
228
229 if debug:
229 if debug:
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
230 dbg('debug.copies: missing files to search: %d\n' % len(missing))
231
231
232 for f in sorted(missing):
232 for f in sorted(missing):
233 if debug:
233 if debug:
234 dbg('debug.copies: tracing file: %s\n' % f)
234 dbg('debug.copies: tracing file: %s\n' % f)
235 fctx = b[f]
235 fctx = b[f]
236 fctx._ancestrycontext = ancestrycontext
236 fctx._ancestrycontext = ancestrycontext
237
237
238 if debug:
238 if debug:
239 start = util.timer()
239 start = util.timer()
240 ofctx = _tracefile(fctx, am, limit)
240 ofctx = _tracefile(fctx, am, limit)
241 if ofctx:
241 if ofctx:
242 if debug:
242 if debug:
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
243 dbg('debug.copies: rename of: %s\n' % ofctx._path)
244 cm[f] = ofctx.path()
244 cm[f] = ofctx.path()
245 if debug:
245 if debug:
246 dbg('debug.copies: time: %f seconds\n'
246 dbg('debug.copies: time: %f seconds\n'
247 % (util.timer() - start))
247 % (util.timer() - start))
248 return cm
248 return cm
249
249
250 def _changesetforwardcopies(a, b, match):
250 def _changesetforwardcopies(a, b, match):
251 if a.rev() == node.nullrev:
251 if a.rev() == node.nullrev:
252 return {}
252 return {}
253
253
254 repo = a.repo()
254 repo = a.repo()
255 children = {}
255 children = {}
256 cl = repo.changelog
256 cl = repo.changelog
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
257 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
258 for r in missingrevs:
258 for r in missingrevs:
259 for p in cl.parentrevs(r):
259 for p in cl.parentrevs(r):
260 if p == node.nullrev:
260 if p == node.nullrev:
261 continue
261 continue
262 if p not in children:
262 if p not in children:
263 children[p] = [r]
263 children[p] = [r]
264 else:
264 else:
265 children[p].append(r)
265 children[p].append(r)
266
266
267 roots = set(children) - set(missingrevs)
267 roots = set(children) - set(missingrevs)
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
268 # 'work' contains 3-tuples of a (revision number, parent number, copies).
269 # The parent number is only used for knowing which parent the copies dict
269 # The parent number is only used for knowing which parent the copies dict
270 # came from.
270 # came from.
271 work = [(r, 1, {}) for r in roots]
271 work = [(r, 1, {}) for r in roots]
272 heapq.heapify(work)
272 heapq.heapify(work)
273 while work:
273 while work:
274 r, i1, copies1 = heapq.heappop(work)
274 r, i1, copies1 = heapq.heappop(work)
275 if work and work[0][0] == r:
275 if work and work[0][0] == r:
276 # We are tracing copies from both parents
276 # We are tracing copies from both parents
277 r, i2, copies2 = heapq.heappop(work)
277 r, i2, copies2 = heapq.heappop(work)
278 copies = {}
278 copies = {}
279 allcopies = set(copies1) | set(copies2)
279 allcopies = set(copies1) | set(copies2)
280 # TODO: perhaps this filtering should be done as long as ctx
280 # TODO: perhaps this filtering should be done as long as ctx
281 # is merge, whether or not we're tracing from both parent.
281 # is merge, whether or not we're tracing from both parent.
282 for dst in allcopies:
282 for dst in allcopies:
283 if not match(dst):
283 if not match(dst):
284 continue
284 continue
285 # Unlike when copies are stored in the filelog, we consider
285 # Unlike when copies are stored in the filelog, we consider
286 # it a copy even if the destination already existed on the
286 # it a copy even if the destination already existed on the
287 # other branch. It's simply too expensive to check if the
287 # other branch. It's simply too expensive to check if the
288 # file existed in the manifest.
288 # file existed in the manifest.
289 if dst in copies1:
289 if dst in copies1:
290 # If it was copied on the p1 side, mark it as copied from
290 # If it was copied on the p1 side, mark it as copied from
291 # that side, even if it was also copied on the p2 side.
291 # that side, even if it was also copied on the p2 side.
292 copies[dst] = copies1[dst]
292 copies[dst] = copies1[dst]
293 else:
293 else:
294 copies[dst] = copies2[dst]
294 copies[dst] = copies2[dst]
295 else:
295 else:
296 copies = copies1
296 copies = copies1
297 if r == b.rev():
297 if r == b.rev():
298 _filter(a, b, copies)
298 _filter(a, b, copies)
299 return copies
299 return copies
300 for c in children[r]:
300 for i, c in enumerate(children[r]):
301 childctx = repo[c]
301 childctx = repo[c]
302 if r == childctx.p1().rev():
302 if r == childctx.p1().rev():
303 parent = 1
303 parent = 1
304 childcopies = childctx.p1copies()
304 childcopies = childctx.p1copies()
305 else:
305 else:
306 assert r == childctx.p2().rev()
306 assert r == childctx.p2().rev()
307 parent = 2
307 parent = 2
308 childcopies = childctx.p2copies()
308 childcopies = childctx.p2copies()
309 if not match.always():
309 if not match.always():
310 childcopies = {dst: src for dst, src in childcopies.items()
310 childcopies = {dst: src for dst, src in childcopies.items()
311 if match(dst)}
311 if match(dst)}
312 childcopies = _chain(copies, childcopies)
312 # Copy the dict only if later iterations will also need it
313 if i != len(children[r]) - 1:
314 copies = copies.copy()
315 if childcopies:
316 childcopies = _chain(copies, childcopies)
317 else:
318 childcopies = copies
313 for f in childctx.filesremoved():
319 for f in childctx.filesremoved():
314 if f in childcopies:
320 if f in childcopies:
315 del childcopies[f]
321 del childcopies[f]
316 heapq.heappush(work, (c, parent, childcopies))
322 heapq.heappush(work, (c, parent, childcopies))
317 assert False
323 assert False
318
324
319 def _forwardcopies(a, b, match=None):
325 def _forwardcopies(a, b, match=None):
320 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
326 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
321
327
322 match = a.repo().narrowmatch(match)
328 match = a.repo().narrowmatch(match)
323 # check for working copy
329 # check for working copy
324 if b.rev() is None:
330 if b.rev() is None:
325 if a == b.p1():
331 if a == b.p1():
326 # short-circuit to avoid issues with merge states
332 # short-circuit to avoid issues with merge states
327 return _dirstatecopies(b._repo, match)
333 return _dirstatecopies(b._repo, match)
328
334
329 cm = _committedforwardcopies(a, b.p1(), match)
335 cm = _committedforwardcopies(a, b.p1(), match)
330 # combine copies from dirstate if necessary
336 # combine copies from dirstate if necessary
331 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
337 return _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
332 return _committedforwardcopies(a, b, match)
338 return _committedforwardcopies(a, b, match)
333
339
334 def _backwardrenames(a, b, match):
340 def _backwardrenames(a, b, match):
335 if a._repo.ui.config('experimental', 'copytrace') == 'off':
341 if a._repo.ui.config('experimental', 'copytrace') == 'off':
336 return {}
342 return {}
337
343
338 # Even though we're not taking copies into account, 1:n rename situations
344 # Even though we're not taking copies into account, 1:n rename situations
339 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
345 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
340 # arbitrarily pick one of the renames.
346 # arbitrarily pick one of the renames.
341 # We don't want to pass in "match" here, since that would filter
347 # We don't want to pass in "match" here, since that would filter
342 # the destination by it. Since we're reversing the copies, we want
348 # the destination by it. Since we're reversing the copies, we want
343 # to filter the source instead.
349 # to filter the source instead.
344 f = _forwardcopies(b, a)
350 f = _forwardcopies(b, a)
345 r = {}
351 r = {}
346 for k, v in sorted(f.iteritems()):
352 for k, v in sorted(f.iteritems()):
347 if match and not match(v):
353 if match and not match(v):
348 continue
354 continue
349 # remove copies
355 # remove copies
350 if v in a:
356 if v in a:
351 continue
357 continue
352 r[v] = k
358 r[v] = k
353 return r
359 return r
354
360
355 def pathcopies(x, y, match=None):
361 def pathcopies(x, y, match=None):
356 """find {dst@y: src@x} copy mapping for directed compare"""
362 """find {dst@y: src@x} copy mapping for directed compare"""
357 repo = x._repo
363 repo = x._repo
358 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
364 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
359 if debug:
365 if debug:
360 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
366 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
361 % (x, y))
367 % (x, y))
362 if x == y or not x or not y:
368 if x == y or not x or not y:
363 return {}
369 return {}
364 a = y.ancestor(x)
370 a = y.ancestor(x)
365 if a == x:
371 if a == x:
366 if debug:
372 if debug:
367 repo.ui.debug('debug.copies: search mode: forward\n')
373 repo.ui.debug('debug.copies: search mode: forward\n')
368 return _forwardcopies(x, y, match=match)
374 return _forwardcopies(x, y, match=match)
369 if a == y:
375 if a == y:
370 if debug:
376 if debug:
371 repo.ui.debug('debug.copies: search mode: backward\n')
377 repo.ui.debug('debug.copies: search mode: backward\n')
372 return _backwardrenames(x, y, match=match)
378 return _backwardrenames(x, y, match=match)
373 if debug:
379 if debug:
374 repo.ui.debug('debug.copies: search mode: combined\n')
380 repo.ui.debug('debug.copies: search mode: combined\n')
375 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
381 return _chainandfilter(x, y, _backwardrenames(x, a, match=match),
376 _forwardcopies(a, y, match=match))
382 _forwardcopies(a, y, match=match))
377
383
378 def mergecopies(repo, c1, c2, base):
384 def mergecopies(repo, c1, c2, base):
379 """
385 """
380 Finds moves and copies between context c1 and c2 that are relevant for
386 Finds moves and copies between context c1 and c2 that are relevant for
381 merging. 'base' will be used as the merge base.
387 merging. 'base' will be used as the merge base.
382
388
383 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
389 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
384 files that were moved/ copied in one merge parent and modified in another.
390 files that were moved/ copied in one merge parent and modified in another.
385 For example:
391 For example:
386
392
387 o ---> 4 another commit
393 o ---> 4 another commit
388 |
394 |
389 | o ---> 3 commit that modifies a.txt
395 | o ---> 3 commit that modifies a.txt
390 | /
396 | /
391 o / ---> 2 commit that moves a.txt to b.txt
397 o / ---> 2 commit that moves a.txt to b.txt
392 |/
398 |/
393 o ---> 1 merge base
399 o ---> 1 merge base
394
400
395 If we try to rebase revision 3 on revision 4, since there is no a.txt in
401 If we try to rebase revision 3 on revision 4, since there is no a.txt in
396 revision 4, and if user have copytrace disabled, we prints the following
402 revision 4, and if user have copytrace disabled, we prints the following
397 message:
403 message:
398
404
399 ```other changed <file> which local deleted```
405 ```other changed <file> which local deleted```
400
406
401 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
407 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
402 "dirmove".
408 "dirmove".
403
409
404 "copy" is a mapping from destination name -> source name,
410 "copy" is a mapping from destination name -> source name,
405 where source is in c1 and destination is in c2 or vice-versa.
411 where source is in c1 and destination is in c2 or vice-versa.
406
412
407 "movewithdir" is a mapping from source name -> destination name,
413 "movewithdir" is a mapping from source name -> destination name,
408 where the file at source present in one context but not the other
414 where the file at source present in one context but not the other
409 needs to be moved to destination by the merge process, because the
415 needs to be moved to destination by the merge process, because the
410 other context moved the directory it is in.
416 other context moved the directory it is in.
411
417
412 "diverge" is a mapping of source name -> list of destination names
418 "diverge" is a mapping of source name -> list of destination names
413 for divergent renames.
419 for divergent renames.
414
420
415 "renamedelete" is a mapping of source name -> list of destination
421 "renamedelete" is a mapping of source name -> list of destination
416 names for files deleted in c1 that were renamed in c2 or vice-versa.
422 names for files deleted in c1 that were renamed in c2 or vice-versa.
417
423
418 "dirmove" is a mapping of detected source dir -> destination dir renames.
424 "dirmove" is a mapping of detected source dir -> destination dir renames.
419 This is needed for handling changes to new files previously grafted into
425 This is needed for handling changes to new files previously grafted into
420 renamed directories.
426 renamed directories.
421
427
422 This function calls different copytracing algorithms based on config.
428 This function calls different copytracing algorithms based on config.
423 """
429 """
424 # avoid silly behavior for update from empty dir
430 # avoid silly behavior for update from empty dir
425 if not c1 or not c2 or c1 == c2:
431 if not c1 or not c2 or c1 == c2:
426 return {}, {}, {}, {}, {}
432 return {}, {}, {}, {}, {}
427
433
428 narrowmatch = c1.repo().narrowmatch()
434 narrowmatch = c1.repo().narrowmatch()
429
435
430 # avoid silly behavior for parent -> working dir
436 # avoid silly behavior for parent -> working dir
431 if c2.node() is None and c1.node() == repo.dirstate.p1():
437 if c2.node() is None and c1.node() == repo.dirstate.p1():
432 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
438 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
433
439
434 copytracing = repo.ui.config('experimental', 'copytrace')
440 copytracing = repo.ui.config('experimental', 'copytrace')
435 if stringutil.parsebool(copytracing) is False:
441 if stringutil.parsebool(copytracing) is False:
436 # stringutil.parsebool() returns None when it is unable to parse the
442 # stringutil.parsebool() returns None when it is unable to parse the
437 # value, so we should rely on making sure copytracing is on such cases
443 # value, so we should rely on making sure copytracing is on such cases
438 return {}, {}, {}, {}, {}
444 return {}, {}, {}, {}, {}
439
445
440 if usechangesetcentricalgo(repo):
446 if usechangesetcentricalgo(repo):
441 # The heuristics don't make sense when we need changeset-centric algos
447 # The heuristics don't make sense when we need changeset-centric algos
442 return _fullcopytracing(repo, c1, c2, base)
448 return _fullcopytracing(repo, c1, c2, base)
443
449
444 # Copy trace disabling is explicitly below the node == p1 logic above
450 # Copy trace disabling is explicitly below the node == p1 logic above
445 # because the logic above is required for a simple copy to be kept across a
451 # because the logic above is required for a simple copy to be kept across a
446 # rebase.
452 # rebase.
447 if copytracing == 'heuristics':
453 if copytracing == 'heuristics':
448 # Do full copytracing if only non-public revisions are involved as
454 # Do full copytracing if only non-public revisions are involved as
449 # that will be fast enough and will also cover the copies which could
455 # that will be fast enough and will also cover the copies which could
450 # be missed by heuristics
456 # be missed by heuristics
451 if _isfullcopytraceable(repo, c1, base):
457 if _isfullcopytraceable(repo, c1, base):
452 return _fullcopytracing(repo, c1, c2, base)
458 return _fullcopytracing(repo, c1, c2, base)
453 return _heuristicscopytracing(repo, c1, c2, base)
459 return _heuristicscopytracing(repo, c1, c2, base)
454 else:
460 else:
455 return _fullcopytracing(repo, c1, c2, base)
461 return _fullcopytracing(repo, c1, c2, base)
456
462
457 def _isfullcopytraceable(repo, c1, base):
463 def _isfullcopytraceable(repo, c1, base):
458 """ Checks that if base, source and destination are all no-public branches,
464 """ Checks that if base, source and destination are all no-public branches,
459 if yes let's use the full copytrace algorithm for increased capabilities
465 if yes let's use the full copytrace algorithm for increased capabilities
460 since it will be fast enough.
466 since it will be fast enough.
461
467
462 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
468 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
463 number of changesets from c1 to base such that if number of changesets are
469 number of changesets from c1 to base such that if number of changesets are
464 more than the limit, full copytracing algorithm won't be used.
470 more than the limit, full copytracing algorithm won't be used.
465 """
471 """
466 if c1.rev() is None:
472 if c1.rev() is None:
467 c1 = c1.p1()
473 c1 = c1.p1()
468 if c1.mutable() and base.mutable():
474 if c1.mutable() and base.mutable():
469 sourcecommitlimit = repo.ui.configint('experimental',
475 sourcecommitlimit = repo.ui.configint('experimental',
470 'copytrace.sourcecommitlimit')
476 'copytrace.sourcecommitlimit')
471 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
477 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
472 return commits < sourcecommitlimit
478 return commits < sourcecommitlimit
473 return False
479 return False
474
480
475 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
481 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
476 copy, renamedelete):
482 copy, renamedelete):
477 if src not in m2:
483 if src not in m2:
478 # deleted on side 2
484 # deleted on side 2
479 if src not in m1:
485 if src not in m1:
480 # renamed on side 1, deleted on side 2
486 # renamed on side 1, deleted on side 2
481 renamedelete[src] = dsts1
487 renamedelete[src] = dsts1
482 elif m2[src] != mb[src]:
488 elif m2[src] != mb[src]:
483 if not _related(c2[src], base[src]):
489 if not _related(c2[src], base[src]):
484 return
490 return
485 # modified on side 2
491 # modified on side 2
486 for dst in dsts1:
492 for dst in dsts1:
487 if dst not in m2:
493 if dst not in m2:
488 # dst not added on side 2 (handle as regular
494 # dst not added on side 2 (handle as regular
489 # "both created" case in manifestmerge otherwise)
495 # "both created" case in manifestmerge otherwise)
490 copy[dst] = src
496 copy[dst] = src
491
497
492 def _fullcopytracing(repo, c1, c2, base):
498 def _fullcopytracing(repo, c1, c2, base):
493 """ The full copytracing algorithm which finds all the new files that were
499 """ The full copytracing algorithm which finds all the new files that were
494 added from merge base up to the top commit and for each file it checks if
500 added from merge base up to the top commit and for each file it checks if
495 this file was copied from another file.
501 this file was copied from another file.
496
502
497 This is pretty slow when a lot of changesets are involved but will track all
503 This is pretty slow when a lot of changesets are involved but will track all
498 the copies.
504 the copies.
499 """
505 """
500 m1 = c1.manifest()
506 m1 = c1.manifest()
501 m2 = c2.manifest()
507 m2 = c2.manifest()
502 mb = base.manifest()
508 mb = base.manifest()
503
509
504 copies1 = pathcopies(base, c1)
510 copies1 = pathcopies(base, c1)
505 copies2 = pathcopies(base, c2)
511 copies2 = pathcopies(base, c2)
506
512
507 inversecopies1 = {}
513 inversecopies1 = {}
508 inversecopies2 = {}
514 inversecopies2 = {}
509 for dst, src in copies1.items():
515 for dst, src in copies1.items():
510 inversecopies1.setdefault(src, []).append(dst)
516 inversecopies1.setdefault(src, []).append(dst)
511 for dst, src in copies2.items():
517 for dst, src in copies2.items():
512 inversecopies2.setdefault(src, []).append(dst)
518 inversecopies2.setdefault(src, []).append(dst)
513
519
514 copy = {}
520 copy = {}
515 diverge = {}
521 diverge = {}
516 renamedelete = {}
522 renamedelete = {}
517 allsources = set(inversecopies1) | set(inversecopies2)
523 allsources = set(inversecopies1) | set(inversecopies2)
518 for src in allsources:
524 for src in allsources:
519 dsts1 = inversecopies1.get(src)
525 dsts1 = inversecopies1.get(src)
520 dsts2 = inversecopies2.get(src)
526 dsts2 = inversecopies2.get(src)
521 if dsts1 and dsts2:
527 if dsts1 and dsts2:
522 # copied/renamed on both sides
528 # copied/renamed on both sides
523 if src not in m1 and src not in m2:
529 if src not in m1 and src not in m2:
524 # renamed on both sides
530 # renamed on both sides
525 dsts1 = set(dsts1)
531 dsts1 = set(dsts1)
526 dsts2 = set(dsts2)
532 dsts2 = set(dsts2)
527 # If there's some overlap in the rename destinations, we
533 # If there's some overlap in the rename destinations, we
528 # consider it not divergent. For example, if side 1 copies 'a'
534 # consider it not divergent. For example, if side 1 copies 'a'
529 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
535 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
530 # and 'd' and deletes 'a'.
536 # and 'd' and deletes 'a'.
531 if dsts1 & dsts2:
537 if dsts1 & dsts2:
532 for dst in (dsts1 & dsts2):
538 for dst in (dsts1 & dsts2):
533 copy[dst] = src
539 copy[dst] = src
534 else:
540 else:
535 diverge[src] = sorted(dsts1 | dsts2)
541 diverge[src] = sorted(dsts1 | dsts2)
536 elif src in m1 and src in m2:
542 elif src in m1 and src in m2:
537 # copied on both sides
543 # copied on both sides
538 dsts1 = set(dsts1)
544 dsts1 = set(dsts1)
539 dsts2 = set(dsts2)
545 dsts2 = set(dsts2)
540 for dst in (dsts1 & dsts2):
546 for dst in (dsts1 & dsts2):
541 copy[dst] = src
547 copy[dst] = src
542 # TODO: Handle cases where it was renamed on one side and copied
548 # TODO: Handle cases where it was renamed on one side and copied
543 # on the other side
549 # on the other side
544 elif dsts1:
550 elif dsts1:
545 # copied/renamed only on side 1
551 # copied/renamed only on side 1
546 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
552 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
547 copy, renamedelete)
553 copy, renamedelete)
548 elif dsts2:
554 elif dsts2:
549 # copied/renamed only on side 2
555 # copied/renamed only on side 2
550 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
556 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
551 copy, renamedelete)
557 copy, renamedelete)
552
558
553 renamedeleteset = set()
559 renamedeleteset = set()
554 divergeset = set()
560 divergeset = set()
555 for dsts in diverge.values():
561 for dsts in diverge.values():
556 divergeset.update(dsts)
562 divergeset.update(dsts)
557 for dsts in renamedelete.values():
563 for dsts in renamedelete.values():
558 renamedeleteset.update(dsts)
564 renamedeleteset.update(dsts)
559
565
560 # find interesting file sets from manifests
566 # find interesting file sets from manifests
561 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
567 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
562 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
568 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
563 u1 = sorted(addedinm1 - addedinm2)
569 u1 = sorted(addedinm1 - addedinm2)
564 u2 = sorted(addedinm2 - addedinm1)
570 u2 = sorted(addedinm2 - addedinm1)
565
571
566 header = " unmatched files in %s"
572 header = " unmatched files in %s"
567 if u1:
573 if u1:
568 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
574 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
569 if u2:
575 if u2:
570 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
576 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
571
577
572 fullcopy = copies1.copy()
578 fullcopy = copies1.copy()
573 fullcopy.update(copies2)
579 fullcopy.update(copies2)
574 if not fullcopy:
580 if not fullcopy:
575 return copy, {}, diverge, renamedelete, {}
581 return copy, {}, diverge, renamedelete, {}
576
582
577 if repo.ui.debugflag:
583 if repo.ui.debugflag:
578 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
584 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
579 "% = renamed and deleted):\n")
585 "% = renamed and deleted):\n")
580 for f in sorted(fullcopy):
586 for f in sorted(fullcopy):
581 note = ""
587 note = ""
582 if f in copy:
588 if f in copy:
583 note += "*"
589 note += "*"
584 if f in divergeset:
590 if f in divergeset:
585 note += "!"
591 note += "!"
586 if f in renamedeleteset:
592 if f in renamedeleteset:
587 note += "%"
593 note += "%"
588 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
594 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
589 note))
595 note))
590 del divergeset
596 del divergeset
591
597
592 repo.ui.debug(" checking for directory renames\n")
598 repo.ui.debug(" checking for directory renames\n")
593
599
594 # generate a directory move map
600 # generate a directory move map
595 d1, d2 = c1.dirs(), c2.dirs()
601 d1, d2 = c1.dirs(), c2.dirs()
596 invalid = set()
602 invalid = set()
597 dirmove = {}
603 dirmove = {}
598
604
599 # examine each file copy for a potential directory move, which is
605 # examine each file copy for a potential directory move, which is
600 # when all the files in a directory are moved to a new directory
606 # when all the files in a directory are moved to a new directory
601 for dst, src in fullcopy.iteritems():
607 for dst, src in fullcopy.iteritems():
602 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
608 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 if dsrc in invalid:
609 if dsrc in invalid:
604 # already seen to be uninteresting
610 # already seen to be uninteresting
605 continue
611 continue
606 elif dsrc in d1 and ddst in d1:
612 elif dsrc in d1 and ddst in d1:
607 # directory wasn't entirely moved locally
613 # directory wasn't entirely moved locally
608 invalid.add(dsrc)
614 invalid.add(dsrc)
609 elif dsrc in d2 and ddst in d2:
615 elif dsrc in d2 and ddst in d2:
610 # directory wasn't entirely moved remotely
616 # directory wasn't entirely moved remotely
611 invalid.add(dsrc)
617 invalid.add(dsrc)
612 elif dsrc in dirmove and dirmove[dsrc] != ddst:
618 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 # files from the same directory moved to two different places
619 # files from the same directory moved to two different places
614 invalid.add(dsrc)
620 invalid.add(dsrc)
615 else:
621 else:
616 # looks good so far
622 # looks good so far
617 dirmove[dsrc] = ddst
623 dirmove[dsrc] = ddst
618
624
619 for i in invalid:
625 for i in invalid:
620 if i in dirmove:
626 if i in dirmove:
621 del dirmove[i]
627 del dirmove[i]
622 del d1, d2, invalid
628 del d1, d2, invalid
623
629
624 if not dirmove:
630 if not dirmove:
625 return copy, {}, diverge, renamedelete, {}
631 return copy, {}, diverge, renamedelete, {}
626
632
627 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
633 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628
634
629 for d in dirmove:
635 for d in dirmove:
630 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
636 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 (d, dirmove[d]))
637 (d, dirmove[d]))
632
638
633 movewithdir = {}
639 movewithdir = {}
634 # check unaccounted nonoverlapping files against directory moves
640 # check unaccounted nonoverlapping files against directory moves
635 for f in u1 + u2:
641 for f in u1 + u2:
636 if f not in fullcopy:
642 if f not in fullcopy:
637 for d in dirmove:
643 for d in dirmove:
638 if f.startswith(d):
644 if f.startswith(d):
639 # new file added in a directory that was moved, move it
645 # new file added in a directory that was moved, move it
640 df = dirmove[d] + f[len(d):]
646 df = dirmove[d] + f[len(d):]
641 if df not in copy:
647 if df not in copy:
642 movewithdir[f] = df
648 movewithdir[f] = df
643 repo.ui.debug((" pending file src: '%s' -> "
649 repo.ui.debug((" pending file src: '%s' -> "
644 "dst: '%s'\n") % (f, df))
650 "dst: '%s'\n") % (f, df))
645 break
651 break
646
652
647 return copy, movewithdir, diverge, renamedelete, dirmove
653 return copy, movewithdir, diverge, renamedelete, dirmove
648
654
649 def _heuristicscopytracing(repo, c1, c2, base):
655 def _heuristicscopytracing(repo, c1, c2, base):
650 """ Fast copytracing using filename heuristics
656 """ Fast copytracing using filename heuristics
651
657
652 Assumes that moves or renames are of following two types:
658 Assumes that moves or renames are of following two types:
653
659
654 1) Inside a directory only (same directory name but different filenames)
660 1) Inside a directory only (same directory name but different filenames)
655 2) Move from one directory to another
661 2) Move from one directory to another
656 (same filenames but different directory names)
662 (same filenames but different directory names)
657
663
658 Works only when there are no merge commits in the "source branch".
664 Works only when there are no merge commits in the "source branch".
659 Source branch is commits from base up to c2 not including base.
665 Source branch is commits from base up to c2 not including base.
660
666
661 If merge is involved it fallbacks to _fullcopytracing().
667 If merge is involved it fallbacks to _fullcopytracing().
662
668
663 Can be used by setting the following config:
669 Can be used by setting the following config:
664
670
665 [experimental]
671 [experimental]
666 copytrace = heuristics
672 copytrace = heuristics
667
673
668 In some cases the copy/move candidates found by heuristics can be very large
674 In some cases the copy/move candidates found by heuristics can be very large
669 in number and that will make the algorithm slow. The number of possible
675 in number and that will make the algorithm slow. The number of possible
670 candidates to check can be limited by using the config
676 candidates to check can be limited by using the config
671 `experimental.copytrace.movecandidateslimit` which defaults to 100.
677 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 """
678 """
673
679
674 if c1.rev() is None:
680 if c1.rev() is None:
675 c1 = c1.p1()
681 c1 = c1.p1()
676 if c2.rev() is None:
682 if c2.rev() is None:
677 c2 = c2.p1()
683 c2 = c2.p1()
678
684
679 copies = {}
685 copies = {}
680
686
681 changedfiles = set()
687 changedfiles = set()
682 m1 = c1.manifest()
688 m1 = c1.manifest()
683 if not repo.revs('%d::%d', base.rev(), c2.rev()):
689 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 # If base is not in c2 branch, we switch to fullcopytracing
690 # If base is not in c2 branch, we switch to fullcopytracing
685 repo.ui.debug("switching to full copytracing as base is not "
691 repo.ui.debug("switching to full copytracing as base is not "
686 "an ancestor of c2\n")
692 "an ancestor of c2\n")
687 return _fullcopytracing(repo, c1, c2, base)
693 return _fullcopytracing(repo, c1, c2, base)
688
694
689 ctx = c2
695 ctx = c2
690 while ctx != base:
696 while ctx != base:
691 if len(ctx.parents()) == 2:
697 if len(ctx.parents()) == 2:
692 # To keep things simple let's not handle merges
698 # To keep things simple let's not handle merges
693 repo.ui.debug("switching to full copytracing because of merges\n")
699 repo.ui.debug("switching to full copytracing because of merges\n")
694 return _fullcopytracing(repo, c1, c2, base)
700 return _fullcopytracing(repo, c1, c2, base)
695 changedfiles.update(ctx.files())
701 changedfiles.update(ctx.files())
696 ctx = ctx.p1()
702 ctx = ctx.p1()
697
703
698 cp = _forwardcopies(base, c2)
704 cp = _forwardcopies(base, c2)
699 for dst, src in cp.iteritems():
705 for dst, src in cp.iteritems():
700 if src in m1:
706 if src in m1:
701 copies[dst] = src
707 copies[dst] = src
702
708
703 # file is missing if it isn't present in the destination, but is present in
709 # file is missing if it isn't present in the destination, but is present in
704 # the base and present in the source.
710 # the base and present in the source.
705 # Presence in the base is important to exclude added files, presence in the
711 # Presence in the base is important to exclude added files, presence in the
706 # source is important to exclude removed files.
712 # source is important to exclude removed files.
707 filt = lambda f: f not in m1 and f in base and f in c2
713 filt = lambda f: f not in m1 and f in base and f in c2
708 missingfiles = [f for f in changedfiles if filt(f)]
714 missingfiles = [f for f in changedfiles if filt(f)]
709
715
710 if missingfiles:
716 if missingfiles:
711 basenametofilename = collections.defaultdict(list)
717 basenametofilename = collections.defaultdict(list)
712 dirnametofilename = collections.defaultdict(list)
718 dirnametofilename = collections.defaultdict(list)
713
719
714 for f in m1.filesnotin(base.manifest()):
720 for f in m1.filesnotin(base.manifest()):
715 basename = os.path.basename(f)
721 basename = os.path.basename(f)
716 dirname = os.path.dirname(f)
722 dirname = os.path.dirname(f)
717 basenametofilename[basename].append(f)
723 basenametofilename[basename].append(f)
718 dirnametofilename[dirname].append(f)
724 dirnametofilename[dirname].append(f)
719
725
720 for f in missingfiles:
726 for f in missingfiles:
721 basename = os.path.basename(f)
727 basename = os.path.basename(f)
722 dirname = os.path.dirname(f)
728 dirname = os.path.dirname(f)
723 samebasename = basenametofilename[basename]
729 samebasename = basenametofilename[basename]
724 samedirname = dirnametofilename[dirname]
730 samedirname = dirnametofilename[dirname]
725 movecandidates = samebasename + samedirname
731 movecandidates = samebasename + samedirname
726 # f is guaranteed to be present in c2, that's why
732 # f is guaranteed to be present in c2, that's why
727 # c2.filectx(f) won't fail
733 # c2.filectx(f) won't fail
728 f2 = c2.filectx(f)
734 f2 = c2.filectx(f)
729 # we can have a lot of candidates which can slow down the heuristics
735 # we can have a lot of candidates which can slow down the heuristics
730 # config value to limit the number of candidates moves to check
736 # config value to limit the number of candidates moves to check
731 maxcandidates = repo.ui.configint('experimental',
737 maxcandidates = repo.ui.configint('experimental',
732 'copytrace.movecandidateslimit')
738 'copytrace.movecandidateslimit')
733
739
734 if len(movecandidates) > maxcandidates:
740 if len(movecandidates) > maxcandidates:
735 repo.ui.status(_("skipping copytracing for '%s', more "
741 repo.ui.status(_("skipping copytracing for '%s', more "
736 "candidates than the limit: %d\n")
742 "candidates than the limit: %d\n")
737 % (f, len(movecandidates)))
743 % (f, len(movecandidates)))
738 continue
744 continue
739
745
740 for candidate in movecandidates:
746 for candidate in movecandidates:
741 f1 = c1.filectx(candidate)
747 f1 = c1.filectx(candidate)
742 if _related(f1, f2):
748 if _related(f1, f2):
743 # if there are a few related copies then we'll merge
749 # if there are a few related copies then we'll merge
744 # changes into all of them. This matches the behaviour
750 # changes into all of them. This matches the behaviour
745 # of upstream copytracing
751 # of upstream copytracing
746 copies[candidate] = f
752 copies[candidate] = f
747
753
748 return copies, {}, {}, {}, {}
754 return copies, {}, {}, {}, {}
749
755
750 def _related(f1, f2):
756 def _related(f1, f2):
751 """return True if f1 and f2 filectx have a common ancestor
757 """return True if f1 and f2 filectx have a common ancestor
752
758
753 Walk back to common ancestor to see if the two files originate
759 Walk back to common ancestor to see if the two files originate
754 from the same file. Since workingfilectx's rev() is None it messes
760 from the same file. Since workingfilectx's rev() is None it messes
755 up the integer comparison logic, hence the pre-step check for
761 up the integer comparison logic, hence the pre-step check for
756 None (f1 and f2 can only be workingfilectx's initially).
762 None (f1 and f2 can only be workingfilectx's initially).
757 """
763 """
758
764
759 if f1 == f2:
765 if f1 == f2:
760 return True # a match
766 return True # a match
761
767
762 g1, g2 = f1.ancestors(), f2.ancestors()
768 g1, g2 = f1.ancestors(), f2.ancestors()
763 try:
769 try:
764 f1r, f2r = f1.linkrev(), f2.linkrev()
770 f1r, f2r = f1.linkrev(), f2.linkrev()
765
771
766 if f1r is None:
772 if f1r is None:
767 f1 = next(g1)
773 f1 = next(g1)
768 if f2r is None:
774 if f2r is None:
769 f2 = next(g2)
775 f2 = next(g2)
770
776
771 while True:
777 while True:
772 f1r, f2r = f1.linkrev(), f2.linkrev()
778 f1r, f2r = f1.linkrev(), f2.linkrev()
773 if f1r > f2r:
779 if f1r > f2r:
774 f1 = next(g1)
780 f1 = next(g1)
775 elif f2r > f1r:
781 elif f2r > f1r:
776 f2 = next(g2)
782 f2 = next(g2)
777 else: # f1 and f2 point to files in the same linkrev
783 else: # f1 and f2 point to files in the same linkrev
778 return f1 == f2 # true if they point to the same file
784 return f1 == f2 # true if they point to the same file
779 except StopIteration:
785 except StopIteration:
780 return False
786 return False
781
787
782 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
788 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 """reproduce copies from fromrev to rev in the dirstate
789 """reproduce copies from fromrev to rev in the dirstate
784
790
785 If skiprev is specified, it's a revision that should be used to
791 If skiprev is specified, it's a revision that should be used to
786 filter copy records. Any copies that occur between fromrev and
792 filter copy records. Any copies that occur between fromrev and
787 skiprev will not be duplicated, even if they appear in the set of
793 skiprev will not be duplicated, even if they appear in the set of
788 copies between fromrev and rev.
794 copies between fromrev and rev.
789 """
795 """
790 exclude = {}
796 exclude = {}
791 ctraceconfig = repo.ui.config('experimental', 'copytrace')
797 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 bctrace = stringutil.parsebool(ctraceconfig)
798 bctrace = stringutil.parsebool(ctraceconfig)
793 if (skiprev is not None and
799 if (skiprev is not None and
794 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
800 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 # copytrace='off' skips this line, but not the entire function because
801 # copytrace='off' skips this line, but not the entire function because
796 # the line below is O(size of the repo) during a rebase, while the rest
802 # the line below is O(size of the repo) during a rebase, while the rest
797 # of the function is much faster (and is required for carrying copy
803 # of the function is much faster (and is required for carrying copy
798 # metadata across the rebase anyway).
804 # metadata across the rebase anyway).
799 exclude = pathcopies(repo[fromrev], repo[skiprev])
805 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
806 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 if dst in exclude:
807 if dst in exclude:
802 continue
808 continue
803 if dst in wctx:
809 if dst in wctx:
804 wctx[dst].markcopied(src)
810 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now