##// END OF EJS Templates
copies: inline _chainandfilter() to prepare for next patch...
Martin von Zweigbergk -
r42796:11ceb1b8 default
parent child Browse files
Show More
@@ -1,809 +1,805
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _chainandfilter(src, dst, a, b):
110 def _filter(src, dst, t):
111 """chain two sets of copies 'a' and 'b' and filter result"""
111 """filters out invalid copies after chaining"""
112
112
113 # When chaining copies in 'a' (from 'src' via some other commit 'mid') with
113 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
114 # copies in 'b' (from 'mid' to 'dst'), we can get the different cases in the
114 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
115 # following table (not including trivial cases). For example, case 2 is
115 # in the following table (not including trivial cases). For example, case 2
116 # where a file existed in 'src' and remained under that name in 'mid' and
116 # is where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 t = _chain(a, b)
133 _filter(src, dst, t)
134 return t
135
136 def _filter(src, dst, t):
137 """filters out invalid copies after chaining"""
138 for k, v in list(t.items()):
132 for k, v in list(t.items()):
139 # remove copies from files that didn't exist
133 # remove copies from files that didn't exist
140 if v not in src:
134 if v not in src:
141 del t[k]
135 del t[k]
142 # remove criss-crossed copies
136 # remove criss-crossed copies
143 elif k in src and v in dst:
137 elif k in src and v in dst:
144 del t[k]
138 del t[k]
145 # remove copies to files that were then removed
139 # remove copies to files that were then removed
146 elif k not in dst:
140 elif k not in dst:
147 del t[k]
141 del t[k]
148
142
149 def _chain(a, b):
143 def _chain(a, b):
150 """chain two sets of copies 'a' and 'b'"""
144 """chain two sets of copies 'a' and 'b'"""
151 t = a.copy()
145 t = a.copy()
152 for k, v in b.iteritems():
146 for k, v in b.iteritems():
153 if v in t:
147 if v in t:
154 t[k] = t[v]
148 t[k] = t[v]
155 else:
149 else:
156 t[k] = v
150 t[k] = v
157 return t
151 return t
158
152
159 def _tracefile(fctx, am, limit):
153 def _tracefile(fctx, am, limit):
160 """return file context that is the ancestor of fctx present in ancestor
154 """return file context that is the ancestor of fctx present in ancestor
161 manifest am, stopping after the first ancestor lower than limit"""
155 manifest am, stopping after the first ancestor lower than limit"""
162
156
163 for f in fctx.ancestors():
157 for f in fctx.ancestors():
164 path = f.path()
158 path = f.path()
165 if am.get(path, None) == f.filenode():
159 if am.get(path, None) == f.filenode():
166 return path
160 return path
167 if not f.isintroducedafter(limit):
161 if not f.isintroducedafter(limit):
168 return None
162 return None
169
163
170 def _dirstatecopies(repo, match=None):
164 def _dirstatecopies(repo, match=None):
171 ds = repo.dirstate
165 ds = repo.dirstate
172 c = ds.copies().copy()
166 c = ds.copies().copy()
173 for k in list(c):
167 for k in list(c):
174 if ds[k] not in 'anm' or (match and not match(k)):
168 if ds[k] not in 'anm' or (match and not match(k)):
175 del c[k]
169 del c[k]
176 return c
170 return c
177
171
178 def _computeforwardmissing(a, b, match=None):
172 def _computeforwardmissing(a, b, match=None):
179 """Computes which files are in b but not a.
173 """Computes which files are in b but not a.
180 This is its own function so extensions can easily wrap this call to see what
174 This is its own function so extensions can easily wrap this call to see what
181 files _forwardcopies is about to process.
175 files _forwardcopies is about to process.
182 """
176 """
183 ma = a.manifest()
177 ma = a.manifest()
184 mb = b.manifest()
178 mb = b.manifest()
185 return mb.filesnotin(ma, match=match)
179 return mb.filesnotin(ma, match=match)
186
180
187 def usechangesetcentricalgo(repo):
181 def usechangesetcentricalgo(repo):
188 """Checks if we should use changeset-centric copy algorithms"""
182 """Checks if we should use changeset-centric copy algorithms"""
189 return (repo.ui.config('experimental', 'copies.read-from') in
183 return (repo.ui.config('experimental', 'copies.read-from') in
190 ('changeset-only', 'compatibility'))
184 ('changeset-only', 'compatibility'))
191
185
192 def _committedforwardcopies(a, b, match):
186 def _committedforwardcopies(a, b, match):
193 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
187 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
194 # files might have to be traced back to the fctx parent of the last
188 # files might have to be traced back to the fctx parent of the last
195 # one-side-only changeset, but not further back than that
189 # one-side-only changeset, but not further back than that
196 repo = a._repo
190 repo = a._repo
197
191
198 if usechangesetcentricalgo(repo):
192 if usechangesetcentricalgo(repo):
199 return _changesetforwardcopies(a, b, match)
193 return _changesetforwardcopies(a, b, match)
200
194
201 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
195 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
202 dbg = repo.ui.debug
196 dbg = repo.ui.debug
203 if debug:
197 if debug:
204 dbg('debug.copies: looking into rename from %s to %s\n'
198 dbg('debug.copies: looking into rename from %s to %s\n'
205 % (a, b))
199 % (a, b))
206 limit = _findlimit(repo, a, b)
200 limit = _findlimit(repo, a, b)
207 if debug:
201 if debug:
208 dbg('debug.copies: search limit: %d\n' % limit)
202 dbg('debug.copies: search limit: %d\n' % limit)
209 am = a.manifest()
203 am = a.manifest()
210
204
211 # find where new files came from
205 # find where new files came from
212 # we currently don't try to find where old files went, too expensive
206 # we currently don't try to find where old files went, too expensive
213 # this means we can miss a case like 'hg rm b; hg cp a b'
207 # this means we can miss a case like 'hg rm b; hg cp a b'
214 cm = {}
208 cm = {}
215
209
216 # Computing the forward missing is quite expensive on large manifests, since
210 # Computing the forward missing is quite expensive on large manifests, since
217 # it compares the entire manifests. We can optimize it in the common use
211 # it compares the entire manifests. We can optimize it in the common use
218 # case of computing what copies are in a commit versus its parent (like
212 # case of computing what copies are in a commit versus its parent (like
219 # during a rebase or histedit). Note, we exclude merge commits from this
213 # during a rebase or histedit). Note, we exclude merge commits from this
220 # optimization, since the ctx.files() for a merge commit is not correct for
214 # optimization, since the ctx.files() for a merge commit is not correct for
221 # this comparison.
215 # this comparison.
222 forwardmissingmatch = match
216 forwardmissingmatch = match
223 if b.p1() == a and b.p2().node() == node.nullid:
217 if b.p1() == a and b.p2().node() == node.nullid:
224 filesmatcher = matchmod.exact(b.files())
218 filesmatcher = matchmod.exact(b.files())
225 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
219 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
226 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
220 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
227
221
228 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
222 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
229
223
230 if debug:
224 if debug:
231 dbg('debug.copies: missing files to search: %d\n' % len(missing))
225 dbg('debug.copies: missing files to search: %d\n' % len(missing))
232
226
233 for f in sorted(missing):
227 for f in sorted(missing):
234 if debug:
228 if debug:
235 dbg('debug.copies: tracing file: %s\n' % f)
229 dbg('debug.copies: tracing file: %s\n' % f)
236 fctx = b[f]
230 fctx = b[f]
237 fctx._ancestrycontext = ancestrycontext
231 fctx._ancestrycontext = ancestrycontext
238
232
239 if debug:
233 if debug:
240 start = util.timer()
234 start = util.timer()
241 opath = _tracefile(fctx, am, limit)
235 opath = _tracefile(fctx, am, limit)
242 if opath:
236 if opath:
243 if debug:
237 if debug:
244 dbg('debug.copies: rename of: %s\n' % opath)
238 dbg('debug.copies: rename of: %s\n' % opath)
245 cm[f] = opath
239 cm[f] = opath
246 if debug:
240 if debug:
247 dbg('debug.copies: time: %f seconds\n'
241 dbg('debug.copies: time: %f seconds\n'
248 % (util.timer() - start))
242 % (util.timer() - start))
249 return cm
243 return cm
250
244
251 def _changesetforwardcopies(a, b, match):
245 def _changesetforwardcopies(a, b, match):
252 if a.rev() == node.nullrev:
246 if a.rev() == node.nullrev:
253 return {}
247 return {}
254
248
255 repo = a.repo()
249 repo = a.repo()
256 children = {}
250 children = {}
257 cl = repo.changelog
251 cl = repo.changelog
258 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
252 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
259 for r in missingrevs:
253 for r in missingrevs:
260 for p in cl.parentrevs(r):
254 for p in cl.parentrevs(r):
261 if p == node.nullrev:
255 if p == node.nullrev:
262 continue
256 continue
263 if p not in children:
257 if p not in children:
264 children[p] = [r]
258 children[p] = [r]
265 else:
259 else:
266 children[p].append(r)
260 children[p].append(r)
267
261
268 roots = set(children) - set(missingrevs)
262 roots = set(children) - set(missingrevs)
269 # 'work' contains 3-tuples of a (revision number, parent number, copies).
263 # 'work' contains 3-tuples of a (revision number, parent number, copies).
270 # The parent number is only used for knowing which parent the copies dict
264 # The parent number is only used for knowing which parent the copies dict
271 # came from.
265 # came from.
272 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
266 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
273 # instance for *one* of the child nodes (the last one). Once an instance
267 # instance for *one* of the child nodes (the last one). Once an instance
274 # has been put on the queue, it is thus no longer safe to modify it.
268 # has been put on the queue, it is thus no longer safe to modify it.
275 # Conversely, it *is* safe to modify an instance popped off the queue.
269 # Conversely, it *is* safe to modify an instance popped off the queue.
276 work = [(r, 1, {}) for r in roots]
270 work = [(r, 1, {}) for r in roots]
277 heapq.heapify(work)
271 heapq.heapify(work)
278 alwaysmatch = match.always()
272 alwaysmatch = match.always()
279 while work:
273 while work:
280 r, i1, copies = heapq.heappop(work)
274 r, i1, copies = heapq.heappop(work)
281 if work and work[0][0] == r:
275 if work and work[0][0] == r:
282 # We are tracing copies from both parents
276 # We are tracing copies from both parents
283 r, i2, copies2 = heapq.heappop(work)
277 r, i2, copies2 = heapq.heappop(work)
284 for dst, src in copies2.items():
278 for dst, src in copies2.items():
285 # Unlike when copies are stored in the filelog, we consider
279 # Unlike when copies are stored in the filelog, we consider
286 # it a copy even if the destination already existed on the
280 # it a copy even if the destination already existed on the
287 # other branch. It's simply too expensive to check if the
281 # other branch. It's simply too expensive to check if the
288 # file existed in the manifest.
282 # file existed in the manifest.
289 if dst not in copies:
283 if dst not in copies:
290 # If it was copied on the p1 side, leave it as copied from
284 # If it was copied on the p1 side, leave it as copied from
291 # that side, even if it was also copied on the p2 side.
285 # that side, even if it was also copied on the p2 side.
292 copies[dst] = copies2[dst]
286 copies[dst] = copies2[dst]
293 if r == b.rev():
287 if r == b.rev():
294 _filter(a, b, copies)
288 _filter(a, b, copies)
295 return copies
289 return copies
296 for i, c in enumerate(children[r]):
290 for i, c in enumerate(children[r]):
297 childctx = repo[c]
291 childctx = repo[c]
298 if r == childctx.p1().rev():
292 if r == childctx.p1().rev():
299 parent = 1
293 parent = 1
300 childcopies = childctx.p1copies()
294 childcopies = childctx.p1copies()
301 else:
295 else:
302 assert r == childctx.p2().rev()
296 assert r == childctx.p2().rev()
303 parent = 2
297 parent = 2
304 childcopies = childctx.p2copies()
298 childcopies = childctx.p2copies()
305 if not alwaysmatch:
299 if not alwaysmatch:
306 childcopies = {dst: src for dst, src in childcopies.items()
300 childcopies = {dst: src for dst, src in childcopies.items()
307 if match(dst)}
301 if match(dst)}
308 # Copy the dict only if later iterations will also need it
302 # Copy the dict only if later iterations will also need it
309 if i != len(children[r]) - 1:
303 if i != len(children[r]) - 1:
310 newcopies = copies.copy()
304 newcopies = copies.copy()
311 else:
305 else:
312 newcopies = copies
306 newcopies = copies
313 if childcopies:
307 if childcopies:
314 newcopies = _chain(newcopies, childcopies)
308 newcopies = _chain(newcopies, childcopies)
315 for f in childctx.filesremoved():
309 for f in childctx.filesremoved():
316 if f in newcopies:
310 if f in newcopies:
317 del newcopies[f]
311 del newcopies[f]
318 heapq.heappush(work, (c, parent, newcopies))
312 heapq.heappush(work, (c, parent, newcopies))
319 assert False
313 assert False
320
314
321 def _forwardcopies(a, b, match=None):
315 def _forwardcopies(a, b, match=None):
322 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
316 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
323
317
324 match = a.repo().narrowmatch(match)
318 match = a.repo().narrowmatch(match)
325 # check for working copy
319 # check for working copy
326 if b.rev() is None:
320 if b.rev() is None:
327 cm = _committedforwardcopies(a, b.p1(), match)
321 cm = _committedforwardcopies(a, b.p1(), match)
328 # combine copies from dirstate if necessary
322 # combine copies from dirstate if necessary
329 copies = _chainandfilter(a, b, cm, _dirstatecopies(b._repo, match))
323 copies = _chain(cm, _dirstatecopies(b._repo, match))
324 _filter(a, b, copies)
330 else:
325 else:
331 copies = _committedforwardcopies(a, b, match)
326 copies = _committedforwardcopies(a, b, match)
332 return copies
327 return copies
333
328
334 def _backwardrenames(a, b, match):
329 def _backwardrenames(a, b, match):
335 if a._repo.ui.config('experimental', 'copytrace') == 'off':
330 if a._repo.ui.config('experimental', 'copytrace') == 'off':
336 return {}
331 return {}
337
332
338 # Even though we're not taking copies into account, 1:n rename situations
333 # Even though we're not taking copies into account, 1:n rename situations
339 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
334 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
340 # arbitrarily pick one of the renames.
335 # arbitrarily pick one of the renames.
341 # We don't want to pass in "match" here, since that would filter
336 # We don't want to pass in "match" here, since that would filter
342 # the destination by it. Since we're reversing the copies, we want
337 # the destination by it. Since we're reversing the copies, we want
343 # to filter the source instead.
338 # to filter the source instead.
344 f = _forwardcopies(b, a)
339 f = _forwardcopies(b, a)
345 r = {}
340 r = {}
346 for k, v in sorted(f.iteritems()):
341 for k, v in sorted(f.iteritems()):
347 if match and not match(v):
342 if match and not match(v):
348 continue
343 continue
349 # remove copies
344 # remove copies
350 if v in a:
345 if v in a:
351 continue
346 continue
352 r[v] = k
347 r[v] = k
353 return r
348 return r
354
349
355 def pathcopies(x, y, match=None):
350 def pathcopies(x, y, match=None):
356 """find {dst@y: src@x} copy mapping for directed compare"""
351 """find {dst@y: src@x} copy mapping for directed compare"""
357 repo = x._repo
352 repo = x._repo
358 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
353 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
359 if debug:
354 if debug:
360 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
355 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
361 % (x, y))
356 % (x, y))
362 if x == y or not x or not y:
357 if x == y or not x or not y:
363 return {}
358 return {}
364 a = y.ancestor(x)
359 a = y.ancestor(x)
365 if a == x:
360 if a == x:
366 if debug:
361 if debug:
367 repo.ui.debug('debug.copies: search mode: forward\n')
362 repo.ui.debug('debug.copies: search mode: forward\n')
368 if y.rev() is None and x == y.p1():
363 if y.rev() is None and x == y.p1():
369 # short-circuit to avoid issues with merge states
364 # short-circuit to avoid issues with merge states
370 return _dirstatecopies(repo, match)
365 return _dirstatecopies(repo, match)
371 copies = _forwardcopies(x, y, match=match)
366 copies = _forwardcopies(x, y, match=match)
372 elif a == y:
367 elif a == y:
373 if debug:
368 if debug:
374 repo.ui.debug('debug.copies: search mode: backward\n')
369 repo.ui.debug('debug.copies: search mode: backward\n')
375 copies = _backwardrenames(x, y, match=match)
370 copies = _backwardrenames(x, y, match=match)
376 else:
371 else:
377 if debug:
372 if debug:
378 repo.ui.debug('debug.copies: search mode: combined\n')
373 repo.ui.debug('debug.copies: search mode: combined\n')
379 copies = _chainandfilter(x, y, _backwardrenames(x, a, match=match),
374 copies = _chain(_backwardrenames(x, a, match=match),
380 _forwardcopies(a, y, match=match))
375 _forwardcopies(a, y, match=match))
376 _filter(x, y, copies)
381 return copies
377 return copies
382
378
383 def mergecopies(repo, c1, c2, base):
379 def mergecopies(repo, c1, c2, base):
384 """
380 """
385 Finds moves and copies between context c1 and c2 that are relevant for
381 Finds moves and copies between context c1 and c2 that are relevant for
386 merging. 'base' will be used as the merge base.
382 merging. 'base' will be used as the merge base.
387
383
388 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
384 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
389 files that were moved/ copied in one merge parent and modified in another.
385 files that were moved/ copied in one merge parent and modified in another.
390 For example:
386 For example:
391
387
392 o ---> 4 another commit
388 o ---> 4 another commit
393 |
389 |
394 | o ---> 3 commit that modifies a.txt
390 | o ---> 3 commit that modifies a.txt
395 | /
391 | /
396 o / ---> 2 commit that moves a.txt to b.txt
392 o / ---> 2 commit that moves a.txt to b.txt
397 |/
393 |/
398 o ---> 1 merge base
394 o ---> 1 merge base
399
395
400 If we try to rebase revision 3 on revision 4, since there is no a.txt in
396 If we try to rebase revision 3 on revision 4, since there is no a.txt in
401 revision 4, and if user have copytrace disabled, we prints the following
397 revision 4, and if user have copytrace disabled, we prints the following
402 message:
398 message:
403
399
404 ```other changed <file> which local deleted```
400 ```other changed <file> which local deleted```
405
401
406 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
402 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
407 "dirmove".
403 "dirmove".
408
404
409 "copy" is a mapping from destination name -> source name,
405 "copy" is a mapping from destination name -> source name,
410 where source is in c1 and destination is in c2 or vice-versa.
406 where source is in c1 and destination is in c2 or vice-versa.
411
407
412 "movewithdir" is a mapping from source name -> destination name,
408 "movewithdir" is a mapping from source name -> destination name,
413 where the file at source present in one context but not the other
409 where the file at source present in one context but not the other
414 needs to be moved to destination by the merge process, because the
410 needs to be moved to destination by the merge process, because the
415 other context moved the directory it is in.
411 other context moved the directory it is in.
416
412
417 "diverge" is a mapping of source name -> list of destination names
413 "diverge" is a mapping of source name -> list of destination names
418 for divergent renames.
414 for divergent renames.
419
415
420 "renamedelete" is a mapping of source name -> list of destination
416 "renamedelete" is a mapping of source name -> list of destination
421 names for files deleted in c1 that were renamed in c2 or vice-versa.
417 names for files deleted in c1 that were renamed in c2 or vice-versa.
422
418
423 "dirmove" is a mapping of detected source dir -> destination dir renames.
419 "dirmove" is a mapping of detected source dir -> destination dir renames.
424 This is needed for handling changes to new files previously grafted into
420 This is needed for handling changes to new files previously grafted into
425 renamed directories.
421 renamed directories.
426
422
427 This function calls different copytracing algorithms based on config.
423 This function calls different copytracing algorithms based on config.
428 """
424 """
429 # avoid silly behavior for update from empty dir
425 # avoid silly behavior for update from empty dir
430 if not c1 or not c2 or c1 == c2:
426 if not c1 or not c2 or c1 == c2:
431 return {}, {}, {}, {}, {}
427 return {}, {}, {}, {}, {}
432
428
433 narrowmatch = c1.repo().narrowmatch()
429 narrowmatch = c1.repo().narrowmatch()
434
430
435 # avoid silly behavior for parent -> working dir
431 # avoid silly behavior for parent -> working dir
436 if c2.node() is None and c1.node() == repo.dirstate.p1():
432 if c2.node() is None and c1.node() == repo.dirstate.p1():
437 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
433 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
438
434
439 copytracing = repo.ui.config('experimental', 'copytrace')
435 copytracing = repo.ui.config('experimental', 'copytrace')
440 if stringutil.parsebool(copytracing) is False:
436 if stringutil.parsebool(copytracing) is False:
441 # stringutil.parsebool() returns None when it is unable to parse the
437 # stringutil.parsebool() returns None when it is unable to parse the
442 # value, so we should rely on making sure copytracing is on such cases
438 # value, so we should rely on making sure copytracing is on such cases
443 return {}, {}, {}, {}, {}
439 return {}, {}, {}, {}, {}
444
440
445 if usechangesetcentricalgo(repo):
441 if usechangesetcentricalgo(repo):
446 # The heuristics don't make sense when we need changeset-centric algos
442 # The heuristics don't make sense when we need changeset-centric algos
447 return _fullcopytracing(repo, c1, c2, base)
443 return _fullcopytracing(repo, c1, c2, base)
448
444
449 # Copy trace disabling is explicitly below the node == p1 logic above
445 # Copy trace disabling is explicitly below the node == p1 logic above
450 # because the logic above is required for a simple copy to be kept across a
446 # because the logic above is required for a simple copy to be kept across a
451 # rebase.
447 # rebase.
452 if copytracing == 'heuristics':
448 if copytracing == 'heuristics':
453 # Do full copytracing if only non-public revisions are involved as
449 # Do full copytracing if only non-public revisions are involved as
454 # that will be fast enough and will also cover the copies which could
450 # that will be fast enough and will also cover the copies which could
455 # be missed by heuristics
451 # be missed by heuristics
456 if _isfullcopytraceable(repo, c1, base):
452 if _isfullcopytraceable(repo, c1, base):
457 return _fullcopytracing(repo, c1, c2, base)
453 return _fullcopytracing(repo, c1, c2, base)
458 return _heuristicscopytracing(repo, c1, c2, base)
454 return _heuristicscopytracing(repo, c1, c2, base)
459 else:
455 else:
460 return _fullcopytracing(repo, c1, c2, base)
456 return _fullcopytracing(repo, c1, c2, base)
461
457
462 def _isfullcopytraceable(repo, c1, base):
458 def _isfullcopytraceable(repo, c1, base):
463 """ Checks that if base, source and destination are all no-public branches,
459 """ Checks that if base, source and destination are all no-public branches,
464 if yes let's use the full copytrace algorithm for increased capabilities
460 if yes let's use the full copytrace algorithm for increased capabilities
465 since it will be fast enough.
461 since it will be fast enough.
466
462
467 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
463 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
468 number of changesets from c1 to base such that if number of changesets are
464 number of changesets from c1 to base such that if number of changesets are
469 more than the limit, full copytracing algorithm won't be used.
465 more than the limit, full copytracing algorithm won't be used.
470 """
466 """
471 if c1.rev() is None:
467 if c1.rev() is None:
472 c1 = c1.p1()
468 c1 = c1.p1()
473 if c1.mutable() and base.mutable():
469 if c1.mutable() and base.mutable():
474 sourcecommitlimit = repo.ui.configint('experimental',
470 sourcecommitlimit = repo.ui.configint('experimental',
475 'copytrace.sourcecommitlimit')
471 'copytrace.sourcecommitlimit')
476 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
472 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
477 return commits < sourcecommitlimit
473 return commits < sourcecommitlimit
478 return False
474 return False
479
475
480 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
476 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
481 copy, renamedelete):
477 copy, renamedelete):
482 if src not in m2:
478 if src not in m2:
483 # deleted on side 2
479 # deleted on side 2
484 if src not in m1:
480 if src not in m1:
485 # renamed on side 1, deleted on side 2
481 # renamed on side 1, deleted on side 2
486 renamedelete[src] = dsts1
482 renamedelete[src] = dsts1
487 elif m2[src] != mb[src]:
483 elif m2[src] != mb[src]:
488 if not _related(c2[src], base[src]):
484 if not _related(c2[src], base[src]):
489 return
485 return
490 # modified on side 2
486 # modified on side 2
491 for dst in dsts1:
487 for dst in dsts1:
492 if dst not in m2:
488 if dst not in m2:
493 # dst not added on side 2 (handle as regular
489 # dst not added on side 2 (handle as regular
494 # "both created" case in manifestmerge otherwise)
490 # "both created" case in manifestmerge otherwise)
495 copy[dst] = src
491 copy[dst] = src
496
492
497 def _fullcopytracing(repo, c1, c2, base):
493 def _fullcopytracing(repo, c1, c2, base):
498 """ The full copytracing algorithm which finds all the new files that were
494 """ The full copytracing algorithm which finds all the new files that were
499 added from merge base up to the top commit and for each file it checks if
495 added from merge base up to the top commit and for each file it checks if
500 this file was copied from another file.
496 this file was copied from another file.
501
497
502 This is pretty slow when a lot of changesets are involved but will track all
498 This is pretty slow when a lot of changesets are involved but will track all
503 the copies.
499 the copies.
504 """
500 """
505 m1 = c1.manifest()
501 m1 = c1.manifest()
506 m2 = c2.manifest()
502 m2 = c2.manifest()
507 mb = base.manifest()
503 mb = base.manifest()
508
504
509 copies1 = pathcopies(base, c1)
505 copies1 = pathcopies(base, c1)
510 copies2 = pathcopies(base, c2)
506 copies2 = pathcopies(base, c2)
511
507
512 inversecopies1 = {}
508 inversecopies1 = {}
513 inversecopies2 = {}
509 inversecopies2 = {}
514 for dst, src in copies1.items():
510 for dst, src in copies1.items():
515 inversecopies1.setdefault(src, []).append(dst)
511 inversecopies1.setdefault(src, []).append(dst)
516 for dst, src in copies2.items():
512 for dst, src in copies2.items():
517 inversecopies2.setdefault(src, []).append(dst)
513 inversecopies2.setdefault(src, []).append(dst)
518
514
519 copy = {}
515 copy = {}
520 diverge = {}
516 diverge = {}
521 renamedelete = {}
517 renamedelete = {}
522 allsources = set(inversecopies1) | set(inversecopies2)
518 allsources = set(inversecopies1) | set(inversecopies2)
523 for src in allsources:
519 for src in allsources:
524 dsts1 = inversecopies1.get(src)
520 dsts1 = inversecopies1.get(src)
525 dsts2 = inversecopies2.get(src)
521 dsts2 = inversecopies2.get(src)
526 if dsts1 and dsts2:
522 if dsts1 and dsts2:
527 # copied/renamed on both sides
523 # copied/renamed on both sides
528 if src not in m1 and src not in m2:
524 if src not in m1 and src not in m2:
529 # renamed on both sides
525 # renamed on both sides
530 dsts1 = set(dsts1)
526 dsts1 = set(dsts1)
531 dsts2 = set(dsts2)
527 dsts2 = set(dsts2)
532 # If there's some overlap in the rename destinations, we
528 # If there's some overlap in the rename destinations, we
533 # consider it not divergent. For example, if side 1 copies 'a'
529 # consider it not divergent. For example, if side 1 copies 'a'
534 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
530 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
535 # and 'd' and deletes 'a'.
531 # and 'd' and deletes 'a'.
536 if dsts1 & dsts2:
532 if dsts1 & dsts2:
537 for dst in (dsts1 & dsts2):
533 for dst in (dsts1 & dsts2):
538 copy[dst] = src
534 copy[dst] = src
539 else:
535 else:
540 diverge[src] = sorted(dsts1 | dsts2)
536 diverge[src] = sorted(dsts1 | dsts2)
541 elif src in m1 and src in m2:
537 elif src in m1 and src in m2:
542 # copied on both sides
538 # copied on both sides
543 dsts1 = set(dsts1)
539 dsts1 = set(dsts1)
544 dsts2 = set(dsts2)
540 dsts2 = set(dsts2)
545 for dst in (dsts1 & dsts2):
541 for dst in (dsts1 & dsts2):
546 copy[dst] = src
542 copy[dst] = src
547 # TODO: Handle cases where it was renamed on one side and copied
543 # TODO: Handle cases where it was renamed on one side and copied
548 # on the other side
544 # on the other side
549 elif dsts1:
545 elif dsts1:
550 # copied/renamed only on side 1
546 # copied/renamed only on side 1
551 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
547 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
552 copy, renamedelete)
548 copy, renamedelete)
553 elif dsts2:
549 elif dsts2:
554 # copied/renamed only on side 2
550 # copied/renamed only on side 2
555 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
551 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
556 copy, renamedelete)
552 copy, renamedelete)
557
553
558 renamedeleteset = set()
554 renamedeleteset = set()
559 divergeset = set()
555 divergeset = set()
560 for dsts in diverge.values():
556 for dsts in diverge.values():
561 divergeset.update(dsts)
557 divergeset.update(dsts)
562 for dsts in renamedelete.values():
558 for dsts in renamedelete.values():
563 renamedeleteset.update(dsts)
559 renamedeleteset.update(dsts)
564
560
565 # find interesting file sets from manifests
561 # find interesting file sets from manifests
566 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
562 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
567 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
563 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
568 u1 = sorted(addedinm1 - addedinm2)
564 u1 = sorted(addedinm1 - addedinm2)
569 u2 = sorted(addedinm2 - addedinm1)
565 u2 = sorted(addedinm2 - addedinm1)
570
566
571 header = " unmatched files in %s"
567 header = " unmatched files in %s"
572 if u1:
568 if u1:
573 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
569 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
574 if u2:
570 if u2:
575 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
571 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
576
572
577 fullcopy = copies1.copy()
573 fullcopy = copies1.copy()
578 fullcopy.update(copies2)
574 fullcopy.update(copies2)
579 if not fullcopy:
575 if not fullcopy:
580 return copy, {}, diverge, renamedelete, {}
576 return copy, {}, diverge, renamedelete, {}
581
577
582 if repo.ui.debugflag:
578 if repo.ui.debugflag:
583 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
579 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
584 "% = renamed and deleted):\n")
580 "% = renamed and deleted):\n")
585 for f in sorted(fullcopy):
581 for f in sorted(fullcopy):
586 note = ""
582 note = ""
587 if f in copy:
583 if f in copy:
588 note += "*"
584 note += "*"
589 if f in divergeset:
585 if f in divergeset:
590 note += "!"
586 note += "!"
591 if f in renamedeleteset:
587 if f in renamedeleteset:
592 note += "%"
588 note += "%"
593 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
589 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
594 note))
590 note))
595 del divergeset
591 del divergeset
596
592
597 repo.ui.debug(" checking for directory renames\n")
593 repo.ui.debug(" checking for directory renames\n")
598
594
599 # generate a directory move map
595 # generate a directory move map
600 d1, d2 = c1.dirs(), c2.dirs()
596 d1, d2 = c1.dirs(), c2.dirs()
601 invalid = set()
597 invalid = set()
602 dirmove = {}
598 dirmove = {}
603
599
604 # examine each file copy for a potential directory move, which is
600 # examine each file copy for a potential directory move, which is
605 # when all the files in a directory are moved to a new directory
601 # when all the files in a directory are moved to a new directory
606 for dst, src in fullcopy.iteritems():
602 for dst, src in fullcopy.iteritems():
607 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
603 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
608 if dsrc in invalid:
604 if dsrc in invalid:
609 # already seen to be uninteresting
605 # already seen to be uninteresting
610 continue
606 continue
611 elif dsrc in d1 and ddst in d1:
607 elif dsrc in d1 and ddst in d1:
612 # directory wasn't entirely moved locally
608 # directory wasn't entirely moved locally
613 invalid.add(dsrc)
609 invalid.add(dsrc)
614 elif dsrc in d2 and ddst in d2:
610 elif dsrc in d2 and ddst in d2:
615 # directory wasn't entirely moved remotely
611 # directory wasn't entirely moved remotely
616 invalid.add(dsrc)
612 invalid.add(dsrc)
617 elif dsrc in dirmove and dirmove[dsrc] != ddst:
613 elif dsrc in dirmove and dirmove[dsrc] != ddst:
618 # files from the same directory moved to two different places
614 # files from the same directory moved to two different places
619 invalid.add(dsrc)
615 invalid.add(dsrc)
620 else:
616 else:
621 # looks good so far
617 # looks good so far
622 dirmove[dsrc] = ddst
618 dirmove[dsrc] = ddst
623
619
624 for i in invalid:
620 for i in invalid:
625 if i in dirmove:
621 if i in dirmove:
626 del dirmove[i]
622 del dirmove[i]
627 del d1, d2, invalid
623 del d1, d2, invalid
628
624
629 if not dirmove:
625 if not dirmove:
630 return copy, {}, diverge, renamedelete, {}
626 return copy, {}, diverge, renamedelete, {}
631
627
632 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
628 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
633
629
634 for d in dirmove:
630 for d in dirmove:
635 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
631 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
636 (d, dirmove[d]))
632 (d, dirmove[d]))
637
633
638 movewithdir = {}
634 movewithdir = {}
639 # check unaccounted nonoverlapping files against directory moves
635 # check unaccounted nonoverlapping files against directory moves
640 for f in u1 + u2:
636 for f in u1 + u2:
641 if f not in fullcopy:
637 if f not in fullcopy:
642 for d in dirmove:
638 for d in dirmove:
643 if f.startswith(d):
639 if f.startswith(d):
644 # new file added in a directory that was moved, move it
640 # new file added in a directory that was moved, move it
645 df = dirmove[d] + f[len(d):]
641 df = dirmove[d] + f[len(d):]
646 if df not in copy:
642 if df not in copy:
647 movewithdir[f] = df
643 movewithdir[f] = df
648 repo.ui.debug((" pending file src: '%s' -> "
644 repo.ui.debug((" pending file src: '%s' -> "
649 "dst: '%s'\n") % (f, df))
645 "dst: '%s'\n") % (f, df))
650 break
646 break
651
647
652 return copy, movewithdir, diverge, renamedelete, dirmove
648 return copy, movewithdir, diverge, renamedelete, dirmove
653
649
654 def _heuristicscopytracing(repo, c1, c2, base):
650 def _heuristicscopytracing(repo, c1, c2, base):
655 """ Fast copytracing using filename heuristics
651 """ Fast copytracing using filename heuristics
656
652
657 Assumes that moves or renames are of following two types:
653 Assumes that moves or renames are of following two types:
658
654
659 1) Inside a directory only (same directory name but different filenames)
655 1) Inside a directory only (same directory name but different filenames)
660 2) Move from one directory to another
656 2) Move from one directory to another
661 (same filenames but different directory names)
657 (same filenames but different directory names)
662
658
663 Works only when there are no merge commits in the "source branch".
659 Works only when there are no merge commits in the "source branch".
664 Source branch is commits from base up to c2 not including base.
660 Source branch is commits from base up to c2 not including base.
665
661
666 If merge is involved it fallbacks to _fullcopytracing().
662 If merge is involved it fallbacks to _fullcopytracing().
667
663
668 Can be used by setting the following config:
664 Can be used by setting the following config:
669
665
670 [experimental]
666 [experimental]
671 copytrace = heuristics
667 copytrace = heuristics
672
668
673 In some cases the copy/move candidates found by heuristics can be very large
669 In some cases the copy/move candidates found by heuristics can be very large
674 in number and that will make the algorithm slow. The number of possible
670 in number and that will make the algorithm slow. The number of possible
675 candidates to check can be limited by using the config
671 candidates to check can be limited by using the config
676 `experimental.copytrace.movecandidateslimit` which defaults to 100.
672 `experimental.copytrace.movecandidateslimit` which defaults to 100.
677 """
673 """
678
674
679 if c1.rev() is None:
675 if c1.rev() is None:
680 c1 = c1.p1()
676 c1 = c1.p1()
681 if c2.rev() is None:
677 if c2.rev() is None:
682 c2 = c2.p1()
678 c2 = c2.p1()
683
679
684 copies = {}
680 copies = {}
685
681
686 changedfiles = set()
682 changedfiles = set()
687 m1 = c1.manifest()
683 m1 = c1.manifest()
688 if not repo.revs('%d::%d', base.rev(), c2.rev()):
684 if not repo.revs('%d::%d', base.rev(), c2.rev()):
689 # If base is not in c2 branch, we switch to fullcopytracing
685 # If base is not in c2 branch, we switch to fullcopytracing
690 repo.ui.debug("switching to full copytracing as base is not "
686 repo.ui.debug("switching to full copytracing as base is not "
691 "an ancestor of c2\n")
687 "an ancestor of c2\n")
692 return _fullcopytracing(repo, c1, c2, base)
688 return _fullcopytracing(repo, c1, c2, base)
693
689
694 ctx = c2
690 ctx = c2
695 while ctx != base:
691 while ctx != base:
696 if len(ctx.parents()) == 2:
692 if len(ctx.parents()) == 2:
697 # To keep things simple let's not handle merges
693 # To keep things simple let's not handle merges
698 repo.ui.debug("switching to full copytracing because of merges\n")
694 repo.ui.debug("switching to full copytracing because of merges\n")
699 return _fullcopytracing(repo, c1, c2, base)
695 return _fullcopytracing(repo, c1, c2, base)
700 changedfiles.update(ctx.files())
696 changedfiles.update(ctx.files())
701 ctx = ctx.p1()
697 ctx = ctx.p1()
702
698
703 cp = _forwardcopies(base, c2)
699 cp = _forwardcopies(base, c2)
704 for dst, src in cp.iteritems():
700 for dst, src in cp.iteritems():
705 if src in m1:
701 if src in m1:
706 copies[dst] = src
702 copies[dst] = src
707
703
708 # file is missing if it isn't present in the destination, but is present in
704 # file is missing if it isn't present in the destination, but is present in
709 # the base and present in the source.
705 # the base and present in the source.
710 # Presence in the base is important to exclude added files, presence in the
706 # Presence in the base is important to exclude added files, presence in the
711 # source is important to exclude removed files.
707 # source is important to exclude removed files.
712 filt = lambda f: f not in m1 and f in base and f in c2
708 filt = lambda f: f not in m1 and f in base and f in c2
713 missingfiles = [f for f in changedfiles if filt(f)]
709 missingfiles = [f for f in changedfiles if filt(f)]
714
710
715 if missingfiles:
711 if missingfiles:
716 basenametofilename = collections.defaultdict(list)
712 basenametofilename = collections.defaultdict(list)
717 dirnametofilename = collections.defaultdict(list)
713 dirnametofilename = collections.defaultdict(list)
718
714
719 for f in m1.filesnotin(base.manifest()):
715 for f in m1.filesnotin(base.manifest()):
720 basename = os.path.basename(f)
716 basename = os.path.basename(f)
721 dirname = os.path.dirname(f)
717 dirname = os.path.dirname(f)
722 basenametofilename[basename].append(f)
718 basenametofilename[basename].append(f)
723 dirnametofilename[dirname].append(f)
719 dirnametofilename[dirname].append(f)
724
720
725 for f in missingfiles:
721 for f in missingfiles:
726 basename = os.path.basename(f)
722 basename = os.path.basename(f)
727 dirname = os.path.dirname(f)
723 dirname = os.path.dirname(f)
728 samebasename = basenametofilename[basename]
724 samebasename = basenametofilename[basename]
729 samedirname = dirnametofilename[dirname]
725 samedirname = dirnametofilename[dirname]
730 movecandidates = samebasename + samedirname
726 movecandidates = samebasename + samedirname
731 # f is guaranteed to be present in c2, that's why
727 # f is guaranteed to be present in c2, that's why
732 # c2.filectx(f) won't fail
728 # c2.filectx(f) won't fail
733 f2 = c2.filectx(f)
729 f2 = c2.filectx(f)
734 # we can have a lot of candidates which can slow down the heuristics
730 # we can have a lot of candidates which can slow down the heuristics
735 # config value to limit the number of candidates moves to check
731 # config value to limit the number of candidates moves to check
736 maxcandidates = repo.ui.configint('experimental',
732 maxcandidates = repo.ui.configint('experimental',
737 'copytrace.movecandidateslimit')
733 'copytrace.movecandidateslimit')
738
734
739 if len(movecandidates) > maxcandidates:
735 if len(movecandidates) > maxcandidates:
740 repo.ui.status(_("skipping copytracing for '%s', more "
736 repo.ui.status(_("skipping copytracing for '%s', more "
741 "candidates than the limit: %d\n")
737 "candidates than the limit: %d\n")
742 % (f, len(movecandidates)))
738 % (f, len(movecandidates)))
743 continue
739 continue
744
740
745 for candidate in movecandidates:
741 for candidate in movecandidates:
746 f1 = c1.filectx(candidate)
742 f1 = c1.filectx(candidate)
747 if _related(f1, f2):
743 if _related(f1, f2):
748 # if there are a few related copies then we'll merge
744 # if there are a few related copies then we'll merge
749 # changes into all of them. This matches the behaviour
745 # changes into all of them. This matches the behaviour
750 # of upstream copytracing
746 # of upstream copytracing
751 copies[candidate] = f
747 copies[candidate] = f
752
748
753 return copies, {}, {}, {}, {}
749 return copies, {}, {}, {}, {}
754
750
755 def _related(f1, f2):
751 def _related(f1, f2):
756 """return True if f1 and f2 filectx have a common ancestor
752 """return True if f1 and f2 filectx have a common ancestor
757
753
758 Walk back to common ancestor to see if the two files originate
754 Walk back to common ancestor to see if the two files originate
759 from the same file. Since workingfilectx's rev() is None it messes
755 from the same file. Since workingfilectx's rev() is None it messes
760 up the integer comparison logic, hence the pre-step check for
756 up the integer comparison logic, hence the pre-step check for
761 None (f1 and f2 can only be workingfilectx's initially).
757 None (f1 and f2 can only be workingfilectx's initially).
762 """
758 """
763
759
764 if f1 == f2:
760 if f1 == f2:
765 return True # a match
761 return True # a match
766
762
767 g1, g2 = f1.ancestors(), f2.ancestors()
763 g1, g2 = f1.ancestors(), f2.ancestors()
768 try:
764 try:
769 f1r, f2r = f1.linkrev(), f2.linkrev()
765 f1r, f2r = f1.linkrev(), f2.linkrev()
770
766
771 if f1r is None:
767 if f1r is None:
772 f1 = next(g1)
768 f1 = next(g1)
773 if f2r is None:
769 if f2r is None:
774 f2 = next(g2)
770 f2 = next(g2)
775
771
776 while True:
772 while True:
777 f1r, f2r = f1.linkrev(), f2.linkrev()
773 f1r, f2r = f1.linkrev(), f2.linkrev()
778 if f1r > f2r:
774 if f1r > f2r:
779 f1 = next(g1)
775 f1 = next(g1)
780 elif f2r > f1r:
776 elif f2r > f1r:
781 f2 = next(g2)
777 f2 = next(g2)
782 else: # f1 and f2 point to files in the same linkrev
778 else: # f1 and f2 point to files in the same linkrev
783 return f1 == f2 # true if they point to the same file
779 return f1 == f2 # true if they point to the same file
784 except StopIteration:
780 except StopIteration:
785 return False
781 return False
786
782
787 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
783 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
788 """reproduce copies from fromrev to rev in the dirstate
784 """reproduce copies from fromrev to rev in the dirstate
789
785
790 If skiprev is specified, it's a revision that should be used to
786 If skiprev is specified, it's a revision that should be used to
791 filter copy records. Any copies that occur between fromrev and
787 filter copy records. Any copies that occur between fromrev and
792 skiprev will not be duplicated, even if they appear in the set of
788 skiprev will not be duplicated, even if they appear in the set of
793 copies between fromrev and rev.
789 copies between fromrev and rev.
794 """
790 """
795 exclude = {}
791 exclude = {}
796 ctraceconfig = repo.ui.config('experimental', 'copytrace')
792 ctraceconfig = repo.ui.config('experimental', 'copytrace')
797 bctrace = stringutil.parsebool(ctraceconfig)
793 bctrace = stringutil.parsebool(ctraceconfig)
798 if (skiprev is not None and
794 if (skiprev is not None and
799 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
795 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
800 # copytrace='off' skips this line, but not the entire function because
796 # copytrace='off' skips this line, but not the entire function because
801 # the line below is O(size of the repo) during a rebase, while the rest
797 # the line below is O(size of the repo) during a rebase, while the rest
802 # of the function is much faster (and is required for carrying copy
798 # of the function is much faster (and is required for carrying copy
803 # metadata across the rebase anyway).
799 # metadata across the rebase anyway).
804 exclude = pathcopies(repo[fromrev], repo[skiprev])
800 exclude = pathcopies(repo[fromrev], repo[skiprev])
805 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
801 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
806 if dst in exclude:
802 if dst in exclude:
807 continue
803 continue
808 if dst in wctx:
804 if dst in wctx:
809 wctx[dst].markcopied(src)
805 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now