##// END OF EJS Templates
copies: expand the logic of usechangesetcentricalgo...
marmoute -
r43290:f3bcae1e default
parent child Browse files
Show More
@@ -1,836 +1,837 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 util,
20 util,
21 )
21 )
22 from .utils import (
22 from .utils import (
23 stringutil,
23 stringutil,
24 )
24 )
25
25
26 def _findlimit(repo, ctxa, ctxb):
26 def _findlimit(repo, ctxa, ctxb):
27 """
27 """
28 Find the last revision that needs to be checked to ensure that a full
28 Find the last revision that needs to be checked to ensure that a full
29 transitive closure for file copies can be properly calculated.
29 transitive closure for file copies can be properly calculated.
30 Generally, this means finding the earliest revision number that's an
30 Generally, this means finding the earliest revision number that's an
31 ancestor of a or b but not both, except when a or b is a direct descendent
31 ancestor of a or b but not both, except when a or b is a direct descendent
32 of the other, in which case we can return the minimum revnum of a and b.
32 of the other, in which case we can return the minimum revnum of a and b.
33 """
33 """
34
34
35 # basic idea:
35 # basic idea:
36 # - mark a and b with different sides
36 # - mark a and b with different sides
37 # - if a parent's children are all on the same side, the parent is
37 # - if a parent's children are all on the same side, the parent is
38 # on that side, otherwise it is on no side
38 # on that side, otherwise it is on no side
39 # - walk the graph in topological order with the help of a heap;
39 # - walk the graph in topological order with the help of a heap;
40 # - add unseen parents to side map
40 # - add unseen parents to side map
41 # - clear side of any parent that has children on different sides
41 # - clear side of any parent that has children on different sides
42 # - track number of interesting revs that might still be on a side
42 # - track number of interesting revs that might still be on a side
43 # - track the lowest interesting rev seen
43 # - track the lowest interesting rev seen
44 # - quit when interesting revs is zero
44 # - quit when interesting revs is zero
45
45
46 cl = repo.changelog
46 cl = repo.changelog
47 wdirparents = None
47 wdirparents = None
48 a = ctxa.rev()
48 a = ctxa.rev()
49 b = ctxb.rev()
49 b = ctxb.rev()
50 if a is None:
50 if a is None:
51 wdirparents = (ctxa.p1(), ctxa.p2())
51 wdirparents = (ctxa.p1(), ctxa.p2())
52 a = node.wdirrev
52 a = node.wdirrev
53 if b is None:
53 if b is None:
54 assert not wdirparents
54 assert not wdirparents
55 wdirparents = (ctxb.p1(), ctxb.p2())
55 wdirparents = (ctxb.p1(), ctxb.p2())
56 b = node.wdirrev
56 b = node.wdirrev
57
57
58 side = {a: -1, b: 1}
58 side = {a: -1, b: 1}
59 visit = [-a, -b]
59 visit = [-a, -b]
60 heapq.heapify(visit)
60 heapq.heapify(visit)
61 interesting = len(visit)
61 interesting = len(visit)
62 limit = node.wdirrev
62 limit = node.wdirrev
63
63
64 while interesting:
64 while interesting:
65 r = -heapq.heappop(visit)
65 r = -heapq.heappop(visit)
66 if r == node.wdirrev:
66 if r == node.wdirrev:
67 parents = [pctx.rev() for pctx in wdirparents]
67 parents = [pctx.rev() for pctx in wdirparents]
68 else:
68 else:
69 parents = cl.parentrevs(r)
69 parents = cl.parentrevs(r)
70 if parents[1] == node.nullrev:
70 if parents[1] == node.nullrev:
71 parents = parents[:1]
71 parents = parents[:1]
72 for p in parents:
72 for p in parents:
73 if p not in side:
73 if p not in side:
74 # first time we see p; add it to visit
74 # first time we see p; add it to visit
75 side[p] = side[r]
75 side[p] = side[r]
76 if side[p]:
76 if side[p]:
77 interesting += 1
77 interesting += 1
78 heapq.heappush(visit, -p)
78 heapq.heappush(visit, -p)
79 elif side[p] and side[p] != side[r]:
79 elif side[p] and side[p] != side[r]:
80 # p was interesting but now we know better
80 # p was interesting but now we know better
81 side[p] = 0
81 side[p] = 0
82 interesting -= 1
82 interesting -= 1
83 if side[r]:
83 if side[r]:
84 limit = r # lowest rev visited
84 limit = r # lowest rev visited
85 interesting -= 1
85 interesting -= 1
86
86
87 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # Consider the following flow (see test-commit-amend.t under issue4405):
88 # 1/ File 'a0' committed
88 # 1/ File 'a0' committed
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
90 # 3/ Move back to first commit
90 # 3/ Move back to first commit
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
93 #
93 #
94 # During the amend in step five, we will be in this state:
94 # During the amend in step five, we will be in this state:
95 #
95 #
96 # @ 3 temporary amend commit for a1-amend
96 # @ 3 temporary amend commit for a1-amend
97 # |
97 # |
98 # o 2 a1-amend
98 # o 2 a1-amend
99 # |
99 # |
100 # | o 1 a1
100 # | o 1 a1
101 # |/
101 # |/
102 # o 0 a0
102 # o 0 a0
103 #
103 #
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
105 # yet the filelog has the copy information in rev 1 and we will not look
105 # yet the filelog has the copy information in rev 1 and we will not look
106 # back far enough unless we also look at the a and b as candidates.
106 # back far enough unless we also look at the a and b as candidates.
107 # This only occurs when a is a descendent of b or visa-versa.
107 # This only occurs when a is a descendent of b or visa-versa.
108 return min(limit, a, b)
108 return min(limit, a, b)
109
109
110 def _filter(src, dst, t):
110 def _filter(src, dst, t):
111 """filters out invalid copies after chaining"""
111 """filters out invalid copies after chaining"""
112
112
113 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
113 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
114 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
114 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
115 # in the following table (not including trivial cases). For example, case 2
115 # in the following table (not including trivial cases). For example, case 2
116 # is where a file existed in 'src' and remained under that name in 'mid' and
116 # is where a file existed in 'src' and remained under that name in 'mid' and
117 # then was renamed between 'mid' and 'dst'.
117 # then was renamed between 'mid' and 'dst'.
118 #
118 #
119 # case src mid dst result
119 # case src mid dst result
120 # 1 x y - -
120 # 1 x y - -
121 # 2 x y y x->y
121 # 2 x y y x->y
122 # 3 x y x -
122 # 3 x y x -
123 # 4 x y z x->z
123 # 4 x y z x->z
124 # 5 - x y -
124 # 5 - x y -
125 # 6 x x y x->y
125 # 6 x x y x->y
126 #
126 #
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
127 # _chain() takes care of chaining the copies in 'a' and 'b', but it
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
128 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
129 # between 5 and 6, so it includes all cases in its result.
129 # between 5 and 6, so it includes all cases in its result.
130 # Cases 1, 3, and 5 are then removed by _filter().
130 # Cases 1, 3, and 5 are then removed by _filter().
131
131
132 for k, v in list(t.items()):
132 for k, v in list(t.items()):
133 # remove copies from files that didn't exist
133 # remove copies from files that didn't exist
134 if v not in src:
134 if v not in src:
135 del t[k]
135 del t[k]
136 # remove criss-crossed copies
136 # remove criss-crossed copies
137 elif k in src and v in dst:
137 elif k in src and v in dst:
138 del t[k]
138 del t[k]
139 # remove copies to files that were then removed
139 # remove copies to files that were then removed
140 elif k not in dst:
140 elif k not in dst:
141 del t[k]
141 del t[k]
142
142
143 def _chain(a, b):
143 def _chain(a, b):
144 """chain two sets of copies 'a' and 'b'"""
144 """chain two sets of copies 'a' and 'b'"""
145 t = a.copy()
145 t = a.copy()
146 for k, v in b.iteritems():
146 for k, v in b.iteritems():
147 if v in t:
147 if v in t:
148 t[k] = t[v]
148 t[k] = t[v]
149 else:
149 else:
150 t[k] = v
150 t[k] = v
151 return t
151 return t
152
152
153 def _tracefile(fctx, am, basemf, limit):
153 def _tracefile(fctx, am, basemf, limit):
154 """return file context that is the ancestor of fctx present in ancestor
154 """return file context that is the ancestor of fctx present in ancestor
155 manifest am, stopping after the first ancestor lower than limit"""
155 manifest am, stopping after the first ancestor lower than limit"""
156
156
157 for f in fctx.ancestors():
157 for f in fctx.ancestors():
158 path = f.path()
158 path = f.path()
159 if am.get(path, None) == f.filenode():
159 if am.get(path, None) == f.filenode():
160 return path
160 return path
161 if basemf and basemf.get(path, None) == f.filenode():
161 if basemf and basemf.get(path, None) == f.filenode():
162 return path
162 return path
163 if not f.isintroducedafter(limit):
163 if not f.isintroducedafter(limit):
164 return None
164 return None
165
165
166 def _dirstatecopies(repo, match=None):
166 def _dirstatecopies(repo, match=None):
167 ds = repo.dirstate
167 ds = repo.dirstate
168 c = ds.copies().copy()
168 c = ds.copies().copy()
169 for k in list(c):
169 for k in list(c):
170 if ds[k] not in 'anm' or (match and not match(k)):
170 if ds[k] not in 'anm' or (match and not match(k)):
171 del c[k]
171 del c[k]
172 return c
172 return c
173
173
174 def _computeforwardmissing(a, b, match=None):
174 def _computeforwardmissing(a, b, match=None):
175 """Computes which files are in b but not a.
175 """Computes which files are in b but not a.
176 This is its own function so extensions can easily wrap this call to see what
176 This is its own function so extensions can easily wrap this call to see what
177 files _forwardcopies is about to process.
177 files _forwardcopies is about to process.
178 """
178 """
179 ma = a.manifest()
179 ma = a.manifest()
180 mb = b.manifest()
180 mb = b.manifest()
181 return mb.filesnotin(ma, match=match)
181 return mb.filesnotin(ma, match=match)
182
182
183 def usechangesetcentricalgo(repo):
183 def usechangesetcentricalgo(repo):
184 """Checks if we should use changeset-centric copy algorithms"""
184 """Checks if we should use changeset-centric copy algorithms"""
185 return (repo.ui.config('experimental', 'copies.read-from') in
185 readfrom = repo.ui.config('experimental', 'copies.read-from')
186 ('changeset-only', 'compatibility'))
186 changesetsource = ('changeset-only', 'compatibility')
187 return readfrom in changesetsource
187
188
188 def _committedforwardcopies(a, b, base, match):
189 def _committedforwardcopies(a, b, base, match):
189 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
190 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
190 # files might have to be traced back to the fctx parent of the last
191 # files might have to be traced back to the fctx parent of the last
191 # one-side-only changeset, but not further back than that
192 # one-side-only changeset, but not further back than that
192 repo = a._repo
193 repo = a._repo
193
194
194 if usechangesetcentricalgo(repo):
195 if usechangesetcentricalgo(repo):
195 return _changesetforwardcopies(a, b, match)
196 return _changesetforwardcopies(a, b, match)
196
197
197 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
198 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
198 dbg = repo.ui.debug
199 dbg = repo.ui.debug
199 if debug:
200 if debug:
200 dbg('debug.copies: looking into rename from %s to %s\n'
201 dbg('debug.copies: looking into rename from %s to %s\n'
201 % (a, b))
202 % (a, b))
202 limit = _findlimit(repo, a, b)
203 limit = _findlimit(repo, a, b)
203 if debug:
204 if debug:
204 dbg('debug.copies: search limit: %d\n' % limit)
205 dbg('debug.copies: search limit: %d\n' % limit)
205 am = a.manifest()
206 am = a.manifest()
206 basemf = None if base is None else base.manifest()
207 basemf = None if base is None else base.manifest()
207
208
208 # find where new files came from
209 # find where new files came from
209 # we currently don't try to find where old files went, too expensive
210 # we currently don't try to find where old files went, too expensive
210 # this means we can miss a case like 'hg rm b; hg cp a b'
211 # this means we can miss a case like 'hg rm b; hg cp a b'
211 cm = {}
212 cm = {}
212
213
213 # Computing the forward missing is quite expensive on large manifests, since
214 # Computing the forward missing is quite expensive on large manifests, since
214 # it compares the entire manifests. We can optimize it in the common use
215 # it compares the entire manifests. We can optimize it in the common use
215 # case of computing what copies are in a commit versus its parent (like
216 # case of computing what copies are in a commit versus its parent (like
216 # during a rebase or histedit). Note, we exclude merge commits from this
217 # during a rebase or histedit). Note, we exclude merge commits from this
217 # optimization, since the ctx.files() for a merge commit is not correct for
218 # optimization, since the ctx.files() for a merge commit is not correct for
218 # this comparison.
219 # this comparison.
219 forwardmissingmatch = match
220 forwardmissingmatch = match
220 if b.p1() == a and b.p2().node() == node.nullid:
221 if b.p1() == a and b.p2().node() == node.nullid:
221 filesmatcher = matchmod.exact(b.files())
222 filesmatcher = matchmod.exact(b.files())
222 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
223 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
223 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
224 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
224
225
225 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
226 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
226
227
227 if debug:
228 if debug:
228 dbg('debug.copies: missing files to search: %d\n' % len(missing))
229 dbg('debug.copies: missing files to search: %d\n' % len(missing))
229
230
230 for f in sorted(missing):
231 for f in sorted(missing):
231 if debug:
232 if debug:
232 dbg('debug.copies: tracing file: %s\n' % f)
233 dbg('debug.copies: tracing file: %s\n' % f)
233 fctx = b[f]
234 fctx = b[f]
234 fctx._ancestrycontext = ancestrycontext
235 fctx._ancestrycontext = ancestrycontext
235
236
236 if debug:
237 if debug:
237 start = util.timer()
238 start = util.timer()
238 opath = _tracefile(fctx, am, basemf, limit)
239 opath = _tracefile(fctx, am, basemf, limit)
239 if opath:
240 if opath:
240 if debug:
241 if debug:
241 dbg('debug.copies: rename of: %s\n' % opath)
242 dbg('debug.copies: rename of: %s\n' % opath)
242 cm[f] = opath
243 cm[f] = opath
243 if debug:
244 if debug:
244 dbg('debug.copies: time: %f seconds\n'
245 dbg('debug.copies: time: %f seconds\n'
245 % (util.timer() - start))
246 % (util.timer() - start))
246 return cm
247 return cm
247
248
248 def _changesetforwardcopies(a, b, match):
249 def _changesetforwardcopies(a, b, match):
249 if a.rev() in (node.nullrev, b.rev()):
250 if a.rev() in (node.nullrev, b.rev()):
250 return {}
251 return {}
251
252
252 repo = a.repo()
253 repo = a.repo()
253 children = {}
254 children = {}
254 cl = repo.changelog
255 cl = repo.changelog
255 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
256 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
256 for r in missingrevs:
257 for r in missingrevs:
257 for p in cl.parentrevs(r):
258 for p in cl.parentrevs(r):
258 if p == node.nullrev:
259 if p == node.nullrev:
259 continue
260 continue
260 if p not in children:
261 if p not in children:
261 children[p] = [r]
262 children[p] = [r]
262 else:
263 else:
263 children[p].append(r)
264 children[p].append(r)
264
265
265 roots = set(children) - set(missingrevs)
266 roots = set(children) - set(missingrevs)
266 # 'work' contains 3-tuples of a (revision number, parent number, copies).
267 # 'work' contains 3-tuples of a (revision number, parent number, copies).
267 # The parent number is only used for knowing which parent the copies dict
268 # The parent number is only used for knowing which parent the copies dict
268 # came from.
269 # came from.
269 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
270 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
270 # instance for *one* of the child nodes (the last one). Once an instance
271 # instance for *one* of the child nodes (the last one). Once an instance
271 # has been put on the queue, it is thus no longer safe to modify it.
272 # has been put on the queue, it is thus no longer safe to modify it.
272 # Conversely, it *is* safe to modify an instance popped off the queue.
273 # Conversely, it *is* safe to modify an instance popped off the queue.
273 work = [(r, 1, {}) for r in roots]
274 work = [(r, 1, {}) for r in roots]
274 heapq.heapify(work)
275 heapq.heapify(work)
275 alwaysmatch = match.always()
276 alwaysmatch = match.always()
276 while work:
277 while work:
277 r, i1, copies = heapq.heappop(work)
278 r, i1, copies = heapq.heappop(work)
278 if work and work[0][0] == r:
279 if work and work[0][0] == r:
279 # We are tracing copies from both parents
280 # We are tracing copies from both parents
280 r, i2, copies2 = heapq.heappop(work)
281 r, i2, copies2 = heapq.heappop(work)
281 for dst, src in copies2.items():
282 for dst, src in copies2.items():
282 # Unlike when copies are stored in the filelog, we consider
283 # Unlike when copies are stored in the filelog, we consider
283 # it a copy even if the destination already existed on the
284 # it a copy even if the destination already existed on the
284 # other branch. It's simply too expensive to check if the
285 # other branch. It's simply too expensive to check if the
285 # file existed in the manifest.
286 # file existed in the manifest.
286 if dst not in copies:
287 if dst not in copies:
287 # If it was copied on the p1 side, leave it as copied from
288 # If it was copied on the p1 side, leave it as copied from
288 # that side, even if it was also copied on the p2 side.
289 # that side, even if it was also copied on the p2 side.
289 copies[dst] = copies2[dst]
290 copies[dst] = copies2[dst]
290 if r == b.rev():
291 if r == b.rev():
291 return copies
292 return copies
292 for i, c in enumerate(children[r]):
293 for i, c in enumerate(children[r]):
293 childctx = repo[c]
294 childctx = repo[c]
294 if r == childctx.p1().rev():
295 if r == childctx.p1().rev():
295 parent = 1
296 parent = 1
296 childcopies = childctx.p1copies()
297 childcopies = childctx.p1copies()
297 else:
298 else:
298 assert r == childctx.p2().rev()
299 assert r == childctx.p2().rev()
299 parent = 2
300 parent = 2
300 childcopies = childctx.p2copies()
301 childcopies = childctx.p2copies()
301 if not alwaysmatch:
302 if not alwaysmatch:
302 childcopies = {dst: src for dst, src in childcopies.items()
303 childcopies = {dst: src for dst, src in childcopies.items()
303 if match(dst)}
304 if match(dst)}
304 # Copy the dict only if later iterations will also need it
305 # Copy the dict only if later iterations will also need it
305 if i != len(children[r]) - 1:
306 if i != len(children[r]) - 1:
306 newcopies = copies.copy()
307 newcopies = copies.copy()
307 else:
308 else:
308 newcopies = copies
309 newcopies = copies
309 if childcopies:
310 if childcopies:
310 newcopies = _chain(newcopies, childcopies)
311 newcopies = _chain(newcopies, childcopies)
311 for f in childctx.filesremoved():
312 for f in childctx.filesremoved():
312 if f in newcopies:
313 if f in newcopies:
313 del newcopies[f]
314 del newcopies[f]
314 heapq.heappush(work, (c, parent, newcopies))
315 heapq.heappush(work, (c, parent, newcopies))
315 assert False
316 assert False
316
317
317 def _forwardcopies(a, b, base=None, match=None):
318 def _forwardcopies(a, b, base=None, match=None):
318 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
319 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
319
320
320 if base is None:
321 if base is None:
321 base = a
322 base = a
322 match = a.repo().narrowmatch(match)
323 match = a.repo().narrowmatch(match)
323 # check for working copy
324 # check for working copy
324 if b.rev() is None:
325 if b.rev() is None:
325 cm = _committedforwardcopies(a, b.p1(), base, match)
326 cm = _committedforwardcopies(a, b.p1(), base, match)
326 # combine copies from dirstate if necessary
327 # combine copies from dirstate if necessary
327 copies = _chain(cm, _dirstatecopies(b._repo, match))
328 copies = _chain(cm, _dirstatecopies(b._repo, match))
328 else:
329 else:
329 copies = _committedforwardcopies(a, b, base, match)
330 copies = _committedforwardcopies(a, b, base, match)
330 return copies
331 return copies
331
332
332 def _backwardrenames(a, b, match):
333 def _backwardrenames(a, b, match):
333 if a._repo.ui.config('experimental', 'copytrace') == 'off':
334 if a._repo.ui.config('experimental', 'copytrace') == 'off':
334 return {}
335 return {}
335
336
336 # Even though we're not taking copies into account, 1:n rename situations
337 # Even though we're not taking copies into account, 1:n rename situations
337 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
338 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
338 # arbitrarily pick one of the renames.
339 # arbitrarily pick one of the renames.
339 # We don't want to pass in "match" here, since that would filter
340 # We don't want to pass in "match" here, since that would filter
340 # the destination by it. Since we're reversing the copies, we want
341 # the destination by it. Since we're reversing the copies, we want
341 # to filter the source instead.
342 # to filter the source instead.
342 f = _forwardcopies(b, a)
343 f = _forwardcopies(b, a)
343 r = {}
344 r = {}
344 for k, v in sorted(f.iteritems()):
345 for k, v in sorted(f.iteritems()):
345 if match and not match(v):
346 if match and not match(v):
346 continue
347 continue
347 # remove copies
348 # remove copies
348 if v in a:
349 if v in a:
349 continue
350 continue
350 r[v] = k
351 r[v] = k
351 return r
352 return r
352
353
353 def pathcopies(x, y, match=None):
354 def pathcopies(x, y, match=None):
354 """find {dst@y: src@x} copy mapping for directed compare"""
355 """find {dst@y: src@x} copy mapping for directed compare"""
355 repo = x._repo
356 repo = x._repo
356 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
357 debug = repo.ui.debugflag and repo.ui.configbool('devel', 'debug.copies')
357 if debug:
358 if debug:
358 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
359 repo.ui.debug('debug.copies: searching copies from %s to %s\n'
359 % (x, y))
360 % (x, y))
360 if x == y or not x or not y:
361 if x == y or not x or not y:
361 return {}
362 return {}
362 a = y.ancestor(x)
363 a = y.ancestor(x)
363 if a == x:
364 if a == x:
364 if debug:
365 if debug:
365 repo.ui.debug('debug.copies: search mode: forward\n')
366 repo.ui.debug('debug.copies: search mode: forward\n')
366 if y.rev() is None and x == y.p1():
367 if y.rev() is None and x == y.p1():
367 # short-circuit to avoid issues with merge states
368 # short-circuit to avoid issues with merge states
368 return _dirstatecopies(repo, match)
369 return _dirstatecopies(repo, match)
369 copies = _forwardcopies(x, y, match=match)
370 copies = _forwardcopies(x, y, match=match)
370 elif a == y:
371 elif a == y:
371 if debug:
372 if debug:
372 repo.ui.debug('debug.copies: search mode: backward\n')
373 repo.ui.debug('debug.copies: search mode: backward\n')
373 copies = _backwardrenames(x, y, match=match)
374 copies = _backwardrenames(x, y, match=match)
374 else:
375 else:
375 if debug:
376 if debug:
376 repo.ui.debug('debug.copies: search mode: combined\n')
377 repo.ui.debug('debug.copies: search mode: combined\n')
377 base = None
378 base = None
378 if a.rev() != node.nullrev:
379 if a.rev() != node.nullrev:
379 base = x
380 base = x
380 copies = _chain(_backwardrenames(x, a, match=match),
381 copies = _chain(_backwardrenames(x, a, match=match),
381 _forwardcopies(a, y, base, match=match))
382 _forwardcopies(a, y, base, match=match))
382 _filter(x, y, copies)
383 _filter(x, y, copies)
383 return copies
384 return copies
384
385
385 def mergecopies(repo, c1, c2, base):
386 def mergecopies(repo, c1, c2, base):
386 """
387 """
387 Finds moves and copies between context c1 and c2 that are relevant for
388 Finds moves and copies between context c1 and c2 that are relevant for
388 merging. 'base' will be used as the merge base.
389 merging. 'base' will be used as the merge base.
389
390
390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 files that were moved/ copied in one merge parent and modified in another.
392 files that were moved/ copied in one merge parent and modified in another.
392 For example:
393 For example:
393
394
394 o ---> 4 another commit
395 o ---> 4 another commit
395 |
396 |
396 | o ---> 3 commit that modifies a.txt
397 | o ---> 3 commit that modifies a.txt
397 | /
398 | /
398 o / ---> 2 commit that moves a.txt to b.txt
399 o / ---> 2 commit that moves a.txt to b.txt
399 |/
400 |/
400 o ---> 1 merge base
401 o ---> 1 merge base
401
402
402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 revision 4, and if user have copytrace disabled, we prints the following
404 revision 4, and if user have copytrace disabled, we prints the following
404 message:
405 message:
405
406
406 ```other changed <file> which local deleted```
407 ```other changed <file> which local deleted```
407
408
408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 "dirmove".
410 "dirmove".
410
411
411 "copy" is a mapping from destination name -> source name,
412 "copy" is a mapping from destination name -> source name,
412 where source is in c1 and destination is in c2 or vice-versa.
413 where source is in c1 and destination is in c2 or vice-versa.
413
414
414 "movewithdir" is a mapping from source name -> destination name,
415 "movewithdir" is a mapping from source name -> destination name,
415 where the file at source present in one context but not the other
416 where the file at source present in one context but not the other
416 needs to be moved to destination by the merge process, because the
417 needs to be moved to destination by the merge process, because the
417 other context moved the directory it is in.
418 other context moved the directory it is in.
418
419
419 "diverge" is a mapping of source name -> list of destination names
420 "diverge" is a mapping of source name -> list of destination names
420 for divergent renames.
421 for divergent renames.
421
422
422 "renamedelete" is a mapping of source name -> list of destination
423 "renamedelete" is a mapping of source name -> list of destination
423 names for files deleted in c1 that were renamed in c2 or vice-versa.
424 names for files deleted in c1 that were renamed in c2 or vice-versa.
424
425
425 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 This is needed for handling changes to new files previously grafted into
427 This is needed for handling changes to new files previously grafted into
427 renamed directories.
428 renamed directories.
428
429
429 This function calls different copytracing algorithms based on config.
430 This function calls different copytracing algorithms based on config.
430 """
431 """
431 # avoid silly behavior for update from empty dir
432 # avoid silly behavior for update from empty dir
432 if not c1 or not c2 or c1 == c2:
433 if not c1 or not c2 or c1 == c2:
433 return {}, {}, {}, {}, {}
434 return {}, {}, {}, {}, {}
434
435
435 narrowmatch = c1.repo().narrowmatch()
436 narrowmatch = c1.repo().narrowmatch()
436
437
437 # avoid silly behavior for parent -> working dir
438 # avoid silly behavior for parent -> working dir
438 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440
441
441 copytracing = repo.ui.config('experimental', 'copytrace')
442 copytracing = repo.ui.config('experimental', 'copytrace')
442 if stringutil.parsebool(copytracing) is False:
443 if stringutil.parsebool(copytracing) is False:
443 # stringutil.parsebool() returns None when it is unable to parse the
444 # stringutil.parsebool() returns None when it is unable to parse the
444 # value, so we should rely on making sure copytracing is on such cases
445 # value, so we should rely on making sure copytracing is on such cases
445 return {}, {}, {}, {}, {}
446 return {}, {}, {}, {}, {}
446
447
447 if usechangesetcentricalgo(repo):
448 if usechangesetcentricalgo(repo):
448 # The heuristics don't make sense when we need changeset-centric algos
449 # The heuristics don't make sense when we need changeset-centric algos
449 return _fullcopytracing(repo, c1, c2, base)
450 return _fullcopytracing(repo, c1, c2, base)
450
451
451 # Copy trace disabling is explicitly below the node == p1 logic above
452 # Copy trace disabling is explicitly below the node == p1 logic above
452 # because the logic above is required for a simple copy to be kept across a
453 # because the logic above is required for a simple copy to be kept across a
453 # rebase.
454 # rebase.
454 if copytracing == 'heuristics':
455 if copytracing == 'heuristics':
455 # Do full copytracing if only non-public revisions are involved as
456 # Do full copytracing if only non-public revisions are involved as
456 # that will be fast enough and will also cover the copies which could
457 # that will be fast enough and will also cover the copies which could
457 # be missed by heuristics
458 # be missed by heuristics
458 if _isfullcopytraceable(repo, c1, base):
459 if _isfullcopytraceable(repo, c1, base):
459 return _fullcopytracing(repo, c1, c2, base)
460 return _fullcopytracing(repo, c1, c2, base)
460 return _heuristicscopytracing(repo, c1, c2, base)
461 return _heuristicscopytracing(repo, c1, c2, base)
461 else:
462 else:
462 return _fullcopytracing(repo, c1, c2, base)
463 return _fullcopytracing(repo, c1, c2, base)
463
464
464 def _isfullcopytraceable(repo, c1, base):
465 def _isfullcopytraceable(repo, c1, base):
465 """ Checks that if base, source and destination are all no-public branches,
466 """ Checks that if base, source and destination are all no-public branches,
466 if yes let's use the full copytrace algorithm for increased capabilities
467 if yes let's use the full copytrace algorithm for increased capabilities
467 since it will be fast enough.
468 since it will be fast enough.
468
469
469 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 number of changesets from c1 to base such that if number of changesets are
471 number of changesets from c1 to base such that if number of changesets are
471 more than the limit, full copytracing algorithm won't be used.
472 more than the limit, full copytracing algorithm won't be used.
472 """
473 """
473 if c1.rev() is None:
474 if c1.rev() is None:
474 c1 = c1.p1()
475 c1 = c1.p1()
475 if c1.mutable() and base.mutable():
476 if c1.mutable() and base.mutable():
476 sourcecommitlimit = repo.ui.configint('experimental',
477 sourcecommitlimit = repo.ui.configint('experimental',
477 'copytrace.sourcecommitlimit')
478 'copytrace.sourcecommitlimit')
478 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
479 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
479 return commits < sourcecommitlimit
480 return commits < sourcecommitlimit
480 return False
481 return False
481
482
482 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
483 def _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
483 copy, renamedelete):
484 copy, renamedelete):
484 if src not in m2:
485 if src not in m2:
485 # deleted on side 2
486 # deleted on side 2
486 if src not in m1:
487 if src not in m1:
487 # renamed on side 1, deleted on side 2
488 # renamed on side 1, deleted on side 2
488 renamedelete[src] = dsts1
489 renamedelete[src] = dsts1
489 elif m2[src] != mb[src]:
490 elif m2[src] != mb[src]:
490 if not _related(c2[src], base[src]):
491 if not _related(c2[src], base[src]):
491 return
492 return
492 # modified on side 2
493 # modified on side 2
493 for dst in dsts1:
494 for dst in dsts1:
494 if dst not in m2:
495 if dst not in m2:
495 # dst not added on side 2 (handle as regular
496 # dst not added on side 2 (handle as regular
496 # "both created" case in manifestmerge otherwise)
497 # "both created" case in manifestmerge otherwise)
497 copy[dst] = src
498 copy[dst] = src
498
499
499 def _fullcopytracing(repo, c1, c2, base):
500 def _fullcopytracing(repo, c1, c2, base):
500 """ The full copytracing algorithm which finds all the new files that were
501 """ The full copytracing algorithm which finds all the new files that were
501 added from merge base up to the top commit and for each file it checks if
502 added from merge base up to the top commit and for each file it checks if
502 this file was copied from another file.
503 this file was copied from another file.
503
504
504 This is pretty slow when a lot of changesets are involved but will track all
505 This is pretty slow when a lot of changesets are involved but will track all
505 the copies.
506 the copies.
506 """
507 """
507 m1 = c1.manifest()
508 m1 = c1.manifest()
508 m2 = c2.manifest()
509 m2 = c2.manifest()
509 mb = base.manifest()
510 mb = base.manifest()
510
511
511 copies1 = pathcopies(base, c1)
512 copies1 = pathcopies(base, c1)
512 copies2 = pathcopies(base, c2)
513 copies2 = pathcopies(base, c2)
513
514
514 inversecopies1 = {}
515 inversecopies1 = {}
515 inversecopies2 = {}
516 inversecopies2 = {}
516 for dst, src in copies1.items():
517 for dst, src in copies1.items():
517 inversecopies1.setdefault(src, []).append(dst)
518 inversecopies1.setdefault(src, []).append(dst)
518 for dst, src in copies2.items():
519 for dst, src in copies2.items():
519 inversecopies2.setdefault(src, []).append(dst)
520 inversecopies2.setdefault(src, []).append(dst)
520
521
521 copy = {}
522 copy = {}
522 diverge = {}
523 diverge = {}
523 renamedelete = {}
524 renamedelete = {}
524 allsources = set(inversecopies1) | set(inversecopies2)
525 allsources = set(inversecopies1) | set(inversecopies2)
525 for src in allsources:
526 for src in allsources:
526 dsts1 = inversecopies1.get(src)
527 dsts1 = inversecopies1.get(src)
527 dsts2 = inversecopies2.get(src)
528 dsts2 = inversecopies2.get(src)
528 if dsts1 and dsts2:
529 if dsts1 and dsts2:
529 # copied/renamed on both sides
530 # copied/renamed on both sides
530 if src not in m1 and src not in m2:
531 if src not in m1 and src not in m2:
531 # renamed on both sides
532 # renamed on both sides
532 dsts1 = set(dsts1)
533 dsts1 = set(dsts1)
533 dsts2 = set(dsts2)
534 dsts2 = set(dsts2)
534 # If there's some overlap in the rename destinations, we
535 # If there's some overlap in the rename destinations, we
535 # consider it not divergent. For example, if side 1 copies 'a'
536 # consider it not divergent. For example, if side 1 copies 'a'
536 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
537 # and 'd' and deletes 'a'.
538 # and 'd' and deletes 'a'.
538 if dsts1 & dsts2:
539 if dsts1 & dsts2:
539 for dst in (dsts1 & dsts2):
540 for dst in (dsts1 & dsts2):
540 copy[dst] = src
541 copy[dst] = src
541 else:
542 else:
542 diverge[src] = sorted(dsts1 | dsts2)
543 diverge[src] = sorted(dsts1 | dsts2)
543 elif src in m1 and src in m2:
544 elif src in m1 and src in m2:
544 # copied on both sides
545 # copied on both sides
545 dsts1 = set(dsts1)
546 dsts1 = set(dsts1)
546 dsts2 = set(dsts2)
547 dsts2 = set(dsts2)
547 for dst in (dsts1 & dsts2):
548 for dst in (dsts1 & dsts2):
548 copy[dst] = src
549 copy[dst] = src
549 # TODO: Handle cases where it was renamed on one side and copied
550 # TODO: Handle cases where it was renamed on one side and copied
550 # on the other side
551 # on the other side
551 elif dsts1:
552 elif dsts1:
552 # copied/renamed only on side 1
553 # copied/renamed only on side 1
553 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
554 _checksinglesidecopies(src, dsts1, m1, m2, mb, c2, base,
554 copy, renamedelete)
555 copy, renamedelete)
555 elif dsts2:
556 elif dsts2:
556 # copied/renamed only on side 2
557 # copied/renamed only on side 2
557 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
558 _checksinglesidecopies(src, dsts2, m2, m1, mb, c1, base,
558 copy, renamedelete)
559 copy, renamedelete)
559
560
560 renamedeleteset = set()
561 renamedeleteset = set()
561 divergeset = set()
562 divergeset = set()
562 for dsts in diverge.values():
563 for dsts in diverge.values():
563 divergeset.update(dsts)
564 divergeset.update(dsts)
564 for dsts in renamedelete.values():
565 for dsts in renamedelete.values():
565 renamedeleteset.update(dsts)
566 renamedeleteset.update(dsts)
566
567
567 # find interesting file sets from manifests
568 # find interesting file sets from manifests
568 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
569 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
569 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
570 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
570 u1 = sorted(addedinm1 - addedinm2)
571 u1 = sorted(addedinm1 - addedinm2)
571 u2 = sorted(addedinm2 - addedinm1)
572 u2 = sorted(addedinm2 - addedinm1)
572
573
573 header = " unmatched files in %s"
574 header = " unmatched files in %s"
574 if u1:
575 if u1:
575 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
576 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
576 if u2:
577 if u2:
577 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
578 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
578
579
579 fullcopy = copies1.copy()
580 fullcopy = copies1.copy()
580 fullcopy.update(copies2)
581 fullcopy.update(copies2)
581 if not fullcopy:
582 if not fullcopy:
582 return copy, {}, diverge, renamedelete, {}
583 return copy, {}, diverge, renamedelete, {}
583
584
584 if repo.ui.debugflag:
585 if repo.ui.debugflag:
585 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
586 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
586 "% = renamed and deleted):\n")
587 "% = renamed and deleted):\n")
587 for f in sorted(fullcopy):
588 for f in sorted(fullcopy):
588 note = ""
589 note = ""
589 if f in copy:
590 if f in copy:
590 note += "*"
591 note += "*"
591 if f in divergeset:
592 if f in divergeset:
592 note += "!"
593 note += "!"
593 if f in renamedeleteset:
594 if f in renamedeleteset:
594 note += "%"
595 note += "%"
595 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
596 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
596 note))
597 note))
597 del divergeset
598 del divergeset
598
599
599 repo.ui.debug(" checking for directory renames\n")
600 repo.ui.debug(" checking for directory renames\n")
600
601
601 # generate a directory move map
602 # generate a directory move map
602 d1, d2 = c1.dirs(), c2.dirs()
603 d1, d2 = c1.dirs(), c2.dirs()
603 invalid = set()
604 invalid = set()
604 dirmove = {}
605 dirmove = {}
605
606
606 # examine each file copy for a potential directory move, which is
607 # examine each file copy for a potential directory move, which is
607 # when all the files in a directory are moved to a new directory
608 # when all the files in a directory are moved to a new directory
608 for dst, src in fullcopy.iteritems():
609 for dst, src in fullcopy.iteritems():
609 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
610 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
610 if dsrc in invalid:
611 if dsrc in invalid:
611 # already seen to be uninteresting
612 # already seen to be uninteresting
612 continue
613 continue
613 elif dsrc in d1 and ddst in d1:
614 elif dsrc in d1 and ddst in d1:
614 # directory wasn't entirely moved locally
615 # directory wasn't entirely moved locally
615 invalid.add(dsrc)
616 invalid.add(dsrc)
616 elif dsrc in d2 and ddst in d2:
617 elif dsrc in d2 and ddst in d2:
617 # directory wasn't entirely moved remotely
618 # directory wasn't entirely moved remotely
618 invalid.add(dsrc)
619 invalid.add(dsrc)
619 elif dsrc in dirmove and dirmove[dsrc] != ddst:
620 elif dsrc in dirmove and dirmove[dsrc] != ddst:
620 # files from the same directory moved to two different places
621 # files from the same directory moved to two different places
621 invalid.add(dsrc)
622 invalid.add(dsrc)
622 else:
623 else:
623 # looks good so far
624 # looks good so far
624 dirmove[dsrc] = ddst
625 dirmove[dsrc] = ddst
625
626
626 for i in invalid:
627 for i in invalid:
627 if i in dirmove:
628 if i in dirmove:
628 del dirmove[i]
629 del dirmove[i]
629 del d1, d2, invalid
630 del d1, d2, invalid
630
631
631 if not dirmove:
632 if not dirmove:
632 return copy, {}, diverge, renamedelete, {}
633 return copy, {}, diverge, renamedelete, {}
633
634
634 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
635 dirmove = {k + "/": v + "/" for k, v in dirmove.iteritems()}
635
636
636 for d in dirmove:
637 for d in dirmove:
637 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
638 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
638 (d, dirmove[d]))
639 (d, dirmove[d]))
639
640
640 movewithdir = {}
641 movewithdir = {}
641 # check unaccounted nonoverlapping files against directory moves
642 # check unaccounted nonoverlapping files against directory moves
642 for f in u1 + u2:
643 for f in u1 + u2:
643 if f not in fullcopy:
644 if f not in fullcopy:
644 for d in dirmove:
645 for d in dirmove:
645 if f.startswith(d):
646 if f.startswith(d):
646 # new file added in a directory that was moved, move it
647 # new file added in a directory that was moved, move it
647 df = dirmove[d] + f[len(d):]
648 df = dirmove[d] + f[len(d):]
648 if df not in copy:
649 if df not in copy:
649 movewithdir[f] = df
650 movewithdir[f] = df
650 repo.ui.debug((" pending file src: '%s' -> "
651 repo.ui.debug((" pending file src: '%s' -> "
651 "dst: '%s'\n") % (f, df))
652 "dst: '%s'\n") % (f, df))
652 break
653 break
653
654
654 return copy, movewithdir, diverge, renamedelete, dirmove
655 return copy, movewithdir, diverge, renamedelete, dirmove
655
656
656 def _heuristicscopytracing(repo, c1, c2, base):
657 def _heuristicscopytracing(repo, c1, c2, base):
657 """ Fast copytracing using filename heuristics
658 """ Fast copytracing using filename heuristics
658
659
659 Assumes that moves or renames are of following two types:
660 Assumes that moves or renames are of following two types:
660
661
661 1) Inside a directory only (same directory name but different filenames)
662 1) Inside a directory only (same directory name but different filenames)
662 2) Move from one directory to another
663 2) Move from one directory to another
663 (same filenames but different directory names)
664 (same filenames but different directory names)
664
665
665 Works only when there are no merge commits in the "source branch".
666 Works only when there are no merge commits in the "source branch".
666 Source branch is commits from base up to c2 not including base.
667 Source branch is commits from base up to c2 not including base.
667
668
668 If merge is involved it fallbacks to _fullcopytracing().
669 If merge is involved it fallbacks to _fullcopytracing().
669
670
670 Can be used by setting the following config:
671 Can be used by setting the following config:
671
672
672 [experimental]
673 [experimental]
673 copytrace = heuristics
674 copytrace = heuristics
674
675
675 In some cases the copy/move candidates found by heuristics can be very large
676 In some cases the copy/move candidates found by heuristics can be very large
676 in number and that will make the algorithm slow. The number of possible
677 in number and that will make the algorithm slow. The number of possible
677 candidates to check can be limited by using the config
678 candidates to check can be limited by using the config
678 `experimental.copytrace.movecandidateslimit` which defaults to 100.
679 `experimental.copytrace.movecandidateslimit` which defaults to 100.
679 """
680 """
680
681
681 if c1.rev() is None:
682 if c1.rev() is None:
682 c1 = c1.p1()
683 c1 = c1.p1()
683 if c2.rev() is None:
684 if c2.rev() is None:
684 c2 = c2.p1()
685 c2 = c2.p1()
685
686
686 copies = {}
687 copies = {}
687
688
688 changedfiles = set()
689 changedfiles = set()
689 m1 = c1.manifest()
690 m1 = c1.manifest()
690 if not repo.revs('%d::%d', base.rev(), c2.rev()):
691 if not repo.revs('%d::%d', base.rev(), c2.rev()):
691 # If base is not in c2 branch, we switch to fullcopytracing
692 # If base is not in c2 branch, we switch to fullcopytracing
692 repo.ui.debug("switching to full copytracing as base is not "
693 repo.ui.debug("switching to full copytracing as base is not "
693 "an ancestor of c2\n")
694 "an ancestor of c2\n")
694 return _fullcopytracing(repo, c1, c2, base)
695 return _fullcopytracing(repo, c1, c2, base)
695
696
696 ctx = c2
697 ctx = c2
697 while ctx != base:
698 while ctx != base:
698 if len(ctx.parents()) == 2:
699 if len(ctx.parents()) == 2:
699 # To keep things simple let's not handle merges
700 # To keep things simple let's not handle merges
700 repo.ui.debug("switching to full copytracing because of merges\n")
701 repo.ui.debug("switching to full copytracing because of merges\n")
701 return _fullcopytracing(repo, c1, c2, base)
702 return _fullcopytracing(repo, c1, c2, base)
702 changedfiles.update(ctx.files())
703 changedfiles.update(ctx.files())
703 ctx = ctx.p1()
704 ctx = ctx.p1()
704
705
705 cp = _forwardcopies(base, c2)
706 cp = _forwardcopies(base, c2)
706 for dst, src in cp.iteritems():
707 for dst, src in cp.iteritems():
707 if src in m1:
708 if src in m1:
708 copies[dst] = src
709 copies[dst] = src
709
710
710 # file is missing if it isn't present in the destination, but is present in
711 # file is missing if it isn't present in the destination, but is present in
711 # the base and present in the source.
712 # the base and present in the source.
712 # Presence in the base is important to exclude added files, presence in the
713 # Presence in the base is important to exclude added files, presence in the
713 # source is important to exclude removed files.
714 # source is important to exclude removed files.
714 filt = lambda f: f not in m1 and f in base and f in c2
715 filt = lambda f: f not in m1 and f in base and f in c2
715 missingfiles = [f for f in changedfiles if filt(f)]
716 missingfiles = [f for f in changedfiles if filt(f)]
716
717
717 if missingfiles:
718 if missingfiles:
718 basenametofilename = collections.defaultdict(list)
719 basenametofilename = collections.defaultdict(list)
719 dirnametofilename = collections.defaultdict(list)
720 dirnametofilename = collections.defaultdict(list)
720
721
721 for f in m1.filesnotin(base.manifest()):
722 for f in m1.filesnotin(base.manifest()):
722 basename = os.path.basename(f)
723 basename = os.path.basename(f)
723 dirname = os.path.dirname(f)
724 dirname = os.path.dirname(f)
724 basenametofilename[basename].append(f)
725 basenametofilename[basename].append(f)
725 dirnametofilename[dirname].append(f)
726 dirnametofilename[dirname].append(f)
726
727
727 for f in missingfiles:
728 for f in missingfiles:
728 basename = os.path.basename(f)
729 basename = os.path.basename(f)
729 dirname = os.path.dirname(f)
730 dirname = os.path.dirname(f)
730 samebasename = basenametofilename[basename]
731 samebasename = basenametofilename[basename]
731 samedirname = dirnametofilename[dirname]
732 samedirname = dirnametofilename[dirname]
732 movecandidates = samebasename + samedirname
733 movecandidates = samebasename + samedirname
733 # f is guaranteed to be present in c2, that's why
734 # f is guaranteed to be present in c2, that's why
734 # c2.filectx(f) won't fail
735 # c2.filectx(f) won't fail
735 f2 = c2.filectx(f)
736 f2 = c2.filectx(f)
736 # we can have a lot of candidates which can slow down the heuristics
737 # we can have a lot of candidates which can slow down the heuristics
737 # config value to limit the number of candidates moves to check
738 # config value to limit the number of candidates moves to check
738 maxcandidates = repo.ui.configint('experimental',
739 maxcandidates = repo.ui.configint('experimental',
739 'copytrace.movecandidateslimit')
740 'copytrace.movecandidateslimit')
740
741
741 if len(movecandidates) > maxcandidates:
742 if len(movecandidates) > maxcandidates:
742 repo.ui.status(_("skipping copytracing for '%s', more "
743 repo.ui.status(_("skipping copytracing for '%s', more "
743 "candidates than the limit: %d\n")
744 "candidates than the limit: %d\n")
744 % (f, len(movecandidates)))
745 % (f, len(movecandidates)))
745 continue
746 continue
746
747
747 for candidate in movecandidates:
748 for candidate in movecandidates:
748 f1 = c1.filectx(candidate)
749 f1 = c1.filectx(candidate)
749 if _related(f1, f2):
750 if _related(f1, f2):
750 # if there are a few related copies then we'll merge
751 # if there are a few related copies then we'll merge
751 # changes into all of them. This matches the behaviour
752 # changes into all of them. This matches the behaviour
752 # of upstream copytracing
753 # of upstream copytracing
753 copies[candidate] = f
754 copies[candidate] = f
754
755
755 return copies, {}, {}, {}, {}
756 return copies, {}, {}, {}, {}
756
757
757 def _related(f1, f2):
758 def _related(f1, f2):
758 """return True if f1 and f2 filectx have a common ancestor
759 """return True if f1 and f2 filectx have a common ancestor
759
760
760 Walk back to common ancestor to see if the two files originate
761 Walk back to common ancestor to see if the two files originate
761 from the same file. Since workingfilectx's rev() is None it messes
762 from the same file. Since workingfilectx's rev() is None it messes
762 up the integer comparison logic, hence the pre-step check for
763 up the integer comparison logic, hence the pre-step check for
763 None (f1 and f2 can only be workingfilectx's initially).
764 None (f1 and f2 can only be workingfilectx's initially).
764 """
765 """
765
766
766 if f1 == f2:
767 if f1 == f2:
767 return True # a match
768 return True # a match
768
769
769 g1, g2 = f1.ancestors(), f2.ancestors()
770 g1, g2 = f1.ancestors(), f2.ancestors()
770 try:
771 try:
771 f1r, f2r = f1.linkrev(), f2.linkrev()
772 f1r, f2r = f1.linkrev(), f2.linkrev()
772
773
773 if f1r is None:
774 if f1r is None:
774 f1 = next(g1)
775 f1 = next(g1)
775 if f2r is None:
776 if f2r is None:
776 f2 = next(g2)
777 f2 = next(g2)
777
778
778 while True:
779 while True:
779 f1r, f2r = f1.linkrev(), f2.linkrev()
780 f1r, f2r = f1.linkrev(), f2.linkrev()
780 if f1r > f2r:
781 if f1r > f2r:
781 f1 = next(g1)
782 f1 = next(g1)
782 elif f2r > f1r:
783 elif f2r > f1r:
783 f2 = next(g2)
784 f2 = next(g2)
784 else: # f1 and f2 point to files in the same linkrev
785 else: # f1 and f2 point to files in the same linkrev
785 return f1 == f2 # true if they point to the same file
786 return f1 == f2 # true if they point to the same file
786 except StopIteration:
787 except StopIteration:
787 return False
788 return False
788
789
789 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
790 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
790 """reproduce copies from fromrev to rev in the dirstate
791 """reproduce copies from fromrev to rev in the dirstate
791
792
792 If skiprev is specified, it's a revision that should be used to
793 If skiprev is specified, it's a revision that should be used to
793 filter copy records. Any copies that occur between fromrev and
794 filter copy records. Any copies that occur between fromrev and
794 skiprev will not be duplicated, even if they appear in the set of
795 skiprev will not be duplicated, even if they appear in the set of
795 copies between fromrev and rev.
796 copies between fromrev and rev.
796 """
797 """
797 exclude = {}
798 exclude = {}
798 ctraceconfig = repo.ui.config('experimental', 'copytrace')
799 ctraceconfig = repo.ui.config('experimental', 'copytrace')
799 bctrace = stringutil.parsebool(ctraceconfig)
800 bctrace = stringutil.parsebool(ctraceconfig)
800 if (skiprev is not None and
801 if (skiprev is not None and
801 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
802 (ctraceconfig == 'heuristics' or bctrace or bctrace is None)):
802 # copytrace='off' skips this line, but not the entire function because
803 # copytrace='off' skips this line, but not the entire function because
803 # the line below is O(size of the repo) during a rebase, while the rest
804 # the line below is O(size of the repo) during a rebase, while the rest
804 # of the function is much faster (and is required for carrying copy
805 # of the function is much faster (and is required for carrying copy
805 # metadata across the rebase anyway).
806 # metadata across the rebase anyway).
806 exclude = pathcopies(repo[fromrev], repo[skiprev])
807 exclude = pathcopies(repo[fromrev], repo[skiprev])
807 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
808 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
808 if dst in exclude:
809 if dst in exclude:
809 continue
810 continue
810 if dst in wctx:
811 if dst in wctx:
811 wctx[dst].markcopied(src)
812 wctx[dst].markcopied(src)
812
813
813 def computechangesetcopies(ctx):
814 def computechangesetcopies(ctx):
814 """return the copies data for a changeset
815 """return the copies data for a changeset
815
816
816 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
817 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
817
818
818 Each dictionnary are in the form: `{newname: oldname}`
819 Each dictionnary are in the form: `{newname: oldname}`
819 """
820 """
820 p1copies = {}
821 p1copies = {}
821 p2copies = {}
822 p2copies = {}
822 p1 = ctx.p1()
823 p1 = ctx.p1()
823 p2 = ctx.p2()
824 p2 = ctx.p2()
824 narrowmatch = ctx._repo.narrowmatch()
825 narrowmatch = ctx._repo.narrowmatch()
825 for dst in ctx.files():
826 for dst in ctx.files():
826 if not narrowmatch(dst) or dst not in ctx:
827 if not narrowmatch(dst) or dst not in ctx:
827 continue
828 continue
828 copied = ctx[dst].renamed()
829 copied = ctx[dst].renamed()
829 if not copied:
830 if not copied:
830 continue
831 continue
831 src, srcnode = copied
832 src, srcnode = copied
832 if src in p1 and p1[src].filenode() == srcnode:
833 if src in p1 and p1[src].filenode() == srcnode:
833 p1copies[dst] = src
834 p1copies[dst] = src
834 elif src in p2 and p2[src].filenode() == srcnode:
835 elif src in p2 and p2[src].filenode() == srcnode:
835 p2copies[dst] = src
836 p2copies[dst] = src
836 return p1copies, p2copies
837 return p1copies, p2copies
General Comments 0
You need to be logged in to leave comments. Login now