##// END OF EJS Templates
pathcopies: give up any optimization based on `introrev`...
marmoute -
r43469:c16fe77e default
parent child Browse files
Show More
@@ -1,1009 +1,1011 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16
16
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
17 from .revlogutils.flagutil import REVIDX_SIDEDATA
18
18
19 from . import (
19 from . import (
20 error,
20 error,
21 match as matchmod,
21 match as matchmod,
22 node,
22 node,
23 pathutil,
23 pathutil,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 )
26 )
27
27
28 from .revlogutils import sidedata as sidedatamod
28 from .revlogutils import sidedata as sidedatamod
29
29
30 from .utils import stringutil
30 from .utils import stringutil
31
31
32
32
33 def _findlimit(repo, ctxa, ctxb):
33 def _findlimit(repo, ctxa, ctxb):
34 """
34 """
35 Find the last revision that needs to be checked to ensure that a full
35 Find the last revision that needs to be checked to ensure that a full
36 transitive closure for file copies can be properly calculated.
36 transitive closure for file copies can be properly calculated.
37 Generally, this means finding the earliest revision number that's an
37 Generally, this means finding the earliest revision number that's an
38 ancestor of a or b but not both, except when a or b is a direct descendent
38 ancestor of a or b but not both, except when a or b is a direct descendent
39 of the other, in which case we can return the minimum revnum of a and b.
39 of the other, in which case we can return the minimum revnum of a and b.
40 """
40 """
41
41
42 # basic idea:
42 # basic idea:
43 # - mark a and b with different sides
43 # - mark a and b with different sides
44 # - if a parent's children are all on the same side, the parent is
44 # - if a parent's children are all on the same side, the parent is
45 # on that side, otherwise it is on no side
45 # on that side, otherwise it is on no side
46 # - walk the graph in topological order with the help of a heap;
46 # - walk the graph in topological order with the help of a heap;
47 # - add unseen parents to side map
47 # - add unseen parents to side map
48 # - clear side of any parent that has children on different sides
48 # - clear side of any parent that has children on different sides
49 # - track number of interesting revs that might still be on a side
49 # - track number of interesting revs that might still be on a side
50 # - track the lowest interesting rev seen
50 # - track the lowest interesting rev seen
51 # - quit when interesting revs is zero
51 # - quit when interesting revs is zero
52
52
53 cl = repo.changelog
53 cl = repo.changelog
54 wdirparents = None
54 wdirparents = None
55 a = ctxa.rev()
55 a = ctxa.rev()
56 b = ctxb.rev()
56 b = ctxb.rev()
57 if a is None:
57 if a is None:
58 wdirparents = (ctxa.p1(), ctxa.p2())
58 wdirparents = (ctxa.p1(), ctxa.p2())
59 a = node.wdirrev
59 a = node.wdirrev
60 if b is None:
60 if b is None:
61 assert not wdirparents
61 assert not wdirparents
62 wdirparents = (ctxb.p1(), ctxb.p2())
62 wdirparents = (ctxb.p1(), ctxb.p2())
63 b = node.wdirrev
63 b = node.wdirrev
64
64
65 side = {a: -1, b: 1}
65 side = {a: -1, b: 1}
66 visit = [-a, -b]
66 visit = [-a, -b]
67 heapq.heapify(visit)
67 heapq.heapify(visit)
68 interesting = len(visit)
68 interesting = len(visit)
69 limit = node.wdirrev
69 limit = node.wdirrev
70
70
71 while interesting:
71 while interesting:
72 r = -(heapq.heappop(visit))
72 r = -(heapq.heappop(visit))
73 if r == node.wdirrev:
73 if r == node.wdirrev:
74 parents = [pctx.rev() for pctx in wdirparents]
74 parents = [pctx.rev() for pctx in wdirparents]
75 else:
75 else:
76 parents = cl.parentrevs(r)
76 parents = cl.parentrevs(r)
77 if parents[1] == node.nullrev:
77 if parents[1] == node.nullrev:
78 parents = parents[:1]
78 parents = parents[:1]
79 for p in parents:
79 for p in parents:
80 if p not in side:
80 if p not in side:
81 # first time we see p; add it to visit
81 # first time we see p; add it to visit
82 side[p] = side[r]
82 side[p] = side[r]
83 if side[p]:
83 if side[p]:
84 interesting += 1
84 interesting += 1
85 heapq.heappush(visit, -p)
85 heapq.heappush(visit, -p)
86 elif side[p] and side[p] != side[r]:
86 elif side[p] and side[p] != side[r]:
87 # p was interesting but now we know better
87 # p was interesting but now we know better
88 side[p] = 0
88 side[p] = 0
89 interesting -= 1
89 interesting -= 1
90 if side[r]:
90 if side[r]:
91 limit = r # lowest rev visited
91 limit = r # lowest rev visited
92 interesting -= 1
92 interesting -= 1
93
93
94 # Consider the following flow (see test-commit-amend.t under issue4405):
94 # Consider the following flow (see test-commit-amend.t under issue4405):
95 # 1/ File 'a0' committed
95 # 1/ File 'a0' committed
96 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
96 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
97 # 3/ Move back to first commit
97 # 3/ Move back to first commit
98 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
98 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
99 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
99 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
100 #
100 #
101 # During the amend in step five, we will be in this state:
101 # During the amend in step five, we will be in this state:
102 #
102 #
103 # @ 3 temporary amend commit for a1-amend
103 # @ 3 temporary amend commit for a1-amend
104 # |
104 # |
105 # o 2 a1-amend
105 # o 2 a1-amend
106 # |
106 # |
107 # | o 1 a1
107 # | o 1 a1
108 # |/
108 # |/
109 # o 0 a0
109 # o 0 a0
110 #
110 #
111 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
111 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
112 # yet the filelog has the copy information in rev 1 and we will not look
112 # yet the filelog has the copy information in rev 1 and we will not look
113 # back far enough unless we also look at the a and b as candidates.
113 # back far enough unless we also look at the a and b as candidates.
114 # This only occurs when a is a descendent of b or visa-versa.
114 # This only occurs when a is a descendent of b or visa-versa.
115 return min(limit, a, b)
115 return min(limit, a, b)
116
116
117
117
118 def _filter(src, dst, t):
118 def _filter(src, dst, t):
119 """filters out invalid copies after chaining"""
119 """filters out invalid copies after chaining"""
120
120
121 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
121 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
122 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
122 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
123 # in the following table (not including trivial cases). For example, case 2
123 # in the following table (not including trivial cases). For example, case 2
124 # is where a file existed in 'src' and remained under that name in 'mid' and
124 # is where a file existed in 'src' and remained under that name in 'mid' and
125 # then was renamed between 'mid' and 'dst'.
125 # then was renamed between 'mid' and 'dst'.
126 #
126 #
127 # case src mid dst result
127 # case src mid dst result
128 # 1 x y - -
128 # 1 x y - -
129 # 2 x y y x->y
129 # 2 x y y x->y
130 # 3 x y x -
130 # 3 x y x -
131 # 4 x y z x->z
131 # 4 x y z x->z
132 # 5 - x y -
132 # 5 - x y -
133 # 6 x x y x->y
133 # 6 x x y x->y
134 #
134 #
135 # _chain() takes care of chaining the copies in 'a' and 'b', but it
135 # _chain() takes care of chaining the copies in 'a' and 'b', but it
136 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
136 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
137 # between 5 and 6, so it includes all cases in its result.
137 # between 5 and 6, so it includes all cases in its result.
138 # Cases 1, 3, and 5 are then removed by _filter().
138 # Cases 1, 3, and 5 are then removed by _filter().
139
139
140 for k, v in list(t.items()):
140 for k, v in list(t.items()):
141 # remove copies from files that didn't exist
141 # remove copies from files that didn't exist
142 if v not in src:
142 if v not in src:
143 del t[k]
143 del t[k]
144 # remove criss-crossed copies
144 # remove criss-crossed copies
145 elif k in src and v in dst:
145 elif k in src and v in dst:
146 del t[k]
146 del t[k]
147 # remove copies to files that were then removed
147 # remove copies to files that were then removed
148 elif k not in dst:
148 elif k not in dst:
149 del t[k]
149 del t[k]
150
150
151
151
152 def _chain(a, b):
152 def _chain(a, b):
153 """chain two sets of copies 'a' and 'b'"""
153 """chain two sets of copies 'a' and 'b'"""
154 t = a.copy()
154 t = a.copy()
155 for k, v in pycompat.iteritems(b):
155 for k, v in pycompat.iteritems(b):
156 if v in t:
156 if v in t:
157 t[k] = t[v]
157 t[k] = t[v]
158 else:
158 else:
159 t[k] = v
159 t[k] = v
160 return t
160 return t
161
161
162
162
163 def _tracefile(fctx, am, basemf, limit):
163 def _tracefile(fctx, am, basemf, limit):
164 """return file context that is the ancestor of fctx present in ancestor
164 """return file context that is the ancestor of fctx present in ancestor
165 manifest am, stopping after the first ancestor lower than limit"""
165 manifest am
166
167 Note: we used to try and stop after a given limit, however checking if that
168 limit is reached turned out to be very expensive. we are better off
169 disabling that feature."""
166
170
167 for f in fctx.ancestors():
171 for f in fctx.ancestors():
168 path = f.path()
172 path = f.path()
169 if am.get(path, None) == f.filenode():
173 if am.get(path, None) == f.filenode():
170 return path
174 return path
171 if basemf and basemf.get(path, None) == f.filenode():
175 if basemf and basemf.get(path, None) == f.filenode():
172 return path
176 return path
173 if not f.isintroducedafter(limit):
174 return None
175
177
176
178
177 def _dirstatecopies(repo, match=None):
179 def _dirstatecopies(repo, match=None):
178 ds = repo.dirstate
180 ds = repo.dirstate
179 c = ds.copies().copy()
181 c = ds.copies().copy()
180 for k in list(c):
182 for k in list(c):
181 if ds[k] not in b'anm' or (match and not match(k)):
183 if ds[k] not in b'anm' or (match and not match(k)):
182 del c[k]
184 del c[k]
183 return c
185 return c
184
186
185
187
186 def _computeforwardmissing(a, b, match=None):
188 def _computeforwardmissing(a, b, match=None):
187 """Computes which files are in b but not a.
189 """Computes which files are in b but not a.
188 This is its own function so extensions can easily wrap this call to see what
190 This is its own function so extensions can easily wrap this call to see what
189 files _forwardcopies is about to process.
191 files _forwardcopies is about to process.
190 """
192 """
191 ma = a.manifest()
193 ma = a.manifest()
192 mb = b.manifest()
194 mb = b.manifest()
193 return mb.filesnotin(ma, match=match)
195 return mb.filesnotin(ma, match=match)
194
196
195
197
196 def usechangesetcentricalgo(repo):
198 def usechangesetcentricalgo(repo):
197 """Checks if we should use changeset-centric copy algorithms"""
199 """Checks if we should use changeset-centric copy algorithms"""
198 if repo.filecopiesmode == b'changeset-sidedata':
200 if repo.filecopiesmode == b'changeset-sidedata':
199 return True
201 return True
200 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
202 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
201 changesetsource = (b'changeset-only', b'compatibility')
203 changesetsource = (b'changeset-only', b'compatibility')
202 return readfrom in changesetsource
204 return readfrom in changesetsource
203
205
204
206
205 def _committedforwardcopies(a, b, base, match):
207 def _committedforwardcopies(a, b, base, match):
206 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
208 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
207 # files might have to be traced back to the fctx parent of the last
209 # files might have to be traced back to the fctx parent of the last
208 # one-side-only changeset, but not further back than that
210 # one-side-only changeset, but not further back than that
209 repo = a._repo
211 repo = a._repo
210
212
211 if usechangesetcentricalgo(repo):
213 if usechangesetcentricalgo(repo):
212 return _changesetforwardcopies(a, b, match)
214 return _changesetforwardcopies(a, b, match)
213
215
214 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
216 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
215 dbg = repo.ui.debug
217 dbg = repo.ui.debug
216 if debug:
218 if debug:
217 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
219 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
218 limit = _findlimit(repo, a, b)
220 limit = _findlimit(repo, a, b)
219 if debug:
221 if debug:
220 dbg(b'debug.copies: search limit: %d\n' % limit)
222 dbg(b'debug.copies: search limit: %d\n' % limit)
221 am = a.manifest()
223 am = a.manifest()
222 basemf = None if base is None else base.manifest()
224 basemf = None if base is None else base.manifest()
223
225
224 # find where new files came from
226 # find where new files came from
225 # we currently don't try to find where old files went, too expensive
227 # we currently don't try to find where old files went, too expensive
226 # this means we can miss a case like 'hg rm b; hg cp a b'
228 # this means we can miss a case like 'hg rm b; hg cp a b'
227 cm = {}
229 cm = {}
228
230
229 # Computing the forward missing is quite expensive on large manifests, since
231 # Computing the forward missing is quite expensive on large manifests, since
230 # it compares the entire manifests. We can optimize it in the common use
232 # it compares the entire manifests. We can optimize it in the common use
231 # case of computing what copies are in a commit versus its parent (like
233 # case of computing what copies are in a commit versus its parent (like
232 # during a rebase or histedit). Note, we exclude merge commits from this
234 # during a rebase or histedit). Note, we exclude merge commits from this
233 # optimization, since the ctx.files() for a merge commit is not correct for
235 # optimization, since the ctx.files() for a merge commit is not correct for
234 # this comparison.
236 # this comparison.
235 forwardmissingmatch = match
237 forwardmissingmatch = match
236 if b.p1() == a and b.p2().node() == node.nullid:
238 if b.p1() == a and b.p2().node() == node.nullid:
237 filesmatcher = matchmod.exact(b.files())
239 filesmatcher = matchmod.exact(b.files())
238 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
240 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
239 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
241 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
240
242
241 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
243 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
242
244
243 if debug:
245 if debug:
244 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
246 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
245
247
246 for f in sorted(missing):
248 for f in sorted(missing):
247 if debug:
249 if debug:
248 dbg(b'debug.copies: tracing file: %s\n' % f)
250 dbg(b'debug.copies: tracing file: %s\n' % f)
249 fctx = b[f]
251 fctx = b[f]
250 fctx._ancestrycontext = ancestrycontext
252 fctx._ancestrycontext = ancestrycontext
251
253
252 if debug:
254 if debug:
253 start = util.timer()
255 start = util.timer()
254 opath = _tracefile(fctx, am, basemf, limit)
256 opath = _tracefile(fctx, am, basemf, limit)
255 if opath:
257 if opath:
256 if debug:
258 if debug:
257 dbg(b'debug.copies: rename of: %s\n' % opath)
259 dbg(b'debug.copies: rename of: %s\n' % opath)
258 cm[f] = opath
260 cm[f] = opath
259 if debug:
261 if debug:
260 dbg(
262 dbg(
261 b'debug.copies: time: %f seconds\n'
263 b'debug.copies: time: %f seconds\n'
262 % (util.timer() - start)
264 % (util.timer() - start)
263 )
265 )
264 return cm
266 return cm
265
267
266
268
267 def _changesetforwardcopies(a, b, match):
269 def _changesetforwardcopies(a, b, match):
268 if a.rev() in (node.nullrev, b.rev()):
270 if a.rev() in (node.nullrev, b.rev()):
269 return {}
271 return {}
270
272
271 repo = a.repo()
273 repo = a.repo()
272 children = {}
274 children = {}
273 cl = repo.changelog
275 cl = repo.changelog
274 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
276 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 for r in missingrevs:
277 for r in missingrevs:
276 for p in cl.parentrevs(r):
278 for p in cl.parentrevs(r):
277 if p == node.nullrev:
279 if p == node.nullrev:
278 continue
280 continue
279 if p not in children:
281 if p not in children:
280 children[p] = [r]
282 children[p] = [r]
281 else:
283 else:
282 children[p].append(r)
284 children[p].append(r)
283
285
284 roots = set(children) - set(missingrevs)
286 roots = set(children) - set(missingrevs)
285 # 'work' contains 3-tuples of a (revision number, parent number, copies).
287 # 'work' contains 3-tuples of a (revision number, parent number, copies).
286 # The parent number is only used for knowing which parent the copies dict
288 # The parent number is only used for knowing which parent the copies dict
287 # came from.
289 # came from.
288 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
290 # NOTE: To reduce costly copying the 'copies' dicts, we reuse the same
289 # instance for *one* of the child nodes (the last one). Once an instance
291 # instance for *one* of the child nodes (the last one). Once an instance
290 # has been put on the queue, it is thus no longer safe to modify it.
292 # has been put on the queue, it is thus no longer safe to modify it.
291 # Conversely, it *is* safe to modify an instance popped off the queue.
293 # Conversely, it *is* safe to modify an instance popped off the queue.
292 work = [(r, 1, {}) for r in roots]
294 work = [(r, 1, {}) for r in roots]
293 heapq.heapify(work)
295 heapq.heapify(work)
294 alwaysmatch = match.always()
296 alwaysmatch = match.always()
295 while work:
297 while work:
296 r, i1, copies = heapq.heappop(work)
298 r, i1, copies = heapq.heappop(work)
297 if work and work[0][0] == r:
299 if work and work[0][0] == r:
298 # We are tracing copies from both parents
300 # We are tracing copies from both parents
299 r, i2, copies2 = heapq.heappop(work)
301 r, i2, copies2 = heapq.heappop(work)
300 for dst, src in copies2.items():
302 for dst, src in copies2.items():
301 # Unlike when copies are stored in the filelog, we consider
303 # Unlike when copies are stored in the filelog, we consider
302 # it a copy even if the destination already existed on the
304 # it a copy even if the destination already existed on the
303 # other branch. It's simply too expensive to check if the
305 # other branch. It's simply too expensive to check if the
304 # file existed in the manifest.
306 # file existed in the manifest.
305 if dst not in copies:
307 if dst not in copies:
306 # If it was copied on the p1 side, leave it as copied from
308 # If it was copied on the p1 side, leave it as copied from
307 # that side, even if it was also copied on the p2 side.
309 # that side, even if it was also copied on the p2 side.
308 copies[dst] = copies2[dst]
310 copies[dst] = copies2[dst]
309 if r == b.rev():
311 if r == b.rev():
310 return copies
312 return copies
311 for i, c in enumerate(children[r]):
313 for i, c in enumerate(children[r]):
312 childctx = repo[c]
314 childctx = repo[c]
313 if r == childctx.p1().rev():
315 if r == childctx.p1().rev():
314 parent = 1
316 parent = 1
315 childcopies = childctx.p1copies()
317 childcopies = childctx.p1copies()
316 else:
318 else:
317 assert r == childctx.p2().rev()
319 assert r == childctx.p2().rev()
318 parent = 2
320 parent = 2
319 childcopies = childctx.p2copies()
321 childcopies = childctx.p2copies()
320 if not alwaysmatch:
322 if not alwaysmatch:
321 childcopies = {
323 childcopies = {
322 dst: src for dst, src in childcopies.items() if match(dst)
324 dst: src for dst, src in childcopies.items() if match(dst)
323 }
325 }
324 # Copy the dict only if later iterations will also need it
326 # Copy the dict only if later iterations will also need it
325 if i != len(children[r]) - 1:
327 if i != len(children[r]) - 1:
326 newcopies = copies.copy()
328 newcopies = copies.copy()
327 else:
329 else:
328 newcopies = copies
330 newcopies = copies
329 if childcopies:
331 if childcopies:
330 newcopies = _chain(newcopies, childcopies)
332 newcopies = _chain(newcopies, childcopies)
331 for f in childctx.filesremoved():
333 for f in childctx.filesremoved():
332 if f in newcopies:
334 if f in newcopies:
333 del newcopies[f]
335 del newcopies[f]
334 heapq.heappush(work, (c, parent, newcopies))
336 heapq.heappush(work, (c, parent, newcopies))
335 assert False
337 assert False
336
338
337
339
338 def _forwardcopies(a, b, base=None, match=None):
340 def _forwardcopies(a, b, base=None, match=None):
339 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
341 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
340
342
341 if base is None:
343 if base is None:
342 base = a
344 base = a
343 match = a.repo().narrowmatch(match)
345 match = a.repo().narrowmatch(match)
344 # check for working copy
346 # check for working copy
345 if b.rev() is None:
347 if b.rev() is None:
346 cm = _committedforwardcopies(a, b.p1(), base, match)
348 cm = _committedforwardcopies(a, b.p1(), base, match)
347 # combine copies from dirstate if necessary
349 # combine copies from dirstate if necessary
348 copies = _chain(cm, _dirstatecopies(b._repo, match))
350 copies = _chain(cm, _dirstatecopies(b._repo, match))
349 else:
351 else:
350 copies = _committedforwardcopies(a, b, base, match)
352 copies = _committedforwardcopies(a, b, base, match)
351 return copies
353 return copies
352
354
353
355
354 def _backwardrenames(a, b, match):
356 def _backwardrenames(a, b, match):
355 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
357 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
356 return {}
358 return {}
357
359
358 # Even though we're not taking copies into account, 1:n rename situations
360 # Even though we're not taking copies into account, 1:n rename situations
359 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
361 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
360 # arbitrarily pick one of the renames.
362 # arbitrarily pick one of the renames.
361 # We don't want to pass in "match" here, since that would filter
363 # We don't want to pass in "match" here, since that would filter
362 # the destination by it. Since we're reversing the copies, we want
364 # the destination by it. Since we're reversing the copies, we want
363 # to filter the source instead.
365 # to filter the source instead.
364 f = _forwardcopies(b, a)
366 f = _forwardcopies(b, a)
365 r = {}
367 r = {}
366 for k, v in sorted(pycompat.iteritems(f)):
368 for k, v in sorted(pycompat.iteritems(f)):
367 if match and not match(v):
369 if match and not match(v):
368 continue
370 continue
369 # remove copies
371 # remove copies
370 if v in a:
372 if v in a:
371 continue
373 continue
372 r[v] = k
374 r[v] = k
373 return r
375 return r
374
376
375
377
376 def pathcopies(x, y, match=None):
378 def pathcopies(x, y, match=None):
377 """find {dst@y: src@x} copy mapping for directed compare"""
379 """find {dst@y: src@x} copy mapping for directed compare"""
378 repo = x._repo
380 repo = x._repo
379 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
381 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
380 if debug:
382 if debug:
381 repo.ui.debug(
383 repo.ui.debug(
382 b'debug.copies: searching copies from %s to %s\n' % (x, y)
384 b'debug.copies: searching copies from %s to %s\n' % (x, y)
383 )
385 )
384 if x == y or not x or not y:
386 if x == y or not x or not y:
385 return {}
387 return {}
386 a = y.ancestor(x)
388 a = y.ancestor(x)
387 if a == x:
389 if a == x:
388 if debug:
390 if debug:
389 repo.ui.debug(b'debug.copies: search mode: forward\n')
391 repo.ui.debug(b'debug.copies: search mode: forward\n')
390 if y.rev() is None and x == y.p1():
392 if y.rev() is None and x == y.p1():
391 # short-circuit to avoid issues with merge states
393 # short-circuit to avoid issues with merge states
392 return _dirstatecopies(repo, match)
394 return _dirstatecopies(repo, match)
393 copies = _forwardcopies(x, y, match=match)
395 copies = _forwardcopies(x, y, match=match)
394 elif a == y:
396 elif a == y:
395 if debug:
397 if debug:
396 repo.ui.debug(b'debug.copies: search mode: backward\n')
398 repo.ui.debug(b'debug.copies: search mode: backward\n')
397 copies = _backwardrenames(x, y, match=match)
399 copies = _backwardrenames(x, y, match=match)
398 else:
400 else:
399 if debug:
401 if debug:
400 repo.ui.debug(b'debug.copies: search mode: combined\n')
402 repo.ui.debug(b'debug.copies: search mode: combined\n')
401 base = None
403 base = None
402 if a.rev() != node.nullrev:
404 if a.rev() != node.nullrev:
403 base = x
405 base = x
404 copies = _chain(
406 copies = _chain(
405 _backwardrenames(x, a, match=match),
407 _backwardrenames(x, a, match=match),
406 _forwardcopies(a, y, base, match=match),
408 _forwardcopies(a, y, base, match=match),
407 )
409 )
408 _filter(x, y, copies)
410 _filter(x, y, copies)
409 return copies
411 return copies
410
412
411
413
412 def mergecopies(repo, c1, c2, base):
414 def mergecopies(repo, c1, c2, base):
413 """
415 """
414 Finds moves and copies between context c1 and c2 that are relevant for
416 Finds moves and copies between context c1 and c2 that are relevant for
415 merging. 'base' will be used as the merge base.
417 merging. 'base' will be used as the merge base.
416
418
417 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
419 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
418 files that were moved/ copied in one merge parent and modified in another.
420 files that were moved/ copied in one merge parent and modified in another.
419 For example:
421 For example:
420
422
421 o ---> 4 another commit
423 o ---> 4 another commit
422 |
424 |
423 | o ---> 3 commit that modifies a.txt
425 | o ---> 3 commit that modifies a.txt
424 | /
426 | /
425 o / ---> 2 commit that moves a.txt to b.txt
427 o / ---> 2 commit that moves a.txt to b.txt
426 |/
428 |/
427 o ---> 1 merge base
429 o ---> 1 merge base
428
430
429 If we try to rebase revision 3 on revision 4, since there is no a.txt in
431 If we try to rebase revision 3 on revision 4, since there is no a.txt in
430 revision 4, and if user have copytrace disabled, we prints the following
432 revision 4, and if user have copytrace disabled, we prints the following
431 message:
433 message:
432
434
433 ```other changed <file> which local deleted```
435 ```other changed <file> which local deleted```
434
436
435 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
437 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
436 "dirmove".
438 "dirmove".
437
439
438 "copy" is a mapping from destination name -> source name,
440 "copy" is a mapping from destination name -> source name,
439 where source is in c1 and destination is in c2 or vice-versa.
441 where source is in c1 and destination is in c2 or vice-versa.
440
442
441 "movewithdir" is a mapping from source name -> destination name,
443 "movewithdir" is a mapping from source name -> destination name,
442 where the file at source present in one context but not the other
444 where the file at source present in one context but not the other
443 needs to be moved to destination by the merge process, because the
445 needs to be moved to destination by the merge process, because the
444 other context moved the directory it is in.
446 other context moved the directory it is in.
445
447
446 "diverge" is a mapping of source name -> list of destination names
448 "diverge" is a mapping of source name -> list of destination names
447 for divergent renames.
449 for divergent renames.
448
450
449 "renamedelete" is a mapping of source name -> list of destination
451 "renamedelete" is a mapping of source name -> list of destination
450 names for files deleted in c1 that were renamed in c2 or vice-versa.
452 names for files deleted in c1 that were renamed in c2 or vice-versa.
451
453
452 "dirmove" is a mapping of detected source dir -> destination dir renames.
454 "dirmove" is a mapping of detected source dir -> destination dir renames.
453 This is needed for handling changes to new files previously grafted into
455 This is needed for handling changes to new files previously grafted into
454 renamed directories.
456 renamed directories.
455
457
456 This function calls different copytracing algorithms based on config.
458 This function calls different copytracing algorithms based on config.
457 """
459 """
458 # avoid silly behavior for update from empty dir
460 # avoid silly behavior for update from empty dir
459 if not c1 or not c2 or c1 == c2:
461 if not c1 or not c2 or c1 == c2:
460 return {}, {}, {}, {}, {}
462 return {}, {}, {}, {}, {}
461
463
462 narrowmatch = c1.repo().narrowmatch()
464 narrowmatch = c1.repo().narrowmatch()
463
465
464 # avoid silly behavior for parent -> working dir
466 # avoid silly behavior for parent -> working dir
465 if c2.node() is None and c1.node() == repo.dirstate.p1():
467 if c2.node() is None and c1.node() == repo.dirstate.p1():
466 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
468 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
467
469
468 copytracing = repo.ui.config(b'experimental', b'copytrace')
470 copytracing = repo.ui.config(b'experimental', b'copytrace')
469 if stringutil.parsebool(copytracing) is False:
471 if stringutil.parsebool(copytracing) is False:
470 # stringutil.parsebool() returns None when it is unable to parse the
472 # stringutil.parsebool() returns None when it is unable to parse the
471 # value, so we should rely on making sure copytracing is on such cases
473 # value, so we should rely on making sure copytracing is on such cases
472 return {}, {}, {}, {}, {}
474 return {}, {}, {}, {}, {}
473
475
474 if usechangesetcentricalgo(repo):
476 if usechangesetcentricalgo(repo):
475 # The heuristics don't make sense when we need changeset-centric algos
477 # The heuristics don't make sense when we need changeset-centric algos
476 return _fullcopytracing(repo, c1, c2, base)
478 return _fullcopytracing(repo, c1, c2, base)
477
479
478 # Copy trace disabling is explicitly below the node == p1 logic above
480 # Copy trace disabling is explicitly below the node == p1 logic above
479 # because the logic above is required for a simple copy to be kept across a
481 # because the logic above is required for a simple copy to be kept across a
480 # rebase.
482 # rebase.
481 if copytracing == b'heuristics':
483 if copytracing == b'heuristics':
482 # Do full copytracing if only non-public revisions are involved as
484 # Do full copytracing if only non-public revisions are involved as
483 # that will be fast enough and will also cover the copies which could
485 # that will be fast enough and will also cover the copies which could
484 # be missed by heuristics
486 # be missed by heuristics
485 if _isfullcopytraceable(repo, c1, base):
487 if _isfullcopytraceable(repo, c1, base):
486 return _fullcopytracing(repo, c1, c2, base)
488 return _fullcopytracing(repo, c1, c2, base)
487 return _heuristicscopytracing(repo, c1, c2, base)
489 return _heuristicscopytracing(repo, c1, c2, base)
488 else:
490 else:
489 return _fullcopytracing(repo, c1, c2, base)
491 return _fullcopytracing(repo, c1, c2, base)
490
492
491
493
492 def _isfullcopytraceable(repo, c1, base):
494 def _isfullcopytraceable(repo, c1, base):
493 """ Checks that if base, source and destination are all no-public branches,
495 """ Checks that if base, source and destination are all no-public branches,
494 if yes let's use the full copytrace algorithm for increased capabilities
496 if yes let's use the full copytrace algorithm for increased capabilities
495 since it will be fast enough.
497 since it will be fast enough.
496
498
497 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
499 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
498 number of changesets from c1 to base such that if number of changesets are
500 number of changesets from c1 to base such that if number of changesets are
499 more than the limit, full copytracing algorithm won't be used.
501 more than the limit, full copytracing algorithm won't be used.
500 """
502 """
501 if c1.rev() is None:
503 if c1.rev() is None:
502 c1 = c1.p1()
504 c1 = c1.p1()
503 if c1.mutable() and base.mutable():
505 if c1.mutable() and base.mutable():
504 sourcecommitlimit = repo.ui.configint(
506 sourcecommitlimit = repo.ui.configint(
505 b'experimental', b'copytrace.sourcecommitlimit'
507 b'experimental', b'copytrace.sourcecommitlimit'
506 )
508 )
507 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
509 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
508 return commits < sourcecommitlimit
510 return commits < sourcecommitlimit
509 return False
511 return False
510
512
511
513
512 def _checksinglesidecopies(
514 def _checksinglesidecopies(
513 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
515 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
514 ):
516 ):
515 if src not in m2:
517 if src not in m2:
516 # deleted on side 2
518 # deleted on side 2
517 if src not in m1:
519 if src not in m1:
518 # renamed on side 1, deleted on side 2
520 # renamed on side 1, deleted on side 2
519 renamedelete[src] = dsts1
521 renamedelete[src] = dsts1
520 elif m2[src] != mb[src]:
522 elif m2[src] != mb[src]:
521 if not _related(c2[src], base[src]):
523 if not _related(c2[src], base[src]):
522 return
524 return
523 # modified on side 2
525 # modified on side 2
524 for dst in dsts1:
526 for dst in dsts1:
525 if dst not in m2:
527 if dst not in m2:
526 # dst not added on side 2 (handle as regular
528 # dst not added on side 2 (handle as regular
527 # "both created" case in manifestmerge otherwise)
529 # "both created" case in manifestmerge otherwise)
528 copy[dst] = src
530 copy[dst] = src
529
531
530
532
531 def _fullcopytracing(repo, c1, c2, base):
533 def _fullcopytracing(repo, c1, c2, base):
532 """ The full copytracing algorithm which finds all the new files that were
534 """ The full copytracing algorithm which finds all the new files that were
533 added from merge base up to the top commit and for each file it checks if
535 added from merge base up to the top commit and for each file it checks if
534 this file was copied from another file.
536 this file was copied from another file.
535
537
536 This is pretty slow when a lot of changesets are involved but will track all
538 This is pretty slow when a lot of changesets are involved but will track all
537 the copies.
539 the copies.
538 """
540 """
539 m1 = c1.manifest()
541 m1 = c1.manifest()
540 m2 = c2.manifest()
542 m2 = c2.manifest()
541 mb = base.manifest()
543 mb = base.manifest()
542
544
543 copies1 = pathcopies(base, c1)
545 copies1 = pathcopies(base, c1)
544 copies2 = pathcopies(base, c2)
546 copies2 = pathcopies(base, c2)
545
547
546 inversecopies1 = {}
548 inversecopies1 = {}
547 inversecopies2 = {}
549 inversecopies2 = {}
548 for dst, src in copies1.items():
550 for dst, src in copies1.items():
549 inversecopies1.setdefault(src, []).append(dst)
551 inversecopies1.setdefault(src, []).append(dst)
550 for dst, src in copies2.items():
552 for dst, src in copies2.items():
551 inversecopies2.setdefault(src, []).append(dst)
553 inversecopies2.setdefault(src, []).append(dst)
552
554
553 copy = {}
555 copy = {}
554 diverge = {}
556 diverge = {}
555 renamedelete = {}
557 renamedelete = {}
556 allsources = set(inversecopies1) | set(inversecopies2)
558 allsources = set(inversecopies1) | set(inversecopies2)
557 for src in allsources:
559 for src in allsources:
558 dsts1 = inversecopies1.get(src)
560 dsts1 = inversecopies1.get(src)
559 dsts2 = inversecopies2.get(src)
561 dsts2 = inversecopies2.get(src)
560 if dsts1 and dsts2:
562 if dsts1 and dsts2:
561 # copied/renamed on both sides
563 # copied/renamed on both sides
562 if src not in m1 and src not in m2:
564 if src not in m1 and src not in m2:
563 # renamed on both sides
565 # renamed on both sides
564 dsts1 = set(dsts1)
566 dsts1 = set(dsts1)
565 dsts2 = set(dsts2)
567 dsts2 = set(dsts2)
566 # If there's some overlap in the rename destinations, we
568 # If there's some overlap in the rename destinations, we
567 # consider it not divergent. For example, if side 1 copies 'a'
569 # consider it not divergent. For example, if side 1 copies 'a'
568 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
570 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
569 # and 'd' and deletes 'a'.
571 # and 'd' and deletes 'a'.
570 if dsts1 & dsts2:
572 if dsts1 & dsts2:
571 for dst in dsts1 & dsts2:
573 for dst in dsts1 & dsts2:
572 copy[dst] = src
574 copy[dst] = src
573 else:
575 else:
574 diverge[src] = sorted(dsts1 | dsts2)
576 diverge[src] = sorted(dsts1 | dsts2)
575 elif src in m1 and src in m2:
577 elif src in m1 and src in m2:
576 # copied on both sides
578 # copied on both sides
577 dsts1 = set(dsts1)
579 dsts1 = set(dsts1)
578 dsts2 = set(dsts2)
580 dsts2 = set(dsts2)
579 for dst in dsts1 & dsts2:
581 for dst in dsts1 & dsts2:
580 copy[dst] = src
582 copy[dst] = src
581 # TODO: Handle cases where it was renamed on one side and copied
583 # TODO: Handle cases where it was renamed on one side and copied
582 # on the other side
584 # on the other side
583 elif dsts1:
585 elif dsts1:
584 # copied/renamed only on side 1
586 # copied/renamed only on side 1
585 _checksinglesidecopies(
587 _checksinglesidecopies(
586 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
588 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
587 )
589 )
588 elif dsts2:
590 elif dsts2:
589 # copied/renamed only on side 2
591 # copied/renamed only on side 2
590 _checksinglesidecopies(
592 _checksinglesidecopies(
591 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
593 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
592 )
594 )
593
595
594 renamedeleteset = set()
596 renamedeleteset = set()
595 divergeset = set()
597 divergeset = set()
596 for dsts in diverge.values():
598 for dsts in diverge.values():
597 divergeset.update(dsts)
599 divergeset.update(dsts)
598 for dsts in renamedelete.values():
600 for dsts in renamedelete.values():
599 renamedeleteset.update(dsts)
601 renamedeleteset.update(dsts)
600
602
601 # find interesting file sets from manifests
603 # find interesting file sets from manifests
602 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
604 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
603 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
605 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
604 u1 = sorted(addedinm1 - addedinm2)
606 u1 = sorted(addedinm1 - addedinm2)
605 u2 = sorted(addedinm2 - addedinm1)
607 u2 = sorted(addedinm2 - addedinm1)
606
608
607 header = b" unmatched files in %s"
609 header = b" unmatched files in %s"
608 if u1:
610 if u1:
609 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
611 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
610 if u2:
612 if u2:
611 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
613 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
612
614
613 fullcopy = copies1.copy()
615 fullcopy = copies1.copy()
614 fullcopy.update(copies2)
616 fullcopy.update(copies2)
615 if not fullcopy:
617 if not fullcopy:
616 return copy, {}, diverge, renamedelete, {}
618 return copy, {}, diverge, renamedelete, {}
617
619
618 if repo.ui.debugflag:
620 if repo.ui.debugflag:
619 repo.ui.debug(
621 repo.ui.debug(
620 b" all copies found (* = to merge, ! = divergent, "
622 b" all copies found (* = to merge, ! = divergent, "
621 b"% = renamed and deleted):\n"
623 b"% = renamed and deleted):\n"
622 )
624 )
623 for f in sorted(fullcopy):
625 for f in sorted(fullcopy):
624 note = b""
626 note = b""
625 if f in copy:
627 if f in copy:
626 note += b"*"
628 note += b"*"
627 if f in divergeset:
629 if f in divergeset:
628 note += b"!"
630 note += b"!"
629 if f in renamedeleteset:
631 if f in renamedeleteset:
630 note += b"%"
632 note += b"%"
631 repo.ui.debug(
633 repo.ui.debug(
632 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
634 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
633 )
635 )
634 del divergeset
636 del divergeset
635
637
636 repo.ui.debug(b" checking for directory renames\n")
638 repo.ui.debug(b" checking for directory renames\n")
637
639
638 # generate a directory move map
640 # generate a directory move map
639 d1, d2 = c1.dirs(), c2.dirs()
641 d1, d2 = c1.dirs(), c2.dirs()
640 invalid = set()
642 invalid = set()
641 dirmove = {}
643 dirmove = {}
642
644
643 # examine each file copy for a potential directory move, which is
645 # examine each file copy for a potential directory move, which is
644 # when all the files in a directory are moved to a new directory
646 # when all the files in a directory are moved to a new directory
645 for dst, src in pycompat.iteritems(fullcopy):
647 for dst, src in pycompat.iteritems(fullcopy):
646 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
648 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
647 if dsrc in invalid:
649 if dsrc in invalid:
648 # already seen to be uninteresting
650 # already seen to be uninteresting
649 continue
651 continue
650 elif dsrc in d1 and ddst in d1:
652 elif dsrc in d1 and ddst in d1:
651 # directory wasn't entirely moved locally
653 # directory wasn't entirely moved locally
652 invalid.add(dsrc)
654 invalid.add(dsrc)
653 elif dsrc in d2 and ddst in d2:
655 elif dsrc in d2 and ddst in d2:
654 # directory wasn't entirely moved remotely
656 # directory wasn't entirely moved remotely
655 invalid.add(dsrc)
657 invalid.add(dsrc)
656 elif dsrc in dirmove and dirmove[dsrc] != ddst:
658 elif dsrc in dirmove and dirmove[dsrc] != ddst:
657 # files from the same directory moved to two different places
659 # files from the same directory moved to two different places
658 invalid.add(dsrc)
660 invalid.add(dsrc)
659 else:
661 else:
660 # looks good so far
662 # looks good so far
661 dirmove[dsrc] = ddst
663 dirmove[dsrc] = ddst
662
664
663 for i in invalid:
665 for i in invalid:
664 if i in dirmove:
666 if i in dirmove:
665 del dirmove[i]
667 del dirmove[i]
666 del d1, d2, invalid
668 del d1, d2, invalid
667
669
668 if not dirmove:
670 if not dirmove:
669 return copy, {}, diverge, renamedelete, {}
671 return copy, {}, diverge, renamedelete, {}
670
672
671 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
673 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
672
674
673 for d in dirmove:
675 for d in dirmove:
674 repo.ui.debug(
676 repo.ui.debug(
675 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
677 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
676 )
678 )
677
679
678 movewithdir = {}
680 movewithdir = {}
679 # check unaccounted nonoverlapping files against directory moves
681 # check unaccounted nonoverlapping files against directory moves
680 for f in u1 + u2:
682 for f in u1 + u2:
681 if f not in fullcopy:
683 if f not in fullcopy:
682 for d in dirmove:
684 for d in dirmove:
683 if f.startswith(d):
685 if f.startswith(d):
684 # new file added in a directory that was moved, move it
686 # new file added in a directory that was moved, move it
685 df = dirmove[d] + f[len(d) :]
687 df = dirmove[d] + f[len(d) :]
686 if df not in copy:
688 if df not in copy:
687 movewithdir[f] = df
689 movewithdir[f] = df
688 repo.ui.debug(
690 repo.ui.debug(
689 b" pending file src: '%s' -> dst: '%s'\n"
691 b" pending file src: '%s' -> dst: '%s'\n"
690 % (f, df)
692 % (f, df)
691 )
693 )
692 break
694 break
693
695
694 return copy, movewithdir, diverge, renamedelete, dirmove
696 return copy, movewithdir, diverge, renamedelete, dirmove
695
697
696
698
697 def _heuristicscopytracing(repo, c1, c2, base):
699 def _heuristicscopytracing(repo, c1, c2, base):
698 """ Fast copytracing using filename heuristics
700 """ Fast copytracing using filename heuristics
699
701
700 Assumes that moves or renames are of following two types:
702 Assumes that moves or renames are of following two types:
701
703
702 1) Inside a directory only (same directory name but different filenames)
704 1) Inside a directory only (same directory name but different filenames)
703 2) Move from one directory to another
705 2) Move from one directory to another
704 (same filenames but different directory names)
706 (same filenames but different directory names)
705
707
706 Works only when there are no merge commits in the "source branch".
708 Works only when there are no merge commits in the "source branch".
707 Source branch is commits from base up to c2 not including base.
709 Source branch is commits from base up to c2 not including base.
708
710
709 If merge is involved it fallbacks to _fullcopytracing().
711 If merge is involved it fallbacks to _fullcopytracing().
710
712
711 Can be used by setting the following config:
713 Can be used by setting the following config:
712
714
713 [experimental]
715 [experimental]
714 copytrace = heuristics
716 copytrace = heuristics
715
717
716 In some cases the copy/move candidates found by heuristics can be very large
718 In some cases the copy/move candidates found by heuristics can be very large
717 in number and that will make the algorithm slow. The number of possible
719 in number and that will make the algorithm slow. The number of possible
718 candidates to check can be limited by using the config
720 candidates to check can be limited by using the config
719 `experimental.copytrace.movecandidateslimit` which defaults to 100.
721 `experimental.copytrace.movecandidateslimit` which defaults to 100.
720 """
722 """
721
723
722 if c1.rev() is None:
724 if c1.rev() is None:
723 c1 = c1.p1()
725 c1 = c1.p1()
724 if c2.rev() is None:
726 if c2.rev() is None:
725 c2 = c2.p1()
727 c2 = c2.p1()
726
728
727 copies = {}
729 copies = {}
728
730
729 changedfiles = set()
731 changedfiles = set()
730 m1 = c1.manifest()
732 m1 = c1.manifest()
731 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
733 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
732 # If base is not in c2 branch, we switch to fullcopytracing
734 # If base is not in c2 branch, we switch to fullcopytracing
733 repo.ui.debug(
735 repo.ui.debug(
734 b"switching to full copytracing as base is not "
736 b"switching to full copytracing as base is not "
735 b"an ancestor of c2\n"
737 b"an ancestor of c2\n"
736 )
738 )
737 return _fullcopytracing(repo, c1, c2, base)
739 return _fullcopytracing(repo, c1, c2, base)
738
740
739 ctx = c2
741 ctx = c2
740 while ctx != base:
742 while ctx != base:
741 if len(ctx.parents()) == 2:
743 if len(ctx.parents()) == 2:
742 # To keep things simple let's not handle merges
744 # To keep things simple let's not handle merges
743 repo.ui.debug(b"switching to full copytracing because of merges\n")
745 repo.ui.debug(b"switching to full copytracing because of merges\n")
744 return _fullcopytracing(repo, c1, c2, base)
746 return _fullcopytracing(repo, c1, c2, base)
745 changedfiles.update(ctx.files())
747 changedfiles.update(ctx.files())
746 ctx = ctx.p1()
748 ctx = ctx.p1()
747
749
748 cp = _forwardcopies(base, c2)
750 cp = _forwardcopies(base, c2)
749 for dst, src in pycompat.iteritems(cp):
751 for dst, src in pycompat.iteritems(cp):
750 if src in m1:
752 if src in m1:
751 copies[dst] = src
753 copies[dst] = src
752
754
753 # file is missing if it isn't present in the destination, but is present in
755 # file is missing if it isn't present in the destination, but is present in
754 # the base and present in the source.
756 # the base and present in the source.
755 # Presence in the base is important to exclude added files, presence in the
757 # Presence in the base is important to exclude added files, presence in the
756 # source is important to exclude removed files.
758 # source is important to exclude removed files.
757 filt = lambda f: f not in m1 and f in base and f in c2
759 filt = lambda f: f not in m1 and f in base and f in c2
758 missingfiles = [f for f in changedfiles if filt(f)]
760 missingfiles = [f for f in changedfiles if filt(f)]
759
761
760 if missingfiles:
762 if missingfiles:
761 basenametofilename = collections.defaultdict(list)
763 basenametofilename = collections.defaultdict(list)
762 dirnametofilename = collections.defaultdict(list)
764 dirnametofilename = collections.defaultdict(list)
763
765
764 for f in m1.filesnotin(base.manifest()):
766 for f in m1.filesnotin(base.manifest()):
765 basename = os.path.basename(f)
767 basename = os.path.basename(f)
766 dirname = os.path.dirname(f)
768 dirname = os.path.dirname(f)
767 basenametofilename[basename].append(f)
769 basenametofilename[basename].append(f)
768 dirnametofilename[dirname].append(f)
770 dirnametofilename[dirname].append(f)
769
771
770 for f in missingfiles:
772 for f in missingfiles:
771 basename = os.path.basename(f)
773 basename = os.path.basename(f)
772 dirname = os.path.dirname(f)
774 dirname = os.path.dirname(f)
773 samebasename = basenametofilename[basename]
775 samebasename = basenametofilename[basename]
774 samedirname = dirnametofilename[dirname]
776 samedirname = dirnametofilename[dirname]
775 movecandidates = samebasename + samedirname
777 movecandidates = samebasename + samedirname
776 # f is guaranteed to be present in c2, that's why
778 # f is guaranteed to be present in c2, that's why
777 # c2.filectx(f) won't fail
779 # c2.filectx(f) won't fail
778 f2 = c2.filectx(f)
780 f2 = c2.filectx(f)
779 # we can have a lot of candidates which can slow down the heuristics
781 # we can have a lot of candidates which can slow down the heuristics
780 # config value to limit the number of candidates moves to check
782 # config value to limit the number of candidates moves to check
781 maxcandidates = repo.ui.configint(
783 maxcandidates = repo.ui.configint(
782 b'experimental', b'copytrace.movecandidateslimit'
784 b'experimental', b'copytrace.movecandidateslimit'
783 )
785 )
784
786
785 if len(movecandidates) > maxcandidates:
787 if len(movecandidates) > maxcandidates:
786 repo.ui.status(
788 repo.ui.status(
787 _(
789 _(
788 b"skipping copytracing for '%s', more "
790 b"skipping copytracing for '%s', more "
789 b"candidates than the limit: %d\n"
791 b"candidates than the limit: %d\n"
790 )
792 )
791 % (f, len(movecandidates))
793 % (f, len(movecandidates))
792 )
794 )
793 continue
795 continue
794
796
795 for candidate in movecandidates:
797 for candidate in movecandidates:
796 f1 = c1.filectx(candidate)
798 f1 = c1.filectx(candidate)
797 if _related(f1, f2):
799 if _related(f1, f2):
798 # if there are a few related copies then we'll merge
800 # if there are a few related copies then we'll merge
799 # changes into all of them. This matches the behaviour
801 # changes into all of them. This matches the behaviour
800 # of upstream copytracing
802 # of upstream copytracing
801 copies[candidate] = f
803 copies[candidate] = f
802
804
803 return copies, {}, {}, {}, {}
805 return copies, {}, {}, {}, {}
804
806
805
807
806 def _related(f1, f2):
808 def _related(f1, f2):
807 """return True if f1 and f2 filectx have a common ancestor
809 """return True if f1 and f2 filectx have a common ancestor
808
810
809 Walk back to common ancestor to see if the two files originate
811 Walk back to common ancestor to see if the two files originate
810 from the same file. Since workingfilectx's rev() is None it messes
812 from the same file. Since workingfilectx's rev() is None it messes
811 up the integer comparison logic, hence the pre-step check for
813 up the integer comparison logic, hence the pre-step check for
812 None (f1 and f2 can only be workingfilectx's initially).
814 None (f1 and f2 can only be workingfilectx's initially).
813 """
815 """
814
816
815 if f1 == f2:
817 if f1 == f2:
816 return True # a match
818 return True # a match
817
819
818 g1, g2 = f1.ancestors(), f2.ancestors()
820 g1, g2 = f1.ancestors(), f2.ancestors()
819 try:
821 try:
820 f1r, f2r = f1.linkrev(), f2.linkrev()
822 f1r, f2r = f1.linkrev(), f2.linkrev()
821
823
822 if f1r is None:
824 if f1r is None:
823 f1 = next(g1)
825 f1 = next(g1)
824 if f2r is None:
826 if f2r is None:
825 f2 = next(g2)
827 f2 = next(g2)
826
828
827 while True:
829 while True:
828 f1r, f2r = f1.linkrev(), f2.linkrev()
830 f1r, f2r = f1.linkrev(), f2.linkrev()
829 if f1r > f2r:
831 if f1r > f2r:
830 f1 = next(g1)
832 f1 = next(g1)
831 elif f2r > f1r:
833 elif f2r > f1r:
832 f2 = next(g2)
834 f2 = next(g2)
833 else: # f1 and f2 point to files in the same linkrev
835 else: # f1 and f2 point to files in the same linkrev
834 return f1 == f2 # true if they point to the same file
836 return f1 == f2 # true if they point to the same file
835 except StopIteration:
837 except StopIteration:
836 return False
838 return False
837
839
838
840
839 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
841 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
840 """reproduce copies from fromrev to rev in the dirstate
842 """reproduce copies from fromrev to rev in the dirstate
841
843
842 If skiprev is specified, it's a revision that should be used to
844 If skiprev is specified, it's a revision that should be used to
843 filter copy records. Any copies that occur between fromrev and
845 filter copy records. Any copies that occur between fromrev and
844 skiprev will not be duplicated, even if they appear in the set of
846 skiprev will not be duplicated, even if they appear in the set of
845 copies between fromrev and rev.
847 copies between fromrev and rev.
846 """
848 """
847 exclude = {}
849 exclude = {}
848 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
850 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
849 bctrace = stringutil.parsebool(ctraceconfig)
851 bctrace = stringutil.parsebool(ctraceconfig)
850 if skiprev is not None and (
852 if skiprev is not None and (
851 ctraceconfig == b'heuristics' or bctrace or bctrace is None
853 ctraceconfig == b'heuristics' or bctrace or bctrace is None
852 ):
854 ):
853 # copytrace='off' skips this line, but not the entire function because
855 # copytrace='off' skips this line, but not the entire function because
854 # the line below is O(size of the repo) during a rebase, while the rest
856 # the line below is O(size of the repo) during a rebase, while the rest
855 # of the function is much faster (and is required for carrying copy
857 # of the function is much faster (and is required for carrying copy
856 # metadata across the rebase anyway).
858 # metadata across the rebase anyway).
857 exclude = pathcopies(repo[fromrev], repo[skiprev])
859 exclude = pathcopies(repo[fromrev], repo[skiprev])
858 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
860 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
859 if dst in exclude:
861 if dst in exclude:
860 continue
862 continue
861 if dst in wctx:
863 if dst in wctx:
862 wctx[dst].markcopied(src)
864 wctx[dst].markcopied(src)
863
865
864
866
865 def computechangesetfilesadded(ctx):
867 def computechangesetfilesadded(ctx):
866 """return the list of files added in a changeset
868 """return the list of files added in a changeset
867 """
869 """
868 added = []
870 added = []
869 for f in ctx.files():
871 for f in ctx.files():
870 if not any(f in p for p in ctx.parents()):
872 if not any(f in p for p in ctx.parents()):
871 added.append(f)
873 added.append(f)
872 return added
874 return added
873
875
874
876
875 def computechangesetfilesremoved(ctx):
877 def computechangesetfilesremoved(ctx):
876 """return the list of files removed in a changeset
878 """return the list of files removed in a changeset
877 """
879 """
878 removed = []
880 removed = []
879 for f in ctx.files():
881 for f in ctx.files():
880 if f not in ctx:
882 if f not in ctx:
881 removed.append(f)
883 removed.append(f)
882 return removed
884 return removed
883
885
884
886
885 def computechangesetcopies(ctx):
887 def computechangesetcopies(ctx):
886 """return the copies data for a changeset
888 """return the copies data for a changeset
887
889
888 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
890 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
889
891
890 Each dictionnary are in the form: `{newname: oldname}`
892 Each dictionnary are in the form: `{newname: oldname}`
891 """
893 """
892 p1copies = {}
894 p1copies = {}
893 p2copies = {}
895 p2copies = {}
894 p1 = ctx.p1()
896 p1 = ctx.p1()
895 p2 = ctx.p2()
897 p2 = ctx.p2()
896 narrowmatch = ctx._repo.narrowmatch()
898 narrowmatch = ctx._repo.narrowmatch()
897 for dst in ctx.files():
899 for dst in ctx.files():
898 if not narrowmatch(dst) or dst not in ctx:
900 if not narrowmatch(dst) or dst not in ctx:
899 continue
901 continue
900 copied = ctx[dst].renamed()
902 copied = ctx[dst].renamed()
901 if not copied:
903 if not copied:
902 continue
904 continue
903 src, srcnode = copied
905 src, srcnode = copied
904 if src in p1 and p1[src].filenode() == srcnode:
906 if src in p1 and p1[src].filenode() == srcnode:
905 p1copies[dst] = src
907 p1copies[dst] = src
906 elif src in p2 and p2[src].filenode() == srcnode:
908 elif src in p2 and p2[src].filenode() == srcnode:
907 p2copies[dst] = src
909 p2copies[dst] = src
908 return p1copies, p2copies
910 return p1copies, p2copies
909
911
910
912
911 def encodecopies(files, copies):
913 def encodecopies(files, copies):
912 items = []
914 items = []
913 for i, dst in enumerate(files):
915 for i, dst in enumerate(files):
914 if dst in copies:
916 if dst in copies:
915 items.append(b'%d\0%s' % (i, copies[dst]))
917 items.append(b'%d\0%s' % (i, copies[dst]))
916 if len(items) != len(copies):
918 if len(items) != len(copies):
917 raise error.ProgrammingError(
919 raise error.ProgrammingError(
918 b'some copy targets missing from file list'
920 b'some copy targets missing from file list'
919 )
921 )
920 return b"\n".join(items)
922 return b"\n".join(items)
921
923
922
924
923 def decodecopies(files, data):
925 def decodecopies(files, data):
924 try:
926 try:
925 copies = {}
927 copies = {}
926 if not data:
928 if not data:
927 return copies
929 return copies
928 for l in data.split(b'\n'):
930 for l in data.split(b'\n'):
929 strindex, src = l.split(b'\0')
931 strindex, src = l.split(b'\0')
930 i = int(strindex)
932 i = int(strindex)
931 dst = files[i]
933 dst = files[i]
932 copies[dst] = src
934 copies[dst] = src
933 return copies
935 return copies
934 except (ValueError, IndexError):
936 except (ValueError, IndexError):
935 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
937 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
936 # used different syntax for the value.
938 # used different syntax for the value.
937 return None
939 return None
938
940
939
941
940 def encodefileindices(files, subset):
942 def encodefileindices(files, subset):
941 subset = set(subset)
943 subset = set(subset)
942 indices = []
944 indices = []
943 for i, f in enumerate(files):
945 for i, f in enumerate(files):
944 if f in subset:
946 if f in subset:
945 indices.append(b'%d' % i)
947 indices.append(b'%d' % i)
946 return b'\n'.join(indices)
948 return b'\n'.join(indices)
947
949
948
950
949 def decodefileindices(files, data):
951 def decodefileindices(files, data):
950 try:
952 try:
951 subset = []
953 subset = []
952 if not data:
954 if not data:
953 return subset
955 return subset
954 for strindex in data.split(b'\n'):
956 for strindex in data.split(b'\n'):
955 i = int(strindex)
957 i = int(strindex)
956 if i < 0 or i >= len(files):
958 if i < 0 or i >= len(files):
957 return None
959 return None
958 subset.append(files[i])
960 subset.append(files[i])
959 return subset
961 return subset
960 except (ValueError, IndexError):
962 except (ValueError, IndexError):
961 # Perhaps someone had chosen the same key name (e.g. "added") and
963 # Perhaps someone had chosen the same key name (e.g. "added") and
962 # used different syntax for the value.
964 # used different syntax for the value.
963 return None
965 return None
964
966
965
967
966 def _getsidedata(srcrepo, rev):
968 def _getsidedata(srcrepo, rev):
967 ctx = srcrepo[rev]
969 ctx = srcrepo[rev]
968 filescopies = computechangesetcopies(ctx)
970 filescopies = computechangesetcopies(ctx)
969 filesadded = computechangesetfilesadded(ctx)
971 filesadded = computechangesetfilesadded(ctx)
970 filesremoved = computechangesetfilesremoved(ctx)
972 filesremoved = computechangesetfilesremoved(ctx)
971 sidedata = {}
973 sidedata = {}
972 if any([filescopies, filesadded, filesremoved]):
974 if any([filescopies, filesadded, filesremoved]):
973 sortedfiles = sorted(ctx.files())
975 sortedfiles = sorted(ctx.files())
974 p1copies, p2copies = filescopies
976 p1copies, p2copies = filescopies
975 p1copies = encodecopies(sortedfiles, p1copies)
977 p1copies = encodecopies(sortedfiles, p1copies)
976 p2copies = encodecopies(sortedfiles, p2copies)
978 p2copies = encodecopies(sortedfiles, p2copies)
977 filesadded = encodefileindices(sortedfiles, filesadded)
979 filesadded = encodefileindices(sortedfiles, filesadded)
978 filesremoved = encodefileindices(sortedfiles, filesremoved)
980 filesremoved = encodefileindices(sortedfiles, filesremoved)
979 sidedata[sidedatamod.SD_P1COPIES] = p1copies
981 sidedata[sidedatamod.SD_P1COPIES] = p1copies
980 sidedata[sidedatamod.SD_P2COPIES] = p2copies
982 sidedata[sidedatamod.SD_P2COPIES] = p2copies
981 sidedata[sidedatamod.SD_FILESADDED] = filesadded
983 sidedata[sidedatamod.SD_FILESADDED] = filesadded
982 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
984 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
983 return sidedata
985 return sidedata
984
986
985
987
986 def getsidedataadder(srcrepo, destrepo):
988 def getsidedataadder(srcrepo, destrepo):
987 def sidedatacompanion(revlog, rev):
989 def sidedatacompanion(revlog, rev):
988 sidedata = {}
990 sidedata = {}
989 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
991 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
990 sidedata = _getsidedata(srcrepo, rev)
992 sidedata = _getsidedata(srcrepo, rev)
991 return False, (), sidedata
993 return False, (), sidedata
992
994
993 return sidedatacompanion
995 return sidedatacompanion
994
996
995
997
996 def getsidedataremover(srcrepo, destrepo):
998 def getsidedataremover(srcrepo, destrepo):
997 def sidedatacompanion(revlog, rev):
999 def sidedatacompanion(revlog, rev):
998 f = ()
1000 f = ()
999 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1001 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1000 if revlog.flags(rev) & REVIDX_SIDEDATA:
1002 if revlog.flags(rev) & REVIDX_SIDEDATA:
1001 f = (
1003 f = (
1002 sidedatamod.SD_P1COPIES,
1004 sidedatamod.SD_P1COPIES,
1003 sidedatamod.SD_P2COPIES,
1005 sidedatamod.SD_P2COPIES,
1004 sidedatamod.SD_FILESADDED,
1006 sidedatamod.SD_FILESADDED,
1005 sidedatamod.SD_FILESREMOVED,
1007 sidedatamod.SD_FILESREMOVED,
1006 )
1008 )
1007 return False, f, {}
1009 return False, f, {}
1008
1010
1009 return sidedatacompanion
1011 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now