##// END OF EJS Templates
sidedatacopies: only fetch information once for merge...
marmoute -
r43595:90213d02 default
parent child Browse files
Show More
@@ -1,986 +1,1017 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(a, b):
66 def _chain(a, b):
67 """chain two sets of copies 'a' and 'b'"""
67 """chain two sets of copies 'a' and 'b'"""
68 t = a.copy()
68 t = a.copy()
69 for k, v in pycompat.iteritems(b):
69 for k, v in pycompat.iteritems(b):
70 if v in t:
70 if v in t:
71 t[k] = t[v]
71 t[k] = t[v]
72 else:
72 else:
73 t[k] = v
73 t[k] = v
74 return t
74 return t
75
75
76
76
77 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
79 manifest am
80
80
81 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
83 disabling that feature."""
84
84
85 for f in fctx.ancestors():
85 for f in fctx.ancestors():
86 path = f.path()
86 path = f.path()
87 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
88 return path
88 return path
89 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
90 return path
90 return path
91
91
92
92
93 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
94 ds = repo.dirstate
95 c = ds.copies().copy()
95 c = ds.copies().copy()
96 for k in list(c):
96 for k in list(c):
97 if ds[k] not in b'anm' or (match and not match(k)):
97 if ds[k] not in b'anm' or (match and not match(k)):
98 del c[k]
98 del c[k]
99 return c
99 return c
100
100
101
101
102 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
106 """
106 """
107 ma = a.manifest()
107 ma = a.manifest()
108 mb = b.manifest()
108 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
110
110
111
111
112 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
115 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
118 return readfrom in changesetsource
119
119
120
120
121 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
125 repo = a._repo
125 repo = a._repo
126
126
127 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
129
129
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
131 dbg = repo.ui.debug
132 if debug:
132 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
134 am = a.manifest()
135 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
136
136
137 # find where new files came from
137 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
140 cm = {}
141
141
142 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
147 # this comparison.
148 forwardmissingmatch = match
148 forwardmissingmatch = match
149 if b.p1() == a and b.p2().node() == node.nullid:
149 if b.p1() == a and b.p2().node() == node.nullid:
150 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153
153
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155
155
156 if debug:
156 if debug:
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158
158
159 for f in sorted(missing):
159 for f in sorted(missing):
160 if debug:
160 if debug:
161 dbg(b'debug.copies: tracing file: %s\n' % f)
161 dbg(b'debug.copies: tracing file: %s\n' % f)
162 fctx = b[f]
162 fctx = b[f]
163 fctx._ancestrycontext = ancestrycontext
163 fctx._ancestrycontext = ancestrycontext
164
164
165 if debug:
165 if debug:
166 start = util.timer()
166 start = util.timer()
167 opath = _tracefile(fctx, am, basemf)
167 opath = _tracefile(fctx, am, basemf)
168 if opath:
168 if opath:
169 if debug:
169 if debug:
170 dbg(b'debug.copies: rename of: %s\n' % opath)
170 dbg(b'debug.copies: rename of: %s\n' % opath)
171 cm[f] = opath
171 cm[f] = opath
172 if debug:
172 if debug:
173 dbg(
173 dbg(
174 b'debug.copies: time: %f seconds\n'
174 b'debug.copies: time: %f seconds\n'
175 % (util.timer() - start)
175 % (util.timer() - start)
176 )
176 )
177 return cm
177 return cm
178
178
179
179
180 def _revinfogetter(repo):
180 def _revinfogetter(repo):
181 """return a function that return multiple data given a <rev>"i
181 """return a function that return multiple data given a <rev>"i
182
182
183 * p1: revision number of first parent
183 * p1: revision number of first parent
184 * p2: revision number of first parent
184 * p2: revision number of first parent
185 * p1copies: mapping of copies from p1
185 * p1copies: mapping of copies from p1
186 * p2copies: mapping of copies from p2
186 * p2copies: mapping of copies from p2
187 * removed: a list of removed files
187 * removed: a list of removed files
188 """
188 """
189 cl = repo.changelog
189 cl = repo.changelog
190 parents = cl.parentrevs
190 parents = cl.parentrevs
191
191
192 if repo.filecopiesmode == b'changeset-sidedata':
192 if repo.filecopiesmode == b'changeset-sidedata':
193 changelogrevision = cl.changelogrevision
193 changelogrevision = cl.changelogrevision
194 flags = cl.flags
194 flags = cl.flags
195
195
196 # A small cache to avoid doing the work twice for merges
197 #
198 # In the vast majority of cases, if we ask information for a revision
199 # about 1 parent, we'll later ask it for the other. So it make sense to
200 # keep the information around when reaching the first parent of a merge
201 # and dropping it after it was provided for the second parents.
202 #
203 # It exists cases were only one parent of the merge will be walked. It
204 # happens when the "destination" the copy tracing is descendant from a
205 # new root, not common with the "source". In that case, we will only walk
206 # through merge parents that are descendant of changesets common
207 # between "source" and "destination".
208 #
209 # With the current case implementation if such changesets have a copy
210 # information, we'll keep them in memory until the end of
211 # _changesetforwardcopies. We don't expect the case to be frequent
212 # enough to matters.
213 #
214 # In addition, it would be possible to reach pathological case, were
215 # many first parent are met before any second parent is reached. In
216 # that case the cache could grow. If this even become an issue one can
217 # safely introduce a maximum cache size. This would trade extra CPU/IO
218 # time to save memory.
219 merge_caches = {}
220
196 def revinfo(rev):
221 def revinfo(rev):
197 p1, p2 = parents(rev)
222 p1, p2 = parents(rev)
198 if flags(rev) & REVIDX_SIDEDATA:
223 if flags(rev) & REVIDX_SIDEDATA:
224 e = merge_caches.pop(rev, None)
225 if e is not None:
226 return e
199 c = changelogrevision(rev)
227 c = changelogrevision(rev)
200 p1copies = c.p1copies
228 p1copies = c.p1copies
201 p2copies = c.p2copies
229 p2copies = c.p2copies
202 removed = c.filesremoved
230 removed = c.filesremoved
231 if p1 != node.nullrev and p2 != node.nullrev:
232 # XXX some case we over cache, IGNORE
233 merge_caches[rev] = (p1, p2, p1copies, p2copies, removed)
203 else:
234 else:
204 p1copies = {}
235 p1copies = {}
205 p2copies = {}
236 p2copies = {}
206 removed = ()
237 removed = ()
207 return p1, p2, p1copies, p2copies, removed
238 return p1, p2, p1copies, p2copies, removed
208
239
209 else:
240 else:
210
241
211 def revinfo(rev):
242 def revinfo(rev):
212 p1, p2 = parents(rev)
243 p1, p2 = parents(rev)
213 ctx = repo[rev]
244 ctx = repo[rev]
214 p1copies, p2copies = ctx._copies
245 p1copies, p2copies = ctx._copies
215 removed = ctx.filesremoved()
246 removed = ctx.filesremoved()
216 return p1, p2, p1copies, p2copies, removed
247 return p1, p2, p1copies, p2copies, removed
217
248
218 return revinfo
249 return revinfo
219
250
220
251
221 def _changesetforwardcopies(a, b, match):
252 def _changesetforwardcopies(a, b, match):
222 if a.rev() in (node.nullrev, b.rev()):
253 if a.rev() in (node.nullrev, b.rev()):
223 return {}
254 return {}
224
255
225 repo = a.repo().unfiltered()
256 repo = a.repo().unfiltered()
226 children = {}
257 children = {}
227 revinfo = _revinfogetter(repo)
258 revinfo = _revinfogetter(repo)
228
259
229 cl = repo.changelog
260 cl = repo.changelog
230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
261 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 mrset = set(missingrevs)
262 mrset = set(missingrevs)
232 roots = set()
263 roots = set()
233 for r in missingrevs:
264 for r in missingrevs:
234 for p in cl.parentrevs(r):
265 for p in cl.parentrevs(r):
235 if p == node.nullrev:
266 if p == node.nullrev:
236 continue
267 continue
237 if p not in children:
268 if p not in children:
238 children[p] = [r]
269 children[p] = [r]
239 else:
270 else:
240 children[p].append(r)
271 children[p].append(r)
241 if p not in mrset:
272 if p not in mrset:
242 roots.add(p)
273 roots.add(p)
243 if not roots:
274 if not roots:
244 # no common revision to track copies from
275 # no common revision to track copies from
245 return {}
276 return {}
246 min_root = min(roots)
277 min_root = min(roots)
247
278
248 from_head = set(
279 from_head = set(
249 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
280 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
250 )
281 )
251
282
252 iterrevs = set(from_head)
283 iterrevs = set(from_head)
253 iterrevs &= mrset
284 iterrevs &= mrset
254 iterrevs.update(roots)
285 iterrevs.update(roots)
255 iterrevs.remove(b.rev())
286 iterrevs.remove(b.rev())
256 all_copies = {r: {} for r in roots}
287 all_copies = {r: {} for r in roots}
257 alwaysmatch = match.always()
288 alwaysmatch = match.always()
258 for r in sorted(iterrevs):
289 for r in sorted(iterrevs):
259 copies = all_copies.pop(r)
290 copies = all_copies.pop(r)
260 for i, c in enumerate(children[r]):
291 for i, c in enumerate(children[r]):
261 p1, p2, p1copies, p2copies, removed = revinfo(c)
292 p1, p2, p1copies, p2copies, removed = revinfo(c)
262 if r == p1:
293 if r == p1:
263 parent = 1
294 parent = 1
264 childcopies = p1copies
295 childcopies = p1copies
265 else:
296 else:
266 assert r == p2
297 assert r == p2
267 parent = 2
298 parent = 2
268 childcopies = p2copies
299 childcopies = p2copies
269 if not alwaysmatch:
300 if not alwaysmatch:
270 childcopies = {
301 childcopies = {
271 dst: src for dst, src in childcopies.items() if match(dst)
302 dst: src for dst, src in childcopies.items() if match(dst)
272 }
303 }
273 newcopies = copies
304 newcopies = copies
274 if childcopies:
305 if childcopies:
275 newcopies = _chain(newcopies, childcopies)
306 newcopies = _chain(newcopies, childcopies)
276 # _chain makes a copies, we can avoid doing so in some
307 # _chain makes a copies, we can avoid doing so in some
277 # simple/linear cases.
308 # simple/linear cases.
278 assert newcopies is not copies
309 assert newcopies is not copies
279 for f in removed:
310 for f in removed:
280 if f in newcopies:
311 if f in newcopies:
281 if newcopies is copies:
312 if newcopies is copies:
282 # copy on write to avoid affecting potential other
313 # copy on write to avoid affecting potential other
283 # branches. when there are no other branches, this
314 # branches. when there are no other branches, this
284 # could be avoided.
315 # could be avoided.
285 newcopies = copies.copy()
316 newcopies = copies.copy()
286 del newcopies[f]
317 del newcopies[f]
287 othercopies = all_copies.get(c)
318 othercopies = all_copies.get(c)
288 if othercopies is None:
319 if othercopies is None:
289 all_copies[c] = newcopies
320 all_copies[c] = newcopies
290 else:
321 else:
291 # we are the second parent to work on c, we need to merge our
322 # we are the second parent to work on c, we need to merge our
292 # work with the other.
323 # work with the other.
293 #
324 #
294 # Unlike when copies are stored in the filelog, we consider
325 # Unlike when copies are stored in the filelog, we consider
295 # it a copy even if the destination already existed on the
326 # it a copy even if the destination already existed on the
296 # other branch. It's simply too expensive to check if the
327 # other branch. It's simply too expensive to check if the
297 # file existed in the manifest.
328 # file existed in the manifest.
298 #
329 #
299 # In case of conflict, parent 1 take precedence over parent 2.
330 # In case of conflict, parent 1 take precedence over parent 2.
300 # This is an arbitrary choice made anew when implementing
331 # This is an arbitrary choice made anew when implementing
301 # changeset based copies. It was made without regards with
332 # changeset based copies. It was made without regards with
302 # potential filelog related behavior.
333 # potential filelog related behavior.
303 if parent == 1:
334 if parent == 1:
304 othercopies.update(newcopies)
335 othercopies.update(newcopies)
305 else:
336 else:
306 newcopies.update(othercopies)
337 newcopies.update(othercopies)
307 all_copies[c] = newcopies
338 all_copies[c] = newcopies
308 return all_copies[b.rev()]
339 return all_copies[b.rev()]
309
340
310
341
311 def _forwardcopies(a, b, base=None, match=None):
342 def _forwardcopies(a, b, base=None, match=None):
312 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
343 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
313
344
314 if base is None:
345 if base is None:
315 base = a
346 base = a
316 match = a.repo().narrowmatch(match)
347 match = a.repo().narrowmatch(match)
317 # check for working copy
348 # check for working copy
318 if b.rev() is None:
349 if b.rev() is None:
319 cm = _committedforwardcopies(a, b.p1(), base, match)
350 cm = _committedforwardcopies(a, b.p1(), base, match)
320 # combine copies from dirstate if necessary
351 # combine copies from dirstate if necessary
321 copies = _chain(cm, _dirstatecopies(b._repo, match))
352 copies = _chain(cm, _dirstatecopies(b._repo, match))
322 else:
353 else:
323 copies = _committedforwardcopies(a, b, base, match)
354 copies = _committedforwardcopies(a, b, base, match)
324 return copies
355 return copies
325
356
326
357
327 def _backwardrenames(a, b, match):
358 def _backwardrenames(a, b, match):
328 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
359 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
329 return {}
360 return {}
330
361
331 # Even though we're not taking copies into account, 1:n rename situations
362 # Even though we're not taking copies into account, 1:n rename situations
332 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
363 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
333 # arbitrarily pick one of the renames.
364 # arbitrarily pick one of the renames.
334 # We don't want to pass in "match" here, since that would filter
365 # We don't want to pass in "match" here, since that would filter
335 # the destination by it. Since we're reversing the copies, we want
366 # the destination by it. Since we're reversing the copies, we want
336 # to filter the source instead.
367 # to filter the source instead.
337 f = _forwardcopies(b, a)
368 f = _forwardcopies(b, a)
338 r = {}
369 r = {}
339 for k, v in sorted(pycompat.iteritems(f)):
370 for k, v in sorted(pycompat.iteritems(f)):
340 if match and not match(v):
371 if match and not match(v):
341 continue
372 continue
342 # remove copies
373 # remove copies
343 if v in a:
374 if v in a:
344 continue
375 continue
345 r[v] = k
376 r[v] = k
346 return r
377 return r
347
378
348
379
349 def pathcopies(x, y, match=None):
380 def pathcopies(x, y, match=None):
350 """find {dst@y: src@x} copy mapping for directed compare"""
381 """find {dst@y: src@x} copy mapping for directed compare"""
351 repo = x._repo
382 repo = x._repo
352 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
383 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
353 if debug:
384 if debug:
354 repo.ui.debug(
385 repo.ui.debug(
355 b'debug.copies: searching copies from %s to %s\n' % (x, y)
386 b'debug.copies: searching copies from %s to %s\n' % (x, y)
356 )
387 )
357 if x == y or not x or not y:
388 if x == y or not x or not y:
358 return {}
389 return {}
359 a = y.ancestor(x)
390 a = y.ancestor(x)
360 if a == x:
391 if a == x:
361 if debug:
392 if debug:
362 repo.ui.debug(b'debug.copies: search mode: forward\n')
393 repo.ui.debug(b'debug.copies: search mode: forward\n')
363 if y.rev() is None and x == y.p1():
394 if y.rev() is None and x == y.p1():
364 # short-circuit to avoid issues with merge states
395 # short-circuit to avoid issues with merge states
365 return _dirstatecopies(repo, match)
396 return _dirstatecopies(repo, match)
366 copies = _forwardcopies(x, y, match=match)
397 copies = _forwardcopies(x, y, match=match)
367 elif a == y:
398 elif a == y:
368 if debug:
399 if debug:
369 repo.ui.debug(b'debug.copies: search mode: backward\n')
400 repo.ui.debug(b'debug.copies: search mode: backward\n')
370 copies = _backwardrenames(x, y, match=match)
401 copies = _backwardrenames(x, y, match=match)
371 else:
402 else:
372 if debug:
403 if debug:
373 repo.ui.debug(b'debug.copies: search mode: combined\n')
404 repo.ui.debug(b'debug.copies: search mode: combined\n')
374 base = None
405 base = None
375 if a.rev() != node.nullrev:
406 if a.rev() != node.nullrev:
376 base = x
407 base = x
377 copies = _chain(
408 copies = _chain(
378 _backwardrenames(x, a, match=match),
409 _backwardrenames(x, a, match=match),
379 _forwardcopies(a, y, base, match=match),
410 _forwardcopies(a, y, base, match=match),
380 )
411 )
381 _filter(x, y, copies)
412 _filter(x, y, copies)
382 return copies
413 return copies
383
414
384
415
385 def mergecopies(repo, c1, c2, base):
416 def mergecopies(repo, c1, c2, base):
386 """
417 """
387 Finds moves and copies between context c1 and c2 that are relevant for
418 Finds moves and copies between context c1 and c2 that are relevant for
388 merging. 'base' will be used as the merge base.
419 merging. 'base' will be used as the merge base.
389
420
390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
421 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
391 files that were moved/ copied in one merge parent and modified in another.
422 files that were moved/ copied in one merge parent and modified in another.
392 For example:
423 For example:
393
424
394 o ---> 4 another commit
425 o ---> 4 another commit
395 |
426 |
396 | o ---> 3 commit that modifies a.txt
427 | o ---> 3 commit that modifies a.txt
397 | /
428 | /
398 o / ---> 2 commit that moves a.txt to b.txt
429 o / ---> 2 commit that moves a.txt to b.txt
399 |/
430 |/
400 o ---> 1 merge base
431 o ---> 1 merge base
401
432
402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
433 If we try to rebase revision 3 on revision 4, since there is no a.txt in
403 revision 4, and if user have copytrace disabled, we prints the following
434 revision 4, and if user have copytrace disabled, we prints the following
404 message:
435 message:
405
436
406 ```other changed <file> which local deleted```
437 ```other changed <file> which local deleted```
407
438
408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
439 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
409 "dirmove".
440 "dirmove".
410
441
411 "copy" is a mapping from destination name -> source name,
442 "copy" is a mapping from destination name -> source name,
412 where source is in c1 and destination is in c2 or vice-versa.
443 where source is in c1 and destination is in c2 or vice-versa.
413
444
414 "movewithdir" is a mapping from source name -> destination name,
445 "movewithdir" is a mapping from source name -> destination name,
415 where the file at source present in one context but not the other
446 where the file at source present in one context but not the other
416 needs to be moved to destination by the merge process, because the
447 needs to be moved to destination by the merge process, because the
417 other context moved the directory it is in.
448 other context moved the directory it is in.
418
449
419 "diverge" is a mapping of source name -> list of destination names
450 "diverge" is a mapping of source name -> list of destination names
420 for divergent renames.
451 for divergent renames.
421
452
422 "renamedelete" is a mapping of source name -> list of destination
453 "renamedelete" is a mapping of source name -> list of destination
423 names for files deleted in c1 that were renamed in c2 or vice-versa.
454 names for files deleted in c1 that were renamed in c2 or vice-versa.
424
455
425 "dirmove" is a mapping of detected source dir -> destination dir renames.
456 "dirmove" is a mapping of detected source dir -> destination dir renames.
426 This is needed for handling changes to new files previously grafted into
457 This is needed for handling changes to new files previously grafted into
427 renamed directories.
458 renamed directories.
428
459
429 This function calls different copytracing algorithms based on config.
460 This function calls different copytracing algorithms based on config.
430 """
461 """
431 # avoid silly behavior for update from empty dir
462 # avoid silly behavior for update from empty dir
432 if not c1 or not c2 or c1 == c2:
463 if not c1 or not c2 or c1 == c2:
433 return {}, {}, {}, {}, {}
464 return {}, {}, {}, {}, {}
434
465
435 narrowmatch = c1.repo().narrowmatch()
466 narrowmatch = c1.repo().narrowmatch()
436
467
437 # avoid silly behavior for parent -> working dir
468 # avoid silly behavior for parent -> working dir
438 if c2.node() is None and c1.node() == repo.dirstate.p1():
469 if c2.node() is None and c1.node() == repo.dirstate.p1():
439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
470 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
440
471
441 copytracing = repo.ui.config(b'experimental', b'copytrace')
472 copytracing = repo.ui.config(b'experimental', b'copytrace')
442 if stringutil.parsebool(copytracing) is False:
473 if stringutil.parsebool(copytracing) is False:
443 # stringutil.parsebool() returns None when it is unable to parse the
474 # stringutil.parsebool() returns None when it is unable to parse the
444 # value, so we should rely on making sure copytracing is on such cases
475 # value, so we should rely on making sure copytracing is on such cases
445 return {}, {}, {}, {}, {}
476 return {}, {}, {}, {}, {}
446
477
447 if usechangesetcentricalgo(repo):
478 if usechangesetcentricalgo(repo):
448 # The heuristics don't make sense when we need changeset-centric algos
479 # The heuristics don't make sense when we need changeset-centric algos
449 return _fullcopytracing(repo, c1, c2, base)
480 return _fullcopytracing(repo, c1, c2, base)
450
481
451 # Copy trace disabling is explicitly below the node == p1 logic above
482 # Copy trace disabling is explicitly below the node == p1 logic above
452 # because the logic above is required for a simple copy to be kept across a
483 # because the logic above is required for a simple copy to be kept across a
453 # rebase.
484 # rebase.
454 if copytracing == b'heuristics':
485 if copytracing == b'heuristics':
455 # Do full copytracing if only non-public revisions are involved as
486 # Do full copytracing if only non-public revisions are involved as
456 # that will be fast enough and will also cover the copies which could
487 # that will be fast enough and will also cover the copies which could
457 # be missed by heuristics
488 # be missed by heuristics
458 if _isfullcopytraceable(repo, c1, base):
489 if _isfullcopytraceable(repo, c1, base):
459 return _fullcopytracing(repo, c1, c2, base)
490 return _fullcopytracing(repo, c1, c2, base)
460 return _heuristicscopytracing(repo, c1, c2, base)
491 return _heuristicscopytracing(repo, c1, c2, base)
461 else:
492 else:
462 return _fullcopytracing(repo, c1, c2, base)
493 return _fullcopytracing(repo, c1, c2, base)
463
494
464
495
465 def _isfullcopytraceable(repo, c1, base):
496 def _isfullcopytraceable(repo, c1, base):
466 """ Checks that if base, source and destination are all no-public branches,
497 """ Checks that if base, source and destination are all no-public branches,
467 if yes let's use the full copytrace algorithm for increased capabilities
498 if yes let's use the full copytrace algorithm for increased capabilities
468 since it will be fast enough.
499 since it will be fast enough.
469
500
470 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
501 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
471 number of changesets from c1 to base such that if number of changesets are
502 number of changesets from c1 to base such that if number of changesets are
472 more than the limit, full copytracing algorithm won't be used.
503 more than the limit, full copytracing algorithm won't be used.
473 """
504 """
474 if c1.rev() is None:
505 if c1.rev() is None:
475 c1 = c1.p1()
506 c1 = c1.p1()
476 if c1.mutable() and base.mutable():
507 if c1.mutable() and base.mutable():
477 sourcecommitlimit = repo.ui.configint(
508 sourcecommitlimit = repo.ui.configint(
478 b'experimental', b'copytrace.sourcecommitlimit'
509 b'experimental', b'copytrace.sourcecommitlimit'
479 )
510 )
480 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
511 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
481 return commits < sourcecommitlimit
512 return commits < sourcecommitlimit
482 return False
513 return False
483
514
484
515
485 def _checksinglesidecopies(
516 def _checksinglesidecopies(
486 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
517 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
487 ):
518 ):
488 if src not in m2:
519 if src not in m2:
489 # deleted on side 2
520 # deleted on side 2
490 if src not in m1:
521 if src not in m1:
491 # renamed on side 1, deleted on side 2
522 # renamed on side 1, deleted on side 2
492 renamedelete[src] = dsts1
523 renamedelete[src] = dsts1
493 elif m2[src] != mb[src]:
524 elif m2[src] != mb[src]:
494 if not _related(c2[src], base[src]):
525 if not _related(c2[src], base[src]):
495 return
526 return
496 # modified on side 2
527 # modified on side 2
497 for dst in dsts1:
528 for dst in dsts1:
498 if dst not in m2:
529 if dst not in m2:
499 # dst not added on side 2 (handle as regular
530 # dst not added on side 2 (handle as regular
500 # "both created" case in manifestmerge otherwise)
531 # "both created" case in manifestmerge otherwise)
501 copy[dst] = src
532 copy[dst] = src
502
533
503
534
504 def _fullcopytracing(repo, c1, c2, base):
535 def _fullcopytracing(repo, c1, c2, base):
505 """ The full copytracing algorithm which finds all the new files that were
536 """ The full copytracing algorithm which finds all the new files that were
506 added from merge base up to the top commit and for each file it checks if
537 added from merge base up to the top commit and for each file it checks if
507 this file was copied from another file.
538 this file was copied from another file.
508
539
509 This is pretty slow when a lot of changesets are involved but will track all
540 This is pretty slow when a lot of changesets are involved but will track all
510 the copies.
541 the copies.
511 """
542 """
512 m1 = c1.manifest()
543 m1 = c1.manifest()
513 m2 = c2.manifest()
544 m2 = c2.manifest()
514 mb = base.manifest()
545 mb = base.manifest()
515
546
516 copies1 = pathcopies(base, c1)
547 copies1 = pathcopies(base, c1)
517 copies2 = pathcopies(base, c2)
548 copies2 = pathcopies(base, c2)
518
549
519 inversecopies1 = {}
550 inversecopies1 = {}
520 inversecopies2 = {}
551 inversecopies2 = {}
521 for dst, src in copies1.items():
552 for dst, src in copies1.items():
522 inversecopies1.setdefault(src, []).append(dst)
553 inversecopies1.setdefault(src, []).append(dst)
523 for dst, src in copies2.items():
554 for dst, src in copies2.items():
524 inversecopies2.setdefault(src, []).append(dst)
555 inversecopies2.setdefault(src, []).append(dst)
525
556
526 copy = {}
557 copy = {}
527 diverge = {}
558 diverge = {}
528 renamedelete = {}
559 renamedelete = {}
529 allsources = set(inversecopies1) | set(inversecopies2)
560 allsources = set(inversecopies1) | set(inversecopies2)
530 for src in allsources:
561 for src in allsources:
531 dsts1 = inversecopies1.get(src)
562 dsts1 = inversecopies1.get(src)
532 dsts2 = inversecopies2.get(src)
563 dsts2 = inversecopies2.get(src)
533 if dsts1 and dsts2:
564 if dsts1 and dsts2:
534 # copied/renamed on both sides
565 # copied/renamed on both sides
535 if src not in m1 and src not in m2:
566 if src not in m1 and src not in m2:
536 # renamed on both sides
567 # renamed on both sides
537 dsts1 = set(dsts1)
568 dsts1 = set(dsts1)
538 dsts2 = set(dsts2)
569 dsts2 = set(dsts2)
539 # If there's some overlap in the rename destinations, we
570 # If there's some overlap in the rename destinations, we
540 # consider it not divergent. For example, if side 1 copies 'a'
571 # consider it not divergent. For example, if side 1 copies 'a'
541 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
572 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
542 # and 'd' and deletes 'a'.
573 # and 'd' and deletes 'a'.
543 if dsts1 & dsts2:
574 if dsts1 & dsts2:
544 for dst in dsts1 & dsts2:
575 for dst in dsts1 & dsts2:
545 copy[dst] = src
576 copy[dst] = src
546 else:
577 else:
547 diverge[src] = sorted(dsts1 | dsts2)
578 diverge[src] = sorted(dsts1 | dsts2)
548 elif src in m1 and src in m2:
579 elif src in m1 and src in m2:
549 # copied on both sides
580 # copied on both sides
550 dsts1 = set(dsts1)
581 dsts1 = set(dsts1)
551 dsts2 = set(dsts2)
582 dsts2 = set(dsts2)
552 for dst in dsts1 & dsts2:
583 for dst in dsts1 & dsts2:
553 copy[dst] = src
584 copy[dst] = src
554 # TODO: Handle cases where it was renamed on one side and copied
585 # TODO: Handle cases where it was renamed on one side and copied
555 # on the other side
586 # on the other side
556 elif dsts1:
587 elif dsts1:
557 # copied/renamed only on side 1
588 # copied/renamed only on side 1
558 _checksinglesidecopies(
589 _checksinglesidecopies(
559 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
590 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
560 )
591 )
561 elif dsts2:
592 elif dsts2:
562 # copied/renamed only on side 2
593 # copied/renamed only on side 2
563 _checksinglesidecopies(
594 _checksinglesidecopies(
564 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
595 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
565 )
596 )
566
597
567 renamedeleteset = set()
598 renamedeleteset = set()
568 divergeset = set()
599 divergeset = set()
569 for dsts in diverge.values():
600 for dsts in diverge.values():
570 divergeset.update(dsts)
601 divergeset.update(dsts)
571 for dsts in renamedelete.values():
602 for dsts in renamedelete.values():
572 renamedeleteset.update(dsts)
603 renamedeleteset.update(dsts)
573
604
574 # find interesting file sets from manifests
605 # find interesting file sets from manifests
575 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
606 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
576 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
607 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
577 u1 = sorted(addedinm1 - addedinm2)
608 u1 = sorted(addedinm1 - addedinm2)
578 u2 = sorted(addedinm2 - addedinm1)
609 u2 = sorted(addedinm2 - addedinm1)
579
610
580 header = b" unmatched files in %s"
611 header = b" unmatched files in %s"
581 if u1:
612 if u1:
582 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
613 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
583 if u2:
614 if u2:
584 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
615 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
585
616
586 fullcopy = copies1.copy()
617 fullcopy = copies1.copy()
587 fullcopy.update(copies2)
618 fullcopy.update(copies2)
588 if not fullcopy:
619 if not fullcopy:
589 return copy, {}, diverge, renamedelete, {}
620 return copy, {}, diverge, renamedelete, {}
590
621
591 if repo.ui.debugflag:
622 if repo.ui.debugflag:
592 repo.ui.debug(
623 repo.ui.debug(
593 b" all copies found (* = to merge, ! = divergent, "
624 b" all copies found (* = to merge, ! = divergent, "
594 b"% = renamed and deleted):\n"
625 b"% = renamed and deleted):\n"
595 )
626 )
596 for f in sorted(fullcopy):
627 for f in sorted(fullcopy):
597 note = b""
628 note = b""
598 if f in copy:
629 if f in copy:
599 note += b"*"
630 note += b"*"
600 if f in divergeset:
631 if f in divergeset:
601 note += b"!"
632 note += b"!"
602 if f in renamedeleteset:
633 if f in renamedeleteset:
603 note += b"%"
634 note += b"%"
604 repo.ui.debug(
635 repo.ui.debug(
605 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
636 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
606 )
637 )
607 del divergeset
638 del divergeset
608
639
609 repo.ui.debug(b" checking for directory renames\n")
640 repo.ui.debug(b" checking for directory renames\n")
610
641
611 # generate a directory move map
642 # generate a directory move map
612 d1, d2 = c1.dirs(), c2.dirs()
643 d1, d2 = c1.dirs(), c2.dirs()
613 invalid = set()
644 invalid = set()
614 dirmove = {}
645 dirmove = {}
615
646
616 # examine each file copy for a potential directory move, which is
647 # examine each file copy for a potential directory move, which is
617 # when all the files in a directory are moved to a new directory
648 # when all the files in a directory are moved to a new directory
618 for dst, src in pycompat.iteritems(fullcopy):
649 for dst, src in pycompat.iteritems(fullcopy):
619 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
650 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
620 if dsrc in invalid:
651 if dsrc in invalid:
621 # already seen to be uninteresting
652 # already seen to be uninteresting
622 continue
653 continue
623 elif dsrc in d1 and ddst in d1:
654 elif dsrc in d1 and ddst in d1:
624 # directory wasn't entirely moved locally
655 # directory wasn't entirely moved locally
625 invalid.add(dsrc)
656 invalid.add(dsrc)
626 elif dsrc in d2 and ddst in d2:
657 elif dsrc in d2 and ddst in d2:
627 # directory wasn't entirely moved remotely
658 # directory wasn't entirely moved remotely
628 invalid.add(dsrc)
659 invalid.add(dsrc)
629 elif dsrc in dirmove and dirmove[dsrc] != ddst:
660 elif dsrc in dirmove and dirmove[dsrc] != ddst:
630 # files from the same directory moved to two different places
661 # files from the same directory moved to two different places
631 invalid.add(dsrc)
662 invalid.add(dsrc)
632 else:
663 else:
633 # looks good so far
664 # looks good so far
634 dirmove[dsrc] = ddst
665 dirmove[dsrc] = ddst
635
666
636 for i in invalid:
667 for i in invalid:
637 if i in dirmove:
668 if i in dirmove:
638 del dirmove[i]
669 del dirmove[i]
639 del d1, d2, invalid
670 del d1, d2, invalid
640
671
641 if not dirmove:
672 if not dirmove:
642 return copy, {}, diverge, renamedelete, {}
673 return copy, {}, diverge, renamedelete, {}
643
674
644 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
675 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
645
676
646 for d in dirmove:
677 for d in dirmove:
647 repo.ui.debug(
678 repo.ui.debug(
648 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
679 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
649 )
680 )
650
681
651 movewithdir = {}
682 movewithdir = {}
652 # check unaccounted nonoverlapping files against directory moves
683 # check unaccounted nonoverlapping files against directory moves
653 for f in u1 + u2:
684 for f in u1 + u2:
654 if f not in fullcopy:
685 if f not in fullcopy:
655 for d in dirmove:
686 for d in dirmove:
656 if f.startswith(d):
687 if f.startswith(d):
657 # new file added in a directory that was moved, move it
688 # new file added in a directory that was moved, move it
658 df = dirmove[d] + f[len(d) :]
689 df = dirmove[d] + f[len(d) :]
659 if df not in copy:
690 if df not in copy:
660 movewithdir[f] = df
691 movewithdir[f] = df
661 repo.ui.debug(
692 repo.ui.debug(
662 b" pending file src: '%s' -> dst: '%s'\n"
693 b" pending file src: '%s' -> dst: '%s'\n"
663 % (f, df)
694 % (f, df)
664 )
695 )
665 break
696 break
666
697
667 return copy, movewithdir, diverge, renamedelete, dirmove
698 return copy, movewithdir, diverge, renamedelete, dirmove
668
699
669
700
670 def _heuristicscopytracing(repo, c1, c2, base):
701 def _heuristicscopytracing(repo, c1, c2, base):
671 """ Fast copytracing using filename heuristics
702 """ Fast copytracing using filename heuristics
672
703
673 Assumes that moves or renames are of following two types:
704 Assumes that moves or renames are of following two types:
674
705
675 1) Inside a directory only (same directory name but different filenames)
706 1) Inside a directory only (same directory name but different filenames)
676 2) Move from one directory to another
707 2) Move from one directory to another
677 (same filenames but different directory names)
708 (same filenames but different directory names)
678
709
679 Works only when there are no merge commits in the "source branch".
710 Works only when there are no merge commits in the "source branch".
680 Source branch is commits from base up to c2 not including base.
711 Source branch is commits from base up to c2 not including base.
681
712
682 If merge is involved it fallbacks to _fullcopytracing().
713 If merge is involved it fallbacks to _fullcopytracing().
683
714
684 Can be used by setting the following config:
715 Can be used by setting the following config:
685
716
686 [experimental]
717 [experimental]
687 copytrace = heuristics
718 copytrace = heuristics
688
719
689 In some cases the copy/move candidates found by heuristics can be very large
720 In some cases the copy/move candidates found by heuristics can be very large
690 in number and that will make the algorithm slow. The number of possible
721 in number and that will make the algorithm slow. The number of possible
691 candidates to check can be limited by using the config
722 candidates to check can be limited by using the config
692 `experimental.copytrace.movecandidateslimit` which defaults to 100.
723 `experimental.copytrace.movecandidateslimit` which defaults to 100.
693 """
724 """
694
725
695 if c1.rev() is None:
726 if c1.rev() is None:
696 c1 = c1.p1()
727 c1 = c1.p1()
697 if c2.rev() is None:
728 if c2.rev() is None:
698 c2 = c2.p1()
729 c2 = c2.p1()
699
730
700 copies = {}
731 copies = {}
701
732
702 changedfiles = set()
733 changedfiles = set()
703 m1 = c1.manifest()
734 m1 = c1.manifest()
704 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
735 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
705 # If base is not in c2 branch, we switch to fullcopytracing
736 # If base is not in c2 branch, we switch to fullcopytracing
706 repo.ui.debug(
737 repo.ui.debug(
707 b"switching to full copytracing as base is not "
738 b"switching to full copytracing as base is not "
708 b"an ancestor of c2\n"
739 b"an ancestor of c2\n"
709 )
740 )
710 return _fullcopytracing(repo, c1, c2, base)
741 return _fullcopytracing(repo, c1, c2, base)
711
742
712 ctx = c2
743 ctx = c2
713 while ctx != base:
744 while ctx != base:
714 if len(ctx.parents()) == 2:
745 if len(ctx.parents()) == 2:
715 # To keep things simple let's not handle merges
746 # To keep things simple let's not handle merges
716 repo.ui.debug(b"switching to full copytracing because of merges\n")
747 repo.ui.debug(b"switching to full copytracing because of merges\n")
717 return _fullcopytracing(repo, c1, c2, base)
748 return _fullcopytracing(repo, c1, c2, base)
718 changedfiles.update(ctx.files())
749 changedfiles.update(ctx.files())
719 ctx = ctx.p1()
750 ctx = ctx.p1()
720
751
721 cp = _forwardcopies(base, c2)
752 cp = _forwardcopies(base, c2)
722 for dst, src in pycompat.iteritems(cp):
753 for dst, src in pycompat.iteritems(cp):
723 if src in m1:
754 if src in m1:
724 copies[dst] = src
755 copies[dst] = src
725
756
726 # file is missing if it isn't present in the destination, but is present in
757 # file is missing if it isn't present in the destination, but is present in
727 # the base and present in the source.
758 # the base and present in the source.
728 # Presence in the base is important to exclude added files, presence in the
759 # Presence in the base is important to exclude added files, presence in the
729 # source is important to exclude removed files.
760 # source is important to exclude removed files.
730 filt = lambda f: f not in m1 and f in base and f in c2
761 filt = lambda f: f not in m1 and f in base and f in c2
731 missingfiles = [f for f in changedfiles if filt(f)]
762 missingfiles = [f for f in changedfiles if filt(f)]
732
763
733 if missingfiles:
764 if missingfiles:
734 basenametofilename = collections.defaultdict(list)
765 basenametofilename = collections.defaultdict(list)
735 dirnametofilename = collections.defaultdict(list)
766 dirnametofilename = collections.defaultdict(list)
736
767
737 for f in m1.filesnotin(base.manifest()):
768 for f in m1.filesnotin(base.manifest()):
738 basename = os.path.basename(f)
769 basename = os.path.basename(f)
739 dirname = os.path.dirname(f)
770 dirname = os.path.dirname(f)
740 basenametofilename[basename].append(f)
771 basenametofilename[basename].append(f)
741 dirnametofilename[dirname].append(f)
772 dirnametofilename[dirname].append(f)
742
773
743 for f in missingfiles:
774 for f in missingfiles:
744 basename = os.path.basename(f)
775 basename = os.path.basename(f)
745 dirname = os.path.dirname(f)
776 dirname = os.path.dirname(f)
746 samebasename = basenametofilename[basename]
777 samebasename = basenametofilename[basename]
747 samedirname = dirnametofilename[dirname]
778 samedirname = dirnametofilename[dirname]
748 movecandidates = samebasename + samedirname
779 movecandidates = samebasename + samedirname
749 # f is guaranteed to be present in c2, that's why
780 # f is guaranteed to be present in c2, that's why
750 # c2.filectx(f) won't fail
781 # c2.filectx(f) won't fail
751 f2 = c2.filectx(f)
782 f2 = c2.filectx(f)
752 # we can have a lot of candidates which can slow down the heuristics
783 # we can have a lot of candidates which can slow down the heuristics
753 # config value to limit the number of candidates moves to check
784 # config value to limit the number of candidates moves to check
754 maxcandidates = repo.ui.configint(
785 maxcandidates = repo.ui.configint(
755 b'experimental', b'copytrace.movecandidateslimit'
786 b'experimental', b'copytrace.movecandidateslimit'
756 )
787 )
757
788
758 if len(movecandidates) > maxcandidates:
789 if len(movecandidates) > maxcandidates:
759 repo.ui.status(
790 repo.ui.status(
760 _(
791 _(
761 b"skipping copytracing for '%s', more "
792 b"skipping copytracing for '%s', more "
762 b"candidates than the limit: %d\n"
793 b"candidates than the limit: %d\n"
763 )
794 )
764 % (f, len(movecandidates))
795 % (f, len(movecandidates))
765 )
796 )
766 continue
797 continue
767
798
768 for candidate in movecandidates:
799 for candidate in movecandidates:
769 f1 = c1.filectx(candidate)
800 f1 = c1.filectx(candidate)
770 if _related(f1, f2):
801 if _related(f1, f2):
771 # if there are a few related copies then we'll merge
802 # if there are a few related copies then we'll merge
772 # changes into all of them. This matches the behaviour
803 # changes into all of them. This matches the behaviour
773 # of upstream copytracing
804 # of upstream copytracing
774 copies[candidate] = f
805 copies[candidate] = f
775
806
776 return copies, {}, {}, {}, {}
807 return copies, {}, {}, {}, {}
777
808
778
809
779 def _related(f1, f2):
810 def _related(f1, f2):
780 """return True if f1 and f2 filectx have a common ancestor
811 """return True if f1 and f2 filectx have a common ancestor
781
812
782 Walk back to common ancestor to see if the two files originate
813 Walk back to common ancestor to see if the two files originate
783 from the same file. Since workingfilectx's rev() is None it messes
814 from the same file. Since workingfilectx's rev() is None it messes
784 up the integer comparison logic, hence the pre-step check for
815 up the integer comparison logic, hence the pre-step check for
785 None (f1 and f2 can only be workingfilectx's initially).
816 None (f1 and f2 can only be workingfilectx's initially).
786 """
817 """
787
818
788 if f1 == f2:
819 if f1 == f2:
789 return True # a match
820 return True # a match
790
821
791 g1, g2 = f1.ancestors(), f2.ancestors()
822 g1, g2 = f1.ancestors(), f2.ancestors()
792 try:
823 try:
793 f1r, f2r = f1.linkrev(), f2.linkrev()
824 f1r, f2r = f1.linkrev(), f2.linkrev()
794
825
795 if f1r is None:
826 if f1r is None:
796 f1 = next(g1)
827 f1 = next(g1)
797 if f2r is None:
828 if f2r is None:
798 f2 = next(g2)
829 f2 = next(g2)
799
830
800 while True:
831 while True:
801 f1r, f2r = f1.linkrev(), f2.linkrev()
832 f1r, f2r = f1.linkrev(), f2.linkrev()
802 if f1r > f2r:
833 if f1r > f2r:
803 f1 = next(g1)
834 f1 = next(g1)
804 elif f2r > f1r:
835 elif f2r > f1r:
805 f2 = next(g2)
836 f2 = next(g2)
806 else: # f1 and f2 point to files in the same linkrev
837 else: # f1 and f2 point to files in the same linkrev
807 return f1 == f2 # true if they point to the same file
838 return f1 == f2 # true if they point to the same file
808 except StopIteration:
839 except StopIteration:
809 return False
840 return False
810
841
811
842
812 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
843 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
813 """reproduce copies from fromrev to rev in the dirstate
844 """reproduce copies from fromrev to rev in the dirstate
814
845
815 If skiprev is specified, it's a revision that should be used to
846 If skiprev is specified, it's a revision that should be used to
816 filter copy records. Any copies that occur between fromrev and
847 filter copy records. Any copies that occur between fromrev and
817 skiprev will not be duplicated, even if they appear in the set of
848 skiprev will not be duplicated, even if they appear in the set of
818 copies between fromrev and rev.
849 copies between fromrev and rev.
819 """
850 """
820 exclude = {}
851 exclude = {}
821 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
852 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
822 bctrace = stringutil.parsebool(ctraceconfig)
853 bctrace = stringutil.parsebool(ctraceconfig)
823 if skiprev is not None and (
854 if skiprev is not None and (
824 ctraceconfig == b'heuristics' or bctrace or bctrace is None
855 ctraceconfig == b'heuristics' or bctrace or bctrace is None
825 ):
856 ):
826 # copytrace='off' skips this line, but not the entire function because
857 # copytrace='off' skips this line, but not the entire function because
827 # the line below is O(size of the repo) during a rebase, while the rest
858 # the line below is O(size of the repo) during a rebase, while the rest
828 # of the function is much faster (and is required for carrying copy
859 # of the function is much faster (and is required for carrying copy
829 # metadata across the rebase anyway).
860 # metadata across the rebase anyway).
830 exclude = pathcopies(repo[fromrev], repo[skiprev])
861 exclude = pathcopies(repo[fromrev], repo[skiprev])
831 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
862 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
832 if dst in exclude:
863 if dst in exclude:
833 continue
864 continue
834 if dst in wctx:
865 if dst in wctx:
835 wctx[dst].markcopied(src)
866 wctx[dst].markcopied(src)
836
867
837
868
838 def computechangesetfilesadded(ctx):
869 def computechangesetfilesadded(ctx):
839 """return the list of files added in a changeset
870 """return the list of files added in a changeset
840 """
871 """
841 added = []
872 added = []
842 for f in ctx.files():
873 for f in ctx.files():
843 if not any(f in p for p in ctx.parents()):
874 if not any(f in p for p in ctx.parents()):
844 added.append(f)
875 added.append(f)
845 return added
876 return added
846
877
847
878
848 def computechangesetfilesremoved(ctx):
879 def computechangesetfilesremoved(ctx):
849 """return the list of files removed in a changeset
880 """return the list of files removed in a changeset
850 """
881 """
851 removed = []
882 removed = []
852 for f in ctx.files():
883 for f in ctx.files():
853 if f not in ctx:
884 if f not in ctx:
854 removed.append(f)
885 removed.append(f)
855 return removed
886 return removed
856
887
857
888
858 def computechangesetcopies(ctx):
889 def computechangesetcopies(ctx):
859 """return the copies data for a changeset
890 """return the copies data for a changeset
860
891
861 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
892 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
862
893
863 Each dictionnary are in the form: `{newname: oldname}`
894 Each dictionnary are in the form: `{newname: oldname}`
864 """
895 """
865 p1copies = {}
896 p1copies = {}
866 p2copies = {}
897 p2copies = {}
867 p1 = ctx.p1()
898 p1 = ctx.p1()
868 p2 = ctx.p2()
899 p2 = ctx.p2()
869 narrowmatch = ctx._repo.narrowmatch()
900 narrowmatch = ctx._repo.narrowmatch()
870 for dst in ctx.files():
901 for dst in ctx.files():
871 if not narrowmatch(dst) or dst not in ctx:
902 if not narrowmatch(dst) or dst not in ctx:
872 continue
903 continue
873 copied = ctx[dst].renamed()
904 copied = ctx[dst].renamed()
874 if not copied:
905 if not copied:
875 continue
906 continue
876 src, srcnode = copied
907 src, srcnode = copied
877 if src in p1 and p1[src].filenode() == srcnode:
908 if src in p1 and p1[src].filenode() == srcnode:
878 p1copies[dst] = src
909 p1copies[dst] = src
879 elif src in p2 and p2[src].filenode() == srcnode:
910 elif src in p2 and p2[src].filenode() == srcnode:
880 p2copies[dst] = src
911 p2copies[dst] = src
881 return p1copies, p2copies
912 return p1copies, p2copies
882
913
883
914
884 def encodecopies(files, copies):
915 def encodecopies(files, copies):
885 items = []
916 items = []
886 for i, dst in enumerate(files):
917 for i, dst in enumerate(files):
887 if dst in copies:
918 if dst in copies:
888 items.append(b'%d\0%s' % (i, copies[dst]))
919 items.append(b'%d\0%s' % (i, copies[dst]))
889 if len(items) != len(copies):
920 if len(items) != len(copies):
890 raise error.ProgrammingError(
921 raise error.ProgrammingError(
891 b'some copy targets missing from file list'
922 b'some copy targets missing from file list'
892 )
923 )
893 return b"\n".join(items)
924 return b"\n".join(items)
894
925
895
926
896 def decodecopies(files, data):
927 def decodecopies(files, data):
897 try:
928 try:
898 copies = {}
929 copies = {}
899 if not data:
930 if not data:
900 return copies
931 return copies
901 for l in data.split(b'\n'):
932 for l in data.split(b'\n'):
902 strindex, src = l.split(b'\0')
933 strindex, src = l.split(b'\0')
903 i = int(strindex)
934 i = int(strindex)
904 dst = files[i]
935 dst = files[i]
905 copies[dst] = src
936 copies[dst] = src
906 return copies
937 return copies
907 except (ValueError, IndexError):
938 except (ValueError, IndexError):
908 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
939 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
909 # used different syntax for the value.
940 # used different syntax for the value.
910 return None
941 return None
911
942
912
943
913 def encodefileindices(files, subset):
944 def encodefileindices(files, subset):
914 subset = set(subset)
945 subset = set(subset)
915 indices = []
946 indices = []
916 for i, f in enumerate(files):
947 for i, f in enumerate(files):
917 if f in subset:
948 if f in subset:
918 indices.append(b'%d' % i)
949 indices.append(b'%d' % i)
919 return b'\n'.join(indices)
950 return b'\n'.join(indices)
920
951
921
952
922 def decodefileindices(files, data):
953 def decodefileindices(files, data):
923 try:
954 try:
924 subset = []
955 subset = []
925 if not data:
956 if not data:
926 return subset
957 return subset
927 for strindex in data.split(b'\n'):
958 for strindex in data.split(b'\n'):
928 i = int(strindex)
959 i = int(strindex)
929 if i < 0 or i >= len(files):
960 if i < 0 or i >= len(files):
930 return None
961 return None
931 subset.append(files[i])
962 subset.append(files[i])
932 return subset
963 return subset
933 except (ValueError, IndexError):
964 except (ValueError, IndexError):
934 # Perhaps someone had chosen the same key name (e.g. "added") and
965 # Perhaps someone had chosen the same key name (e.g. "added") and
935 # used different syntax for the value.
966 # used different syntax for the value.
936 return None
967 return None
937
968
938
969
939 def _getsidedata(srcrepo, rev):
970 def _getsidedata(srcrepo, rev):
940 ctx = srcrepo[rev]
971 ctx = srcrepo[rev]
941 filescopies = computechangesetcopies(ctx)
972 filescopies = computechangesetcopies(ctx)
942 filesadded = computechangesetfilesadded(ctx)
973 filesadded = computechangesetfilesadded(ctx)
943 filesremoved = computechangesetfilesremoved(ctx)
974 filesremoved = computechangesetfilesremoved(ctx)
944 sidedata = {}
975 sidedata = {}
945 if any([filescopies, filesadded, filesremoved]):
976 if any([filescopies, filesadded, filesremoved]):
946 sortedfiles = sorted(ctx.files())
977 sortedfiles = sorted(ctx.files())
947 p1copies, p2copies = filescopies
978 p1copies, p2copies = filescopies
948 p1copies = encodecopies(sortedfiles, p1copies)
979 p1copies = encodecopies(sortedfiles, p1copies)
949 p2copies = encodecopies(sortedfiles, p2copies)
980 p2copies = encodecopies(sortedfiles, p2copies)
950 filesadded = encodefileindices(sortedfiles, filesadded)
981 filesadded = encodefileindices(sortedfiles, filesadded)
951 filesremoved = encodefileindices(sortedfiles, filesremoved)
982 filesremoved = encodefileindices(sortedfiles, filesremoved)
952 if p1copies:
983 if p1copies:
953 sidedata[sidedatamod.SD_P1COPIES] = p1copies
984 sidedata[sidedatamod.SD_P1COPIES] = p1copies
954 if p2copies:
985 if p2copies:
955 sidedata[sidedatamod.SD_P2COPIES] = p2copies
986 sidedata[sidedatamod.SD_P2COPIES] = p2copies
956 if filesadded:
987 if filesadded:
957 sidedata[sidedatamod.SD_FILESADDED] = filesadded
988 sidedata[sidedatamod.SD_FILESADDED] = filesadded
958 if filesremoved:
989 if filesremoved:
959 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
990 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
960 return sidedata
991 return sidedata
961
992
962
993
963 def getsidedataadder(srcrepo, destrepo):
994 def getsidedataadder(srcrepo, destrepo):
964 def sidedatacompanion(revlog, rev):
995 def sidedatacompanion(revlog, rev):
965 sidedata = {}
996 sidedata = {}
966 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
997 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
967 sidedata = _getsidedata(srcrepo, rev)
998 sidedata = _getsidedata(srcrepo, rev)
968 return False, (), sidedata
999 return False, (), sidedata
969
1000
970 return sidedatacompanion
1001 return sidedatacompanion
971
1002
972
1003
973 def getsidedataremover(srcrepo, destrepo):
1004 def getsidedataremover(srcrepo, destrepo):
974 def sidedatacompanion(revlog, rev):
1005 def sidedatacompanion(revlog, rev):
975 f = ()
1006 f = ()
976 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
1007 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
977 if revlog.flags(rev) & REVIDX_SIDEDATA:
1008 if revlog.flags(rev) & REVIDX_SIDEDATA:
978 f = (
1009 f = (
979 sidedatamod.SD_P1COPIES,
1010 sidedatamod.SD_P1COPIES,
980 sidedatamod.SD_P2COPIES,
1011 sidedatamod.SD_P2COPIES,
981 sidedatamod.SD_FILESADDED,
1012 sidedatamod.SD_FILESADDED,
982 sidedatamod.SD_FILESREMOVED,
1013 sidedatamod.SD_FILESREMOVED,
983 )
1014 )
984 return False, f, {}
1015 return False, f, {}
985
1016
986 return sidedatacompanion
1017 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now