##// END OF EJS Templates
copies: move from a copy on branchpoint to a copy on write approach...
marmoute -
r43594:ffd04bc9 default
parent child Browse files
Show More
@@ -1,982 +1,986 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 match as matchmod,
20 match as matchmod,
21 node,
21 node,
22 pathutil,
22 pathutil,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 )
25 )
26
26
27 from .revlogutils import sidedata as sidedatamod
27 from .revlogutils import sidedata as sidedatamod
28
28
29 from .utils import stringutil
29 from .utils import stringutil
30
30
31
31
32 def _filter(src, dst, t):
32 def _filter(src, dst, t):
33 """filters out invalid copies after chaining"""
33 """filters out invalid copies after chaining"""
34
34
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
35 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
36 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
37 # in the following table (not including trivial cases). For example, case 2
37 # in the following table (not including trivial cases). For example, case 2
38 # is where a file existed in 'src' and remained under that name in 'mid' and
38 # is where a file existed in 'src' and remained under that name in 'mid' and
39 # then was renamed between 'mid' and 'dst'.
39 # then was renamed between 'mid' and 'dst'.
40 #
40 #
41 # case src mid dst result
41 # case src mid dst result
42 # 1 x y - -
42 # 1 x y - -
43 # 2 x y y x->y
43 # 2 x y y x->y
44 # 3 x y x -
44 # 3 x y x -
45 # 4 x y z x->z
45 # 4 x y z x->z
46 # 5 - x y -
46 # 5 - x y -
47 # 6 x x y x->y
47 # 6 x x y x->y
48 #
48 #
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
49 # _chain() takes care of chaining the copies in 'a' and 'b', but it
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
50 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
51 # between 5 and 6, so it includes all cases in its result.
51 # between 5 and 6, so it includes all cases in its result.
52 # Cases 1, 3, and 5 are then removed by _filter().
52 # Cases 1, 3, and 5 are then removed by _filter().
53
53
54 for k, v in list(t.items()):
54 for k, v in list(t.items()):
55 # remove copies from files that didn't exist
55 # remove copies from files that didn't exist
56 if v not in src:
56 if v not in src:
57 del t[k]
57 del t[k]
58 # remove criss-crossed copies
58 # remove criss-crossed copies
59 elif k in src and v in dst:
59 elif k in src and v in dst:
60 del t[k]
60 del t[k]
61 # remove copies to files that were then removed
61 # remove copies to files that were then removed
62 elif k not in dst:
62 elif k not in dst:
63 del t[k]
63 del t[k]
64
64
65
65
66 def _chain(a, b):
66 def _chain(a, b):
67 """chain two sets of copies 'a' and 'b'"""
67 """chain two sets of copies 'a' and 'b'"""
68 t = a.copy()
68 t = a.copy()
69 for k, v in pycompat.iteritems(b):
69 for k, v in pycompat.iteritems(b):
70 if v in t:
70 if v in t:
71 t[k] = t[v]
71 t[k] = t[v]
72 else:
72 else:
73 t[k] = v
73 t[k] = v
74 return t
74 return t
75
75
76
76
77 def _tracefile(fctx, am, basemf):
77 def _tracefile(fctx, am, basemf):
78 """return file context that is the ancestor of fctx present in ancestor
78 """return file context that is the ancestor of fctx present in ancestor
79 manifest am
79 manifest am
80
80
81 Note: we used to try and stop after a given limit, however checking if that
81 Note: we used to try and stop after a given limit, however checking if that
82 limit is reached turned out to be very expensive. we are better off
82 limit is reached turned out to be very expensive. we are better off
83 disabling that feature."""
83 disabling that feature."""
84
84
85 for f in fctx.ancestors():
85 for f in fctx.ancestors():
86 path = f.path()
86 path = f.path()
87 if am.get(path, None) == f.filenode():
87 if am.get(path, None) == f.filenode():
88 return path
88 return path
89 if basemf and basemf.get(path, None) == f.filenode():
89 if basemf and basemf.get(path, None) == f.filenode():
90 return path
90 return path
91
91
92
92
93 def _dirstatecopies(repo, match=None):
93 def _dirstatecopies(repo, match=None):
94 ds = repo.dirstate
94 ds = repo.dirstate
95 c = ds.copies().copy()
95 c = ds.copies().copy()
96 for k in list(c):
96 for k in list(c):
97 if ds[k] not in b'anm' or (match and not match(k)):
97 if ds[k] not in b'anm' or (match and not match(k)):
98 del c[k]
98 del c[k]
99 return c
99 return c
100
100
101
101
102 def _computeforwardmissing(a, b, match=None):
102 def _computeforwardmissing(a, b, match=None):
103 """Computes which files are in b but not a.
103 """Computes which files are in b but not a.
104 This is its own function so extensions can easily wrap this call to see what
104 This is its own function so extensions can easily wrap this call to see what
105 files _forwardcopies is about to process.
105 files _forwardcopies is about to process.
106 """
106 """
107 ma = a.manifest()
107 ma = a.manifest()
108 mb = b.manifest()
108 mb = b.manifest()
109 return mb.filesnotin(ma, match=match)
109 return mb.filesnotin(ma, match=match)
110
110
111
111
112 def usechangesetcentricalgo(repo):
112 def usechangesetcentricalgo(repo):
113 """Checks if we should use changeset-centric copy algorithms"""
113 """Checks if we should use changeset-centric copy algorithms"""
114 if repo.filecopiesmode == b'changeset-sidedata':
114 if repo.filecopiesmode == b'changeset-sidedata':
115 return True
115 return True
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
116 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
117 changesetsource = (b'changeset-only', b'compatibility')
117 changesetsource = (b'changeset-only', b'compatibility')
118 return readfrom in changesetsource
118 return readfrom in changesetsource
119
119
120
120
121 def _committedforwardcopies(a, b, base, match):
121 def _committedforwardcopies(a, b, base, match):
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
122 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
123 # files might have to be traced back to the fctx parent of the last
123 # files might have to be traced back to the fctx parent of the last
124 # one-side-only changeset, but not further back than that
124 # one-side-only changeset, but not further back than that
125 repo = a._repo
125 repo = a._repo
126
126
127 if usechangesetcentricalgo(repo):
127 if usechangesetcentricalgo(repo):
128 return _changesetforwardcopies(a, b, match)
128 return _changesetforwardcopies(a, b, match)
129
129
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
130 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
131 dbg = repo.ui.debug
131 dbg = repo.ui.debug
132 if debug:
132 if debug:
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
133 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
134 am = a.manifest()
134 am = a.manifest()
135 basemf = None if base is None else base.manifest()
135 basemf = None if base is None else base.manifest()
136
136
137 # find where new files came from
137 # find where new files came from
138 # we currently don't try to find where old files went, too expensive
138 # we currently don't try to find where old files went, too expensive
139 # this means we can miss a case like 'hg rm b; hg cp a b'
139 # this means we can miss a case like 'hg rm b; hg cp a b'
140 cm = {}
140 cm = {}
141
141
142 # Computing the forward missing is quite expensive on large manifests, since
142 # Computing the forward missing is quite expensive on large manifests, since
143 # it compares the entire manifests. We can optimize it in the common use
143 # it compares the entire manifests. We can optimize it in the common use
144 # case of computing what copies are in a commit versus its parent (like
144 # case of computing what copies are in a commit versus its parent (like
145 # during a rebase or histedit). Note, we exclude merge commits from this
145 # during a rebase or histedit). Note, we exclude merge commits from this
146 # optimization, since the ctx.files() for a merge commit is not correct for
146 # optimization, since the ctx.files() for a merge commit is not correct for
147 # this comparison.
147 # this comparison.
148 forwardmissingmatch = match
148 forwardmissingmatch = match
149 if b.p1() == a and b.p2().node() == node.nullid:
149 if b.p1() == a and b.p2().node() == node.nullid:
150 filesmatcher = matchmod.exact(b.files())
150 filesmatcher = matchmod.exact(b.files())
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
151 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
152 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
153
153
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
154 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
155
155
156 if debug:
156 if debug:
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
157 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
158
158
159 for f in sorted(missing):
159 for f in sorted(missing):
160 if debug:
160 if debug:
161 dbg(b'debug.copies: tracing file: %s\n' % f)
161 dbg(b'debug.copies: tracing file: %s\n' % f)
162 fctx = b[f]
162 fctx = b[f]
163 fctx._ancestrycontext = ancestrycontext
163 fctx._ancestrycontext = ancestrycontext
164
164
165 if debug:
165 if debug:
166 start = util.timer()
166 start = util.timer()
167 opath = _tracefile(fctx, am, basemf)
167 opath = _tracefile(fctx, am, basemf)
168 if opath:
168 if opath:
169 if debug:
169 if debug:
170 dbg(b'debug.copies: rename of: %s\n' % opath)
170 dbg(b'debug.copies: rename of: %s\n' % opath)
171 cm[f] = opath
171 cm[f] = opath
172 if debug:
172 if debug:
173 dbg(
173 dbg(
174 b'debug.copies: time: %f seconds\n'
174 b'debug.copies: time: %f seconds\n'
175 % (util.timer() - start)
175 % (util.timer() - start)
176 )
176 )
177 return cm
177 return cm
178
178
179
179
180 def _revinfogetter(repo):
180 def _revinfogetter(repo):
181 """return a function that return multiple data given a <rev>"i
181 """return a function that return multiple data given a <rev>"i
182
182
183 * p1: revision number of first parent
183 * p1: revision number of first parent
184 * p2: revision number of first parent
184 * p2: revision number of first parent
185 * p1copies: mapping of copies from p1
185 * p1copies: mapping of copies from p1
186 * p2copies: mapping of copies from p2
186 * p2copies: mapping of copies from p2
187 * removed: a list of removed files
187 * removed: a list of removed files
188 """
188 """
189 cl = repo.changelog
189 cl = repo.changelog
190 parents = cl.parentrevs
190 parents = cl.parentrevs
191
191
192 if repo.filecopiesmode == b'changeset-sidedata':
192 if repo.filecopiesmode == b'changeset-sidedata':
193 changelogrevision = cl.changelogrevision
193 changelogrevision = cl.changelogrevision
194 flags = cl.flags
194 flags = cl.flags
195
195
196 def revinfo(rev):
196 def revinfo(rev):
197 p1, p2 = parents(rev)
197 p1, p2 = parents(rev)
198 if flags(rev) & REVIDX_SIDEDATA:
198 if flags(rev) & REVIDX_SIDEDATA:
199 c = changelogrevision(rev)
199 c = changelogrevision(rev)
200 p1copies = c.p1copies
200 p1copies = c.p1copies
201 p2copies = c.p2copies
201 p2copies = c.p2copies
202 removed = c.filesremoved
202 removed = c.filesremoved
203 else:
203 else:
204 p1copies = {}
204 p1copies = {}
205 p2copies = {}
205 p2copies = {}
206 removed = ()
206 removed = ()
207 return p1, p2, p1copies, p2copies, removed
207 return p1, p2, p1copies, p2copies, removed
208
208
209 else:
209 else:
210
210
211 def revinfo(rev):
211 def revinfo(rev):
212 p1, p2 = parents(rev)
212 p1, p2 = parents(rev)
213 ctx = repo[rev]
213 ctx = repo[rev]
214 p1copies, p2copies = ctx._copies
214 p1copies, p2copies = ctx._copies
215 removed = ctx.filesremoved()
215 removed = ctx.filesremoved()
216 return p1, p2, p1copies, p2copies, removed
216 return p1, p2, p1copies, p2copies, removed
217
217
218 return revinfo
218 return revinfo
219
219
220
220
221 def _changesetforwardcopies(a, b, match):
221 def _changesetforwardcopies(a, b, match):
222 if a.rev() in (node.nullrev, b.rev()):
222 if a.rev() in (node.nullrev, b.rev()):
223 return {}
223 return {}
224
224
225 repo = a.repo().unfiltered()
225 repo = a.repo().unfiltered()
226 children = {}
226 children = {}
227 revinfo = _revinfogetter(repo)
227 revinfo = _revinfogetter(repo)
228
228
229 cl = repo.changelog
229 cl = repo.changelog
230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
230 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
231 mrset = set(missingrevs)
231 mrset = set(missingrevs)
232 roots = set()
232 roots = set()
233 for r in missingrevs:
233 for r in missingrevs:
234 for p in cl.parentrevs(r):
234 for p in cl.parentrevs(r):
235 if p == node.nullrev:
235 if p == node.nullrev:
236 continue
236 continue
237 if p not in children:
237 if p not in children:
238 children[p] = [r]
238 children[p] = [r]
239 else:
239 else:
240 children[p].append(r)
240 children[p].append(r)
241 if p not in mrset:
241 if p not in mrset:
242 roots.add(p)
242 roots.add(p)
243 if not roots:
243 if not roots:
244 # no common revision to track copies from
244 # no common revision to track copies from
245 return {}
245 return {}
246 min_root = min(roots)
246 min_root = min(roots)
247
247
248 from_head = set(
248 from_head = set(
249 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
249 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
250 )
250 )
251
251
252 iterrevs = set(from_head)
252 iterrevs = set(from_head)
253 iterrevs &= mrset
253 iterrevs &= mrset
254 iterrevs.update(roots)
254 iterrevs.update(roots)
255 iterrevs.remove(b.rev())
255 iterrevs.remove(b.rev())
256 all_copies = {r: {} for r in roots}
256 all_copies = {r: {} for r in roots}
257 alwaysmatch = match.always()
257 alwaysmatch = match.always()
258 for r in sorted(iterrevs):
258 for r in sorted(iterrevs):
259 copies = all_copies.pop(r)
259 copies = all_copies.pop(r)
260 for i, c in enumerate(children[r]):
260 for i, c in enumerate(children[r]):
261 p1, p2, p1copies, p2copies, removed = revinfo(c)
261 p1, p2, p1copies, p2copies, removed = revinfo(c)
262 if r == p1:
262 if r == p1:
263 parent = 1
263 parent = 1
264 childcopies = p1copies
264 childcopies = p1copies
265 else:
265 else:
266 assert r == p2
266 assert r == p2
267 parent = 2
267 parent = 2
268 childcopies = p2copies
268 childcopies = p2copies
269 if not alwaysmatch:
269 if not alwaysmatch:
270 childcopies = {
270 childcopies = {
271 dst: src for dst, src in childcopies.items() if match(dst)
271 dst: src for dst, src in childcopies.items() if match(dst)
272 }
272 }
273 # Copy the dict only if later iterations will also need it
274 if i != len(children[r]) - 1:
275 newcopies = copies.copy()
276 else:
277 newcopies = copies
273 newcopies = copies
278 if childcopies:
274 if childcopies:
279 newcopies = _chain(newcopies, childcopies)
275 newcopies = _chain(newcopies, childcopies)
276 # _chain makes a copies, we can avoid doing so in some
277 # simple/linear cases.
278 assert newcopies is not copies
280 for f in removed:
279 for f in removed:
281 if f in newcopies:
280 if f in newcopies:
281 if newcopies is copies:
282 # copy on write to avoid affecting potential other
283 # branches. when there are no other branches, this
284 # could be avoided.
285 newcopies = copies.copy()
282 del newcopies[f]
286 del newcopies[f]
283 othercopies = all_copies.get(c)
287 othercopies = all_copies.get(c)
284 if othercopies is None:
288 if othercopies is None:
285 all_copies[c] = newcopies
289 all_copies[c] = newcopies
286 else:
290 else:
287 # we are the second parent to work on c, we need to merge our
291 # we are the second parent to work on c, we need to merge our
288 # work with the other.
292 # work with the other.
289 #
293 #
290 # Unlike when copies are stored in the filelog, we consider
294 # Unlike when copies are stored in the filelog, we consider
291 # it a copy even if the destination already existed on the
295 # it a copy even if the destination already existed on the
292 # other branch. It's simply too expensive to check if the
296 # other branch. It's simply too expensive to check if the
293 # file existed in the manifest.
297 # file existed in the manifest.
294 #
298 #
295 # In case of conflict, parent 1 take precedence over parent 2.
299 # In case of conflict, parent 1 take precedence over parent 2.
296 # This is an arbitrary choice made anew when implementing
300 # This is an arbitrary choice made anew when implementing
297 # changeset based copies. It was made without regards with
301 # changeset based copies. It was made without regards with
298 # potential filelog related behavior.
302 # potential filelog related behavior.
299 if parent == 1:
303 if parent == 1:
300 othercopies.update(newcopies)
304 othercopies.update(newcopies)
301 else:
305 else:
302 newcopies.update(othercopies)
306 newcopies.update(othercopies)
303 all_copies[c] = newcopies
307 all_copies[c] = newcopies
304 return all_copies[b.rev()]
308 return all_copies[b.rev()]
305
309
306
310
307 def _forwardcopies(a, b, base=None, match=None):
311 def _forwardcopies(a, b, base=None, match=None):
308 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
312 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
309
313
310 if base is None:
314 if base is None:
311 base = a
315 base = a
312 match = a.repo().narrowmatch(match)
316 match = a.repo().narrowmatch(match)
313 # check for working copy
317 # check for working copy
314 if b.rev() is None:
318 if b.rev() is None:
315 cm = _committedforwardcopies(a, b.p1(), base, match)
319 cm = _committedforwardcopies(a, b.p1(), base, match)
316 # combine copies from dirstate if necessary
320 # combine copies from dirstate if necessary
317 copies = _chain(cm, _dirstatecopies(b._repo, match))
321 copies = _chain(cm, _dirstatecopies(b._repo, match))
318 else:
322 else:
319 copies = _committedforwardcopies(a, b, base, match)
323 copies = _committedforwardcopies(a, b, base, match)
320 return copies
324 return copies
321
325
322
326
323 def _backwardrenames(a, b, match):
327 def _backwardrenames(a, b, match):
324 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
328 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
325 return {}
329 return {}
326
330
327 # Even though we're not taking copies into account, 1:n rename situations
331 # Even though we're not taking copies into account, 1:n rename situations
328 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
332 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
329 # arbitrarily pick one of the renames.
333 # arbitrarily pick one of the renames.
330 # We don't want to pass in "match" here, since that would filter
334 # We don't want to pass in "match" here, since that would filter
331 # the destination by it. Since we're reversing the copies, we want
335 # the destination by it. Since we're reversing the copies, we want
332 # to filter the source instead.
336 # to filter the source instead.
333 f = _forwardcopies(b, a)
337 f = _forwardcopies(b, a)
334 r = {}
338 r = {}
335 for k, v in sorted(pycompat.iteritems(f)):
339 for k, v in sorted(pycompat.iteritems(f)):
336 if match and not match(v):
340 if match and not match(v):
337 continue
341 continue
338 # remove copies
342 # remove copies
339 if v in a:
343 if v in a:
340 continue
344 continue
341 r[v] = k
345 r[v] = k
342 return r
346 return r
343
347
344
348
345 def pathcopies(x, y, match=None):
349 def pathcopies(x, y, match=None):
346 """find {dst@y: src@x} copy mapping for directed compare"""
350 """find {dst@y: src@x} copy mapping for directed compare"""
347 repo = x._repo
351 repo = x._repo
348 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
352 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
349 if debug:
353 if debug:
350 repo.ui.debug(
354 repo.ui.debug(
351 b'debug.copies: searching copies from %s to %s\n' % (x, y)
355 b'debug.copies: searching copies from %s to %s\n' % (x, y)
352 )
356 )
353 if x == y or not x or not y:
357 if x == y or not x or not y:
354 return {}
358 return {}
355 a = y.ancestor(x)
359 a = y.ancestor(x)
356 if a == x:
360 if a == x:
357 if debug:
361 if debug:
358 repo.ui.debug(b'debug.copies: search mode: forward\n')
362 repo.ui.debug(b'debug.copies: search mode: forward\n')
359 if y.rev() is None and x == y.p1():
363 if y.rev() is None and x == y.p1():
360 # short-circuit to avoid issues with merge states
364 # short-circuit to avoid issues with merge states
361 return _dirstatecopies(repo, match)
365 return _dirstatecopies(repo, match)
362 copies = _forwardcopies(x, y, match=match)
366 copies = _forwardcopies(x, y, match=match)
363 elif a == y:
367 elif a == y:
364 if debug:
368 if debug:
365 repo.ui.debug(b'debug.copies: search mode: backward\n')
369 repo.ui.debug(b'debug.copies: search mode: backward\n')
366 copies = _backwardrenames(x, y, match=match)
370 copies = _backwardrenames(x, y, match=match)
367 else:
371 else:
368 if debug:
372 if debug:
369 repo.ui.debug(b'debug.copies: search mode: combined\n')
373 repo.ui.debug(b'debug.copies: search mode: combined\n')
370 base = None
374 base = None
371 if a.rev() != node.nullrev:
375 if a.rev() != node.nullrev:
372 base = x
376 base = x
373 copies = _chain(
377 copies = _chain(
374 _backwardrenames(x, a, match=match),
378 _backwardrenames(x, a, match=match),
375 _forwardcopies(a, y, base, match=match),
379 _forwardcopies(a, y, base, match=match),
376 )
380 )
377 _filter(x, y, copies)
381 _filter(x, y, copies)
378 return copies
382 return copies
379
383
380
384
381 def mergecopies(repo, c1, c2, base):
385 def mergecopies(repo, c1, c2, base):
382 """
386 """
383 Finds moves and copies between context c1 and c2 that are relevant for
387 Finds moves and copies between context c1 and c2 that are relevant for
384 merging. 'base' will be used as the merge base.
388 merging. 'base' will be used as the merge base.
385
389
386 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
390 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
387 files that were moved/ copied in one merge parent and modified in another.
391 files that were moved/ copied in one merge parent and modified in another.
388 For example:
392 For example:
389
393
390 o ---> 4 another commit
394 o ---> 4 another commit
391 |
395 |
392 | o ---> 3 commit that modifies a.txt
396 | o ---> 3 commit that modifies a.txt
393 | /
397 | /
394 o / ---> 2 commit that moves a.txt to b.txt
398 o / ---> 2 commit that moves a.txt to b.txt
395 |/
399 |/
396 o ---> 1 merge base
400 o ---> 1 merge base
397
401
398 If we try to rebase revision 3 on revision 4, since there is no a.txt in
402 If we try to rebase revision 3 on revision 4, since there is no a.txt in
399 revision 4, and if user have copytrace disabled, we prints the following
403 revision 4, and if user have copytrace disabled, we prints the following
400 message:
404 message:
401
405
402 ```other changed <file> which local deleted```
406 ```other changed <file> which local deleted```
403
407
404 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
408 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
405 "dirmove".
409 "dirmove".
406
410
407 "copy" is a mapping from destination name -> source name,
411 "copy" is a mapping from destination name -> source name,
408 where source is in c1 and destination is in c2 or vice-versa.
412 where source is in c1 and destination is in c2 or vice-versa.
409
413
410 "movewithdir" is a mapping from source name -> destination name,
414 "movewithdir" is a mapping from source name -> destination name,
411 where the file at source present in one context but not the other
415 where the file at source present in one context but not the other
412 needs to be moved to destination by the merge process, because the
416 needs to be moved to destination by the merge process, because the
413 other context moved the directory it is in.
417 other context moved the directory it is in.
414
418
415 "diverge" is a mapping of source name -> list of destination names
419 "diverge" is a mapping of source name -> list of destination names
416 for divergent renames.
420 for divergent renames.
417
421
418 "renamedelete" is a mapping of source name -> list of destination
422 "renamedelete" is a mapping of source name -> list of destination
419 names for files deleted in c1 that were renamed in c2 or vice-versa.
423 names for files deleted in c1 that were renamed in c2 or vice-versa.
420
424
421 "dirmove" is a mapping of detected source dir -> destination dir renames.
425 "dirmove" is a mapping of detected source dir -> destination dir renames.
422 This is needed for handling changes to new files previously grafted into
426 This is needed for handling changes to new files previously grafted into
423 renamed directories.
427 renamed directories.
424
428
425 This function calls different copytracing algorithms based on config.
429 This function calls different copytracing algorithms based on config.
426 """
430 """
427 # avoid silly behavior for update from empty dir
431 # avoid silly behavior for update from empty dir
428 if not c1 or not c2 or c1 == c2:
432 if not c1 or not c2 or c1 == c2:
429 return {}, {}, {}, {}, {}
433 return {}, {}, {}, {}, {}
430
434
431 narrowmatch = c1.repo().narrowmatch()
435 narrowmatch = c1.repo().narrowmatch()
432
436
433 # avoid silly behavior for parent -> working dir
437 # avoid silly behavior for parent -> working dir
434 if c2.node() is None and c1.node() == repo.dirstate.p1():
438 if c2.node() is None and c1.node() == repo.dirstate.p1():
435 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
439 return _dirstatecopies(repo, narrowmatch), {}, {}, {}, {}
436
440
437 copytracing = repo.ui.config(b'experimental', b'copytrace')
441 copytracing = repo.ui.config(b'experimental', b'copytrace')
438 if stringutil.parsebool(copytracing) is False:
442 if stringutil.parsebool(copytracing) is False:
439 # stringutil.parsebool() returns None when it is unable to parse the
443 # stringutil.parsebool() returns None when it is unable to parse the
440 # value, so we should rely on making sure copytracing is on such cases
444 # value, so we should rely on making sure copytracing is on such cases
441 return {}, {}, {}, {}, {}
445 return {}, {}, {}, {}, {}
442
446
443 if usechangesetcentricalgo(repo):
447 if usechangesetcentricalgo(repo):
444 # The heuristics don't make sense when we need changeset-centric algos
448 # The heuristics don't make sense when we need changeset-centric algos
445 return _fullcopytracing(repo, c1, c2, base)
449 return _fullcopytracing(repo, c1, c2, base)
446
450
447 # Copy trace disabling is explicitly below the node == p1 logic above
451 # Copy trace disabling is explicitly below the node == p1 logic above
448 # because the logic above is required for a simple copy to be kept across a
452 # because the logic above is required for a simple copy to be kept across a
449 # rebase.
453 # rebase.
450 if copytracing == b'heuristics':
454 if copytracing == b'heuristics':
451 # Do full copytracing if only non-public revisions are involved as
455 # Do full copytracing if only non-public revisions are involved as
452 # that will be fast enough and will also cover the copies which could
456 # that will be fast enough and will also cover the copies which could
453 # be missed by heuristics
457 # be missed by heuristics
454 if _isfullcopytraceable(repo, c1, base):
458 if _isfullcopytraceable(repo, c1, base):
455 return _fullcopytracing(repo, c1, c2, base)
459 return _fullcopytracing(repo, c1, c2, base)
456 return _heuristicscopytracing(repo, c1, c2, base)
460 return _heuristicscopytracing(repo, c1, c2, base)
457 else:
461 else:
458 return _fullcopytracing(repo, c1, c2, base)
462 return _fullcopytracing(repo, c1, c2, base)
459
463
460
464
461 def _isfullcopytraceable(repo, c1, base):
465 def _isfullcopytraceable(repo, c1, base):
462 """ Checks that if base, source and destination are all no-public branches,
466 """ Checks that if base, source and destination are all no-public branches,
463 if yes let's use the full copytrace algorithm for increased capabilities
467 if yes let's use the full copytrace algorithm for increased capabilities
464 since it will be fast enough.
468 since it will be fast enough.
465
469
466 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
470 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
467 number of changesets from c1 to base such that if number of changesets are
471 number of changesets from c1 to base such that if number of changesets are
468 more than the limit, full copytracing algorithm won't be used.
472 more than the limit, full copytracing algorithm won't be used.
469 """
473 """
470 if c1.rev() is None:
474 if c1.rev() is None:
471 c1 = c1.p1()
475 c1 = c1.p1()
472 if c1.mutable() and base.mutable():
476 if c1.mutable() and base.mutable():
473 sourcecommitlimit = repo.ui.configint(
477 sourcecommitlimit = repo.ui.configint(
474 b'experimental', b'copytrace.sourcecommitlimit'
478 b'experimental', b'copytrace.sourcecommitlimit'
475 )
479 )
476 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
480 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
477 return commits < sourcecommitlimit
481 return commits < sourcecommitlimit
478 return False
482 return False
479
483
480
484
481 def _checksinglesidecopies(
485 def _checksinglesidecopies(
482 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
486 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
483 ):
487 ):
484 if src not in m2:
488 if src not in m2:
485 # deleted on side 2
489 # deleted on side 2
486 if src not in m1:
490 if src not in m1:
487 # renamed on side 1, deleted on side 2
491 # renamed on side 1, deleted on side 2
488 renamedelete[src] = dsts1
492 renamedelete[src] = dsts1
489 elif m2[src] != mb[src]:
493 elif m2[src] != mb[src]:
490 if not _related(c2[src], base[src]):
494 if not _related(c2[src], base[src]):
491 return
495 return
492 # modified on side 2
496 # modified on side 2
493 for dst in dsts1:
497 for dst in dsts1:
494 if dst not in m2:
498 if dst not in m2:
495 # dst not added on side 2 (handle as regular
499 # dst not added on side 2 (handle as regular
496 # "both created" case in manifestmerge otherwise)
500 # "both created" case in manifestmerge otherwise)
497 copy[dst] = src
501 copy[dst] = src
498
502
499
503
500 def _fullcopytracing(repo, c1, c2, base):
504 def _fullcopytracing(repo, c1, c2, base):
501 """ The full copytracing algorithm which finds all the new files that were
505 """ The full copytracing algorithm which finds all the new files that were
502 added from merge base up to the top commit and for each file it checks if
506 added from merge base up to the top commit and for each file it checks if
503 this file was copied from another file.
507 this file was copied from another file.
504
508
505 This is pretty slow when a lot of changesets are involved but will track all
509 This is pretty slow when a lot of changesets are involved but will track all
506 the copies.
510 the copies.
507 """
511 """
508 m1 = c1.manifest()
512 m1 = c1.manifest()
509 m2 = c2.manifest()
513 m2 = c2.manifest()
510 mb = base.manifest()
514 mb = base.manifest()
511
515
512 copies1 = pathcopies(base, c1)
516 copies1 = pathcopies(base, c1)
513 copies2 = pathcopies(base, c2)
517 copies2 = pathcopies(base, c2)
514
518
515 inversecopies1 = {}
519 inversecopies1 = {}
516 inversecopies2 = {}
520 inversecopies2 = {}
517 for dst, src in copies1.items():
521 for dst, src in copies1.items():
518 inversecopies1.setdefault(src, []).append(dst)
522 inversecopies1.setdefault(src, []).append(dst)
519 for dst, src in copies2.items():
523 for dst, src in copies2.items():
520 inversecopies2.setdefault(src, []).append(dst)
524 inversecopies2.setdefault(src, []).append(dst)
521
525
522 copy = {}
526 copy = {}
523 diverge = {}
527 diverge = {}
524 renamedelete = {}
528 renamedelete = {}
525 allsources = set(inversecopies1) | set(inversecopies2)
529 allsources = set(inversecopies1) | set(inversecopies2)
526 for src in allsources:
530 for src in allsources:
527 dsts1 = inversecopies1.get(src)
531 dsts1 = inversecopies1.get(src)
528 dsts2 = inversecopies2.get(src)
532 dsts2 = inversecopies2.get(src)
529 if dsts1 and dsts2:
533 if dsts1 and dsts2:
530 # copied/renamed on both sides
534 # copied/renamed on both sides
531 if src not in m1 and src not in m2:
535 if src not in m1 and src not in m2:
532 # renamed on both sides
536 # renamed on both sides
533 dsts1 = set(dsts1)
537 dsts1 = set(dsts1)
534 dsts2 = set(dsts2)
538 dsts2 = set(dsts2)
535 # If there's some overlap in the rename destinations, we
539 # If there's some overlap in the rename destinations, we
536 # consider it not divergent. For example, if side 1 copies 'a'
540 # consider it not divergent. For example, if side 1 copies 'a'
537 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
541 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
538 # and 'd' and deletes 'a'.
542 # and 'd' and deletes 'a'.
539 if dsts1 & dsts2:
543 if dsts1 & dsts2:
540 for dst in dsts1 & dsts2:
544 for dst in dsts1 & dsts2:
541 copy[dst] = src
545 copy[dst] = src
542 else:
546 else:
543 diverge[src] = sorted(dsts1 | dsts2)
547 diverge[src] = sorted(dsts1 | dsts2)
544 elif src in m1 and src in m2:
548 elif src in m1 and src in m2:
545 # copied on both sides
549 # copied on both sides
546 dsts1 = set(dsts1)
550 dsts1 = set(dsts1)
547 dsts2 = set(dsts2)
551 dsts2 = set(dsts2)
548 for dst in dsts1 & dsts2:
552 for dst in dsts1 & dsts2:
549 copy[dst] = src
553 copy[dst] = src
550 # TODO: Handle cases where it was renamed on one side and copied
554 # TODO: Handle cases where it was renamed on one side and copied
551 # on the other side
555 # on the other side
552 elif dsts1:
556 elif dsts1:
553 # copied/renamed only on side 1
557 # copied/renamed only on side 1
554 _checksinglesidecopies(
558 _checksinglesidecopies(
555 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
559 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
556 )
560 )
557 elif dsts2:
561 elif dsts2:
558 # copied/renamed only on side 2
562 # copied/renamed only on side 2
559 _checksinglesidecopies(
563 _checksinglesidecopies(
560 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
564 src, dsts2, m2, m1, mb, c1, base, copy, renamedelete
561 )
565 )
562
566
563 renamedeleteset = set()
567 renamedeleteset = set()
564 divergeset = set()
568 divergeset = set()
565 for dsts in diverge.values():
569 for dsts in diverge.values():
566 divergeset.update(dsts)
570 divergeset.update(dsts)
567 for dsts in renamedelete.values():
571 for dsts in renamedelete.values():
568 renamedeleteset.update(dsts)
572 renamedeleteset.update(dsts)
569
573
570 # find interesting file sets from manifests
574 # find interesting file sets from manifests
571 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
575 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
572 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
576 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
573 u1 = sorted(addedinm1 - addedinm2)
577 u1 = sorted(addedinm1 - addedinm2)
574 u2 = sorted(addedinm2 - addedinm1)
578 u2 = sorted(addedinm2 - addedinm1)
575
579
576 header = b" unmatched files in %s"
580 header = b" unmatched files in %s"
577 if u1:
581 if u1:
578 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
582 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
579 if u2:
583 if u2:
580 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
584 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
581
585
582 fullcopy = copies1.copy()
586 fullcopy = copies1.copy()
583 fullcopy.update(copies2)
587 fullcopy.update(copies2)
584 if not fullcopy:
588 if not fullcopy:
585 return copy, {}, diverge, renamedelete, {}
589 return copy, {}, diverge, renamedelete, {}
586
590
587 if repo.ui.debugflag:
591 if repo.ui.debugflag:
588 repo.ui.debug(
592 repo.ui.debug(
589 b" all copies found (* = to merge, ! = divergent, "
593 b" all copies found (* = to merge, ! = divergent, "
590 b"% = renamed and deleted):\n"
594 b"% = renamed and deleted):\n"
591 )
595 )
592 for f in sorted(fullcopy):
596 for f in sorted(fullcopy):
593 note = b""
597 note = b""
594 if f in copy:
598 if f in copy:
595 note += b"*"
599 note += b"*"
596 if f in divergeset:
600 if f in divergeset:
597 note += b"!"
601 note += b"!"
598 if f in renamedeleteset:
602 if f in renamedeleteset:
599 note += b"%"
603 note += b"%"
600 repo.ui.debug(
604 repo.ui.debug(
601 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
605 b" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f, note)
602 )
606 )
603 del divergeset
607 del divergeset
604
608
605 repo.ui.debug(b" checking for directory renames\n")
609 repo.ui.debug(b" checking for directory renames\n")
606
610
607 # generate a directory move map
611 # generate a directory move map
608 d1, d2 = c1.dirs(), c2.dirs()
612 d1, d2 = c1.dirs(), c2.dirs()
609 invalid = set()
613 invalid = set()
610 dirmove = {}
614 dirmove = {}
611
615
612 # examine each file copy for a potential directory move, which is
616 # examine each file copy for a potential directory move, which is
613 # when all the files in a directory are moved to a new directory
617 # when all the files in a directory are moved to a new directory
614 for dst, src in pycompat.iteritems(fullcopy):
618 for dst, src in pycompat.iteritems(fullcopy):
615 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
619 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
616 if dsrc in invalid:
620 if dsrc in invalid:
617 # already seen to be uninteresting
621 # already seen to be uninteresting
618 continue
622 continue
619 elif dsrc in d1 and ddst in d1:
623 elif dsrc in d1 and ddst in d1:
620 # directory wasn't entirely moved locally
624 # directory wasn't entirely moved locally
621 invalid.add(dsrc)
625 invalid.add(dsrc)
622 elif dsrc in d2 and ddst in d2:
626 elif dsrc in d2 and ddst in d2:
623 # directory wasn't entirely moved remotely
627 # directory wasn't entirely moved remotely
624 invalid.add(dsrc)
628 invalid.add(dsrc)
625 elif dsrc in dirmove and dirmove[dsrc] != ddst:
629 elif dsrc in dirmove and dirmove[dsrc] != ddst:
626 # files from the same directory moved to two different places
630 # files from the same directory moved to two different places
627 invalid.add(dsrc)
631 invalid.add(dsrc)
628 else:
632 else:
629 # looks good so far
633 # looks good so far
630 dirmove[dsrc] = ddst
634 dirmove[dsrc] = ddst
631
635
632 for i in invalid:
636 for i in invalid:
633 if i in dirmove:
637 if i in dirmove:
634 del dirmove[i]
638 del dirmove[i]
635 del d1, d2, invalid
639 del d1, d2, invalid
636
640
637 if not dirmove:
641 if not dirmove:
638 return copy, {}, diverge, renamedelete, {}
642 return copy, {}, diverge, renamedelete, {}
639
643
640 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
644 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
641
645
642 for d in dirmove:
646 for d in dirmove:
643 repo.ui.debug(
647 repo.ui.debug(
644 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
648 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
645 )
649 )
646
650
647 movewithdir = {}
651 movewithdir = {}
648 # check unaccounted nonoverlapping files against directory moves
652 # check unaccounted nonoverlapping files against directory moves
649 for f in u1 + u2:
653 for f in u1 + u2:
650 if f not in fullcopy:
654 if f not in fullcopy:
651 for d in dirmove:
655 for d in dirmove:
652 if f.startswith(d):
656 if f.startswith(d):
653 # new file added in a directory that was moved, move it
657 # new file added in a directory that was moved, move it
654 df = dirmove[d] + f[len(d) :]
658 df = dirmove[d] + f[len(d) :]
655 if df not in copy:
659 if df not in copy:
656 movewithdir[f] = df
660 movewithdir[f] = df
657 repo.ui.debug(
661 repo.ui.debug(
658 b" pending file src: '%s' -> dst: '%s'\n"
662 b" pending file src: '%s' -> dst: '%s'\n"
659 % (f, df)
663 % (f, df)
660 )
664 )
661 break
665 break
662
666
663 return copy, movewithdir, diverge, renamedelete, dirmove
667 return copy, movewithdir, diverge, renamedelete, dirmove
664
668
665
669
666 def _heuristicscopytracing(repo, c1, c2, base):
670 def _heuristicscopytracing(repo, c1, c2, base):
667 """ Fast copytracing using filename heuristics
671 """ Fast copytracing using filename heuristics
668
672
669 Assumes that moves or renames are of following two types:
673 Assumes that moves or renames are of following two types:
670
674
671 1) Inside a directory only (same directory name but different filenames)
675 1) Inside a directory only (same directory name but different filenames)
672 2) Move from one directory to another
676 2) Move from one directory to another
673 (same filenames but different directory names)
677 (same filenames but different directory names)
674
678
675 Works only when there are no merge commits in the "source branch".
679 Works only when there are no merge commits in the "source branch".
676 Source branch is commits from base up to c2 not including base.
680 Source branch is commits from base up to c2 not including base.
677
681
678 If merge is involved it fallbacks to _fullcopytracing().
682 If merge is involved it fallbacks to _fullcopytracing().
679
683
680 Can be used by setting the following config:
684 Can be used by setting the following config:
681
685
682 [experimental]
686 [experimental]
683 copytrace = heuristics
687 copytrace = heuristics
684
688
685 In some cases the copy/move candidates found by heuristics can be very large
689 In some cases the copy/move candidates found by heuristics can be very large
686 in number and that will make the algorithm slow. The number of possible
690 in number and that will make the algorithm slow. The number of possible
687 candidates to check can be limited by using the config
691 candidates to check can be limited by using the config
688 `experimental.copytrace.movecandidateslimit` which defaults to 100.
692 `experimental.copytrace.movecandidateslimit` which defaults to 100.
689 """
693 """
690
694
691 if c1.rev() is None:
695 if c1.rev() is None:
692 c1 = c1.p1()
696 c1 = c1.p1()
693 if c2.rev() is None:
697 if c2.rev() is None:
694 c2 = c2.p1()
698 c2 = c2.p1()
695
699
696 copies = {}
700 copies = {}
697
701
698 changedfiles = set()
702 changedfiles = set()
699 m1 = c1.manifest()
703 m1 = c1.manifest()
700 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
704 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
701 # If base is not in c2 branch, we switch to fullcopytracing
705 # If base is not in c2 branch, we switch to fullcopytracing
702 repo.ui.debug(
706 repo.ui.debug(
703 b"switching to full copytracing as base is not "
707 b"switching to full copytracing as base is not "
704 b"an ancestor of c2\n"
708 b"an ancestor of c2\n"
705 )
709 )
706 return _fullcopytracing(repo, c1, c2, base)
710 return _fullcopytracing(repo, c1, c2, base)
707
711
708 ctx = c2
712 ctx = c2
709 while ctx != base:
713 while ctx != base:
710 if len(ctx.parents()) == 2:
714 if len(ctx.parents()) == 2:
711 # To keep things simple let's not handle merges
715 # To keep things simple let's not handle merges
712 repo.ui.debug(b"switching to full copytracing because of merges\n")
716 repo.ui.debug(b"switching to full copytracing because of merges\n")
713 return _fullcopytracing(repo, c1, c2, base)
717 return _fullcopytracing(repo, c1, c2, base)
714 changedfiles.update(ctx.files())
718 changedfiles.update(ctx.files())
715 ctx = ctx.p1()
719 ctx = ctx.p1()
716
720
717 cp = _forwardcopies(base, c2)
721 cp = _forwardcopies(base, c2)
718 for dst, src in pycompat.iteritems(cp):
722 for dst, src in pycompat.iteritems(cp):
719 if src in m1:
723 if src in m1:
720 copies[dst] = src
724 copies[dst] = src
721
725
722 # file is missing if it isn't present in the destination, but is present in
726 # file is missing if it isn't present in the destination, but is present in
723 # the base and present in the source.
727 # the base and present in the source.
724 # Presence in the base is important to exclude added files, presence in the
728 # Presence in the base is important to exclude added files, presence in the
725 # source is important to exclude removed files.
729 # source is important to exclude removed files.
726 filt = lambda f: f not in m1 and f in base and f in c2
730 filt = lambda f: f not in m1 and f in base and f in c2
727 missingfiles = [f for f in changedfiles if filt(f)]
731 missingfiles = [f for f in changedfiles if filt(f)]
728
732
729 if missingfiles:
733 if missingfiles:
730 basenametofilename = collections.defaultdict(list)
734 basenametofilename = collections.defaultdict(list)
731 dirnametofilename = collections.defaultdict(list)
735 dirnametofilename = collections.defaultdict(list)
732
736
733 for f in m1.filesnotin(base.manifest()):
737 for f in m1.filesnotin(base.manifest()):
734 basename = os.path.basename(f)
738 basename = os.path.basename(f)
735 dirname = os.path.dirname(f)
739 dirname = os.path.dirname(f)
736 basenametofilename[basename].append(f)
740 basenametofilename[basename].append(f)
737 dirnametofilename[dirname].append(f)
741 dirnametofilename[dirname].append(f)
738
742
739 for f in missingfiles:
743 for f in missingfiles:
740 basename = os.path.basename(f)
744 basename = os.path.basename(f)
741 dirname = os.path.dirname(f)
745 dirname = os.path.dirname(f)
742 samebasename = basenametofilename[basename]
746 samebasename = basenametofilename[basename]
743 samedirname = dirnametofilename[dirname]
747 samedirname = dirnametofilename[dirname]
744 movecandidates = samebasename + samedirname
748 movecandidates = samebasename + samedirname
745 # f is guaranteed to be present in c2, that's why
749 # f is guaranteed to be present in c2, that's why
746 # c2.filectx(f) won't fail
750 # c2.filectx(f) won't fail
747 f2 = c2.filectx(f)
751 f2 = c2.filectx(f)
748 # we can have a lot of candidates which can slow down the heuristics
752 # we can have a lot of candidates which can slow down the heuristics
749 # config value to limit the number of candidates moves to check
753 # config value to limit the number of candidates moves to check
750 maxcandidates = repo.ui.configint(
754 maxcandidates = repo.ui.configint(
751 b'experimental', b'copytrace.movecandidateslimit'
755 b'experimental', b'copytrace.movecandidateslimit'
752 )
756 )
753
757
754 if len(movecandidates) > maxcandidates:
758 if len(movecandidates) > maxcandidates:
755 repo.ui.status(
759 repo.ui.status(
756 _(
760 _(
757 b"skipping copytracing for '%s', more "
761 b"skipping copytracing for '%s', more "
758 b"candidates than the limit: %d\n"
762 b"candidates than the limit: %d\n"
759 )
763 )
760 % (f, len(movecandidates))
764 % (f, len(movecandidates))
761 )
765 )
762 continue
766 continue
763
767
764 for candidate in movecandidates:
768 for candidate in movecandidates:
765 f1 = c1.filectx(candidate)
769 f1 = c1.filectx(candidate)
766 if _related(f1, f2):
770 if _related(f1, f2):
767 # if there are a few related copies then we'll merge
771 # if there are a few related copies then we'll merge
768 # changes into all of them. This matches the behaviour
772 # changes into all of them. This matches the behaviour
769 # of upstream copytracing
773 # of upstream copytracing
770 copies[candidate] = f
774 copies[candidate] = f
771
775
772 return copies, {}, {}, {}, {}
776 return copies, {}, {}, {}, {}
773
777
774
778
775 def _related(f1, f2):
779 def _related(f1, f2):
776 """return True if f1 and f2 filectx have a common ancestor
780 """return True if f1 and f2 filectx have a common ancestor
777
781
778 Walk back to common ancestor to see if the two files originate
782 Walk back to common ancestor to see if the two files originate
779 from the same file. Since workingfilectx's rev() is None it messes
783 from the same file. Since workingfilectx's rev() is None it messes
780 up the integer comparison logic, hence the pre-step check for
784 up the integer comparison logic, hence the pre-step check for
781 None (f1 and f2 can only be workingfilectx's initially).
785 None (f1 and f2 can only be workingfilectx's initially).
782 """
786 """
783
787
784 if f1 == f2:
788 if f1 == f2:
785 return True # a match
789 return True # a match
786
790
787 g1, g2 = f1.ancestors(), f2.ancestors()
791 g1, g2 = f1.ancestors(), f2.ancestors()
788 try:
792 try:
789 f1r, f2r = f1.linkrev(), f2.linkrev()
793 f1r, f2r = f1.linkrev(), f2.linkrev()
790
794
791 if f1r is None:
795 if f1r is None:
792 f1 = next(g1)
796 f1 = next(g1)
793 if f2r is None:
797 if f2r is None:
794 f2 = next(g2)
798 f2 = next(g2)
795
799
796 while True:
800 while True:
797 f1r, f2r = f1.linkrev(), f2.linkrev()
801 f1r, f2r = f1.linkrev(), f2.linkrev()
798 if f1r > f2r:
802 if f1r > f2r:
799 f1 = next(g1)
803 f1 = next(g1)
800 elif f2r > f1r:
804 elif f2r > f1r:
801 f2 = next(g2)
805 f2 = next(g2)
802 else: # f1 and f2 point to files in the same linkrev
806 else: # f1 and f2 point to files in the same linkrev
803 return f1 == f2 # true if they point to the same file
807 return f1 == f2 # true if they point to the same file
804 except StopIteration:
808 except StopIteration:
805 return False
809 return False
806
810
807
811
808 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
812 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
809 """reproduce copies from fromrev to rev in the dirstate
813 """reproduce copies from fromrev to rev in the dirstate
810
814
811 If skiprev is specified, it's a revision that should be used to
815 If skiprev is specified, it's a revision that should be used to
812 filter copy records. Any copies that occur between fromrev and
816 filter copy records. Any copies that occur between fromrev and
813 skiprev will not be duplicated, even if they appear in the set of
817 skiprev will not be duplicated, even if they appear in the set of
814 copies between fromrev and rev.
818 copies between fromrev and rev.
815 """
819 """
816 exclude = {}
820 exclude = {}
817 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
821 ctraceconfig = repo.ui.config(b'experimental', b'copytrace')
818 bctrace = stringutil.parsebool(ctraceconfig)
822 bctrace = stringutil.parsebool(ctraceconfig)
819 if skiprev is not None and (
823 if skiprev is not None and (
820 ctraceconfig == b'heuristics' or bctrace or bctrace is None
824 ctraceconfig == b'heuristics' or bctrace or bctrace is None
821 ):
825 ):
822 # copytrace='off' skips this line, but not the entire function because
826 # copytrace='off' skips this line, but not the entire function because
823 # the line below is O(size of the repo) during a rebase, while the rest
827 # the line below is O(size of the repo) during a rebase, while the rest
824 # of the function is much faster (and is required for carrying copy
828 # of the function is much faster (and is required for carrying copy
825 # metadata across the rebase anyway).
829 # metadata across the rebase anyway).
826 exclude = pathcopies(repo[fromrev], repo[skiprev])
830 exclude = pathcopies(repo[fromrev], repo[skiprev])
827 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
831 for dst, src in pycompat.iteritems(pathcopies(repo[fromrev], repo[rev])):
828 if dst in exclude:
832 if dst in exclude:
829 continue
833 continue
830 if dst in wctx:
834 if dst in wctx:
831 wctx[dst].markcopied(src)
835 wctx[dst].markcopied(src)
832
836
833
837
834 def computechangesetfilesadded(ctx):
838 def computechangesetfilesadded(ctx):
835 """return the list of files added in a changeset
839 """return the list of files added in a changeset
836 """
840 """
837 added = []
841 added = []
838 for f in ctx.files():
842 for f in ctx.files():
839 if not any(f in p for p in ctx.parents()):
843 if not any(f in p for p in ctx.parents()):
840 added.append(f)
844 added.append(f)
841 return added
845 return added
842
846
843
847
844 def computechangesetfilesremoved(ctx):
848 def computechangesetfilesremoved(ctx):
845 """return the list of files removed in a changeset
849 """return the list of files removed in a changeset
846 """
850 """
847 removed = []
851 removed = []
848 for f in ctx.files():
852 for f in ctx.files():
849 if f not in ctx:
853 if f not in ctx:
850 removed.append(f)
854 removed.append(f)
851 return removed
855 return removed
852
856
853
857
854 def computechangesetcopies(ctx):
858 def computechangesetcopies(ctx):
855 """return the copies data for a changeset
859 """return the copies data for a changeset
856
860
857 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
861 The copies data are returned as a pair of dictionnary (p1copies, p2copies).
858
862
859 Each dictionnary are in the form: `{newname: oldname}`
863 Each dictionnary are in the form: `{newname: oldname}`
860 """
864 """
861 p1copies = {}
865 p1copies = {}
862 p2copies = {}
866 p2copies = {}
863 p1 = ctx.p1()
867 p1 = ctx.p1()
864 p2 = ctx.p2()
868 p2 = ctx.p2()
865 narrowmatch = ctx._repo.narrowmatch()
869 narrowmatch = ctx._repo.narrowmatch()
866 for dst in ctx.files():
870 for dst in ctx.files():
867 if not narrowmatch(dst) or dst not in ctx:
871 if not narrowmatch(dst) or dst not in ctx:
868 continue
872 continue
869 copied = ctx[dst].renamed()
873 copied = ctx[dst].renamed()
870 if not copied:
874 if not copied:
871 continue
875 continue
872 src, srcnode = copied
876 src, srcnode = copied
873 if src in p1 and p1[src].filenode() == srcnode:
877 if src in p1 and p1[src].filenode() == srcnode:
874 p1copies[dst] = src
878 p1copies[dst] = src
875 elif src in p2 and p2[src].filenode() == srcnode:
879 elif src in p2 and p2[src].filenode() == srcnode:
876 p2copies[dst] = src
880 p2copies[dst] = src
877 return p1copies, p2copies
881 return p1copies, p2copies
878
882
879
883
880 def encodecopies(files, copies):
884 def encodecopies(files, copies):
881 items = []
885 items = []
882 for i, dst in enumerate(files):
886 for i, dst in enumerate(files):
883 if dst in copies:
887 if dst in copies:
884 items.append(b'%d\0%s' % (i, copies[dst]))
888 items.append(b'%d\0%s' % (i, copies[dst]))
885 if len(items) != len(copies):
889 if len(items) != len(copies):
886 raise error.ProgrammingError(
890 raise error.ProgrammingError(
887 b'some copy targets missing from file list'
891 b'some copy targets missing from file list'
888 )
892 )
889 return b"\n".join(items)
893 return b"\n".join(items)
890
894
891
895
892 def decodecopies(files, data):
896 def decodecopies(files, data):
893 try:
897 try:
894 copies = {}
898 copies = {}
895 if not data:
899 if not data:
896 return copies
900 return copies
897 for l in data.split(b'\n'):
901 for l in data.split(b'\n'):
898 strindex, src = l.split(b'\0')
902 strindex, src = l.split(b'\0')
899 i = int(strindex)
903 i = int(strindex)
900 dst = files[i]
904 dst = files[i]
901 copies[dst] = src
905 copies[dst] = src
902 return copies
906 return copies
903 except (ValueError, IndexError):
907 except (ValueError, IndexError):
904 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
908 # Perhaps someone had chosen the same key name (e.g. "p1copies") and
905 # used different syntax for the value.
909 # used different syntax for the value.
906 return None
910 return None
907
911
908
912
909 def encodefileindices(files, subset):
913 def encodefileindices(files, subset):
910 subset = set(subset)
914 subset = set(subset)
911 indices = []
915 indices = []
912 for i, f in enumerate(files):
916 for i, f in enumerate(files):
913 if f in subset:
917 if f in subset:
914 indices.append(b'%d' % i)
918 indices.append(b'%d' % i)
915 return b'\n'.join(indices)
919 return b'\n'.join(indices)
916
920
917
921
918 def decodefileindices(files, data):
922 def decodefileindices(files, data):
919 try:
923 try:
920 subset = []
924 subset = []
921 if not data:
925 if not data:
922 return subset
926 return subset
923 for strindex in data.split(b'\n'):
927 for strindex in data.split(b'\n'):
924 i = int(strindex)
928 i = int(strindex)
925 if i < 0 or i >= len(files):
929 if i < 0 or i >= len(files):
926 return None
930 return None
927 subset.append(files[i])
931 subset.append(files[i])
928 return subset
932 return subset
929 except (ValueError, IndexError):
933 except (ValueError, IndexError):
930 # Perhaps someone had chosen the same key name (e.g. "added") and
934 # Perhaps someone had chosen the same key name (e.g. "added") and
931 # used different syntax for the value.
935 # used different syntax for the value.
932 return None
936 return None
933
937
934
938
935 def _getsidedata(srcrepo, rev):
939 def _getsidedata(srcrepo, rev):
936 ctx = srcrepo[rev]
940 ctx = srcrepo[rev]
937 filescopies = computechangesetcopies(ctx)
941 filescopies = computechangesetcopies(ctx)
938 filesadded = computechangesetfilesadded(ctx)
942 filesadded = computechangesetfilesadded(ctx)
939 filesremoved = computechangesetfilesremoved(ctx)
943 filesremoved = computechangesetfilesremoved(ctx)
940 sidedata = {}
944 sidedata = {}
941 if any([filescopies, filesadded, filesremoved]):
945 if any([filescopies, filesadded, filesremoved]):
942 sortedfiles = sorted(ctx.files())
946 sortedfiles = sorted(ctx.files())
943 p1copies, p2copies = filescopies
947 p1copies, p2copies = filescopies
944 p1copies = encodecopies(sortedfiles, p1copies)
948 p1copies = encodecopies(sortedfiles, p1copies)
945 p2copies = encodecopies(sortedfiles, p2copies)
949 p2copies = encodecopies(sortedfiles, p2copies)
946 filesadded = encodefileindices(sortedfiles, filesadded)
950 filesadded = encodefileindices(sortedfiles, filesadded)
947 filesremoved = encodefileindices(sortedfiles, filesremoved)
951 filesremoved = encodefileindices(sortedfiles, filesremoved)
948 if p1copies:
952 if p1copies:
949 sidedata[sidedatamod.SD_P1COPIES] = p1copies
953 sidedata[sidedatamod.SD_P1COPIES] = p1copies
950 if p2copies:
954 if p2copies:
951 sidedata[sidedatamod.SD_P2COPIES] = p2copies
955 sidedata[sidedatamod.SD_P2COPIES] = p2copies
952 if filesadded:
956 if filesadded:
953 sidedata[sidedatamod.SD_FILESADDED] = filesadded
957 sidedata[sidedatamod.SD_FILESADDED] = filesadded
954 if filesremoved:
958 if filesremoved:
955 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
959 sidedata[sidedatamod.SD_FILESREMOVED] = filesremoved
956 return sidedata
960 return sidedata
957
961
958
962
959 def getsidedataadder(srcrepo, destrepo):
963 def getsidedataadder(srcrepo, destrepo):
960 def sidedatacompanion(revlog, rev):
964 def sidedatacompanion(revlog, rev):
961 sidedata = {}
965 sidedata = {}
962 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
966 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
963 sidedata = _getsidedata(srcrepo, rev)
967 sidedata = _getsidedata(srcrepo, rev)
964 return False, (), sidedata
968 return False, (), sidedata
965
969
966 return sidedatacompanion
970 return sidedatacompanion
967
971
968
972
969 def getsidedataremover(srcrepo, destrepo):
973 def getsidedataremover(srcrepo, destrepo):
970 def sidedatacompanion(revlog, rev):
974 def sidedatacompanion(revlog, rev):
971 f = ()
975 f = ()
972 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
976 if util.safehasattr(revlog, 'filteredrevs'): # this is a changelog
973 if revlog.flags(rev) & REVIDX_SIDEDATA:
977 if revlog.flags(rev) & REVIDX_SIDEDATA:
974 f = (
978 f = (
975 sidedatamod.SD_P1COPIES,
979 sidedatamod.SD_P1COPIES,
976 sidedatamod.SD_P2COPIES,
980 sidedatamod.SD_P2COPIES,
977 sidedatamod.SD_FILESADDED,
981 sidedatamod.SD_FILESADDED,
978 sidedatamod.SD_FILESREMOVED,
982 sidedatamod.SD_FILESREMOVED,
979 )
983 )
980 return False, f, {}
984 return False, f, {}
981
985
982 return sidedatacompanion
986 return sidedatacompanion
General Comments 0
You need to be logged in to leave comments. Login now