##// END OF EJS Templates
copies: make two version of the changeset centric algorithm...
marmoute -
r46214:ad6ebb6f default
parent child Browse files
Show More
@@ -1,997 +1,1095 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 match as matchmod,
19 match as matchmod,
20 node,
20 node,
21 pathutil,
21 pathutil,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25
25
26
26
27 from .utils import stringutil
27 from .utils import stringutil
28
28
29
29
30 def _filter(src, dst, t):
30 def _filter(src, dst, t):
31 """filters out invalid copies after chaining"""
31 """filters out invalid copies after chaining"""
32
32
33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 # in the following table (not including trivial cases). For example, case 2
35 # in the following table (not including trivial cases). For example, case 2
36 # is where a file existed in 'src' and remained under that name in 'mid' and
36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 # then was renamed between 'mid' and 'dst'.
37 # then was renamed between 'mid' and 'dst'.
38 #
38 #
39 # case src mid dst result
39 # case src mid dst result
40 # 1 x y - -
40 # 1 x y - -
41 # 2 x y y x->y
41 # 2 x y y x->y
42 # 3 x y x -
42 # 3 x y x -
43 # 4 x y z x->z
43 # 4 x y z x->z
44 # 5 - x y -
44 # 5 - x y -
45 # 6 x x y x->y
45 # 6 x x y x->y
46 #
46 #
47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 # between 5 and 6, so it includes all cases in its result.
49 # between 5 and 6, so it includes all cases in its result.
50 # Cases 1, 3, and 5 are then removed by _filter().
50 # Cases 1, 3, and 5 are then removed by _filter().
51
51
52 for k, v in list(t.items()):
52 for k, v in list(t.items()):
53 # remove copies from files that didn't exist
53 # remove copies from files that didn't exist
54 if v not in src:
54 if v not in src:
55 del t[k]
55 del t[k]
56 # remove criss-crossed copies
56 # remove criss-crossed copies
57 elif k in src and v in dst:
57 elif k in src and v in dst:
58 del t[k]
58 del t[k]
59 # remove copies to files that were then removed
59 # remove copies to files that were then removed
60 elif k not in dst:
60 elif k not in dst:
61 del t[k]
61 del t[k]
62
62
63
63
64 def _chain(prefix, suffix):
64 def _chain(prefix, suffix):
65 """chain two sets of copies 'prefix' and 'suffix'"""
65 """chain two sets of copies 'prefix' and 'suffix'"""
66 result = prefix.copy()
66 result = prefix.copy()
67 for key, value in pycompat.iteritems(suffix):
67 for key, value in pycompat.iteritems(suffix):
68 result[key] = prefix.get(value, value)
68 result[key] = prefix.get(value, value)
69 return result
69 return result
70
70
71
71
72 def _tracefile(fctx, am, basemf):
72 def _tracefile(fctx, am, basemf):
73 """return file context that is the ancestor of fctx present in ancestor
73 """return file context that is the ancestor of fctx present in ancestor
74 manifest am
74 manifest am
75
75
76 Note: we used to try and stop after a given limit, however checking if that
76 Note: we used to try and stop after a given limit, however checking if that
77 limit is reached turned out to be very expensive. we are better off
77 limit is reached turned out to be very expensive. we are better off
78 disabling that feature."""
78 disabling that feature."""
79
79
80 for f in fctx.ancestors():
80 for f in fctx.ancestors():
81 path = f.path()
81 path = f.path()
82 if am.get(path, None) == f.filenode():
82 if am.get(path, None) == f.filenode():
83 return path
83 return path
84 if basemf and basemf.get(path, None) == f.filenode():
84 if basemf and basemf.get(path, None) == f.filenode():
85 return path
85 return path
86
86
87
87
88 def _dirstatecopies(repo, match=None):
88 def _dirstatecopies(repo, match=None):
89 ds = repo.dirstate
89 ds = repo.dirstate
90 c = ds.copies().copy()
90 c = ds.copies().copy()
91 for k in list(c):
91 for k in list(c):
92 if ds[k] not in b'anm' or (match and not match(k)):
92 if ds[k] not in b'anm' or (match and not match(k)):
93 del c[k]
93 del c[k]
94 return c
94 return c
95
95
96
96
97 def _computeforwardmissing(a, b, match=None):
97 def _computeforwardmissing(a, b, match=None):
98 """Computes which files are in b but not a.
98 """Computes which files are in b but not a.
99 This is its own function so extensions can easily wrap this call to see what
99 This is its own function so extensions can easily wrap this call to see what
100 files _forwardcopies is about to process.
100 files _forwardcopies is about to process.
101 """
101 """
102 ma = a.manifest()
102 ma = a.manifest()
103 mb = b.manifest()
103 mb = b.manifest()
104 return mb.filesnotin(ma, match=match)
104 return mb.filesnotin(ma, match=match)
105
105
106
106
107 def usechangesetcentricalgo(repo):
107 def usechangesetcentricalgo(repo):
108 """Checks if we should use changeset-centric copy algorithms"""
108 """Checks if we should use changeset-centric copy algorithms"""
109 if repo.filecopiesmode == b'changeset-sidedata':
109 if repo.filecopiesmode == b'changeset-sidedata':
110 return True
110 return True
111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 changesetsource = (b'changeset-only', b'compatibility')
112 changesetsource = (b'changeset-only', b'compatibility')
113 return readfrom in changesetsource
113 return readfrom in changesetsource
114
114
115
115
116 def _committedforwardcopies(a, b, base, match):
116 def _committedforwardcopies(a, b, base, match):
117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 # files might have to be traced back to the fctx parent of the last
118 # files might have to be traced back to the fctx parent of the last
119 # one-side-only changeset, but not further back than that
119 # one-side-only changeset, but not further back than that
120 repo = a._repo
120 repo = a._repo
121
121
122 if usechangesetcentricalgo(repo):
122 if usechangesetcentricalgo(repo):
123 return _changesetforwardcopies(a, b, match)
123 return _changesetforwardcopies(a, b, match)
124
124
125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 dbg = repo.ui.debug
126 dbg = repo.ui.debug
127 if debug:
127 if debug:
128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 am = a.manifest()
129 am = a.manifest()
130 basemf = None if base is None else base.manifest()
130 basemf = None if base is None else base.manifest()
131
131
132 # find where new files came from
132 # find where new files came from
133 # we currently don't try to find where old files went, too expensive
133 # we currently don't try to find where old files went, too expensive
134 # this means we can miss a case like 'hg rm b; hg cp a b'
134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 cm = {}
135 cm = {}
136
136
137 # Computing the forward missing is quite expensive on large manifests, since
137 # Computing the forward missing is quite expensive on large manifests, since
138 # it compares the entire manifests. We can optimize it in the common use
138 # it compares the entire manifests. We can optimize it in the common use
139 # case of computing what copies are in a commit versus its parent (like
139 # case of computing what copies are in a commit versus its parent (like
140 # during a rebase or histedit). Note, we exclude merge commits from this
140 # during a rebase or histedit). Note, we exclude merge commits from this
141 # optimization, since the ctx.files() for a merge commit is not correct for
141 # optimization, since the ctx.files() for a merge commit is not correct for
142 # this comparison.
142 # this comparison.
143 forwardmissingmatch = match
143 forwardmissingmatch = match
144 if b.p1() == a and b.p2().node() == node.nullid:
144 if b.p1() == a and b.p2().node() == node.nullid:
145 filesmatcher = matchmod.exact(b.files())
145 filesmatcher = matchmod.exact(b.files())
146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148
148
149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150
150
151 if debug:
151 if debug:
152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153
153
154 for f in sorted(missing):
154 for f in sorted(missing):
155 if debug:
155 if debug:
156 dbg(b'debug.copies: tracing file: %s\n' % f)
156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 fctx = b[f]
157 fctx = b[f]
158 fctx._ancestrycontext = ancestrycontext
158 fctx._ancestrycontext = ancestrycontext
159
159
160 if debug:
160 if debug:
161 start = util.timer()
161 start = util.timer()
162 opath = _tracefile(fctx, am, basemf)
162 opath = _tracefile(fctx, am, basemf)
163 if opath:
163 if opath:
164 if debug:
164 if debug:
165 dbg(b'debug.copies: rename of: %s\n' % opath)
165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 cm[f] = opath
166 cm[f] = opath
167 if debug:
167 if debug:
168 dbg(
168 dbg(
169 b'debug.copies: time: %f seconds\n'
169 b'debug.copies: time: %f seconds\n'
170 % (util.timer() - start)
170 % (util.timer() - start)
171 )
171 )
172 return cm
172 return cm
173
173
174
174
175 def _revinfo_getter(repo):
175 def _revinfo_getter(repo):
176 """return a function that return multiple data given a <rev>"i
176 """return a function that return multiple data given a <rev>"i
177
177
178 * p1: revision number of first parent
178 * p1: revision number of first parent
179 * p2: revision number of first parent
179 * p2: revision number of first parent
180 * p1copies: mapping of copies from p1
180 * p1copies: mapping of copies from p1
181 * p2copies: mapping of copies from p2
181 * p2copies: mapping of copies from p2
182 * removed: a list of removed files
182 * removed: a list of removed files
183 * ismerged: a callback to know if file was merged in that revision
183 * ismerged: a callback to know if file was merged in that revision
184 """
184 """
185 cl = repo.changelog
185 cl = repo.changelog
186 parents = cl.parentrevs
186 parents = cl.parentrevs
187
187
188 def get_ismerged(rev):
188 def get_ismerged(rev):
189 ctx = repo[rev]
189 ctx = repo[rev]
190
190
191 def ismerged(path):
191 def ismerged(path):
192 if path not in ctx.files():
192 if path not in ctx.files():
193 return False
193 return False
194 fctx = ctx[path]
194 fctx = ctx[path]
195 parents = fctx._filelog.parents(fctx._filenode)
195 parents = fctx._filelog.parents(fctx._filenode)
196 nb_parents = 0
196 nb_parents = 0
197 for n in parents:
197 for n in parents:
198 if n != node.nullid:
198 if n != node.nullid:
199 nb_parents += 1
199 nb_parents += 1
200 return nb_parents >= 2
200 return nb_parents >= 2
201
201
202 return ismerged
202 return ismerged
203
203
204 if repo.filecopiesmode == b'changeset-sidedata':
204 if repo.filecopiesmode == b'changeset-sidedata':
205 changelogrevision = cl.changelogrevision
205 changelogrevision = cl.changelogrevision
206 flags = cl.flags
206 flags = cl.flags
207
207
208 # A small cache to avoid doing the work twice for merges
208 # A small cache to avoid doing the work twice for merges
209 #
209 #
210 # In the vast majority of cases, if we ask information for a revision
210 # In the vast majority of cases, if we ask information for a revision
211 # about 1 parent, we'll later ask it for the other. So it make sense to
211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 # keep the information around when reaching the first parent of a merge
212 # keep the information around when reaching the first parent of a merge
213 # and dropping it after it was provided for the second parents.
213 # and dropping it after it was provided for the second parents.
214 #
214 #
215 # It exists cases were only one parent of the merge will be walked. It
215 # It exists cases were only one parent of the merge will be walked. It
216 # happens when the "destination" the copy tracing is descendant from a
216 # happens when the "destination" the copy tracing is descendant from a
217 # new root, not common with the "source". In that case, we will only walk
217 # new root, not common with the "source". In that case, we will only walk
218 # through merge parents that are descendant of changesets common
218 # through merge parents that are descendant of changesets common
219 # between "source" and "destination".
219 # between "source" and "destination".
220 #
220 #
221 # With the current case implementation if such changesets have a copy
221 # With the current case implementation if such changesets have a copy
222 # information, we'll keep them in memory until the end of
222 # information, we'll keep them in memory until the end of
223 # _changesetforwardcopies. We don't expect the case to be frequent
223 # _changesetforwardcopies. We don't expect the case to be frequent
224 # enough to matters.
224 # enough to matters.
225 #
225 #
226 # In addition, it would be possible to reach pathological case, were
226 # In addition, it would be possible to reach pathological case, were
227 # many first parent are met before any second parent is reached. In
227 # many first parent are met before any second parent is reached. In
228 # that case the cache could grow. If this even become an issue one can
228 # that case the cache could grow. If this even become an issue one can
229 # safely introduce a maximum cache size. This would trade extra CPU/IO
229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 # time to save memory.
230 # time to save memory.
231 merge_caches = {}
231 merge_caches = {}
232
232
233 def revinfo(rev):
233 def revinfo(rev):
234 p1, p2 = parents(rev)
234 p1, p2 = parents(rev)
235 value = None
235 value = None
236 if flags(rev) & REVIDX_SIDEDATA:
236 if flags(rev) & REVIDX_SIDEDATA:
237 e = merge_caches.pop(rev, None)
237 e = merge_caches.pop(rev, None)
238 if e is not None:
238 if e is not None:
239 return e
239 return e
240 c = changelogrevision(rev)
240 c = changelogrevision(rev)
241 p1copies = c.p1copies
241 p1copies = c.p1copies
242 p2copies = c.p2copies
242 p2copies = c.p2copies
243 removed = c.filesremoved
243 removed = c.filesremoved
244 if p1 != node.nullrev and p2 != node.nullrev:
244 if p1 != node.nullrev and p2 != node.nullrev:
245 # XXX some case we over cache, IGNORE
245 # XXX some case we over cache, IGNORE
246 value = merge_caches[rev] = (
246 value = merge_caches[rev] = (
247 p1,
247 p1,
248 p2,
248 p2,
249 p1copies,
249 p1copies,
250 p2copies,
250 p2copies,
251 removed,
251 removed,
252 get_ismerged(rev),
252 get_ismerged(rev),
253 )
253 )
254 else:
254 else:
255 p1copies = {}
255 p1copies = {}
256 p2copies = {}
256 p2copies = {}
257 removed = []
257 removed = []
258
258
259 if value is None:
259 if value is None:
260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 return value
261 return value
262
262
263 else:
263 else:
264
264
265 def revinfo(rev):
265 def revinfo(rev):
266 p1, p2 = parents(rev)
266 p1, p2 = parents(rev)
267 ctx = repo[rev]
267 ctx = repo[rev]
268 p1copies, p2copies = ctx._copies
268 p1copies, p2copies = ctx._copies
269 removed = ctx.filesremoved()
269 removed = ctx.filesremoved()
270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
271
271
272 return revinfo
272 return revinfo
273
273
274
274
275 def _changesetforwardcopies(a, b, match):
275 def _changesetforwardcopies(a, b, match):
276 if a.rev() in (node.nullrev, b.rev()):
276 if a.rev() in (node.nullrev, b.rev()):
277 return {}
277 return {}
278
278
279 repo = a.repo().unfiltered()
279 repo = a.repo().unfiltered()
280 children = {}
280 children = {}
281 revinfo = _revinfo_getter(repo)
281 revinfo = _revinfo_getter(repo)
282
282
283 cl = repo.changelog
283 cl = repo.changelog
284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 mrset = set(missingrevs)
286 mrset = set(missingrevs)
287 roots = set()
287 roots = set()
288 for r in missingrevs:
288 for r in missingrevs:
289 for p in cl.parentrevs(r):
289 for p in cl.parentrevs(r):
290 if p == node.nullrev:
290 if p == node.nullrev:
291 continue
291 continue
292 if p not in children:
292 if p not in children:
293 children[p] = [r]
293 children[p] = [r]
294 else:
294 else:
295 children[p].append(r)
295 children[p].append(r)
296 if p not in mrset:
296 if p not in mrset:
297 roots.add(p)
297 roots.add(p)
298 if not roots:
298 if not roots:
299 # no common revision to track copies from
299 # no common revision to track copies from
300 return {}
300 return {}
301 min_root = min(roots)
301 min_root = min(roots)
302
302
303 from_head = set(
303 from_head = set(
304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 )
305 )
306
306
307 iterrevs = set(from_head)
307 iterrevs = set(from_head)
308 iterrevs &= mrset
308 iterrevs &= mrset
309 iterrevs.update(roots)
309 iterrevs.update(roots)
310 iterrevs.remove(b.rev())
310 iterrevs.remove(b.rev())
311 revs = sorted(iterrevs)
311 revs = sorted(iterrevs)
312 return _combine_changeset_copies(
312
313 revs, children, b.rev(), revinfo, match, isancestor
313 if repo.filecopiesmode == b'changeset-sidedata':
314 )
314 return _combine_changeset_copies(
315 revs, children, b.rev(), revinfo, match, isancestor
316 )
317 else:
318 return _combine_changeset_copies_extra(
319 revs, children, b.rev(), revinfo, match, isancestor
320 )
315
321
316
322
317 def _combine_changeset_copies(
323 def _combine_changeset_copies(
318 revs, children, targetrev, revinfo, match, isancestor
324 revs, children, targetrev, revinfo, match, isancestor
319 ):
325 ):
320 """combine the copies information for each item of iterrevs
326 """combine the copies information for each item of iterrevs
321
327
322 revs: sorted iterable of revision to visit
328 revs: sorted iterable of revision to visit
323 children: a {parent: [children]} mapping.
329 children: a {parent: [children]} mapping.
324 targetrev: the final copies destination revision (not in iterrevs)
330 targetrev: the final copies destination revision (not in iterrevs)
325 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
331 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
326 match: a matcher
332 match: a matcher
327
333
328 It returns the aggregated copies information for `targetrev`.
334 It returns the aggregated copies information for `targetrev`.
329 """
335 """
330 all_copies = {}
336 all_copies = {}
331 alwaysmatch = match.always()
337 alwaysmatch = match.always()
332 for r in revs:
338 for r in revs:
333 copies = all_copies.pop(r, None)
339 copies = all_copies.pop(r, None)
334 if copies is None:
340 if copies is None:
335 # this is a root
341 # this is a root
336 copies = {}
342 copies = {}
337 for i, c in enumerate(children[r]):
343 for i, c in enumerate(children[r]):
338 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
344 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
339 if r == p1:
345 if r == p1:
340 parent = 1
346 parent = 1
341 childcopies = p1copies
347 childcopies = p1copies
342 else:
348 else:
343 assert r == p2
349 assert r == p2
344 parent = 2
350 parent = 2
345 childcopies = p2copies
351 childcopies = p2copies
346 if not alwaysmatch:
352 if not alwaysmatch:
347 childcopies = {
353 childcopies = {
348 dst: src for dst, src in childcopies.items() if match(dst)
354 dst: src for dst, src in childcopies.items() if match(dst)
349 }
355 }
350 newcopies = copies
356 newcopies = copies
351 if childcopies:
357 if childcopies:
352 newcopies = copies.copy()
358 newcopies = copies.copy()
353 for dest, source in pycompat.iteritems(childcopies):
359 for dest, source in pycompat.iteritems(childcopies):
354 prev = copies.get(source)
360 prev = copies.get(source)
355 if prev is not None and prev[1] is not None:
361 if prev is not None and prev[1] is not None:
356 source = prev[1]
362 source = prev[1]
357 newcopies[dest] = (c, source)
363 newcopies[dest] = (c, source)
358 assert newcopies is not copies
364 assert newcopies is not copies
359 for f in removed:
365 for f in removed:
360 if f in newcopies:
366 if f in newcopies:
361 if newcopies is copies:
367 if newcopies is copies:
362 # copy on write to avoid affecting potential other
368 # copy on write to avoid affecting potential other
363 # branches. when there are no other branches, this
369 # branches. when there are no other branches, this
364 # could be avoided.
370 # could be avoided.
365 newcopies = copies.copy()
371 newcopies = copies.copy()
366 newcopies[f] = (c, None)
372 newcopies[f] = (c, None)
367 othercopies = all_copies.get(c)
373 othercopies = all_copies.get(c)
368 if othercopies is None:
374 if othercopies is None:
369 all_copies[c] = newcopies
375 all_copies[c] = newcopies
370 else:
376 else:
371 # we are the second parent to work on c, we need to merge our
377 # we are the second parent to work on c, we need to merge our
372 # work with the other.
378 # work with the other.
373 #
379 #
374 # In case of conflict, parent 1 take precedence over parent 2.
380 # In case of conflict, parent 1 take precedence over parent 2.
375 # This is an arbitrary choice made anew when implementing
381 # This is an arbitrary choice made anew when implementing
376 # changeset based copies. It was made without regards with
382 # changeset based copies. It was made without regards with
377 # potential filelog related behavior.
383 # potential filelog related behavior.
378 if parent == 1:
384 if parent == 1:
379 _merge_copies_dict(
385 _merge_copies_dict(
380 othercopies, newcopies, isancestor, ismerged
386 othercopies, newcopies, isancestor, ismerged
381 )
387 )
382 else:
388 else:
383 _merge_copies_dict(
389 _merge_copies_dict(
384 newcopies, othercopies, isancestor, ismerged
390 newcopies, othercopies, isancestor, ismerged
385 )
391 )
386 all_copies[c] = newcopies
392 all_copies[c] = newcopies
387
393
388 final_copies = {}
394 final_copies = {}
389 for dest, (tt, source) in all_copies[targetrev].items():
395 for dest, (tt, source) in all_copies[targetrev].items():
390 if source is not None:
396 if source is not None:
391 final_copies[dest] = source
397 final_copies[dest] = source
392 return final_copies
398 return final_copies
393
399
394
400
395 def _merge_copies_dict(minor, major, isancestor, ismerged):
401 def _merge_copies_dict(minor, major, isancestor, ismerged):
396 """merge two copies-mapping together, minor and major
402 """merge two copies-mapping together, minor and major
397
403
398 In case of conflict, value from "major" will be picked.
404 In case of conflict, value from "major" will be picked.
399
405
400 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
406 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
401 ancestors of `high_rev`,
407 ancestors of `high_rev`,
402
408
403 - `ismerged(path)`: callable return True if `path` have been merged in the
409 - `ismerged(path)`: callable return True if `path` have been merged in the
404 current revision,
410 current revision,
405 """
411 """
406 for dest, value in major.items():
412 for dest, value in major.items():
407 other = minor.get(dest)
413 other = minor.get(dest)
408 if other is None:
414 if other is None:
409 minor[dest] = value
415 minor[dest] = value
410 else:
416 else:
411 new_tt = value[0]
417 new_tt = value[0]
412 other_tt = other[0]
418 other_tt = other[0]
413 if value[1] == other[1]:
419 if value[1] == other[1]:
414 continue
420 continue
415 # content from "major" wins, unless it is older
421 # content from "major" wins, unless it is older
416 # than the branch point or there is a merge
422 # than the branch point or there is a merge
417 if (
423 if (
418 new_tt == other_tt
424 new_tt == other_tt
419 or not isancestor(new_tt, other_tt)
425 or not isancestor(new_tt, other_tt)
420 or ismerged(dest)
426 or ismerged(dest)
421 ):
427 ):
422 minor[dest] = value
428 minor[dest] = value
423
429
424
430
431 def _combine_changeset_copies_extra(
432 revs, children, targetrev, revinfo, match, isancestor
433 ):
434 """version of `_combine_changeset_copies` that works with the Google
435 specific "extra" based storage for copy information"""
436 all_copies = {}
437 alwaysmatch = match.always()
438 for r in revs:
439 copies = all_copies.pop(r, None)
440 if copies is None:
441 # this is a root
442 copies = {}
443 for i, c in enumerate(children[r]):
444 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
445 if r == p1:
446 parent = 1
447 childcopies = p1copies
448 else:
449 assert r == p2
450 parent = 2
451 childcopies = p2copies
452 if not alwaysmatch:
453 childcopies = {
454 dst: src for dst, src in childcopies.items() if match(dst)
455 }
456 newcopies = copies
457 if childcopies:
458 newcopies = copies.copy()
459 for dest, source in pycompat.iteritems(childcopies):
460 prev = copies.get(source)
461 if prev is not None and prev[1] is not None:
462 source = prev[1]
463 newcopies[dest] = (c, source)
464 assert newcopies is not copies
465 for f in removed:
466 if f in newcopies:
467 if newcopies is copies:
468 # copy on write to avoid affecting potential other
469 # branches. when there are no other branches, this
470 # could be avoided.
471 newcopies = copies.copy()
472 newcopies[f] = (c, None)
473 othercopies = all_copies.get(c)
474 if othercopies is None:
475 all_copies[c] = newcopies
476 else:
477 # we are the second parent to work on c, we need to merge our
478 # work with the other.
479 #
480 # In case of conflict, parent 1 take precedence over parent 2.
481 # This is an arbitrary choice made anew when implementing
482 # changeset based copies. It was made without regards with
483 # potential filelog related behavior.
484 if parent == 1:
485 _merge_copies_dict_extra(
486 othercopies, newcopies, isancestor, ismerged
487 )
488 else:
489 _merge_copies_dict_extra(
490 newcopies, othercopies, isancestor, ismerged
491 )
492 all_copies[c] = newcopies
493
494 final_copies = {}
495 for dest, (tt, source) in all_copies[targetrev].items():
496 if source is not None:
497 final_copies[dest] = source
498 return final_copies
499
500
501 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
502 """version of `_merge_copies_dict` that works with the Google
503 specific "extra" based storage for copy information"""
504 for dest, value in major.items():
505 other = minor.get(dest)
506 if other is None:
507 minor[dest] = value
508 else:
509 new_tt = value[0]
510 other_tt = other[0]
511 if value[1] == other[1]:
512 continue
513 # content from "major" wins, unless it is older
514 # than the branch point or there is a merge
515 if (
516 new_tt == other_tt
517 or not isancestor(new_tt, other_tt)
518 or ismerged(dest)
519 ):
520 minor[dest] = value
521
522
425 def _forwardcopies(a, b, base=None, match=None):
523 def _forwardcopies(a, b, base=None, match=None):
426 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
524 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427
525
428 if base is None:
526 if base is None:
429 base = a
527 base = a
430 match = a.repo().narrowmatch(match)
528 match = a.repo().narrowmatch(match)
431 # check for working copy
529 # check for working copy
432 if b.rev() is None:
530 if b.rev() is None:
433 cm = _committedforwardcopies(a, b.p1(), base, match)
531 cm = _committedforwardcopies(a, b.p1(), base, match)
434 # combine copies from dirstate if necessary
532 # combine copies from dirstate if necessary
435 copies = _chain(cm, _dirstatecopies(b._repo, match))
533 copies = _chain(cm, _dirstatecopies(b._repo, match))
436 else:
534 else:
437 copies = _committedforwardcopies(a, b, base, match)
535 copies = _committedforwardcopies(a, b, base, match)
438 return copies
536 return copies
439
537
440
538
441 def _backwardrenames(a, b, match):
539 def _backwardrenames(a, b, match):
442 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
540 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
443 return {}
541 return {}
444
542
445 # Even though we're not taking copies into account, 1:n rename situations
543 # Even though we're not taking copies into account, 1:n rename situations
446 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
544 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
447 # arbitrarily pick one of the renames.
545 # arbitrarily pick one of the renames.
448 # We don't want to pass in "match" here, since that would filter
546 # We don't want to pass in "match" here, since that would filter
449 # the destination by it. Since we're reversing the copies, we want
547 # the destination by it. Since we're reversing the copies, we want
450 # to filter the source instead.
548 # to filter the source instead.
451 f = _forwardcopies(b, a)
549 f = _forwardcopies(b, a)
452 r = {}
550 r = {}
453 for k, v in sorted(pycompat.iteritems(f)):
551 for k, v in sorted(pycompat.iteritems(f)):
454 if match and not match(v):
552 if match and not match(v):
455 continue
553 continue
456 # remove copies
554 # remove copies
457 if v in a:
555 if v in a:
458 continue
556 continue
459 r[v] = k
557 r[v] = k
460 return r
558 return r
461
559
462
560
463 def pathcopies(x, y, match=None):
561 def pathcopies(x, y, match=None):
464 """find {dst@y: src@x} copy mapping for directed compare"""
562 """find {dst@y: src@x} copy mapping for directed compare"""
465 repo = x._repo
563 repo = x._repo
466 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
564 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
467 if debug:
565 if debug:
468 repo.ui.debug(
566 repo.ui.debug(
469 b'debug.copies: searching copies from %s to %s\n' % (x, y)
567 b'debug.copies: searching copies from %s to %s\n' % (x, y)
470 )
568 )
471 if x == y or not x or not y:
569 if x == y or not x or not y:
472 return {}
570 return {}
473 if y.rev() is None and x == y.p1():
571 if y.rev() is None and x == y.p1():
474 if debug:
572 if debug:
475 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
573 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
476 # short-circuit to avoid issues with merge states
574 # short-circuit to avoid issues with merge states
477 return _dirstatecopies(repo, match)
575 return _dirstatecopies(repo, match)
478 a = y.ancestor(x)
576 a = y.ancestor(x)
479 if a == x:
577 if a == x:
480 if debug:
578 if debug:
481 repo.ui.debug(b'debug.copies: search mode: forward\n')
579 repo.ui.debug(b'debug.copies: search mode: forward\n')
482 copies = _forwardcopies(x, y, match=match)
580 copies = _forwardcopies(x, y, match=match)
483 elif a == y:
581 elif a == y:
484 if debug:
582 if debug:
485 repo.ui.debug(b'debug.copies: search mode: backward\n')
583 repo.ui.debug(b'debug.copies: search mode: backward\n')
486 copies = _backwardrenames(x, y, match=match)
584 copies = _backwardrenames(x, y, match=match)
487 else:
585 else:
488 if debug:
586 if debug:
489 repo.ui.debug(b'debug.copies: search mode: combined\n')
587 repo.ui.debug(b'debug.copies: search mode: combined\n')
490 base = None
588 base = None
491 if a.rev() != node.nullrev:
589 if a.rev() != node.nullrev:
492 base = x
590 base = x
493 copies = _chain(
591 copies = _chain(
494 _backwardrenames(x, a, match=match),
592 _backwardrenames(x, a, match=match),
495 _forwardcopies(a, y, base, match=match),
593 _forwardcopies(a, y, base, match=match),
496 )
594 )
497 _filter(x, y, copies)
595 _filter(x, y, copies)
498 return copies
596 return copies
499
597
500
598
501 def mergecopies(repo, c1, c2, base):
599 def mergecopies(repo, c1, c2, base):
502 """
600 """
503 Finds moves and copies between context c1 and c2 that are relevant for
601 Finds moves and copies between context c1 and c2 that are relevant for
504 merging. 'base' will be used as the merge base.
602 merging. 'base' will be used as the merge base.
505
603
506 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
604 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
507 files that were moved/ copied in one merge parent and modified in another.
605 files that were moved/ copied in one merge parent and modified in another.
508 For example:
606 For example:
509
607
510 o ---> 4 another commit
608 o ---> 4 another commit
511 |
609 |
512 | o ---> 3 commit that modifies a.txt
610 | o ---> 3 commit that modifies a.txt
513 | /
611 | /
514 o / ---> 2 commit that moves a.txt to b.txt
612 o / ---> 2 commit that moves a.txt to b.txt
515 |/
613 |/
516 o ---> 1 merge base
614 o ---> 1 merge base
517
615
518 If we try to rebase revision 3 on revision 4, since there is no a.txt in
616 If we try to rebase revision 3 on revision 4, since there is no a.txt in
519 revision 4, and if user have copytrace disabled, we prints the following
617 revision 4, and if user have copytrace disabled, we prints the following
520 message:
618 message:
521
619
522 ```other changed <file> which local deleted```
620 ```other changed <file> which local deleted```
523
621
524 Returns a tuple where:
622 Returns a tuple where:
525
623
526 "branch_copies" an instance of branch_copies.
624 "branch_copies" an instance of branch_copies.
527
625
528 "diverge" is a mapping of source name -> list of destination names
626 "diverge" is a mapping of source name -> list of destination names
529 for divergent renames.
627 for divergent renames.
530
628
531 This function calls different copytracing algorithms based on config.
629 This function calls different copytracing algorithms based on config.
532 """
630 """
533 # avoid silly behavior for update from empty dir
631 # avoid silly behavior for update from empty dir
534 if not c1 or not c2 or c1 == c2:
632 if not c1 or not c2 or c1 == c2:
535 return branch_copies(), branch_copies(), {}
633 return branch_copies(), branch_copies(), {}
536
634
537 narrowmatch = c1.repo().narrowmatch()
635 narrowmatch = c1.repo().narrowmatch()
538
636
539 # avoid silly behavior for parent -> working dir
637 # avoid silly behavior for parent -> working dir
540 if c2.node() is None and c1.node() == repo.dirstate.p1():
638 if c2.node() is None and c1.node() == repo.dirstate.p1():
541 return (
639 return (
542 branch_copies(_dirstatecopies(repo, narrowmatch)),
640 branch_copies(_dirstatecopies(repo, narrowmatch)),
543 branch_copies(),
641 branch_copies(),
544 {},
642 {},
545 )
643 )
546
644
547 copytracing = repo.ui.config(b'experimental', b'copytrace')
645 copytracing = repo.ui.config(b'experimental', b'copytrace')
548 if stringutil.parsebool(copytracing) is False:
646 if stringutil.parsebool(copytracing) is False:
549 # stringutil.parsebool() returns None when it is unable to parse the
647 # stringutil.parsebool() returns None when it is unable to parse the
550 # value, so we should rely on making sure copytracing is on such cases
648 # value, so we should rely on making sure copytracing is on such cases
551 return branch_copies(), branch_copies(), {}
649 return branch_copies(), branch_copies(), {}
552
650
553 if usechangesetcentricalgo(repo):
651 if usechangesetcentricalgo(repo):
554 # The heuristics don't make sense when we need changeset-centric algos
652 # The heuristics don't make sense when we need changeset-centric algos
555 return _fullcopytracing(repo, c1, c2, base)
653 return _fullcopytracing(repo, c1, c2, base)
556
654
557 # Copy trace disabling is explicitly below the node == p1 logic above
655 # Copy trace disabling is explicitly below the node == p1 logic above
558 # because the logic above is required for a simple copy to be kept across a
656 # because the logic above is required for a simple copy to be kept across a
559 # rebase.
657 # rebase.
560 if copytracing == b'heuristics':
658 if copytracing == b'heuristics':
561 # Do full copytracing if only non-public revisions are involved as
659 # Do full copytracing if only non-public revisions are involved as
562 # that will be fast enough and will also cover the copies which could
660 # that will be fast enough and will also cover the copies which could
563 # be missed by heuristics
661 # be missed by heuristics
564 if _isfullcopytraceable(repo, c1, base):
662 if _isfullcopytraceable(repo, c1, base):
565 return _fullcopytracing(repo, c1, c2, base)
663 return _fullcopytracing(repo, c1, c2, base)
566 return _heuristicscopytracing(repo, c1, c2, base)
664 return _heuristicscopytracing(repo, c1, c2, base)
567 else:
665 else:
568 return _fullcopytracing(repo, c1, c2, base)
666 return _fullcopytracing(repo, c1, c2, base)
569
667
570
668
571 def _isfullcopytraceable(repo, c1, base):
669 def _isfullcopytraceable(repo, c1, base):
572 """ Checks that if base, source and destination are all no-public branches,
670 """ Checks that if base, source and destination are all no-public branches,
573 if yes let's use the full copytrace algorithm for increased capabilities
671 if yes let's use the full copytrace algorithm for increased capabilities
574 since it will be fast enough.
672 since it will be fast enough.
575
673
576 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
674 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
577 number of changesets from c1 to base such that if number of changesets are
675 number of changesets from c1 to base such that if number of changesets are
578 more than the limit, full copytracing algorithm won't be used.
676 more than the limit, full copytracing algorithm won't be used.
579 """
677 """
580 if c1.rev() is None:
678 if c1.rev() is None:
581 c1 = c1.p1()
679 c1 = c1.p1()
582 if c1.mutable() and base.mutable():
680 if c1.mutable() and base.mutable():
583 sourcecommitlimit = repo.ui.configint(
681 sourcecommitlimit = repo.ui.configint(
584 b'experimental', b'copytrace.sourcecommitlimit'
682 b'experimental', b'copytrace.sourcecommitlimit'
585 )
683 )
586 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
684 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
587 return commits < sourcecommitlimit
685 return commits < sourcecommitlimit
588 return False
686 return False
589
687
590
688
591 def _checksinglesidecopies(
689 def _checksinglesidecopies(
592 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
690 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
593 ):
691 ):
594 if src not in m2:
692 if src not in m2:
595 # deleted on side 2
693 # deleted on side 2
596 if src not in m1:
694 if src not in m1:
597 # renamed on side 1, deleted on side 2
695 # renamed on side 1, deleted on side 2
598 renamedelete[src] = dsts1
696 renamedelete[src] = dsts1
599 elif src not in mb:
697 elif src not in mb:
600 # Work around the "short-circuit to avoid issues with merge states"
698 # Work around the "short-circuit to avoid issues with merge states"
601 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
699 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
602 # destination doesn't exist in y.
700 # destination doesn't exist in y.
603 pass
701 pass
604 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
702 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
605 return
703 return
606 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
704 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
607 # modified on side 2
705 # modified on side 2
608 for dst in dsts1:
706 for dst in dsts1:
609 copy[dst] = src
707 copy[dst] = src
610
708
611
709
612 class branch_copies(object):
710 class branch_copies(object):
613 """Information about copies made on one side of a merge/graft.
711 """Information about copies made on one side of a merge/graft.
614
712
615 "copy" is a mapping from destination name -> source name,
713 "copy" is a mapping from destination name -> source name,
616 where source is in c1 and destination is in c2 or vice-versa.
714 where source is in c1 and destination is in c2 or vice-versa.
617
715
618 "movewithdir" is a mapping from source name -> destination name,
716 "movewithdir" is a mapping from source name -> destination name,
619 where the file at source present in one context but not the other
717 where the file at source present in one context but not the other
620 needs to be moved to destination by the merge process, because the
718 needs to be moved to destination by the merge process, because the
621 other context moved the directory it is in.
719 other context moved the directory it is in.
622
720
623 "renamedelete" is a mapping of source name -> list of destination
721 "renamedelete" is a mapping of source name -> list of destination
624 names for files deleted in c1 that were renamed in c2 or vice-versa.
722 names for files deleted in c1 that were renamed in c2 or vice-versa.
625
723
626 "dirmove" is a mapping of detected source dir -> destination dir renames.
724 "dirmove" is a mapping of detected source dir -> destination dir renames.
627 This is needed for handling changes to new files previously grafted into
725 This is needed for handling changes to new files previously grafted into
628 renamed directories.
726 renamed directories.
629 """
727 """
630
728
631 def __init__(
729 def __init__(
632 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
730 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
633 ):
731 ):
634 self.copy = {} if copy is None else copy
732 self.copy = {} if copy is None else copy
635 self.renamedelete = {} if renamedelete is None else renamedelete
733 self.renamedelete = {} if renamedelete is None else renamedelete
636 self.dirmove = {} if dirmove is None else dirmove
734 self.dirmove = {} if dirmove is None else dirmove
637 self.movewithdir = {} if movewithdir is None else movewithdir
735 self.movewithdir = {} if movewithdir is None else movewithdir
638
736
639 def __repr__(self):
737 def __repr__(self):
640 return (
738 return (
641 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
739 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
642 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
740 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
643 )
741 )
644
742
645
743
646 def _fullcopytracing(repo, c1, c2, base):
744 def _fullcopytracing(repo, c1, c2, base):
647 """ The full copytracing algorithm which finds all the new files that were
745 """ The full copytracing algorithm which finds all the new files that were
648 added from merge base up to the top commit and for each file it checks if
746 added from merge base up to the top commit and for each file it checks if
649 this file was copied from another file.
747 this file was copied from another file.
650
748
651 This is pretty slow when a lot of changesets are involved but will track all
749 This is pretty slow when a lot of changesets are involved but will track all
652 the copies.
750 the copies.
653 """
751 """
654 m1 = c1.manifest()
752 m1 = c1.manifest()
655 m2 = c2.manifest()
753 m2 = c2.manifest()
656 mb = base.manifest()
754 mb = base.manifest()
657
755
658 copies1 = pathcopies(base, c1)
756 copies1 = pathcopies(base, c1)
659 copies2 = pathcopies(base, c2)
757 copies2 = pathcopies(base, c2)
660
758
661 if not (copies1 or copies2):
759 if not (copies1 or copies2):
662 return branch_copies(), branch_copies(), {}
760 return branch_copies(), branch_copies(), {}
663
761
664 inversecopies1 = {}
762 inversecopies1 = {}
665 inversecopies2 = {}
763 inversecopies2 = {}
666 for dst, src in copies1.items():
764 for dst, src in copies1.items():
667 inversecopies1.setdefault(src, []).append(dst)
765 inversecopies1.setdefault(src, []).append(dst)
668 for dst, src in copies2.items():
766 for dst, src in copies2.items():
669 inversecopies2.setdefault(src, []).append(dst)
767 inversecopies2.setdefault(src, []).append(dst)
670
768
671 copy1 = {}
769 copy1 = {}
672 copy2 = {}
770 copy2 = {}
673 diverge = {}
771 diverge = {}
674 renamedelete1 = {}
772 renamedelete1 = {}
675 renamedelete2 = {}
773 renamedelete2 = {}
676 allsources = set(inversecopies1) | set(inversecopies2)
774 allsources = set(inversecopies1) | set(inversecopies2)
677 for src in allsources:
775 for src in allsources:
678 dsts1 = inversecopies1.get(src)
776 dsts1 = inversecopies1.get(src)
679 dsts2 = inversecopies2.get(src)
777 dsts2 = inversecopies2.get(src)
680 if dsts1 and dsts2:
778 if dsts1 and dsts2:
681 # copied/renamed on both sides
779 # copied/renamed on both sides
682 if src not in m1 and src not in m2:
780 if src not in m1 and src not in m2:
683 # renamed on both sides
781 # renamed on both sides
684 dsts1 = set(dsts1)
782 dsts1 = set(dsts1)
685 dsts2 = set(dsts2)
783 dsts2 = set(dsts2)
686 # If there's some overlap in the rename destinations, we
784 # If there's some overlap in the rename destinations, we
687 # consider it not divergent. For example, if side 1 copies 'a'
785 # consider it not divergent. For example, if side 1 copies 'a'
688 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
786 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
689 # and 'd' and deletes 'a'.
787 # and 'd' and deletes 'a'.
690 if dsts1 & dsts2:
788 if dsts1 & dsts2:
691 for dst in dsts1 & dsts2:
789 for dst in dsts1 & dsts2:
692 copy1[dst] = src
790 copy1[dst] = src
693 copy2[dst] = src
791 copy2[dst] = src
694 else:
792 else:
695 diverge[src] = sorted(dsts1 | dsts2)
793 diverge[src] = sorted(dsts1 | dsts2)
696 elif src in m1 and src in m2:
794 elif src in m1 and src in m2:
697 # copied on both sides
795 # copied on both sides
698 dsts1 = set(dsts1)
796 dsts1 = set(dsts1)
699 dsts2 = set(dsts2)
797 dsts2 = set(dsts2)
700 for dst in dsts1 & dsts2:
798 for dst in dsts1 & dsts2:
701 copy1[dst] = src
799 copy1[dst] = src
702 copy2[dst] = src
800 copy2[dst] = src
703 # TODO: Handle cases where it was renamed on one side and copied
801 # TODO: Handle cases where it was renamed on one side and copied
704 # on the other side
802 # on the other side
705 elif dsts1:
803 elif dsts1:
706 # copied/renamed only on side 1
804 # copied/renamed only on side 1
707 _checksinglesidecopies(
805 _checksinglesidecopies(
708 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
806 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
709 )
807 )
710 elif dsts2:
808 elif dsts2:
711 # copied/renamed only on side 2
809 # copied/renamed only on side 2
712 _checksinglesidecopies(
810 _checksinglesidecopies(
713 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
811 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
714 )
812 )
715
813
716 # find interesting file sets from manifests
814 # find interesting file sets from manifests
717 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
815 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
718 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
816 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
719 u1 = sorted(addedinm1 - addedinm2)
817 u1 = sorted(addedinm1 - addedinm2)
720 u2 = sorted(addedinm2 - addedinm1)
818 u2 = sorted(addedinm2 - addedinm1)
721
819
722 header = b" unmatched files in %s"
820 header = b" unmatched files in %s"
723 if u1:
821 if u1:
724 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
822 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
725 if u2:
823 if u2:
726 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
824 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
727
825
728 if repo.ui.debugflag:
826 if repo.ui.debugflag:
729 renamedeleteset = set()
827 renamedeleteset = set()
730 divergeset = set()
828 divergeset = set()
731 for dsts in diverge.values():
829 for dsts in diverge.values():
732 divergeset.update(dsts)
830 divergeset.update(dsts)
733 for dsts in renamedelete1.values():
831 for dsts in renamedelete1.values():
734 renamedeleteset.update(dsts)
832 renamedeleteset.update(dsts)
735 for dsts in renamedelete2.values():
833 for dsts in renamedelete2.values():
736 renamedeleteset.update(dsts)
834 renamedeleteset.update(dsts)
737
835
738 repo.ui.debug(
836 repo.ui.debug(
739 b" all copies found (* = to merge, ! = divergent, "
837 b" all copies found (* = to merge, ! = divergent, "
740 b"% = renamed and deleted):\n"
838 b"% = renamed and deleted):\n"
741 )
839 )
742 for side, copies in ((b"local", copies1), (b"remote", copies2)):
840 for side, copies in ((b"local", copies1), (b"remote", copies2)):
743 if not copies:
841 if not copies:
744 continue
842 continue
745 repo.ui.debug(b" on %s side:\n" % side)
843 repo.ui.debug(b" on %s side:\n" % side)
746 for f in sorted(copies):
844 for f in sorted(copies):
747 note = b""
845 note = b""
748 if f in copy1 or f in copy2:
846 if f in copy1 or f in copy2:
749 note += b"*"
847 note += b"*"
750 if f in divergeset:
848 if f in divergeset:
751 note += b"!"
849 note += b"!"
752 if f in renamedeleteset:
850 if f in renamedeleteset:
753 note += b"%"
851 note += b"%"
754 repo.ui.debug(
852 repo.ui.debug(
755 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
853 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
756 )
854 )
757 del renamedeleteset
855 del renamedeleteset
758 del divergeset
856 del divergeset
759
857
760 repo.ui.debug(b" checking for directory renames\n")
858 repo.ui.debug(b" checking for directory renames\n")
761
859
762 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
860 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
763 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
861 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
764
862
765 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
863 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
766 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
864 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
767
865
768 return branch_copies1, branch_copies2, diverge
866 return branch_copies1, branch_copies2, diverge
769
867
770
868
771 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
869 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
772 """Finds moved directories and files that should move with them.
870 """Finds moved directories and files that should move with them.
773
871
774 ctx: the context for one of the sides
872 ctx: the context for one of the sides
775 copy: files copied on the same side (as ctx)
873 copy: files copied on the same side (as ctx)
776 fullcopy: files copied on the same side (as ctx), including those that
874 fullcopy: files copied on the same side (as ctx), including those that
777 merge.manifestmerge() won't care about
875 merge.manifestmerge() won't care about
778 addedfiles: added files on the other side (compared to ctx)
876 addedfiles: added files on the other side (compared to ctx)
779 """
877 """
780 # generate a directory move map
878 # generate a directory move map
781 d = ctx.dirs()
879 d = ctx.dirs()
782 invalid = set()
880 invalid = set()
783 dirmove = {}
881 dirmove = {}
784
882
785 # examine each file copy for a potential directory move, which is
883 # examine each file copy for a potential directory move, which is
786 # when all the files in a directory are moved to a new directory
884 # when all the files in a directory are moved to a new directory
787 for dst, src in pycompat.iteritems(fullcopy):
885 for dst, src in pycompat.iteritems(fullcopy):
788 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
886 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
789 if dsrc in invalid:
887 if dsrc in invalid:
790 # already seen to be uninteresting
888 # already seen to be uninteresting
791 continue
889 continue
792 elif dsrc in d and ddst in d:
890 elif dsrc in d and ddst in d:
793 # directory wasn't entirely moved locally
891 # directory wasn't entirely moved locally
794 invalid.add(dsrc)
892 invalid.add(dsrc)
795 elif dsrc in dirmove and dirmove[dsrc] != ddst:
893 elif dsrc in dirmove and dirmove[dsrc] != ddst:
796 # files from the same directory moved to two different places
894 # files from the same directory moved to two different places
797 invalid.add(dsrc)
895 invalid.add(dsrc)
798 else:
896 else:
799 # looks good so far
897 # looks good so far
800 dirmove[dsrc] = ddst
898 dirmove[dsrc] = ddst
801
899
802 for i in invalid:
900 for i in invalid:
803 if i in dirmove:
901 if i in dirmove:
804 del dirmove[i]
902 del dirmove[i]
805 del d, invalid
903 del d, invalid
806
904
807 if not dirmove:
905 if not dirmove:
808 return {}, {}
906 return {}, {}
809
907
810 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
908 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
811
909
812 for d in dirmove:
910 for d in dirmove:
813 repo.ui.debug(
911 repo.ui.debug(
814 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
912 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
815 )
913 )
816
914
817 movewithdir = {}
915 movewithdir = {}
818 # check unaccounted nonoverlapping files against directory moves
916 # check unaccounted nonoverlapping files against directory moves
819 for f in addedfiles:
917 for f in addedfiles:
820 if f not in fullcopy:
918 if f not in fullcopy:
821 for d in dirmove:
919 for d in dirmove:
822 if f.startswith(d):
920 if f.startswith(d):
823 # new file added in a directory that was moved, move it
921 # new file added in a directory that was moved, move it
824 df = dirmove[d] + f[len(d) :]
922 df = dirmove[d] + f[len(d) :]
825 if df not in copy:
923 if df not in copy:
826 movewithdir[f] = df
924 movewithdir[f] = df
827 repo.ui.debug(
925 repo.ui.debug(
828 b" pending file src: '%s' -> dst: '%s'\n"
926 b" pending file src: '%s' -> dst: '%s'\n"
829 % (f, df)
927 % (f, df)
830 )
928 )
831 break
929 break
832
930
833 return dirmove, movewithdir
931 return dirmove, movewithdir
834
932
835
933
836 def _heuristicscopytracing(repo, c1, c2, base):
934 def _heuristicscopytracing(repo, c1, c2, base):
837 """ Fast copytracing using filename heuristics
935 """ Fast copytracing using filename heuristics
838
936
839 Assumes that moves or renames are of following two types:
937 Assumes that moves or renames are of following two types:
840
938
841 1) Inside a directory only (same directory name but different filenames)
939 1) Inside a directory only (same directory name but different filenames)
842 2) Move from one directory to another
940 2) Move from one directory to another
843 (same filenames but different directory names)
941 (same filenames but different directory names)
844
942
845 Works only when there are no merge commits in the "source branch".
943 Works only when there are no merge commits in the "source branch".
846 Source branch is commits from base up to c2 not including base.
944 Source branch is commits from base up to c2 not including base.
847
945
848 If merge is involved it fallbacks to _fullcopytracing().
946 If merge is involved it fallbacks to _fullcopytracing().
849
947
850 Can be used by setting the following config:
948 Can be used by setting the following config:
851
949
852 [experimental]
950 [experimental]
853 copytrace = heuristics
951 copytrace = heuristics
854
952
855 In some cases the copy/move candidates found by heuristics can be very large
953 In some cases the copy/move candidates found by heuristics can be very large
856 in number and that will make the algorithm slow. The number of possible
954 in number and that will make the algorithm slow. The number of possible
857 candidates to check can be limited by using the config
955 candidates to check can be limited by using the config
858 `experimental.copytrace.movecandidateslimit` which defaults to 100.
956 `experimental.copytrace.movecandidateslimit` which defaults to 100.
859 """
957 """
860
958
861 if c1.rev() is None:
959 if c1.rev() is None:
862 c1 = c1.p1()
960 c1 = c1.p1()
863 if c2.rev() is None:
961 if c2.rev() is None:
864 c2 = c2.p1()
962 c2 = c2.p1()
865
963
866 changedfiles = set()
964 changedfiles = set()
867 m1 = c1.manifest()
965 m1 = c1.manifest()
868 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
966 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
869 # If base is not in c2 branch, we switch to fullcopytracing
967 # If base is not in c2 branch, we switch to fullcopytracing
870 repo.ui.debug(
968 repo.ui.debug(
871 b"switching to full copytracing as base is not "
969 b"switching to full copytracing as base is not "
872 b"an ancestor of c2\n"
970 b"an ancestor of c2\n"
873 )
971 )
874 return _fullcopytracing(repo, c1, c2, base)
972 return _fullcopytracing(repo, c1, c2, base)
875
973
876 ctx = c2
974 ctx = c2
877 while ctx != base:
975 while ctx != base:
878 if len(ctx.parents()) == 2:
976 if len(ctx.parents()) == 2:
879 # To keep things simple let's not handle merges
977 # To keep things simple let's not handle merges
880 repo.ui.debug(b"switching to full copytracing because of merges\n")
978 repo.ui.debug(b"switching to full copytracing because of merges\n")
881 return _fullcopytracing(repo, c1, c2, base)
979 return _fullcopytracing(repo, c1, c2, base)
882 changedfiles.update(ctx.files())
980 changedfiles.update(ctx.files())
883 ctx = ctx.p1()
981 ctx = ctx.p1()
884
982
885 copies2 = {}
983 copies2 = {}
886 cp = _forwardcopies(base, c2)
984 cp = _forwardcopies(base, c2)
887 for dst, src in pycompat.iteritems(cp):
985 for dst, src in pycompat.iteritems(cp):
888 if src in m1:
986 if src in m1:
889 copies2[dst] = src
987 copies2[dst] = src
890
988
891 # file is missing if it isn't present in the destination, but is present in
989 # file is missing if it isn't present in the destination, but is present in
892 # the base and present in the source.
990 # the base and present in the source.
893 # Presence in the base is important to exclude added files, presence in the
991 # Presence in the base is important to exclude added files, presence in the
894 # source is important to exclude removed files.
992 # source is important to exclude removed files.
895 filt = lambda f: f not in m1 and f in base and f in c2
993 filt = lambda f: f not in m1 and f in base and f in c2
896 missingfiles = [f for f in changedfiles if filt(f)]
994 missingfiles = [f for f in changedfiles if filt(f)]
897
995
898 copies1 = {}
996 copies1 = {}
899 if missingfiles:
997 if missingfiles:
900 basenametofilename = collections.defaultdict(list)
998 basenametofilename = collections.defaultdict(list)
901 dirnametofilename = collections.defaultdict(list)
999 dirnametofilename = collections.defaultdict(list)
902
1000
903 for f in m1.filesnotin(base.manifest()):
1001 for f in m1.filesnotin(base.manifest()):
904 basename = os.path.basename(f)
1002 basename = os.path.basename(f)
905 dirname = os.path.dirname(f)
1003 dirname = os.path.dirname(f)
906 basenametofilename[basename].append(f)
1004 basenametofilename[basename].append(f)
907 dirnametofilename[dirname].append(f)
1005 dirnametofilename[dirname].append(f)
908
1006
909 for f in missingfiles:
1007 for f in missingfiles:
910 basename = os.path.basename(f)
1008 basename = os.path.basename(f)
911 dirname = os.path.dirname(f)
1009 dirname = os.path.dirname(f)
912 samebasename = basenametofilename[basename]
1010 samebasename = basenametofilename[basename]
913 samedirname = dirnametofilename[dirname]
1011 samedirname = dirnametofilename[dirname]
914 movecandidates = samebasename + samedirname
1012 movecandidates = samebasename + samedirname
915 # f is guaranteed to be present in c2, that's why
1013 # f is guaranteed to be present in c2, that's why
916 # c2.filectx(f) won't fail
1014 # c2.filectx(f) won't fail
917 f2 = c2.filectx(f)
1015 f2 = c2.filectx(f)
918 # we can have a lot of candidates which can slow down the heuristics
1016 # we can have a lot of candidates which can slow down the heuristics
919 # config value to limit the number of candidates moves to check
1017 # config value to limit the number of candidates moves to check
920 maxcandidates = repo.ui.configint(
1018 maxcandidates = repo.ui.configint(
921 b'experimental', b'copytrace.movecandidateslimit'
1019 b'experimental', b'copytrace.movecandidateslimit'
922 )
1020 )
923
1021
924 if len(movecandidates) > maxcandidates:
1022 if len(movecandidates) > maxcandidates:
925 repo.ui.status(
1023 repo.ui.status(
926 _(
1024 _(
927 b"skipping copytracing for '%s', more "
1025 b"skipping copytracing for '%s', more "
928 b"candidates than the limit: %d\n"
1026 b"candidates than the limit: %d\n"
929 )
1027 )
930 % (f, len(movecandidates))
1028 % (f, len(movecandidates))
931 )
1029 )
932 continue
1030 continue
933
1031
934 for candidate in movecandidates:
1032 for candidate in movecandidates:
935 f1 = c1.filectx(candidate)
1033 f1 = c1.filectx(candidate)
936 if _related(f1, f2):
1034 if _related(f1, f2):
937 # if there are a few related copies then we'll merge
1035 # if there are a few related copies then we'll merge
938 # changes into all of them. This matches the behaviour
1036 # changes into all of them. This matches the behaviour
939 # of upstream copytracing
1037 # of upstream copytracing
940 copies1[candidate] = f
1038 copies1[candidate] = f
941
1039
942 return branch_copies(copies1), branch_copies(copies2), {}
1040 return branch_copies(copies1), branch_copies(copies2), {}
943
1041
944
1042
945 def _related(f1, f2):
1043 def _related(f1, f2):
946 """return True if f1 and f2 filectx have a common ancestor
1044 """return True if f1 and f2 filectx have a common ancestor
947
1045
948 Walk back to common ancestor to see if the two files originate
1046 Walk back to common ancestor to see if the two files originate
949 from the same file. Since workingfilectx's rev() is None it messes
1047 from the same file. Since workingfilectx's rev() is None it messes
950 up the integer comparison logic, hence the pre-step check for
1048 up the integer comparison logic, hence the pre-step check for
951 None (f1 and f2 can only be workingfilectx's initially).
1049 None (f1 and f2 can only be workingfilectx's initially).
952 """
1050 """
953
1051
954 if f1 == f2:
1052 if f1 == f2:
955 return True # a match
1053 return True # a match
956
1054
957 g1, g2 = f1.ancestors(), f2.ancestors()
1055 g1, g2 = f1.ancestors(), f2.ancestors()
958 try:
1056 try:
959 f1r, f2r = f1.linkrev(), f2.linkrev()
1057 f1r, f2r = f1.linkrev(), f2.linkrev()
960
1058
961 if f1r is None:
1059 if f1r is None:
962 f1 = next(g1)
1060 f1 = next(g1)
963 if f2r is None:
1061 if f2r is None:
964 f2 = next(g2)
1062 f2 = next(g2)
965
1063
966 while True:
1064 while True:
967 f1r, f2r = f1.linkrev(), f2.linkrev()
1065 f1r, f2r = f1.linkrev(), f2.linkrev()
968 if f1r > f2r:
1066 if f1r > f2r:
969 f1 = next(g1)
1067 f1 = next(g1)
970 elif f2r > f1r:
1068 elif f2r > f1r:
971 f2 = next(g2)
1069 f2 = next(g2)
972 else: # f1 and f2 point to files in the same linkrev
1070 else: # f1 and f2 point to files in the same linkrev
973 return f1 == f2 # true if they point to the same file
1071 return f1 == f2 # true if they point to the same file
974 except StopIteration:
1072 except StopIteration:
975 return False
1073 return False
976
1074
977
1075
978 def graftcopies(wctx, ctx, base):
1076 def graftcopies(wctx, ctx, base):
979 """reproduce copies between base and ctx in the wctx
1077 """reproduce copies between base and ctx in the wctx
980
1078
981 Unlike mergecopies(), this function will only consider copies between base
1079 Unlike mergecopies(), this function will only consider copies between base
982 and ctx; it will ignore copies between base and wctx. Also unlike
1080 and ctx; it will ignore copies between base and wctx. Also unlike
983 mergecopies(), this function will apply copies to the working copy (instead
1081 mergecopies(), this function will apply copies to the working copy (instead
984 of just returning information about the copies). That makes it cheaper
1082 of just returning information about the copies). That makes it cheaper
985 (especially in the common case of base==ctx.p1()) and useful also when
1083 (especially in the common case of base==ctx.p1()) and useful also when
986 experimental.copytrace=off.
1084 experimental.copytrace=off.
987
1085
988 merge.update() will have already marked most copies, but it will only
1086 merge.update() will have already marked most copies, but it will only
989 mark copies if it thinks the source files are related (see
1087 mark copies if it thinks the source files are related (see
990 merge._related()). It will also not mark copies if the file wasn't modified
1088 merge._related()). It will also not mark copies if the file wasn't modified
991 on the local side. This function adds the copies that were "missed"
1089 on the local side. This function adds the copies that were "missed"
992 by merge.update().
1090 by merge.update().
993 """
1091 """
994 new_copies = pathcopies(base, ctx)
1092 new_copies = pathcopies(base, ctx)
995 _filter(wctx.p1(), wctx, new_copies)
1093 _filter(wctx.p1(), wctx, new_copies)
996 for dst, src in pycompat.iteritems(new_copies):
1094 for dst, src in pycompat.iteritems(new_copies):
997 wctx[dst].markcopied(src)
1095 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now