##// END OF EJS Templates
copies: implement __repr__ on branch_copies for debugging...
Martin von Zweigbergk -
r45528:cfd06649 default
parent child Browse files
Show More
@@ -1,991 +1,997 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import os
11 import os
12
12
13 from .i18n import _
13 from .i18n import _
14
14
15
15
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
16 from .revlogutils.flagutil import REVIDX_SIDEDATA
17
17
18 from . import (
18 from . import (
19 match as matchmod,
19 match as matchmod,
20 node,
20 node,
21 pathutil,
21 pathutil,
22 pycompat,
22 pycompat,
23 util,
23 util,
24 )
24 )
25
25
26
26
27 from .utils import stringutil
27 from .utils import stringutil
28
28
29
29
30 def _filter(src, dst, t):
30 def _filter(src, dst, t):
31 """filters out invalid copies after chaining"""
31 """filters out invalid copies after chaining"""
32
32
33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 # in the following table (not including trivial cases). For example, case 2
35 # in the following table (not including trivial cases). For example, case 2
36 # is where a file existed in 'src' and remained under that name in 'mid' and
36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 # then was renamed between 'mid' and 'dst'.
37 # then was renamed between 'mid' and 'dst'.
38 #
38 #
39 # case src mid dst result
39 # case src mid dst result
40 # 1 x y - -
40 # 1 x y - -
41 # 2 x y y x->y
41 # 2 x y y x->y
42 # 3 x y x -
42 # 3 x y x -
43 # 4 x y z x->z
43 # 4 x y z x->z
44 # 5 - x y -
44 # 5 - x y -
45 # 6 x x y x->y
45 # 6 x x y x->y
46 #
46 #
47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 # between 5 and 6, so it includes all cases in its result.
49 # between 5 and 6, so it includes all cases in its result.
50 # Cases 1, 3, and 5 are then removed by _filter().
50 # Cases 1, 3, and 5 are then removed by _filter().
51
51
52 for k, v in list(t.items()):
52 for k, v in list(t.items()):
53 # remove copies from files that didn't exist
53 # remove copies from files that didn't exist
54 if v not in src:
54 if v not in src:
55 del t[k]
55 del t[k]
56 # remove criss-crossed copies
56 # remove criss-crossed copies
57 elif k in src and v in dst:
57 elif k in src and v in dst:
58 del t[k]
58 del t[k]
59 # remove copies to files that were then removed
59 # remove copies to files that were then removed
60 elif k not in dst:
60 elif k not in dst:
61 del t[k]
61 del t[k]
62
62
63
63
64 def _chain(prefix, suffix):
64 def _chain(prefix, suffix):
65 """chain two sets of copies 'prefix' and 'suffix'"""
65 """chain two sets of copies 'prefix' and 'suffix'"""
66 result = prefix.copy()
66 result = prefix.copy()
67 for key, value in pycompat.iteritems(suffix):
67 for key, value in pycompat.iteritems(suffix):
68 result[key] = prefix.get(value, value)
68 result[key] = prefix.get(value, value)
69 return result
69 return result
70
70
71
71
72 def _tracefile(fctx, am, basemf):
72 def _tracefile(fctx, am, basemf):
73 """return file context that is the ancestor of fctx present in ancestor
73 """return file context that is the ancestor of fctx present in ancestor
74 manifest am
74 manifest am
75
75
76 Note: we used to try and stop after a given limit, however checking if that
76 Note: we used to try and stop after a given limit, however checking if that
77 limit is reached turned out to be very expensive. we are better off
77 limit is reached turned out to be very expensive. we are better off
78 disabling that feature."""
78 disabling that feature."""
79
79
80 for f in fctx.ancestors():
80 for f in fctx.ancestors():
81 path = f.path()
81 path = f.path()
82 if am.get(path, None) == f.filenode():
82 if am.get(path, None) == f.filenode():
83 return path
83 return path
84 if basemf and basemf.get(path, None) == f.filenode():
84 if basemf and basemf.get(path, None) == f.filenode():
85 return path
85 return path
86
86
87
87
88 def _dirstatecopies(repo, match=None):
88 def _dirstatecopies(repo, match=None):
89 ds = repo.dirstate
89 ds = repo.dirstate
90 c = ds.copies().copy()
90 c = ds.copies().copy()
91 for k in list(c):
91 for k in list(c):
92 if ds[k] not in b'anm' or (match and not match(k)):
92 if ds[k] not in b'anm' or (match and not match(k)):
93 del c[k]
93 del c[k]
94 return c
94 return c
95
95
96
96
97 def _computeforwardmissing(a, b, match=None):
97 def _computeforwardmissing(a, b, match=None):
98 """Computes which files are in b but not a.
98 """Computes which files are in b but not a.
99 This is its own function so extensions can easily wrap this call to see what
99 This is its own function so extensions can easily wrap this call to see what
100 files _forwardcopies is about to process.
100 files _forwardcopies is about to process.
101 """
101 """
102 ma = a.manifest()
102 ma = a.manifest()
103 mb = b.manifest()
103 mb = b.manifest()
104 return mb.filesnotin(ma, match=match)
104 return mb.filesnotin(ma, match=match)
105
105
106
106
107 def usechangesetcentricalgo(repo):
107 def usechangesetcentricalgo(repo):
108 """Checks if we should use changeset-centric copy algorithms"""
108 """Checks if we should use changeset-centric copy algorithms"""
109 if repo.filecopiesmode == b'changeset-sidedata':
109 if repo.filecopiesmode == b'changeset-sidedata':
110 return True
110 return True
111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 changesetsource = (b'changeset-only', b'compatibility')
112 changesetsource = (b'changeset-only', b'compatibility')
113 return readfrom in changesetsource
113 return readfrom in changesetsource
114
114
115
115
116 def _committedforwardcopies(a, b, base, match):
116 def _committedforwardcopies(a, b, base, match):
117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 # files might have to be traced back to the fctx parent of the last
118 # files might have to be traced back to the fctx parent of the last
119 # one-side-only changeset, but not further back than that
119 # one-side-only changeset, but not further back than that
120 repo = a._repo
120 repo = a._repo
121
121
122 if usechangesetcentricalgo(repo):
122 if usechangesetcentricalgo(repo):
123 return _changesetforwardcopies(a, b, match)
123 return _changesetforwardcopies(a, b, match)
124
124
125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 dbg = repo.ui.debug
126 dbg = repo.ui.debug
127 if debug:
127 if debug:
128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 am = a.manifest()
129 am = a.manifest()
130 basemf = None if base is None else base.manifest()
130 basemf = None if base is None else base.manifest()
131
131
132 # find where new files came from
132 # find where new files came from
133 # we currently don't try to find where old files went, too expensive
133 # we currently don't try to find where old files went, too expensive
134 # this means we can miss a case like 'hg rm b; hg cp a b'
134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 cm = {}
135 cm = {}
136
136
137 # Computing the forward missing is quite expensive on large manifests, since
137 # Computing the forward missing is quite expensive on large manifests, since
138 # it compares the entire manifests. We can optimize it in the common use
138 # it compares the entire manifests. We can optimize it in the common use
139 # case of computing what copies are in a commit versus its parent (like
139 # case of computing what copies are in a commit versus its parent (like
140 # during a rebase or histedit). Note, we exclude merge commits from this
140 # during a rebase or histedit). Note, we exclude merge commits from this
141 # optimization, since the ctx.files() for a merge commit is not correct for
141 # optimization, since the ctx.files() for a merge commit is not correct for
142 # this comparison.
142 # this comparison.
143 forwardmissingmatch = match
143 forwardmissingmatch = match
144 if b.p1() == a and b.p2().node() == node.nullid:
144 if b.p1() == a and b.p2().node() == node.nullid:
145 filesmatcher = matchmod.exact(b.files())
145 filesmatcher = matchmod.exact(b.files())
146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148
148
149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150
150
151 if debug:
151 if debug:
152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153
153
154 for f in sorted(missing):
154 for f in sorted(missing):
155 if debug:
155 if debug:
156 dbg(b'debug.copies: tracing file: %s\n' % f)
156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 fctx = b[f]
157 fctx = b[f]
158 fctx._ancestrycontext = ancestrycontext
158 fctx._ancestrycontext = ancestrycontext
159
159
160 if debug:
160 if debug:
161 start = util.timer()
161 start = util.timer()
162 opath = _tracefile(fctx, am, basemf)
162 opath = _tracefile(fctx, am, basemf)
163 if opath:
163 if opath:
164 if debug:
164 if debug:
165 dbg(b'debug.copies: rename of: %s\n' % opath)
165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 cm[f] = opath
166 cm[f] = opath
167 if debug:
167 if debug:
168 dbg(
168 dbg(
169 b'debug.copies: time: %f seconds\n'
169 b'debug.copies: time: %f seconds\n'
170 % (util.timer() - start)
170 % (util.timer() - start)
171 )
171 )
172 return cm
172 return cm
173
173
174
174
175 def _revinfogetter(repo):
175 def _revinfogetter(repo):
176 """return a function that return multiple data given a <rev>"i
176 """return a function that return multiple data given a <rev>"i
177
177
178 * p1: revision number of first parent
178 * p1: revision number of first parent
179 * p2: revision number of first parent
179 * p2: revision number of first parent
180 * p1copies: mapping of copies from p1
180 * p1copies: mapping of copies from p1
181 * p2copies: mapping of copies from p2
181 * p2copies: mapping of copies from p2
182 * removed: a list of removed files
182 * removed: a list of removed files
183 * ismerged: a callback to know if file was merged in that revision
183 * ismerged: a callback to know if file was merged in that revision
184 """
184 """
185 cl = repo.changelog
185 cl = repo.changelog
186 parents = cl.parentrevs
186 parents = cl.parentrevs
187
187
188 def get_ismerged(rev):
188 def get_ismerged(rev):
189 ctx = repo[rev]
189 ctx = repo[rev]
190
190
191 def ismerged(path):
191 def ismerged(path):
192 if path not in ctx.files():
192 if path not in ctx.files():
193 return False
193 return False
194 fctx = ctx[path]
194 fctx = ctx[path]
195 parents = fctx._filelog.parents(fctx._filenode)
195 parents = fctx._filelog.parents(fctx._filenode)
196 nb_parents = 0
196 nb_parents = 0
197 for n in parents:
197 for n in parents:
198 if n != node.nullid:
198 if n != node.nullid:
199 nb_parents += 1
199 nb_parents += 1
200 return nb_parents >= 2
200 return nb_parents >= 2
201
201
202 return ismerged
202 return ismerged
203
203
204 if repo.filecopiesmode == b'changeset-sidedata':
204 if repo.filecopiesmode == b'changeset-sidedata':
205 changelogrevision = cl.changelogrevision
205 changelogrevision = cl.changelogrevision
206 flags = cl.flags
206 flags = cl.flags
207
207
208 # A small cache to avoid doing the work twice for merges
208 # A small cache to avoid doing the work twice for merges
209 #
209 #
210 # In the vast majority of cases, if we ask information for a revision
210 # In the vast majority of cases, if we ask information for a revision
211 # about 1 parent, we'll later ask it for the other. So it make sense to
211 # about 1 parent, we'll later ask it for the other. So it make sense to
212 # keep the information around when reaching the first parent of a merge
212 # keep the information around when reaching the first parent of a merge
213 # and dropping it after it was provided for the second parents.
213 # and dropping it after it was provided for the second parents.
214 #
214 #
215 # It exists cases were only one parent of the merge will be walked. It
215 # It exists cases were only one parent of the merge will be walked. It
216 # happens when the "destination" the copy tracing is descendant from a
216 # happens when the "destination" the copy tracing is descendant from a
217 # new root, not common with the "source". In that case, we will only walk
217 # new root, not common with the "source". In that case, we will only walk
218 # through merge parents that are descendant of changesets common
218 # through merge parents that are descendant of changesets common
219 # between "source" and "destination".
219 # between "source" and "destination".
220 #
220 #
221 # With the current case implementation if such changesets have a copy
221 # With the current case implementation if such changesets have a copy
222 # information, we'll keep them in memory until the end of
222 # information, we'll keep them in memory until the end of
223 # _changesetforwardcopies. We don't expect the case to be frequent
223 # _changesetforwardcopies. We don't expect the case to be frequent
224 # enough to matters.
224 # enough to matters.
225 #
225 #
226 # In addition, it would be possible to reach pathological case, were
226 # In addition, it would be possible to reach pathological case, were
227 # many first parent are met before any second parent is reached. In
227 # many first parent are met before any second parent is reached. In
228 # that case the cache could grow. If this even become an issue one can
228 # that case the cache could grow. If this even become an issue one can
229 # safely introduce a maximum cache size. This would trade extra CPU/IO
229 # safely introduce a maximum cache size. This would trade extra CPU/IO
230 # time to save memory.
230 # time to save memory.
231 merge_caches = {}
231 merge_caches = {}
232
232
233 def revinfo(rev):
233 def revinfo(rev):
234 p1, p2 = parents(rev)
234 p1, p2 = parents(rev)
235 value = None
235 value = None
236 if flags(rev) & REVIDX_SIDEDATA:
236 if flags(rev) & REVIDX_SIDEDATA:
237 e = merge_caches.pop(rev, None)
237 e = merge_caches.pop(rev, None)
238 if e is not None:
238 if e is not None:
239 return e
239 return e
240 c = changelogrevision(rev)
240 c = changelogrevision(rev)
241 p1copies = c.p1copies
241 p1copies = c.p1copies
242 p2copies = c.p2copies
242 p2copies = c.p2copies
243 removed = c.filesremoved
243 removed = c.filesremoved
244 if p1 != node.nullrev and p2 != node.nullrev:
244 if p1 != node.nullrev and p2 != node.nullrev:
245 # XXX some case we over cache, IGNORE
245 # XXX some case we over cache, IGNORE
246 value = merge_caches[rev] = (
246 value = merge_caches[rev] = (
247 p1,
247 p1,
248 p2,
248 p2,
249 p1copies,
249 p1copies,
250 p2copies,
250 p2copies,
251 removed,
251 removed,
252 get_ismerged(rev),
252 get_ismerged(rev),
253 )
253 )
254 else:
254 else:
255 p1copies = {}
255 p1copies = {}
256 p2copies = {}
256 p2copies = {}
257 removed = []
257 removed = []
258
258
259 if value is None:
259 if value is None:
260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
260 value = (p1, p2, p1copies, p2copies, removed, get_ismerged(rev))
261 return value
261 return value
262
262
263 else:
263 else:
264
264
265 def revinfo(rev):
265 def revinfo(rev):
266 p1, p2 = parents(rev)
266 p1, p2 = parents(rev)
267 ctx = repo[rev]
267 ctx = repo[rev]
268 p1copies, p2copies = ctx._copies
268 p1copies, p2copies = ctx._copies
269 removed = ctx.filesremoved()
269 removed = ctx.filesremoved()
270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
270 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
271
271
272 return revinfo
272 return revinfo
273
273
274
274
275 def _changesetforwardcopies(a, b, match):
275 def _changesetforwardcopies(a, b, match):
276 if a.rev() in (node.nullrev, b.rev()):
276 if a.rev() in (node.nullrev, b.rev()):
277 return {}
277 return {}
278
278
279 repo = a.repo().unfiltered()
279 repo = a.repo().unfiltered()
280 children = {}
280 children = {}
281 revinfo = _revinfogetter(repo)
281 revinfo = _revinfogetter(repo)
282
282
283 cl = repo.changelog
283 cl = repo.changelog
284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
284 isancestor = cl.isancestorrev # XXX we should had chaching to this.
285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
285 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
286 mrset = set(missingrevs)
286 mrset = set(missingrevs)
287 roots = set()
287 roots = set()
288 for r in missingrevs:
288 for r in missingrevs:
289 for p in cl.parentrevs(r):
289 for p in cl.parentrevs(r):
290 if p == node.nullrev:
290 if p == node.nullrev:
291 continue
291 continue
292 if p not in children:
292 if p not in children:
293 children[p] = [r]
293 children[p] = [r]
294 else:
294 else:
295 children[p].append(r)
295 children[p].append(r)
296 if p not in mrset:
296 if p not in mrset:
297 roots.add(p)
297 roots.add(p)
298 if not roots:
298 if not roots:
299 # no common revision to track copies from
299 # no common revision to track copies from
300 return {}
300 return {}
301 min_root = min(roots)
301 min_root = min(roots)
302
302
303 from_head = set(
303 from_head = set(
304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
304 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
305 )
305 )
306
306
307 iterrevs = set(from_head)
307 iterrevs = set(from_head)
308 iterrevs &= mrset
308 iterrevs &= mrset
309 iterrevs.update(roots)
309 iterrevs.update(roots)
310 iterrevs.remove(b.rev())
310 iterrevs.remove(b.rev())
311 revs = sorted(iterrevs)
311 revs = sorted(iterrevs)
312 return _combinechangesetcopies(
312 return _combinechangesetcopies(
313 revs, children, b.rev(), revinfo, match, isancestor
313 revs, children, b.rev(), revinfo, match, isancestor
314 )
314 )
315
315
316
316
317 def _combinechangesetcopies(
317 def _combinechangesetcopies(
318 revs, children, targetrev, revinfo, match, isancestor
318 revs, children, targetrev, revinfo, match, isancestor
319 ):
319 ):
320 """combine the copies information for each item of iterrevs
320 """combine the copies information for each item of iterrevs
321
321
322 revs: sorted iterable of revision to visit
322 revs: sorted iterable of revision to visit
323 children: a {parent: [children]} mapping.
323 children: a {parent: [children]} mapping.
324 targetrev: the final copies destination revision (not in iterrevs)
324 targetrev: the final copies destination revision (not in iterrevs)
325 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
325 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
326 match: a matcher
326 match: a matcher
327
327
328 It returns the aggregated copies information for `targetrev`.
328 It returns the aggregated copies information for `targetrev`.
329 """
329 """
330 all_copies = {}
330 all_copies = {}
331 alwaysmatch = match.always()
331 alwaysmatch = match.always()
332 for r in revs:
332 for r in revs:
333 copies = all_copies.pop(r, None)
333 copies = all_copies.pop(r, None)
334 if copies is None:
334 if copies is None:
335 # this is a root
335 # this is a root
336 copies = {}
336 copies = {}
337 for i, c in enumerate(children[r]):
337 for i, c in enumerate(children[r]):
338 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
338 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
339 if r == p1:
339 if r == p1:
340 parent = 1
340 parent = 1
341 childcopies = p1copies
341 childcopies = p1copies
342 else:
342 else:
343 assert r == p2
343 assert r == p2
344 parent = 2
344 parent = 2
345 childcopies = p2copies
345 childcopies = p2copies
346 if not alwaysmatch:
346 if not alwaysmatch:
347 childcopies = {
347 childcopies = {
348 dst: src for dst, src in childcopies.items() if match(dst)
348 dst: src for dst, src in childcopies.items() if match(dst)
349 }
349 }
350 newcopies = copies
350 newcopies = copies
351 if childcopies:
351 if childcopies:
352 newcopies = copies.copy()
352 newcopies = copies.copy()
353 for dest, source in pycompat.iteritems(childcopies):
353 for dest, source in pycompat.iteritems(childcopies):
354 prev = copies.get(source)
354 prev = copies.get(source)
355 if prev is not None and prev[1] is not None:
355 if prev is not None and prev[1] is not None:
356 source = prev[1]
356 source = prev[1]
357 newcopies[dest] = (c, source)
357 newcopies[dest] = (c, source)
358 assert newcopies is not copies
358 assert newcopies is not copies
359 for f in removed:
359 for f in removed:
360 if f in newcopies:
360 if f in newcopies:
361 if newcopies is copies:
361 if newcopies is copies:
362 # copy on write to avoid affecting potential other
362 # copy on write to avoid affecting potential other
363 # branches. when there are no other branches, this
363 # branches. when there are no other branches, this
364 # could be avoided.
364 # could be avoided.
365 newcopies = copies.copy()
365 newcopies = copies.copy()
366 newcopies[f] = (c, None)
366 newcopies[f] = (c, None)
367 othercopies = all_copies.get(c)
367 othercopies = all_copies.get(c)
368 if othercopies is None:
368 if othercopies is None:
369 all_copies[c] = newcopies
369 all_copies[c] = newcopies
370 else:
370 else:
371 # we are the second parent to work on c, we need to merge our
371 # we are the second parent to work on c, we need to merge our
372 # work with the other.
372 # work with the other.
373 #
373 #
374 # In case of conflict, parent 1 take precedence over parent 2.
374 # In case of conflict, parent 1 take precedence over parent 2.
375 # This is an arbitrary choice made anew when implementing
375 # This is an arbitrary choice made anew when implementing
376 # changeset based copies. It was made without regards with
376 # changeset based copies. It was made without regards with
377 # potential filelog related behavior.
377 # potential filelog related behavior.
378 if parent == 1:
378 if parent == 1:
379 _merge_copies_dict(
379 _merge_copies_dict(
380 othercopies, newcopies, isancestor, ismerged
380 othercopies, newcopies, isancestor, ismerged
381 )
381 )
382 else:
382 else:
383 _merge_copies_dict(
383 _merge_copies_dict(
384 newcopies, othercopies, isancestor, ismerged
384 newcopies, othercopies, isancestor, ismerged
385 )
385 )
386 all_copies[c] = newcopies
386 all_copies[c] = newcopies
387
387
388 final_copies = {}
388 final_copies = {}
389 for dest, (tt, source) in all_copies[targetrev].items():
389 for dest, (tt, source) in all_copies[targetrev].items():
390 if source is not None:
390 if source is not None:
391 final_copies[dest] = source
391 final_copies[dest] = source
392 return final_copies
392 return final_copies
393
393
394
394
395 def _merge_copies_dict(minor, major, isancestor, ismerged):
395 def _merge_copies_dict(minor, major, isancestor, ismerged):
396 """merge two copies-mapping together, minor and major
396 """merge two copies-mapping together, minor and major
397
397
398 In case of conflict, value from "major" will be picked.
398 In case of conflict, value from "major" will be picked.
399
399
400 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
400 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
401 ancestors of `high_rev`,
401 ancestors of `high_rev`,
402
402
403 - `ismerged(path)`: callable return True if `path` have been merged in the
403 - `ismerged(path)`: callable return True if `path` have been merged in the
404 current revision,
404 current revision,
405 """
405 """
406 for dest, value in major.items():
406 for dest, value in major.items():
407 other = minor.get(dest)
407 other = minor.get(dest)
408 if other is None:
408 if other is None:
409 minor[dest] = value
409 minor[dest] = value
410 else:
410 else:
411 new_tt = value[0]
411 new_tt = value[0]
412 other_tt = other[0]
412 other_tt = other[0]
413 if value[1] == other[1]:
413 if value[1] == other[1]:
414 continue
414 continue
415 # content from "major" wins, unless it is older
415 # content from "major" wins, unless it is older
416 # than the branch point or there is a merge
416 # than the branch point or there is a merge
417 if (
417 if (
418 new_tt == other_tt
418 new_tt == other_tt
419 or not isancestor(new_tt, other_tt)
419 or not isancestor(new_tt, other_tt)
420 or ismerged(dest)
420 or ismerged(dest)
421 ):
421 ):
422 minor[dest] = value
422 minor[dest] = value
423
423
424
424
425 def _forwardcopies(a, b, base=None, match=None):
425 def _forwardcopies(a, b, base=None, match=None):
426 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
426 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
427
427
428 if base is None:
428 if base is None:
429 base = a
429 base = a
430 match = a.repo().narrowmatch(match)
430 match = a.repo().narrowmatch(match)
431 # check for working copy
431 # check for working copy
432 if b.rev() is None:
432 if b.rev() is None:
433 cm = _committedforwardcopies(a, b.p1(), base, match)
433 cm = _committedforwardcopies(a, b.p1(), base, match)
434 # combine copies from dirstate if necessary
434 # combine copies from dirstate if necessary
435 copies = _chain(cm, _dirstatecopies(b._repo, match))
435 copies = _chain(cm, _dirstatecopies(b._repo, match))
436 else:
436 else:
437 copies = _committedforwardcopies(a, b, base, match)
437 copies = _committedforwardcopies(a, b, base, match)
438 return copies
438 return copies
439
439
440
440
441 def _backwardrenames(a, b, match):
441 def _backwardrenames(a, b, match):
442 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
442 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
443 return {}
443 return {}
444
444
445 # Even though we're not taking copies into account, 1:n rename situations
445 # Even though we're not taking copies into account, 1:n rename situations
446 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
446 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
447 # arbitrarily pick one of the renames.
447 # arbitrarily pick one of the renames.
448 # We don't want to pass in "match" here, since that would filter
448 # We don't want to pass in "match" here, since that would filter
449 # the destination by it. Since we're reversing the copies, we want
449 # the destination by it. Since we're reversing the copies, we want
450 # to filter the source instead.
450 # to filter the source instead.
451 f = _forwardcopies(b, a)
451 f = _forwardcopies(b, a)
452 r = {}
452 r = {}
453 for k, v in sorted(pycompat.iteritems(f)):
453 for k, v in sorted(pycompat.iteritems(f)):
454 if match and not match(v):
454 if match and not match(v):
455 continue
455 continue
456 # remove copies
456 # remove copies
457 if v in a:
457 if v in a:
458 continue
458 continue
459 r[v] = k
459 r[v] = k
460 return r
460 return r
461
461
462
462
463 def pathcopies(x, y, match=None):
463 def pathcopies(x, y, match=None):
464 """find {dst@y: src@x} copy mapping for directed compare"""
464 """find {dst@y: src@x} copy mapping for directed compare"""
465 repo = x._repo
465 repo = x._repo
466 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
466 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
467 if debug:
467 if debug:
468 repo.ui.debug(
468 repo.ui.debug(
469 b'debug.copies: searching copies from %s to %s\n' % (x, y)
469 b'debug.copies: searching copies from %s to %s\n' % (x, y)
470 )
470 )
471 if x == y or not x or not y:
471 if x == y or not x or not y:
472 return {}
472 return {}
473 if y.rev() is None and x == y.p1():
473 if y.rev() is None and x == y.p1():
474 if debug:
474 if debug:
475 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
475 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
476 # short-circuit to avoid issues with merge states
476 # short-circuit to avoid issues with merge states
477 return _dirstatecopies(repo, match)
477 return _dirstatecopies(repo, match)
478 a = y.ancestor(x)
478 a = y.ancestor(x)
479 if a == x:
479 if a == x:
480 if debug:
480 if debug:
481 repo.ui.debug(b'debug.copies: search mode: forward\n')
481 repo.ui.debug(b'debug.copies: search mode: forward\n')
482 copies = _forwardcopies(x, y, match=match)
482 copies = _forwardcopies(x, y, match=match)
483 elif a == y:
483 elif a == y:
484 if debug:
484 if debug:
485 repo.ui.debug(b'debug.copies: search mode: backward\n')
485 repo.ui.debug(b'debug.copies: search mode: backward\n')
486 copies = _backwardrenames(x, y, match=match)
486 copies = _backwardrenames(x, y, match=match)
487 else:
487 else:
488 if debug:
488 if debug:
489 repo.ui.debug(b'debug.copies: search mode: combined\n')
489 repo.ui.debug(b'debug.copies: search mode: combined\n')
490 base = None
490 base = None
491 if a.rev() != node.nullrev:
491 if a.rev() != node.nullrev:
492 base = x
492 base = x
493 copies = _chain(
493 copies = _chain(
494 _backwardrenames(x, a, match=match),
494 _backwardrenames(x, a, match=match),
495 _forwardcopies(a, y, base, match=match),
495 _forwardcopies(a, y, base, match=match),
496 )
496 )
497 _filter(x, y, copies)
497 _filter(x, y, copies)
498 return copies
498 return copies
499
499
500
500
501 def mergecopies(repo, c1, c2, base):
501 def mergecopies(repo, c1, c2, base):
502 """
502 """
503 Finds moves and copies between context c1 and c2 that are relevant for
503 Finds moves and copies between context c1 and c2 that are relevant for
504 merging. 'base' will be used as the merge base.
504 merging. 'base' will be used as the merge base.
505
505
506 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
506 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
507 files that were moved/ copied in one merge parent and modified in another.
507 files that were moved/ copied in one merge parent and modified in another.
508 For example:
508 For example:
509
509
510 o ---> 4 another commit
510 o ---> 4 another commit
511 |
511 |
512 | o ---> 3 commit that modifies a.txt
512 | o ---> 3 commit that modifies a.txt
513 | /
513 | /
514 o / ---> 2 commit that moves a.txt to b.txt
514 o / ---> 2 commit that moves a.txt to b.txt
515 |/
515 |/
516 o ---> 1 merge base
516 o ---> 1 merge base
517
517
518 If we try to rebase revision 3 on revision 4, since there is no a.txt in
518 If we try to rebase revision 3 on revision 4, since there is no a.txt in
519 revision 4, and if user have copytrace disabled, we prints the following
519 revision 4, and if user have copytrace disabled, we prints the following
520 message:
520 message:
521
521
522 ```other changed <file> which local deleted```
522 ```other changed <file> which local deleted```
523
523
524 Returns a tuple where:
524 Returns a tuple where:
525
525
526 "branch_copies" an instance of branch_copies.
526 "branch_copies" an instance of branch_copies.
527
527
528 "diverge" is a mapping of source name -> list of destination names
528 "diverge" is a mapping of source name -> list of destination names
529 for divergent renames.
529 for divergent renames.
530
530
531 This function calls different copytracing algorithms based on config.
531 This function calls different copytracing algorithms based on config.
532 """
532 """
533 # avoid silly behavior for update from empty dir
533 # avoid silly behavior for update from empty dir
534 if not c1 or not c2 or c1 == c2:
534 if not c1 or not c2 or c1 == c2:
535 return branch_copies(), branch_copies(), {}
535 return branch_copies(), branch_copies(), {}
536
536
537 narrowmatch = c1.repo().narrowmatch()
537 narrowmatch = c1.repo().narrowmatch()
538
538
539 # avoid silly behavior for parent -> working dir
539 # avoid silly behavior for parent -> working dir
540 if c2.node() is None and c1.node() == repo.dirstate.p1():
540 if c2.node() is None and c1.node() == repo.dirstate.p1():
541 return (
541 return (
542 branch_copies(_dirstatecopies(repo, narrowmatch)),
542 branch_copies(_dirstatecopies(repo, narrowmatch)),
543 branch_copies(),
543 branch_copies(),
544 {},
544 {},
545 )
545 )
546
546
547 copytracing = repo.ui.config(b'experimental', b'copytrace')
547 copytracing = repo.ui.config(b'experimental', b'copytrace')
548 if stringutil.parsebool(copytracing) is False:
548 if stringutil.parsebool(copytracing) is False:
549 # stringutil.parsebool() returns None when it is unable to parse the
549 # stringutil.parsebool() returns None when it is unable to parse the
550 # value, so we should rely on making sure copytracing is on such cases
550 # value, so we should rely on making sure copytracing is on such cases
551 return branch_copies(), branch_copies(), {}
551 return branch_copies(), branch_copies(), {}
552
552
553 if usechangesetcentricalgo(repo):
553 if usechangesetcentricalgo(repo):
554 # The heuristics don't make sense when we need changeset-centric algos
554 # The heuristics don't make sense when we need changeset-centric algos
555 return _fullcopytracing(repo, c1, c2, base)
555 return _fullcopytracing(repo, c1, c2, base)
556
556
557 # Copy trace disabling is explicitly below the node == p1 logic above
557 # Copy trace disabling is explicitly below the node == p1 logic above
558 # because the logic above is required for a simple copy to be kept across a
558 # because the logic above is required for a simple copy to be kept across a
559 # rebase.
559 # rebase.
560 if copytracing == b'heuristics':
560 if copytracing == b'heuristics':
561 # Do full copytracing if only non-public revisions are involved as
561 # Do full copytracing if only non-public revisions are involved as
562 # that will be fast enough and will also cover the copies which could
562 # that will be fast enough and will also cover the copies which could
563 # be missed by heuristics
563 # be missed by heuristics
564 if _isfullcopytraceable(repo, c1, base):
564 if _isfullcopytraceable(repo, c1, base):
565 return _fullcopytracing(repo, c1, c2, base)
565 return _fullcopytracing(repo, c1, c2, base)
566 return _heuristicscopytracing(repo, c1, c2, base)
566 return _heuristicscopytracing(repo, c1, c2, base)
567 else:
567 else:
568 return _fullcopytracing(repo, c1, c2, base)
568 return _fullcopytracing(repo, c1, c2, base)
569
569
570
570
571 def _isfullcopytraceable(repo, c1, base):
571 def _isfullcopytraceable(repo, c1, base):
572 """ Checks that if base, source and destination are all no-public branches,
572 """ Checks that if base, source and destination are all no-public branches,
573 if yes let's use the full copytrace algorithm for increased capabilities
573 if yes let's use the full copytrace algorithm for increased capabilities
574 since it will be fast enough.
574 since it will be fast enough.
575
575
576 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
576 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
577 number of changesets from c1 to base such that if number of changesets are
577 number of changesets from c1 to base such that if number of changesets are
578 more than the limit, full copytracing algorithm won't be used.
578 more than the limit, full copytracing algorithm won't be used.
579 """
579 """
580 if c1.rev() is None:
580 if c1.rev() is None:
581 c1 = c1.p1()
581 c1 = c1.p1()
582 if c1.mutable() and base.mutable():
582 if c1.mutable() and base.mutable():
583 sourcecommitlimit = repo.ui.configint(
583 sourcecommitlimit = repo.ui.configint(
584 b'experimental', b'copytrace.sourcecommitlimit'
584 b'experimental', b'copytrace.sourcecommitlimit'
585 )
585 )
586 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
586 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
587 return commits < sourcecommitlimit
587 return commits < sourcecommitlimit
588 return False
588 return False
589
589
590
590
591 def _checksinglesidecopies(
591 def _checksinglesidecopies(
592 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
592 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
593 ):
593 ):
594 if src not in m2:
594 if src not in m2:
595 # deleted on side 2
595 # deleted on side 2
596 if src not in m1:
596 if src not in m1:
597 # renamed on side 1, deleted on side 2
597 # renamed on side 1, deleted on side 2
598 renamedelete[src] = dsts1
598 renamedelete[src] = dsts1
599 elif src not in mb:
599 elif src not in mb:
600 # Work around the "short-circuit to avoid issues with merge states"
600 # Work around the "short-circuit to avoid issues with merge states"
601 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
601 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
602 # destination doesn't exist in y.
602 # destination doesn't exist in y.
603 pass
603 pass
604 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
604 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
605 return
605 return
606 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
606 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
607 # modified on side 2
607 # modified on side 2
608 for dst in dsts1:
608 for dst in dsts1:
609 copy[dst] = src
609 copy[dst] = src
610
610
611
611
612 class branch_copies(object):
612 class branch_copies(object):
613 """Information about copies made on one side of a merge/graft.
613 """Information about copies made on one side of a merge/graft.
614
614
615 "copy" is a mapping from destination name -> source name,
615 "copy" is a mapping from destination name -> source name,
616 where source is in c1 and destination is in c2 or vice-versa.
616 where source is in c1 and destination is in c2 or vice-versa.
617
617
618 "movewithdir" is a mapping from source name -> destination name,
618 "movewithdir" is a mapping from source name -> destination name,
619 where the file at source present in one context but not the other
619 where the file at source present in one context but not the other
620 needs to be moved to destination by the merge process, because the
620 needs to be moved to destination by the merge process, because the
621 other context moved the directory it is in.
621 other context moved the directory it is in.
622
622
623 "renamedelete" is a mapping of source name -> list of destination
623 "renamedelete" is a mapping of source name -> list of destination
624 names for files deleted in c1 that were renamed in c2 or vice-versa.
624 names for files deleted in c1 that were renamed in c2 or vice-versa.
625
625
626 "dirmove" is a mapping of detected source dir -> destination dir renames.
626 "dirmove" is a mapping of detected source dir -> destination dir renames.
627 This is needed for handling changes to new files previously grafted into
627 This is needed for handling changes to new files previously grafted into
628 renamed directories.
628 renamed directories.
629 """
629 """
630
630
631 def __init__(
631 def __init__(
632 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
632 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
633 ):
633 ):
634 self.copy = {} if copy is None else copy
634 self.copy = {} if copy is None else copy
635 self.renamedelete = {} if renamedelete is None else renamedelete
635 self.renamedelete = {} if renamedelete is None else renamedelete
636 self.dirmove = {} if dirmove is None else dirmove
636 self.dirmove = {} if dirmove is None else dirmove
637 self.movewithdir = {} if movewithdir is None else movewithdir
637 self.movewithdir = {} if movewithdir is None else movewithdir
638
638
639 def __repr__(self):
640 return (
641 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
642 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
643 )
644
639
645
640 def _fullcopytracing(repo, c1, c2, base):
646 def _fullcopytracing(repo, c1, c2, base):
641 """ The full copytracing algorithm which finds all the new files that were
647 """ The full copytracing algorithm which finds all the new files that were
642 added from merge base up to the top commit and for each file it checks if
648 added from merge base up to the top commit and for each file it checks if
643 this file was copied from another file.
649 this file was copied from another file.
644
650
645 This is pretty slow when a lot of changesets are involved but will track all
651 This is pretty slow when a lot of changesets are involved but will track all
646 the copies.
652 the copies.
647 """
653 """
648 m1 = c1.manifest()
654 m1 = c1.manifest()
649 m2 = c2.manifest()
655 m2 = c2.manifest()
650 mb = base.manifest()
656 mb = base.manifest()
651
657
652 copies1 = pathcopies(base, c1)
658 copies1 = pathcopies(base, c1)
653 copies2 = pathcopies(base, c2)
659 copies2 = pathcopies(base, c2)
654
660
655 if not (copies1 or copies2):
661 if not (copies1 or copies2):
656 return branch_copies(), branch_copies(), {}
662 return branch_copies(), branch_copies(), {}
657
663
658 inversecopies1 = {}
664 inversecopies1 = {}
659 inversecopies2 = {}
665 inversecopies2 = {}
660 for dst, src in copies1.items():
666 for dst, src in copies1.items():
661 inversecopies1.setdefault(src, []).append(dst)
667 inversecopies1.setdefault(src, []).append(dst)
662 for dst, src in copies2.items():
668 for dst, src in copies2.items():
663 inversecopies2.setdefault(src, []).append(dst)
669 inversecopies2.setdefault(src, []).append(dst)
664
670
665 copy1 = {}
671 copy1 = {}
666 copy2 = {}
672 copy2 = {}
667 diverge = {}
673 diverge = {}
668 renamedelete1 = {}
674 renamedelete1 = {}
669 renamedelete2 = {}
675 renamedelete2 = {}
670 allsources = set(inversecopies1) | set(inversecopies2)
676 allsources = set(inversecopies1) | set(inversecopies2)
671 for src in allsources:
677 for src in allsources:
672 dsts1 = inversecopies1.get(src)
678 dsts1 = inversecopies1.get(src)
673 dsts2 = inversecopies2.get(src)
679 dsts2 = inversecopies2.get(src)
674 if dsts1 and dsts2:
680 if dsts1 and dsts2:
675 # copied/renamed on both sides
681 # copied/renamed on both sides
676 if src not in m1 and src not in m2:
682 if src not in m1 and src not in m2:
677 # renamed on both sides
683 # renamed on both sides
678 dsts1 = set(dsts1)
684 dsts1 = set(dsts1)
679 dsts2 = set(dsts2)
685 dsts2 = set(dsts2)
680 # If there's some overlap in the rename destinations, we
686 # If there's some overlap in the rename destinations, we
681 # consider it not divergent. For example, if side 1 copies 'a'
687 # consider it not divergent. For example, if side 1 copies 'a'
682 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
688 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
683 # and 'd' and deletes 'a'.
689 # and 'd' and deletes 'a'.
684 if dsts1 & dsts2:
690 if dsts1 & dsts2:
685 for dst in dsts1 & dsts2:
691 for dst in dsts1 & dsts2:
686 copy1[dst] = src
692 copy1[dst] = src
687 copy2[dst] = src
693 copy2[dst] = src
688 else:
694 else:
689 diverge[src] = sorted(dsts1 | dsts2)
695 diverge[src] = sorted(dsts1 | dsts2)
690 elif src in m1 and src in m2:
696 elif src in m1 and src in m2:
691 # copied on both sides
697 # copied on both sides
692 dsts1 = set(dsts1)
698 dsts1 = set(dsts1)
693 dsts2 = set(dsts2)
699 dsts2 = set(dsts2)
694 for dst in dsts1 & dsts2:
700 for dst in dsts1 & dsts2:
695 copy1[dst] = src
701 copy1[dst] = src
696 copy2[dst] = src
702 copy2[dst] = src
697 # TODO: Handle cases where it was renamed on one side and copied
703 # TODO: Handle cases where it was renamed on one side and copied
698 # on the other side
704 # on the other side
699 elif dsts1:
705 elif dsts1:
700 # copied/renamed only on side 1
706 # copied/renamed only on side 1
701 _checksinglesidecopies(
707 _checksinglesidecopies(
702 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
708 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
703 )
709 )
704 elif dsts2:
710 elif dsts2:
705 # copied/renamed only on side 2
711 # copied/renamed only on side 2
706 _checksinglesidecopies(
712 _checksinglesidecopies(
707 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
713 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
708 )
714 )
709
715
710 # find interesting file sets from manifests
716 # find interesting file sets from manifests
711 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
717 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
712 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
718 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
713 u1 = sorted(addedinm1 - addedinm2)
719 u1 = sorted(addedinm1 - addedinm2)
714 u2 = sorted(addedinm2 - addedinm1)
720 u2 = sorted(addedinm2 - addedinm1)
715
721
716 header = b" unmatched files in %s"
722 header = b" unmatched files in %s"
717 if u1:
723 if u1:
718 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
724 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
719 if u2:
725 if u2:
720 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
726 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
721
727
722 if repo.ui.debugflag:
728 if repo.ui.debugflag:
723 renamedeleteset = set()
729 renamedeleteset = set()
724 divergeset = set()
730 divergeset = set()
725 for dsts in diverge.values():
731 for dsts in diverge.values():
726 divergeset.update(dsts)
732 divergeset.update(dsts)
727 for dsts in renamedelete1.values():
733 for dsts in renamedelete1.values():
728 renamedeleteset.update(dsts)
734 renamedeleteset.update(dsts)
729 for dsts in renamedelete2.values():
735 for dsts in renamedelete2.values():
730 renamedeleteset.update(dsts)
736 renamedeleteset.update(dsts)
731
737
732 repo.ui.debug(
738 repo.ui.debug(
733 b" all copies found (* = to merge, ! = divergent, "
739 b" all copies found (* = to merge, ! = divergent, "
734 b"% = renamed and deleted):\n"
740 b"% = renamed and deleted):\n"
735 )
741 )
736 for side, copies in ((b"local", copies1), (b"remote", copies2)):
742 for side, copies in ((b"local", copies1), (b"remote", copies2)):
737 if not copies:
743 if not copies:
738 continue
744 continue
739 repo.ui.debug(b" on %s side:\n" % side)
745 repo.ui.debug(b" on %s side:\n" % side)
740 for f in sorted(copies):
746 for f in sorted(copies):
741 note = b""
747 note = b""
742 if f in copy1 or f in copy2:
748 if f in copy1 or f in copy2:
743 note += b"*"
749 note += b"*"
744 if f in divergeset:
750 if f in divergeset:
745 note += b"!"
751 note += b"!"
746 if f in renamedeleteset:
752 if f in renamedeleteset:
747 note += b"%"
753 note += b"%"
748 repo.ui.debug(
754 repo.ui.debug(
749 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
755 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
750 )
756 )
751 del renamedeleteset
757 del renamedeleteset
752 del divergeset
758 del divergeset
753
759
754 repo.ui.debug(b" checking for directory renames\n")
760 repo.ui.debug(b" checking for directory renames\n")
755
761
756 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
762 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
757 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
763 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
758
764
759 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
765 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
760 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
766 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
761
767
762 return branch_copies1, branch_copies2, diverge
768 return branch_copies1, branch_copies2, diverge
763
769
764
770
765 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
771 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
766 """Finds moved directories and files that should move with them.
772 """Finds moved directories and files that should move with them.
767
773
768 ctx: the context for one of the sides
774 ctx: the context for one of the sides
769 copy: files copied on the same side (as ctx)
775 copy: files copied on the same side (as ctx)
770 fullcopy: files copied on the same side (as ctx), including those that
776 fullcopy: files copied on the same side (as ctx), including those that
771 merge.manifestmerge() won't care about
777 merge.manifestmerge() won't care about
772 addedfiles: added files on the other side (compared to ctx)
778 addedfiles: added files on the other side (compared to ctx)
773 """
779 """
774 # generate a directory move map
780 # generate a directory move map
775 d = ctx.dirs()
781 d = ctx.dirs()
776 invalid = set()
782 invalid = set()
777 dirmove = {}
783 dirmove = {}
778
784
779 # examine each file copy for a potential directory move, which is
785 # examine each file copy for a potential directory move, which is
780 # when all the files in a directory are moved to a new directory
786 # when all the files in a directory are moved to a new directory
781 for dst, src in pycompat.iteritems(fullcopy):
787 for dst, src in pycompat.iteritems(fullcopy):
782 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
788 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
783 if dsrc in invalid:
789 if dsrc in invalid:
784 # already seen to be uninteresting
790 # already seen to be uninteresting
785 continue
791 continue
786 elif dsrc in d and ddst in d:
792 elif dsrc in d and ddst in d:
787 # directory wasn't entirely moved locally
793 # directory wasn't entirely moved locally
788 invalid.add(dsrc)
794 invalid.add(dsrc)
789 elif dsrc in dirmove and dirmove[dsrc] != ddst:
795 elif dsrc in dirmove and dirmove[dsrc] != ddst:
790 # files from the same directory moved to two different places
796 # files from the same directory moved to two different places
791 invalid.add(dsrc)
797 invalid.add(dsrc)
792 else:
798 else:
793 # looks good so far
799 # looks good so far
794 dirmove[dsrc] = ddst
800 dirmove[dsrc] = ddst
795
801
796 for i in invalid:
802 for i in invalid:
797 if i in dirmove:
803 if i in dirmove:
798 del dirmove[i]
804 del dirmove[i]
799 del d, invalid
805 del d, invalid
800
806
801 if not dirmove:
807 if not dirmove:
802 return {}, {}
808 return {}, {}
803
809
804 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
810 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
805
811
806 for d in dirmove:
812 for d in dirmove:
807 repo.ui.debug(
813 repo.ui.debug(
808 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
814 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
809 )
815 )
810
816
811 movewithdir = {}
817 movewithdir = {}
812 # check unaccounted nonoverlapping files against directory moves
818 # check unaccounted nonoverlapping files against directory moves
813 for f in addedfiles:
819 for f in addedfiles:
814 if f not in fullcopy:
820 if f not in fullcopy:
815 for d in dirmove:
821 for d in dirmove:
816 if f.startswith(d):
822 if f.startswith(d):
817 # new file added in a directory that was moved, move it
823 # new file added in a directory that was moved, move it
818 df = dirmove[d] + f[len(d) :]
824 df = dirmove[d] + f[len(d) :]
819 if df not in copy:
825 if df not in copy:
820 movewithdir[f] = df
826 movewithdir[f] = df
821 repo.ui.debug(
827 repo.ui.debug(
822 b" pending file src: '%s' -> dst: '%s'\n"
828 b" pending file src: '%s' -> dst: '%s'\n"
823 % (f, df)
829 % (f, df)
824 )
830 )
825 break
831 break
826
832
827 return dirmove, movewithdir
833 return dirmove, movewithdir
828
834
829
835
830 def _heuristicscopytracing(repo, c1, c2, base):
836 def _heuristicscopytracing(repo, c1, c2, base):
831 """ Fast copytracing using filename heuristics
837 """ Fast copytracing using filename heuristics
832
838
833 Assumes that moves or renames are of following two types:
839 Assumes that moves or renames are of following two types:
834
840
835 1) Inside a directory only (same directory name but different filenames)
841 1) Inside a directory only (same directory name but different filenames)
836 2) Move from one directory to another
842 2) Move from one directory to another
837 (same filenames but different directory names)
843 (same filenames but different directory names)
838
844
839 Works only when there are no merge commits in the "source branch".
845 Works only when there are no merge commits in the "source branch".
840 Source branch is commits from base up to c2 not including base.
846 Source branch is commits from base up to c2 not including base.
841
847
842 If merge is involved it fallbacks to _fullcopytracing().
848 If merge is involved it fallbacks to _fullcopytracing().
843
849
844 Can be used by setting the following config:
850 Can be used by setting the following config:
845
851
846 [experimental]
852 [experimental]
847 copytrace = heuristics
853 copytrace = heuristics
848
854
849 In some cases the copy/move candidates found by heuristics can be very large
855 In some cases the copy/move candidates found by heuristics can be very large
850 in number and that will make the algorithm slow. The number of possible
856 in number and that will make the algorithm slow. The number of possible
851 candidates to check can be limited by using the config
857 candidates to check can be limited by using the config
852 `experimental.copytrace.movecandidateslimit` which defaults to 100.
858 `experimental.copytrace.movecandidateslimit` which defaults to 100.
853 """
859 """
854
860
855 if c1.rev() is None:
861 if c1.rev() is None:
856 c1 = c1.p1()
862 c1 = c1.p1()
857 if c2.rev() is None:
863 if c2.rev() is None:
858 c2 = c2.p1()
864 c2 = c2.p1()
859
865
860 changedfiles = set()
866 changedfiles = set()
861 m1 = c1.manifest()
867 m1 = c1.manifest()
862 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
868 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
863 # If base is not in c2 branch, we switch to fullcopytracing
869 # If base is not in c2 branch, we switch to fullcopytracing
864 repo.ui.debug(
870 repo.ui.debug(
865 b"switching to full copytracing as base is not "
871 b"switching to full copytracing as base is not "
866 b"an ancestor of c2\n"
872 b"an ancestor of c2\n"
867 )
873 )
868 return _fullcopytracing(repo, c1, c2, base)
874 return _fullcopytracing(repo, c1, c2, base)
869
875
870 ctx = c2
876 ctx = c2
871 while ctx != base:
877 while ctx != base:
872 if len(ctx.parents()) == 2:
878 if len(ctx.parents()) == 2:
873 # To keep things simple let's not handle merges
879 # To keep things simple let's not handle merges
874 repo.ui.debug(b"switching to full copytracing because of merges\n")
880 repo.ui.debug(b"switching to full copytracing because of merges\n")
875 return _fullcopytracing(repo, c1, c2, base)
881 return _fullcopytracing(repo, c1, c2, base)
876 changedfiles.update(ctx.files())
882 changedfiles.update(ctx.files())
877 ctx = ctx.p1()
883 ctx = ctx.p1()
878
884
879 copies2 = {}
885 copies2 = {}
880 cp = _forwardcopies(base, c2)
886 cp = _forwardcopies(base, c2)
881 for dst, src in pycompat.iteritems(cp):
887 for dst, src in pycompat.iteritems(cp):
882 if src in m1:
888 if src in m1:
883 copies2[dst] = src
889 copies2[dst] = src
884
890
885 # file is missing if it isn't present in the destination, but is present in
891 # file is missing if it isn't present in the destination, but is present in
886 # the base and present in the source.
892 # the base and present in the source.
887 # Presence in the base is important to exclude added files, presence in the
893 # Presence in the base is important to exclude added files, presence in the
888 # source is important to exclude removed files.
894 # source is important to exclude removed files.
889 filt = lambda f: f not in m1 and f in base and f in c2
895 filt = lambda f: f not in m1 and f in base and f in c2
890 missingfiles = [f for f in changedfiles if filt(f)]
896 missingfiles = [f for f in changedfiles if filt(f)]
891
897
892 copies1 = {}
898 copies1 = {}
893 if missingfiles:
899 if missingfiles:
894 basenametofilename = collections.defaultdict(list)
900 basenametofilename = collections.defaultdict(list)
895 dirnametofilename = collections.defaultdict(list)
901 dirnametofilename = collections.defaultdict(list)
896
902
897 for f in m1.filesnotin(base.manifest()):
903 for f in m1.filesnotin(base.manifest()):
898 basename = os.path.basename(f)
904 basename = os.path.basename(f)
899 dirname = os.path.dirname(f)
905 dirname = os.path.dirname(f)
900 basenametofilename[basename].append(f)
906 basenametofilename[basename].append(f)
901 dirnametofilename[dirname].append(f)
907 dirnametofilename[dirname].append(f)
902
908
903 for f in missingfiles:
909 for f in missingfiles:
904 basename = os.path.basename(f)
910 basename = os.path.basename(f)
905 dirname = os.path.dirname(f)
911 dirname = os.path.dirname(f)
906 samebasename = basenametofilename[basename]
912 samebasename = basenametofilename[basename]
907 samedirname = dirnametofilename[dirname]
913 samedirname = dirnametofilename[dirname]
908 movecandidates = samebasename + samedirname
914 movecandidates = samebasename + samedirname
909 # f is guaranteed to be present in c2, that's why
915 # f is guaranteed to be present in c2, that's why
910 # c2.filectx(f) won't fail
916 # c2.filectx(f) won't fail
911 f2 = c2.filectx(f)
917 f2 = c2.filectx(f)
912 # we can have a lot of candidates which can slow down the heuristics
918 # we can have a lot of candidates which can slow down the heuristics
913 # config value to limit the number of candidates moves to check
919 # config value to limit the number of candidates moves to check
914 maxcandidates = repo.ui.configint(
920 maxcandidates = repo.ui.configint(
915 b'experimental', b'copytrace.movecandidateslimit'
921 b'experimental', b'copytrace.movecandidateslimit'
916 )
922 )
917
923
918 if len(movecandidates) > maxcandidates:
924 if len(movecandidates) > maxcandidates:
919 repo.ui.status(
925 repo.ui.status(
920 _(
926 _(
921 b"skipping copytracing for '%s', more "
927 b"skipping copytracing for '%s', more "
922 b"candidates than the limit: %d\n"
928 b"candidates than the limit: %d\n"
923 )
929 )
924 % (f, len(movecandidates))
930 % (f, len(movecandidates))
925 )
931 )
926 continue
932 continue
927
933
928 for candidate in movecandidates:
934 for candidate in movecandidates:
929 f1 = c1.filectx(candidate)
935 f1 = c1.filectx(candidate)
930 if _related(f1, f2):
936 if _related(f1, f2):
931 # if there are a few related copies then we'll merge
937 # if there are a few related copies then we'll merge
932 # changes into all of them. This matches the behaviour
938 # changes into all of them. This matches the behaviour
933 # of upstream copytracing
939 # of upstream copytracing
934 copies1[candidate] = f
940 copies1[candidate] = f
935
941
936 return branch_copies(copies1), branch_copies(copies2), {}
942 return branch_copies(copies1), branch_copies(copies2), {}
937
943
938
944
939 def _related(f1, f2):
945 def _related(f1, f2):
940 """return True if f1 and f2 filectx have a common ancestor
946 """return True if f1 and f2 filectx have a common ancestor
941
947
942 Walk back to common ancestor to see if the two files originate
948 Walk back to common ancestor to see if the two files originate
943 from the same file. Since workingfilectx's rev() is None it messes
949 from the same file. Since workingfilectx's rev() is None it messes
944 up the integer comparison logic, hence the pre-step check for
950 up the integer comparison logic, hence the pre-step check for
945 None (f1 and f2 can only be workingfilectx's initially).
951 None (f1 and f2 can only be workingfilectx's initially).
946 """
952 """
947
953
948 if f1 == f2:
954 if f1 == f2:
949 return True # a match
955 return True # a match
950
956
951 g1, g2 = f1.ancestors(), f2.ancestors()
957 g1, g2 = f1.ancestors(), f2.ancestors()
952 try:
958 try:
953 f1r, f2r = f1.linkrev(), f2.linkrev()
959 f1r, f2r = f1.linkrev(), f2.linkrev()
954
960
955 if f1r is None:
961 if f1r is None:
956 f1 = next(g1)
962 f1 = next(g1)
957 if f2r is None:
963 if f2r is None:
958 f2 = next(g2)
964 f2 = next(g2)
959
965
960 while True:
966 while True:
961 f1r, f2r = f1.linkrev(), f2.linkrev()
967 f1r, f2r = f1.linkrev(), f2.linkrev()
962 if f1r > f2r:
968 if f1r > f2r:
963 f1 = next(g1)
969 f1 = next(g1)
964 elif f2r > f1r:
970 elif f2r > f1r:
965 f2 = next(g2)
971 f2 = next(g2)
966 else: # f1 and f2 point to files in the same linkrev
972 else: # f1 and f2 point to files in the same linkrev
967 return f1 == f2 # true if they point to the same file
973 return f1 == f2 # true if they point to the same file
968 except StopIteration:
974 except StopIteration:
969 return False
975 return False
970
976
971
977
972 def graftcopies(wctx, ctx, base):
978 def graftcopies(wctx, ctx, base):
973 """reproduce copies between base and ctx in the wctx
979 """reproduce copies between base and ctx in the wctx
974
980
975 Unlike mergecopies(), this function will only consider copies between base
981 Unlike mergecopies(), this function will only consider copies between base
976 and ctx; it will ignore copies between base and wctx. Also unlike
982 and ctx; it will ignore copies between base and wctx. Also unlike
977 mergecopies(), this function will apply copies to the working copy (instead
983 mergecopies(), this function will apply copies to the working copy (instead
978 of just returning information about the copies). That makes it cheaper
984 of just returning information about the copies). That makes it cheaper
979 (especially in the common case of base==ctx.p1()) and useful also when
985 (especially in the common case of base==ctx.p1()) and useful also when
980 experimental.copytrace=off.
986 experimental.copytrace=off.
981
987
982 merge.update() will have already marked most copies, but it will only
988 merge.update() will have already marked most copies, but it will only
983 mark copies if it thinks the source files are related (see
989 mark copies if it thinks the source files are related (see
984 merge._related()). It will also not mark copies if the file wasn't modified
990 merge._related()). It will also not mark copies if the file wasn't modified
985 on the local side. This function adds the copies that were "missed"
991 on the local side. This function adds the copies that were "missed"
986 by merge.update().
992 by merge.update().
987 """
993 """
988 new_copies = pathcopies(base, ctx)
994 new_copies = pathcopies(base, ctx)
989 _filter(wctx.p1(), wctx, new_copies)
995 _filter(wctx.p1(), wctx, new_copies)
990 for dst, src in pycompat.iteritems(new_copies):
996 for dst, src in pycompat.iteritems(new_copies):
991 wctx[dst].markcopied(src)
997 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now