##// END OF EJS Templates
narrow: filter copies in core...
Martin von Zweigbergk -
r38057:ee7b6fa5 default
parent child Browse files
Show More
@@ -1,97 +1,95
1 # __init__.py - narrowhg extension
1 # __init__.py - narrowhg extension
2 #
2 #
3 # Copyright 2017 Google, Inc.
3 # Copyright 2017 Google, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 '''create clones which fetch history data for subset of files (EXPERIMENTAL)'''
7 '''create clones which fetch history data for subset of files (EXPERIMENTAL)'''
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
11 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
12 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
12 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
13 # be specifying the version(s) of Mercurial they are tested with, or
13 # be specifying the version(s) of Mercurial they are tested with, or
14 # leave the attribute unspecified.
14 # leave the attribute unspecified.
15 testedwith = 'ships-with-hg-core'
15 testedwith = 'ships-with-hg-core'
16
16
17 from mercurial import (
17 from mercurial import (
18 changegroup,
18 changegroup,
19 extensions,
19 extensions,
20 hg,
20 hg,
21 localrepo,
21 localrepo,
22 registrar,
22 registrar,
23 verify as verifymod,
23 verify as verifymod,
24 )
24 )
25
25
26 from . import (
26 from . import (
27 narrowbundle2,
27 narrowbundle2,
28 narrowchangegroup,
28 narrowchangegroup,
29 narrowcommands,
29 narrowcommands,
30 narrowcopies,
30 narrowcopies,
31 narrowdirstate,
31 narrowdirstate,
32 narrowmerge,
33 narrowpatch,
32 narrowpatch,
34 narrowrepo,
33 narrowrepo,
35 narrowrevlog,
34 narrowrevlog,
36 narrowtemplates,
35 narrowtemplates,
37 narrowwirepeer,
36 narrowwirepeer,
38 )
37 )
39
38
40 configtable = {}
39 configtable = {}
41 configitem = registrar.configitem(configtable)
40 configitem = registrar.configitem(configtable)
42 # Narrowhg *has* support for serving ellipsis nodes (which are used at
41 # Narrowhg *has* support for serving ellipsis nodes (which are used at
43 # least by Google's internal server), but that support is pretty
42 # least by Google's internal server), but that support is pretty
44 # fragile and has a lot of problems on real-world repositories that
43 # fragile and has a lot of problems on real-world repositories that
45 # have complex graph topologies. This could probably be corrected, but
44 # have complex graph topologies. This could probably be corrected, but
46 # absent someone needing the full support for ellipsis nodes in
45 # absent someone needing the full support for ellipsis nodes in
47 # repositories with merges, it's unlikely this work will get done. As
46 # repositories with merges, it's unlikely this work will get done. As
48 # of this writining in late 2017, all repositories large enough for
47 # of this writining in late 2017, all repositories large enough for
49 # ellipsis nodes to be a hard requirement also enforce strictly linear
48 # ellipsis nodes to be a hard requirement also enforce strictly linear
50 # history for other scaling reasons.
49 # history for other scaling reasons.
51 configitem('experimental', 'narrowservebrokenellipses',
50 configitem('experimental', 'narrowservebrokenellipses',
52 default=False,
51 default=False,
53 alias=[('narrow', 'serveellipses')],
52 alias=[('narrow', 'serveellipses')],
54 )
53 )
55
54
56 # Export the commands table for Mercurial to see.
55 # Export the commands table for Mercurial to see.
57 cmdtable = narrowcommands.table
56 cmdtable = narrowcommands.table
58
57
59 def featuresetup(ui, features):
58 def featuresetup(ui, features):
60 features.add(changegroup.NARROW_REQUIREMENT)
59 features.add(changegroup.NARROW_REQUIREMENT)
61
60
62 def uisetup(ui):
61 def uisetup(ui):
63 """Wraps user-facing mercurial commands with narrow-aware versions."""
62 """Wraps user-facing mercurial commands with narrow-aware versions."""
64 localrepo.featuresetupfuncs.add(featuresetup)
63 localrepo.featuresetupfuncs.add(featuresetup)
65 narrowrevlog.setup()
64 narrowrevlog.setup()
66 narrowbundle2.setup()
65 narrowbundle2.setup()
67 narrowmerge.setup()
68 narrowcommands.setup()
66 narrowcommands.setup()
69 narrowchangegroup.setup()
67 narrowchangegroup.setup()
70 narrowwirepeer.uisetup()
68 narrowwirepeer.uisetup()
71
69
72 def reposetup(ui, repo):
70 def reposetup(ui, repo):
73 """Wraps local repositories with narrow repo support."""
71 """Wraps local repositories with narrow repo support."""
74 if not repo.local():
72 if not repo.local():
75 return
73 return
76
74
77 narrowrepo.wraprepo(repo)
75 narrowrepo.wraprepo(repo)
78 if changegroup.NARROW_REQUIREMENT in repo.requirements:
76 if changegroup.NARROW_REQUIREMENT in repo.requirements:
79 narrowcopies.setup(repo)
77 narrowcopies.setup(repo)
80 narrowdirstate.setup(repo)
78 narrowdirstate.setup(repo)
81 narrowpatch.setup(repo)
79 narrowpatch.setup(repo)
82 narrowwirepeer.reposetup(repo)
80 narrowwirepeer.reposetup(repo)
83
81
84 def _verifierinit(orig, self, repo, matcher=None):
82 def _verifierinit(orig, self, repo, matcher=None):
85 # The verifier's matcher argument was desgined for narrowhg, so it should
83 # The verifier's matcher argument was desgined for narrowhg, so it should
86 # be None from core. If another extension passes a matcher (unlikely),
84 # be None from core. If another extension passes a matcher (unlikely),
87 # we'll have to fail until matchers can be composed more easily.
85 # we'll have to fail until matchers can be composed more easily.
88 assert matcher is None
86 assert matcher is None
89 orig(self, repo, repo.narrowmatch())
87 orig(self, repo, repo.narrowmatch())
90
88
91 def extsetup(ui):
89 def extsetup(ui):
92 extensions.wrapfunction(verifymod.verifier, '__init__', _verifierinit)
90 extensions.wrapfunction(verifymod.verifier, '__init__', _verifierinit)
93 extensions.wrapfunction(hg, 'postshare', narrowrepo.wrappostshare)
91 extensions.wrapfunction(hg, 'postshare', narrowrepo.wrappostshare)
94 extensions.wrapfunction(hg, 'copystore', narrowrepo.unsharenarrowspec)
92 extensions.wrapfunction(hg, 'copystore', narrowrepo.unsharenarrowspec)
95
93
96 templatekeyword = narrowtemplates.templatekeyword
94 templatekeyword = narrowtemplates.templatekeyword
97 revsetpredicate = narrowtemplates.revsetpredicate
95 revsetpredicate = narrowtemplates.revsetpredicate
@@ -1,878 +1,883
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import collections
10 import collections
11 import heapq
11 import heapq
12 import os
12 import os
13
13
14 from .i18n import _
14 from .i18n import _
15
15
16 from . import (
16 from . import (
17 match as matchmod,
17 match as matchmod,
18 node,
18 node,
19 pathutil,
19 pathutil,
20 scmutil,
20 scmutil,
21 util,
21 util,
22 )
22 )
23
23
24 def _findlimit(repo, a, b):
24 def _findlimit(repo, a, b):
25 """
25 """
26 Find the last revision that needs to be checked to ensure that a full
26 Find the last revision that needs to be checked to ensure that a full
27 transitive closure for file copies can be properly calculated.
27 transitive closure for file copies can be properly calculated.
28 Generally, this means finding the earliest revision number that's an
28 Generally, this means finding the earliest revision number that's an
29 ancestor of a or b but not both, except when a or b is a direct descendent
29 ancestor of a or b but not both, except when a or b is a direct descendent
30 of the other, in which case we can return the minimum revnum of a and b.
30 of the other, in which case we can return the minimum revnum of a and b.
31 None if no such revision exists.
31 None if no such revision exists.
32 """
32 """
33
33
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85
85
86 # Consider the following flow (see test-commit-amend.t under issue4405):
86 # Consider the following flow (see test-commit-amend.t under issue4405):
87 # 1/ File 'a0' committed
87 # 1/ File 'a0' committed
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
88 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
89 # 3/ Move back to first commit
89 # 3/ Move back to first commit
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
90 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
91 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
92 #
92 #
93 # During the amend in step five, we will be in this state:
93 # During the amend in step five, we will be in this state:
94 #
94 #
95 # @ 3 temporary amend commit for a1-amend
95 # @ 3 temporary amend commit for a1-amend
96 # |
96 # |
97 # o 2 a1-amend
97 # o 2 a1-amend
98 # |
98 # |
99 # | o 1 a1
99 # | o 1 a1
100 # |/
100 # |/
101 # o 0 a0
101 # o 0 a0
102 #
102 #
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
103 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
104 # yet the filelog has the copy information in rev 1 and we will not look
104 # yet the filelog has the copy information in rev 1 and we will not look
105 # back far enough unless we also look at the a and b as candidates.
105 # back far enough unless we also look at the a and b as candidates.
106 # This only occurs when a is a descendent of b or visa-versa.
106 # This only occurs when a is a descendent of b or visa-versa.
107 return min(limit, a, b)
107 return min(limit, a, b)
108
108
109 def _chain(src, dst, a, b):
109 def _chain(src, dst, a, b):
110 """chain two sets of copies a->b"""
110 """chain two sets of copies a->b"""
111 t = a.copy()
111 t = a.copy()
112 for k, v in b.iteritems():
112 for k, v in b.iteritems():
113 if v in t:
113 if v in t:
114 # found a chain
114 # found a chain
115 if t[v] != k:
115 if t[v] != k:
116 # file wasn't renamed back to itself
116 # file wasn't renamed back to itself
117 t[k] = t[v]
117 t[k] = t[v]
118 if v not in dst:
118 if v not in dst:
119 # chain was a rename, not a copy
119 # chain was a rename, not a copy
120 del t[v]
120 del t[v]
121 if v in src:
121 if v in src:
122 # file is a copy of an existing file
122 # file is a copy of an existing file
123 t[k] = v
123 t[k] = v
124
124
125 # remove criss-crossed copies
125 # remove criss-crossed copies
126 for k, v in list(t.items()):
126 for k, v in list(t.items()):
127 if k in src and v in dst:
127 if k in src and v in dst:
128 del t[k]
128 del t[k]
129
129
130 return t
130 return t
131
131
132 def _tracefile(fctx, am, limit=-1):
132 def _tracefile(fctx, am, limit=-1):
133 """return file context that is the ancestor of fctx present in ancestor
133 """return file context that is the ancestor of fctx present in ancestor
134 manifest am, stopping after the first ancestor lower than limit"""
134 manifest am, stopping after the first ancestor lower than limit"""
135
135
136 for f in fctx.ancestors():
136 for f in fctx.ancestors():
137 if am.get(f.path(), None) == f.filenode():
137 if am.get(f.path(), None) == f.filenode():
138 return f
138 return f
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
139 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
140 return None
140 return None
141
141
142 def _dirstatecopies(d, match=None):
142 def _dirstatecopies(d, match=None):
143 ds = d._repo.dirstate
143 ds = d._repo.dirstate
144 c = ds.copies().copy()
144 c = ds.copies().copy()
145 for k in list(c):
145 for k in list(c):
146 if ds[k] not in 'anm' or (match and not match(k)):
146 if ds[k] not in 'anm' or (match and not match(k)):
147 del c[k]
147 del c[k]
148 return c
148 return c
149
149
150 def _computeforwardmissing(a, b, match=None):
150 def _computeforwardmissing(a, b, match=None):
151 """Computes which files are in b but not a.
151 """Computes which files are in b but not a.
152 This is its own function so extensions can easily wrap this call to see what
152 This is its own function so extensions can easily wrap this call to see what
153 files _forwardcopies is about to process.
153 files _forwardcopies is about to process.
154 """
154 """
155 ma = a.manifest()
155 ma = a.manifest()
156 mb = b.manifest()
156 mb = b.manifest()
157 return mb.filesnotin(ma, match=match)
157 return mb.filesnotin(ma, match=match)
158
158
159 def _committedforwardcopies(a, b, match):
159 def _committedforwardcopies(a, b, match):
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
160 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
161 # files might have to be traced back to the fctx parent of the last
161 # files might have to be traced back to the fctx parent of the last
162 # one-side-only changeset, but not further back than that
162 # one-side-only changeset, but not further back than that
163 limit = _findlimit(a._repo, a.rev(), b.rev())
163 limit = _findlimit(a._repo, a.rev(), b.rev())
164 if limit is None:
164 if limit is None:
165 limit = -1
165 limit = -1
166 am = a.manifest()
166 am = a.manifest()
167
167
168 # find where new files came from
168 # find where new files came from
169 # we currently don't try to find where old files went, too expensive
169 # we currently don't try to find where old files went, too expensive
170 # this means we can miss a case like 'hg rm b; hg cp a b'
170 # this means we can miss a case like 'hg rm b; hg cp a b'
171 cm = {}
171 cm = {}
172
172
173 # Computing the forward missing is quite expensive on large manifests, since
173 # Computing the forward missing is quite expensive on large manifests, since
174 # it compares the entire manifests. We can optimize it in the common use
174 # it compares the entire manifests. We can optimize it in the common use
175 # case of computing what copies are in a commit versus its parent (like
175 # case of computing what copies are in a commit versus its parent (like
176 # during a rebase or histedit). Note, we exclude merge commits from this
176 # during a rebase or histedit). Note, we exclude merge commits from this
177 # optimization, since the ctx.files() for a merge commit is not correct for
177 # optimization, since the ctx.files() for a merge commit is not correct for
178 # this comparison.
178 # this comparison.
179 forwardmissingmatch = match
179 forwardmissingmatch = match
180 if b.p1() == a and b.p2().node() == node.nullid:
180 if b.p1() == a and b.p2().node() == node.nullid:
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
181 filesmatcher = scmutil.matchfiles(a._repo, b.files())
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
182 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
183 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
184
184
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
185 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
186 for f in missing:
186 for f in missing:
187 fctx = b[f]
187 fctx = b[f]
188 fctx._ancestrycontext = ancestrycontext
188 fctx._ancestrycontext = ancestrycontext
189 ofctx = _tracefile(fctx, am, limit)
189 ofctx = _tracefile(fctx, am, limit)
190 if ofctx:
190 if ofctx:
191 cm[f] = ofctx.path()
191 cm[f] = ofctx.path()
192 return cm
192 return cm
193
193
194 def _forwardcopies(a, b, match=None):
194 def _forwardcopies(a, b, match=None):
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
195 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
196
196
197 # check for working copy
197 # check for working copy
198 if b.rev() is None:
198 if b.rev() is None:
199 if a == b.p1():
199 if a == b.p1():
200 # short-circuit to avoid issues with merge states
200 # short-circuit to avoid issues with merge states
201 return _dirstatecopies(b, match)
201 return _dirstatecopies(b, match)
202
202
203 cm = _committedforwardcopies(a, b.p1(), match)
203 cm = _committedforwardcopies(a, b.p1(), match)
204 # combine copies from dirstate if necessary
204 # combine copies from dirstate if necessary
205 return _chain(a, b, cm, _dirstatecopies(b, match))
205 return _chain(a, b, cm, _dirstatecopies(b, match))
206 return _committedforwardcopies(a, b, match)
206 return _committedforwardcopies(a, b, match)
207
207
208 def _backwardrenames(a, b):
208 def _backwardrenames(a, b):
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
209 if a._repo.ui.config('experimental', 'copytrace') == 'off':
210 return {}
210 return {}
211
211
212 # Even though we're not taking copies into account, 1:n rename situations
212 # Even though we're not taking copies into account, 1:n rename situations
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
213 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
214 # arbitrarily pick one of the renames.
214 # arbitrarily pick one of the renames.
215 f = _forwardcopies(b, a)
215 f = _forwardcopies(b, a)
216 r = {}
216 r = {}
217 for k, v in sorted(f.iteritems()):
217 for k, v in sorted(f.iteritems()):
218 # remove copies
218 # remove copies
219 if v in a:
219 if v in a:
220 continue
220 continue
221 r[v] = k
221 r[v] = k
222 return r
222 return r
223
223
224 def pathcopies(x, y, match=None):
224 def pathcopies(x, y, match=None):
225 """find {dst@y: src@x} copy mapping for directed compare"""
225 """find {dst@y: src@x} copy mapping for directed compare"""
226 if x == y or not x or not y:
226 if x == y or not x or not y:
227 return {}
227 return {}
228 a = y.ancestor(x)
228 a = y.ancestor(x)
229 if a == x:
229 if a == x:
230 return _forwardcopies(x, y, match=match)
230 return _forwardcopies(x, y, match=match)
231 if a == y:
231 if a == y:
232 return _backwardrenames(x, y)
232 return _backwardrenames(x, y)
233 return _chain(x, y, _backwardrenames(x, a),
233 return _chain(x, y, _backwardrenames(x, a),
234 _forwardcopies(a, y, match=match))
234 _forwardcopies(a, y, match=match))
235
235
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
236 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
237 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
238 and c2. This is its own function so extensions can easily wrap this call
238 and c2. This is its own function so extensions can easily wrap this call
239 to see what files mergecopies is about to process.
239 to see what files mergecopies is about to process.
240
240
241 Even though c1 and c2 are not used in this function, they are useful in
241 Even though c1 and c2 are not used in this function, they are useful in
242 other extensions for being able to read the file nodes of the changed files.
242 other extensions for being able to read the file nodes of the changed files.
243
243
244 "baselabel" can be passed to help distinguish the multiple computations
244 "baselabel" can be passed to help distinguish the multiple computations
245 done in the graft case.
245 done in the graft case.
246 """
246 """
247 u1 = sorted(addedinm1 - addedinm2)
247 u1 = sorted(addedinm1 - addedinm2)
248 u2 = sorted(addedinm2 - addedinm1)
248 u2 = sorted(addedinm2 - addedinm1)
249
249
250 header = " unmatched files in %s"
250 header = " unmatched files in %s"
251 if baselabel:
251 if baselabel:
252 header += ' (from %s)' % baselabel
252 header += ' (from %s)' % baselabel
253 if u1:
253 if u1:
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
254 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
255 if u2:
255 if u2:
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
256 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
257
258 narrowmatch = repo.narrowmatch()
259 if not narrowmatch.always():
260 u1 = [f for f in u1 if narrowmatch(f)]
261 u2 = [f for f in u2 if narrowmatch(f)]
257 return u1, u2
262 return u1, u2
258
263
259 def _makegetfctx(ctx):
264 def _makegetfctx(ctx):
260 """return a 'getfctx' function suitable for _checkcopies usage
265 """return a 'getfctx' function suitable for _checkcopies usage
261
266
262 We have to re-setup the function building 'filectx' for each
267 We have to re-setup the function building 'filectx' for each
263 '_checkcopies' to ensure the linkrev adjustment is properly setup for
268 '_checkcopies' to ensure the linkrev adjustment is properly setup for
264 each. Linkrev adjustment is important to avoid bug in rename
269 each. Linkrev adjustment is important to avoid bug in rename
265 detection. Moreover, having a proper '_ancestrycontext' setup ensures
270 detection. Moreover, having a proper '_ancestrycontext' setup ensures
266 the performance impact of this adjustment is kept limited. Without it,
271 the performance impact of this adjustment is kept limited. Without it,
267 each file could do a full dag traversal making the time complexity of
272 each file could do a full dag traversal making the time complexity of
268 the operation explode (see issue4537).
273 the operation explode (see issue4537).
269
274
270 This function exists here mostly to limit the impact on stable. Feel
275 This function exists here mostly to limit the impact on stable. Feel
271 free to refactor on default.
276 free to refactor on default.
272 """
277 """
273 rev = ctx.rev()
278 rev = ctx.rev()
274 repo = ctx._repo
279 repo = ctx._repo
275 ac = getattr(ctx, '_ancestrycontext', None)
280 ac = getattr(ctx, '_ancestrycontext', None)
276 if ac is None:
281 if ac is None:
277 revs = [rev]
282 revs = [rev]
278 if rev is None:
283 if rev is None:
279 revs = [p.rev() for p in ctx.parents()]
284 revs = [p.rev() for p in ctx.parents()]
280 ac = repo.changelog.ancestors(revs, inclusive=True)
285 ac = repo.changelog.ancestors(revs, inclusive=True)
281 ctx._ancestrycontext = ac
286 ctx._ancestrycontext = ac
282 def makectx(f, n):
287 def makectx(f, n):
283 if n in node.wdirfilenodeids: # in a working context?
288 if n in node.wdirfilenodeids: # in a working context?
284 if ctx.rev() is None:
289 if ctx.rev() is None:
285 return ctx.filectx(f)
290 return ctx.filectx(f)
286 return repo[None][f]
291 return repo[None][f]
287 fctx = repo.filectx(f, fileid=n)
292 fctx = repo.filectx(f, fileid=n)
288 # setup only needed for filectx not create from a changectx
293 # setup only needed for filectx not create from a changectx
289 fctx._ancestrycontext = ac
294 fctx._ancestrycontext = ac
290 fctx._descendantrev = rev
295 fctx._descendantrev = rev
291 return fctx
296 return fctx
292 return util.lrucachefunc(makectx)
297 return util.lrucachefunc(makectx)
293
298
294 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
299 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
295 """combine partial copy paths"""
300 """combine partial copy paths"""
296 remainder = {}
301 remainder = {}
297 for f in copyfrom:
302 for f in copyfrom:
298 if f in copyto:
303 if f in copyto:
299 finalcopy[copyto[f]] = copyfrom[f]
304 finalcopy[copyto[f]] = copyfrom[f]
300 del copyto[f]
305 del copyto[f]
301 for f in incompletediverge:
306 for f in incompletediverge:
302 assert f not in diverge
307 assert f not in diverge
303 ic = incompletediverge[f]
308 ic = incompletediverge[f]
304 if ic[0] in copyto:
309 if ic[0] in copyto:
305 diverge[f] = [copyto[ic[0]], ic[1]]
310 diverge[f] = [copyto[ic[0]], ic[1]]
306 else:
311 else:
307 remainder[f] = ic
312 remainder[f] = ic
308 return remainder
313 return remainder
309
314
310 def mergecopies(repo, c1, c2, base):
315 def mergecopies(repo, c1, c2, base):
311 """
316 """
312 The function calling different copytracing algorithms on the basis of config
317 The function calling different copytracing algorithms on the basis of config
313 which find moves and copies between context c1 and c2 that are relevant for
318 which find moves and copies between context c1 and c2 that are relevant for
314 merging. 'base' will be used as the merge base.
319 merging. 'base' will be used as the merge base.
315
320
316 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
321 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
317 files that were moved/ copied in one merge parent and modified in another.
322 files that were moved/ copied in one merge parent and modified in another.
318 For example:
323 For example:
319
324
320 o ---> 4 another commit
325 o ---> 4 another commit
321 |
326 |
322 | o ---> 3 commit that modifies a.txt
327 | o ---> 3 commit that modifies a.txt
323 | /
328 | /
324 o / ---> 2 commit that moves a.txt to b.txt
329 o / ---> 2 commit that moves a.txt to b.txt
325 |/
330 |/
326 o ---> 1 merge base
331 o ---> 1 merge base
327
332
328 If we try to rebase revision 3 on revision 4, since there is no a.txt in
333 If we try to rebase revision 3 on revision 4, since there is no a.txt in
329 revision 4, and if user have copytrace disabled, we prints the following
334 revision 4, and if user have copytrace disabled, we prints the following
330 message:
335 message:
331
336
332 ```other changed <file> which local deleted```
337 ```other changed <file> which local deleted```
333
338
334 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
339 Returns five dicts: "copy", "movewithdir", "diverge", "renamedelete" and
335 "dirmove".
340 "dirmove".
336
341
337 "copy" is a mapping from destination name -> source name,
342 "copy" is a mapping from destination name -> source name,
338 where source is in c1 and destination is in c2 or vice-versa.
343 where source is in c1 and destination is in c2 or vice-versa.
339
344
340 "movewithdir" is a mapping from source name -> destination name,
345 "movewithdir" is a mapping from source name -> destination name,
341 where the file at source present in one context but not the other
346 where the file at source present in one context but not the other
342 needs to be moved to destination by the merge process, because the
347 needs to be moved to destination by the merge process, because the
343 other context moved the directory it is in.
348 other context moved the directory it is in.
344
349
345 "diverge" is a mapping of source name -> list of destination names
350 "diverge" is a mapping of source name -> list of destination names
346 for divergent renames.
351 for divergent renames.
347
352
348 "renamedelete" is a mapping of source name -> list of destination
353 "renamedelete" is a mapping of source name -> list of destination
349 names for files deleted in c1 that were renamed in c2 or vice-versa.
354 names for files deleted in c1 that were renamed in c2 or vice-versa.
350
355
351 "dirmove" is a mapping of detected source dir -> destination dir renames.
356 "dirmove" is a mapping of detected source dir -> destination dir renames.
352 This is needed for handling changes to new files previously grafted into
357 This is needed for handling changes to new files previously grafted into
353 renamed directories.
358 renamed directories.
354 """
359 """
355 # avoid silly behavior for update from empty dir
360 # avoid silly behavior for update from empty dir
356 if not c1 or not c2 or c1 == c2:
361 if not c1 or not c2 or c1 == c2:
357 return {}, {}, {}, {}, {}
362 return {}, {}, {}, {}, {}
358
363
359 # avoid silly behavior for parent -> working dir
364 # avoid silly behavior for parent -> working dir
360 if c2.node() is None and c1.node() == repo.dirstate.p1():
365 if c2.node() is None and c1.node() == repo.dirstate.p1():
361 return repo.dirstate.copies(), {}, {}, {}, {}
366 return repo.dirstate.copies(), {}, {}, {}, {}
362
367
363 copytracing = repo.ui.config('experimental', 'copytrace')
368 copytracing = repo.ui.config('experimental', 'copytrace')
364
369
365 # Copy trace disabling is explicitly below the node == p1 logic above
370 # Copy trace disabling is explicitly below the node == p1 logic above
366 # because the logic above is required for a simple copy to be kept across a
371 # because the logic above is required for a simple copy to be kept across a
367 # rebase.
372 # rebase.
368 if copytracing == 'off':
373 if copytracing == 'off':
369 return {}, {}, {}, {}, {}
374 return {}, {}, {}, {}, {}
370 elif copytracing == 'heuristics':
375 elif copytracing == 'heuristics':
371 # Do full copytracing if only non-public revisions are involved as
376 # Do full copytracing if only non-public revisions are involved as
372 # that will be fast enough and will also cover the copies which could
377 # that will be fast enough and will also cover the copies which could
373 # be missed by heuristics
378 # be missed by heuristics
374 if _isfullcopytraceable(repo, c1, base):
379 if _isfullcopytraceable(repo, c1, base):
375 return _fullcopytracing(repo, c1, c2, base)
380 return _fullcopytracing(repo, c1, c2, base)
376 return _heuristicscopytracing(repo, c1, c2, base)
381 return _heuristicscopytracing(repo, c1, c2, base)
377 else:
382 else:
378 return _fullcopytracing(repo, c1, c2, base)
383 return _fullcopytracing(repo, c1, c2, base)
379
384
380 def _isfullcopytraceable(repo, c1, base):
385 def _isfullcopytraceable(repo, c1, base):
381 """ Checks that if base, source and destination are all no-public branches,
386 """ Checks that if base, source and destination are all no-public branches,
382 if yes let's use the full copytrace algorithm for increased capabilities
387 if yes let's use the full copytrace algorithm for increased capabilities
383 since it will be fast enough.
388 since it will be fast enough.
384
389
385 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
390 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
386 number of changesets from c1 to base such that if number of changesets are
391 number of changesets from c1 to base such that if number of changesets are
387 more than the limit, full copytracing algorithm won't be used.
392 more than the limit, full copytracing algorithm won't be used.
388 """
393 """
389 if c1.rev() is None:
394 if c1.rev() is None:
390 c1 = c1.p1()
395 c1 = c1.p1()
391 if c1.mutable() and base.mutable():
396 if c1.mutable() and base.mutable():
392 sourcecommitlimit = repo.ui.configint('experimental',
397 sourcecommitlimit = repo.ui.configint('experimental',
393 'copytrace.sourcecommitlimit')
398 'copytrace.sourcecommitlimit')
394 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
399 commits = len(repo.revs('%d::%d', base.rev(), c1.rev()))
395 return commits < sourcecommitlimit
400 return commits < sourcecommitlimit
396 return False
401 return False
397
402
398 def _fullcopytracing(repo, c1, c2, base):
403 def _fullcopytracing(repo, c1, c2, base):
399 """ The full copytracing algorithm which finds all the new files that were
404 """ The full copytracing algorithm which finds all the new files that were
400 added from merge base up to the top commit and for each file it checks if
405 added from merge base up to the top commit and for each file it checks if
401 this file was copied from another file.
406 this file was copied from another file.
402
407
403 This is pretty slow when a lot of changesets are involved but will track all
408 This is pretty slow when a lot of changesets are involved but will track all
404 the copies.
409 the copies.
405 """
410 """
406 # In certain scenarios (e.g. graft, update or rebase), base can be
411 # In certain scenarios (e.g. graft, update or rebase), base can be
407 # overridden We still need to know a real common ancestor in this case We
412 # overridden We still need to know a real common ancestor in this case We
408 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
413 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
409 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
414 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
410 # caller may not know if the revision passed in lieu of the CA is a genuine
415 # caller may not know if the revision passed in lieu of the CA is a genuine
411 # common ancestor or not without explicitly checking it, it's better to
416 # common ancestor or not without explicitly checking it, it's better to
412 # determine that here.
417 # determine that here.
413 #
418 #
414 # base.descendant(wc) and base.descendant(base) are False, work around that
419 # base.descendant(wc) and base.descendant(base) are False, work around that
415 _c1 = c1.p1() if c1.rev() is None else c1
420 _c1 = c1.p1() if c1.rev() is None else c1
416 _c2 = c2.p1() if c2.rev() is None else c2
421 _c2 = c2.p1() if c2.rev() is None else c2
417 # an endpoint is "dirty" if it isn't a descendant of the merge base
422 # an endpoint is "dirty" if it isn't a descendant of the merge base
418 # if we have a dirty endpoint, we need to trigger graft logic, and also
423 # if we have a dirty endpoint, we need to trigger graft logic, and also
419 # keep track of which endpoint is dirty
424 # keep track of which endpoint is dirty
420 dirtyc1 = not (base == _c1 or base.descendant(_c1))
425 dirtyc1 = not (base == _c1 or base.descendant(_c1))
421 dirtyc2 = not (base == _c2 or base.descendant(_c2))
426 dirtyc2 = not (base == _c2 or base.descendant(_c2))
422 graft = dirtyc1 or dirtyc2
427 graft = dirtyc1 or dirtyc2
423 tca = base
428 tca = base
424 if graft:
429 if graft:
425 tca = _c1.ancestor(_c2)
430 tca = _c1.ancestor(_c2)
426
431
427 limit = _findlimit(repo, c1.rev(), c2.rev())
432 limit = _findlimit(repo, c1.rev(), c2.rev())
428 if limit is None:
433 if limit is None:
429 # no common ancestor, no copies
434 # no common ancestor, no copies
430 return {}, {}, {}, {}, {}
435 return {}, {}, {}, {}, {}
431 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
436 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
432
437
433 m1 = c1.manifest()
438 m1 = c1.manifest()
434 m2 = c2.manifest()
439 m2 = c2.manifest()
435 mb = base.manifest()
440 mb = base.manifest()
436
441
437 # gather data from _checkcopies:
442 # gather data from _checkcopies:
438 # - diverge = record all diverges in this dict
443 # - diverge = record all diverges in this dict
439 # - copy = record all non-divergent copies in this dict
444 # - copy = record all non-divergent copies in this dict
440 # - fullcopy = record all copies in this dict
445 # - fullcopy = record all copies in this dict
441 # - incomplete = record non-divergent partial copies here
446 # - incomplete = record non-divergent partial copies here
442 # - incompletediverge = record divergent partial copies here
447 # - incompletediverge = record divergent partial copies here
443 diverge = {} # divergence data is shared
448 diverge = {} # divergence data is shared
444 incompletediverge = {}
449 incompletediverge = {}
445 data1 = {'copy': {},
450 data1 = {'copy': {},
446 'fullcopy': {},
451 'fullcopy': {},
447 'incomplete': {},
452 'incomplete': {},
448 'diverge': diverge,
453 'diverge': diverge,
449 'incompletediverge': incompletediverge,
454 'incompletediverge': incompletediverge,
450 }
455 }
451 data2 = {'copy': {},
456 data2 = {'copy': {},
452 'fullcopy': {},
457 'fullcopy': {},
453 'incomplete': {},
458 'incomplete': {},
454 'diverge': diverge,
459 'diverge': diverge,
455 'incompletediverge': incompletediverge,
460 'incompletediverge': incompletediverge,
456 }
461 }
457
462
458 # find interesting file sets from manifests
463 # find interesting file sets from manifests
459 addedinm1 = m1.filesnotin(mb)
464 addedinm1 = m1.filesnotin(mb)
460 addedinm2 = m2.filesnotin(mb)
465 addedinm2 = m2.filesnotin(mb)
461 bothnew = sorted(addedinm1 & addedinm2)
466 bothnew = sorted(addedinm1 & addedinm2)
462 if tca == base:
467 if tca == base:
463 # unmatched file from base
468 # unmatched file from base
464 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
469 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
465 u1u, u2u = u1r, u2r
470 u1u, u2u = u1r, u2r
466 else:
471 else:
467 # unmatched file from base (DAG rotation in the graft case)
472 # unmatched file from base (DAG rotation in the graft case)
468 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
473 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
469 baselabel='base')
474 baselabel='base')
470 # unmatched file from topological common ancestors (no DAG rotation)
475 # unmatched file from topological common ancestors (no DAG rotation)
471 # need to recompute this for directory move handling when grafting
476 # need to recompute this for directory move handling when grafting
472 mta = tca.manifest()
477 mta = tca.manifest()
473 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
478 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
474 m2.filesnotin(mta),
479 m2.filesnotin(mta),
475 baselabel='topological common ancestor')
480 baselabel='topological common ancestor')
476
481
477 for f in u1u:
482 for f in u1u:
478 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
483 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, data1)
479
484
480 for f in u2u:
485 for f in u2u:
481 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
486 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, data2)
482
487
483 copy = dict(data1['copy'])
488 copy = dict(data1['copy'])
484 copy.update(data2['copy'])
489 copy.update(data2['copy'])
485 fullcopy = dict(data1['fullcopy'])
490 fullcopy = dict(data1['fullcopy'])
486 fullcopy.update(data2['fullcopy'])
491 fullcopy.update(data2['fullcopy'])
487
492
488 if dirtyc1:
493 if dirtyc1:
489 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
494 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
490 incompletediverge)
495 incompletediverge)
491 else:
496 else:
492 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
497 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
493 incompletediverge)
498 incompletediverge)
494
499
495 renamedelete = {}
500 renamedelete = {}
496 renamedeleteset = set()
501 renamedeleteset = set()
497 divergeset = set()
502 divergeset = set()
498 for of, fl in list(diverge.items()):
503 for of, fl in list(diverge.items()):
499 if len(fl) == 1 or of in c1 or of in c2:
504 if len(fl) == 1 or of in c1 or of in c2:
500 del diverge[of] # not actually divergent, or not a rename
505 del diverge[of] # not actually divergent, or not a rename
501 if of not in c1 and of not in c2:
506 if of not in c1 and of not in c2:
502 # renamed on one side, deleted on the other side, but filter
507 # renamed on one side, deleted on the other side, but filter
503 # out files that have been renamed and then deleted
508 # out files that have been renamed and then deleted
504 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
509 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
505 renamedeleteset.update(fl) # reverse map for below
510 renamedeleteset.update(fl) # reverse map for below
506 else:
511 else:
507 divergeset.update(fl) # reverse map for below
512 divergeset.update(fl) # reverse map for below
508
513
509 if bothnew:
514 if bothnew:
510 repo.ui.debug(" unmatched files new in both:\n %s\n"
515 repo.ui.debug(" unmatched files new in both:\n %s\n"
511 % "\n ".join(bothnew))
516 % "\n ".join(bothnew))
512 bothdiverge = {}
517 bothdiverge = {}
513 bothincompletediverge = {}
518 bothincompletediverge = {}
514 remainder = {}
519 remainder = {}
515 both1 = {'copy': {},
520 both1 = {'copy': {},
516 'fullcopy': {},
521 'fullcopy': {},
517 'incomplete': {},
522 'incomplete': {},
518 'diverge': bothdiverge,
523 'diverge': bothdiverge,
519 'incompletediverge': bothincompletediverge
524 'incompletediverge': bothincompletediverge
520 }
525 }
521 both2 = {'copy': {},
526 both2 = {'copy': {},
522 'fullcopy': {},
527 'fullcopy': {},
523 'incomplete': {},
528 'incomplete': {},
524 'diverge': bothdiverge,
529 'diverge': bothdiverge,
525 'incompletediverge': bothincompletediverge
530 'incompletediverge': bothincompletediverge
526 }
531 }
527 for f in bothnew:
532 for f in bothnew:
528 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
533 _checkcopies(c1, c2, f, base, tca, dirtyc1, limit, both1)
529 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
534 _checkcopies(c2, c1, f, base, tca, dirtyc2, limit, both2)
530 if dirtyc1:
535 if dirtyc1:
531 # incomplete copies may only be found on the "dirty" side for bothnew
536 # incomplete copies may only be found on the "dirty" side for bothnew
532 assert not both2['incomplete']
537 assert not both2['incomplete']
533 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
538 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
534 bothincompletediverge)
539 bothincompletediverge)
535 elif dirtyc2:
540 elif dirtyc2:
536 assert not both1['incomplete']
541 assert not both1['incomplete']
537 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
542 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
538 bothincompletediverge)
543 bothincompletediverge)
539 else:
544 else:
540 # incomplete copies and divergences can't happen outside grafts
545 # incomplete copies and divergences can't happen outside grafts
541 assert not both1['incomplete']
546 assert not both1['incomplete']
542 assert not both2['incomplete']
547 assert not both2['incomplete']
543 assert not bothincompletediverge
548 assert not bothincompletediverge
544 for f in remainder:
549 for f in remainder:
545 assert f not in bothdiverge
550 assert f not in bothdiverge
546 ic = remainder[f]
551 ic = remainder[f]
547 if ic[0] in (m1 if dirtyc1 else m2):
552 if ic[0] in (m1 if dirtyc1 else m2):
548 # backed-out rename on one side, but watch out for deleted files
553 # backed-out rename on one side, but watch out for deleted files
549 bothdiverge[f] = ic
554 bothdiverge[f] = ic
550 for of, fl in bothdiverge.items():
555 for of, fl in bothdiverge.items():
551 if len(fl) == 2 and fl[0] == fl[1]:
556 if len(fl) == 2 and fl[0] == fl[1]:
552 copy[fl[0]] = of # not actually divergent, just matching renames
557 copy[fl[0]] = of # not actually divergent, just matching renames
553
558
554 if fullcopy and repo.ui.debugflag:
559 if fullcopy and repo.ui.debugflag:
555 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
560 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
556 "% = renamed and deleted):\n")
561 "% = renamed and deleted):\n")
557 for f in sorted(fullcopy):
562 for f in sorted(fullcopy):
558 note = ""
563 note = ""
559 if f in copy:
564 if f in copy:
560 note += "*"
565 note += "*"
561 if f in divergeset:
566 if f in divergeset:
562 note += "!"
567 note += "!"
563 if f in renamedeleteset:
568 if f in renamedeleteset:
564 note += "%"
569 note += "%"
565 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
570 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
566 note))
571 note))
567 del divergeset
572 del divergeset
568
573
569 if not fullcopy:
574 if not fullcopy:
570 return copy, {}, diverge, renamedelete, {}
575 return copy, {}, diverge, renamedelete, {}
571
576
572 repo.ui.debug(" checking for directory renames\n")
577 repo.ui.debug(" checking for directory renames\n")
573
578
574 # generate a directory move map
579 # generate a directory move map
575 d1, d2 = c1.dirs(), c2.dirs()
580 d1, d2 = c1.dirs(), c2.dirs()
576 # Hack for adding '', which is not otherwise added, to d1 and d2
581 # Hack for adding '', which is not otherwise added, to d1 and d2
577 d1.addpath('/')
582 d1.addpath('/')
578 d2.addpath('/')
583 d2.addpath('/')
579 invalid = set()
584 invalid = set()
580 dirmove = {}
585 dirmove = {}
581
586
582 # examine each file copy for a potential directory move, which is
587 # examine each file copy for a potential directory move, which is
583 # when all the files in a directory are moved to a new directory
588 # when all the files in a directory are moved to a new directory
584 for dst, src in fullcopy.iteritems():
589 for dst, src in fullcopy.iteritems():
585 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
590 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
586 if dsrc in invalid:
591 if dsrc in invalid:
587 # already seen to be uninteresting
592 # already seen to be uninteresting
588 continue
593 continue
589 elif dsrc in d1 and ddst in d1:
594 elif dsrc in d1 and ddst in d1:
590 # directory wasn't entirely moved locally
595 # directory wasn't entirely moved locally
591 invalid.add(dsrc + "/")
596 invalid.add(dsrc + "/")
592 elif dsrc in d2 and ddst in d2:
597 elif dsrc in d2 and ddst in d2:
593 # directory wasn't entirely moved remotely
598 # directory wasn't entirely moved remotely
594 invalid.add(dsrc + "/")
599 invalid.add(dsrc + "/")
595 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
600 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
596 # files from the same directory moved to two different places
601 # files from the same directory moved to two different places
597 invalid.add(dsrc + "/")
602 invalid.add(dsrc + "/")
598 else:
603 else:
599 # looks good so far
604 # looks good so far
600 dirmove[dsrc + "/"] = ddst + "/"
605 dirmove[dsrc + "/"] = ddst + "/"
601
606
602 for i in invalid:
607 for i in invalid:
603 if i in dirmove:
608 if i in dirmove:
604 del dirmove[i]
609 del dirmove[i]
605 del d1, d2, invalid
610 del d1, d2, invalid
606
611
607 if not dirmove:
612 if not dirmove:
608 return copy, {}, diverge, renamedelete, {}
613 return copy, {}, diverge, renamedelete, {}
609
614
610 for d in dirmove:
615 for d in dirmove:
611 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
616 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
612 (d, dirmove[d]))
617 (d, dirmove[d]))
613
618
614 movewithdir = {}
619 movewithdir = {}
615 # check unaccounted nonoverlapping files against directory moves
620 # check unaccounted nonoverlapping files against directory moves
616 for f in u1r + u2r:
621 for f in u1r + u2r:
617 if f not in fullcopy:
622 if f not in fullcopy:
618 for d in dirmove:
623 for d in dirmove:
619 if f.startswith(d):
624 if f.startswith(d):
620 # new file added in a directory that was moved, move it
625 # new file added in a directory that was moved, move it
621 df = dirmove[d] + f[len(d):]
626 df = dirmove[d] + f[len(d):]
622 if df not in copy:
627 if df not in copy:
623 movewithdir[f] = df
628 movewithdir[f] = df
624 repo.ui.debug((" pending file src: '%s' -> "
629 repo.ui.debug((" pending file src: '%s' -> "
625 "dst: '%s'\n") % (f, df))
630 "dst: '%s'\n") % (f, df))
626 break
631 break
627
632
628 return copy, movewithdir, diverge, renamedelete, dirmove
633 return copy, movewithdir, diverge, renamedelete, dirmove
629
634
630 def _heuristicscopytracing(repo, c1, c2, base):
635 def _heuristicscopytracing(repo, c1, c2, base):
631 """ Fast copytracing using filename heuristics
636 """ Fast copytracing using filename heuristics
632
637
633 Assumes that moves or renames are of following two types:
638 Assumes that moves or renames are of following two types:
634
639
635 1) Inside a directory only (same directory name but different filenames)
640 1) Inside a directory only (same directory name but different filenames)
636 2) Move from one directory to another
641 2) Move from one directory to another
637 (same filenames but different directory names)
642 (same filenames but different directory names)
638
643
639 Works only when there are no merge commits in the "source branch".
644 Works only when there are no merge commits in the "source branch".
640 Source branch is commits from base up to c2 not including base.
645 Source branch is commits from base up to c2 not including base.
641
646
642 If merge is involved it fallbacks to _fullcopytracing().
647 If merge is involved it fallbacks to _fullcopytracing().
643
648
644 Can be used by setting the following config:
649 Can be used by setting the following config:
645
650
646 [experimental]
651 [experimental]
647 copytrace = heuristics
652 copytrace = heuristics
648
653
649 In some cases the copy/move candidates found by heuristics can be very large
654 In some cases the copy/move candidates found by heuristics can be very large
650 in number and that will make the algorithm slow. The number of possible
655 in number and that will make the algorithm slow. The number of possible
651 candidates to check can be limited by using the config
656 candidates to check can be limited by using the config
652 `experimental.copytrace.movecandidateslimit` which defaults to 100.
657 `experimental.copytrace.movecandidateslimit` which defaults to 100.
653 """
658 """
654
659
655 if c1.rev() is None:
660 if c1.rev() is None:
656 c1 = c1.p1()
661 c1 = c1.p1()
657 if c2.rev() is None:
662 if c2.rev() is None:
658 c2 = c2.p1()
663 c2 = c2.p1()
659
664
660 copies = {}
665 copies = {}
661
666
662 changedfiles = set()
667 changedfiles = set()
663 m1 = c1.manifest()
668 m1 = c1.manifest()
664 if not repo.revs('%d::%d', base.rev(), c2.rev()):
669 if not repo.revs('%d::%d', base.rev(), c2.rev()):
665 # If base is not in c2 branch, we switch to fullcopytracing
670 # If base is not in c2 branch, we switch to fullcopytracing
666 repo.ui.debug("switching to full copytracing as base is not "
671 repo.ui.debug("switching to full copytracing as base is not "
667 "an ancestor of c2\n")
672 "an ancestor of c2\n")
668 return _fullcopytracing(repo, c1, c2, base)
673 return _fullcopytracing(repo, c1, c2, base)
669
674
670 ctx = c2
675 ctx = c2
671 while ctx != base:
676 while ctx != base:
672 if len(ctx.parents()) == 2:
677 if len(ctx.parents()) == 2:
673 # To keep things simple let's not handle merges
678 # To keep things simple let's not handle merges
674 repo.ui.debug("switching to full copytracing because of merges\n")
679 repo.ui.debug("switching to full copytracing because of merges\n")
675 return _fullcopytracing(repo, c1, c2, base)
680 return _fullcopytracing(repo, c1, c2, base)
676 changedfiles.update(ctx.files())
681 changedfiles.update(ctx.files())
677 ctx = ctx.p1()
682 ctx = ctx.p1()
678
683
679 cp = _forwardcopies(base, c2)
684 cp = _forwardcopies(base, c2)
680 for dst, src in cp.iteritems():
685 for dst, src in cp.iteritems():
681 if src in m1:
686 if src in m1:
682 copies[dst] = src
687 copies[dst] = src
683
688
684 # file is missing if it isn't present in the destination, but is present in
689 # file is missing if it isn't present in the destination, but is present in
685 # the base and present in the source.
690 # the base and present in the source.
686 # Presence in the base is important to exclude added files, presence in the
691 # Presence in the base is important to exclude added files, presence in the
687 # source is important to exclude removed files.
692 # source is important to exclude removed files.
688 filt = lambda f: f not in m1 and f in base and f in c2
693 filt = lambda f: f not in m1 and f in base and f in c2
689 missingfiles = [f for f in changedfiles if filt(f)]
694 missingfiles = [f for f in changedfiles if filt(f)]
690
695
691 if missingfiles:
696 if missingfiles:
692 basenametofilename = collections.defaultdict(list)
697 basenametofilename = collections.defaultdict(list)
693 dirnametofilename = collections.defaultdict(list)
698 dirnametofilename = collections.defaultdict(list)
694
699
695 for f in m1.filesnotin(base.manifest()):
700 for f in m1.filesnotin(base.manifest()):
696 basename = os.path.basename(f)
701 basename = os.path.basename(f)
697 dirname = os.path.dirname(f)
702 dirname = os.path.dirname(f)
698 basenametofilename[basename].append(f)
703 basenametofilename[basename].append(f)
699 dirnametofilename[dirname].append(f)
704 dirnametofilename[dirname].append(f)
700
705
701 for f in missingfiles:
706 for f in missingfiles:
702 basename = os.path.basename(f)
707 basename = os.path.basename(f)
703 dirname = os.path.dirname(f)
708 dirname = os.path.dirname(f)
704 samebasename = basenametofilename[basename]
709 samebasename = basenametofilename[basename]
705 samedirname = dirnametofilename[dirname]
710 samedirname = dirnametofilename[dirname]
706 movecandidates = samebasename + samedirname
711 movecandidates = samebasename + samedirname
707 # f is guaranteed to be present in c2, that's why
712 # f is guaranteed to be present in c2, that's why
708 # c2.filectx(f) won't fail
713 # c2.filectx(f) won't fail
709 f2 = c2.filectx(f)
714 f2 = c2.filectx(f)
710 # we can have a lot of candidates which can slow down the heuristics
715 # we can have a lot of candidates which can slow down the heuristics
711 # config value to limit the number of candidates moves to check
716 # config value to limit the number of candidates moves to check
712 maxcandidates = repo.ui.configint('experimental',
717 maxcandidates = repo.ui.configint('experimental',
713 'copytrace.movecandidateslimit')
718 'copytrace.movecandidateslimit')
714
719
715 if len(movecandidates) > maxcandidates:
720 if len(movecandidates) > maxcandidates:
716 repo.ui.status(_("skipping copytracing for '%s', more "
721 repo.ui.status(_("skipping copytracing for '%s', more "
717 "candidates than the limit: %d\n")
722 "candidates than the limit: %d\n")
718 % (f, len(movecandidates)))
723 % (f, len(movecandidates)))
719 continue
724 continue
720
725
721 for candidate in movecandidates:
726 for candidate in movecandidates:
722 f1 = c1.filectx(candidate)
727 f1 = c1.filectx(candidate)
723 if _related(f1, f2):
728 if _related(f1, f2):
724 # if there are a few related copies then we'll merge
729 # if there are a few related copies then we'll merge
725 # changes into all of them. This matches the behaviour
730 # changes into all of them. This matches the behaviour
726 # of upstream copytracing
731 # of upstream copytracing
727 copies[candidate] = f
732 copies[candidate] = f
728
733
729 return copies, {}, {}, {}, {}
734 return copies, {}, {}, {}, {}
730
735
731 def _related(f1, f2):
736 def _related(f1, f2):
732 """return True if f1 and f2 filectx have a common ancestor
737 """return True if f1 and f2 filectx have a common ancestor
733
738
734 Walk back to common ancestor to see if the two files originate
739 Walk back to common ancestor to see if the two files originate
735 from the same file. Since workingfilectx's rev() is None it messes
740 from the same file. Since workingfilectx's rev() is None it messes
736 up the integer comparison logic, hence the pre-step check for
741 up the integer comparison logic, hence the pre-step check for
737 None (f1 and f2 can only be workingfilectx's initially).
742 None (f1 and f2 can only be workingfilectx's initially).
738 """
743 """
739
744
740 if f1 == f2:
745 if f1 == f2:
741 return f1 # a match
746 return f1 # a match
742
747
743 g1, g2 = f1.ancestors(), f2.ancestors()
748 g1, g2 = f1.ancestors(), f2.ancestors()
744 try:
749 try:
745 f1r, f2r = f1.linkrev(), f2.linkrev()
750 f1r, f2r = f1.linkrev(), f2.linkrev()
746
751
747 if f1r is None:
752 if f1r is None:
748 f1 = next(g1)
753 f1 = next(g1)
749 if f2r is None:
754 if f2r is None:
750 f2 = next(g2)
755 f2 = next(g2)
751
756
752 while True:
757 while True:
753 f1r, f2r = f1.linkrev(), f2.linkrev()
758 f1r, f2r = f1.linkrev(), f2.linkrev()
754 if f1r > f2r:
759 if f1r > f2r:
755 f1 = next(g1)
760 f1 = next(g1)
756 elif f2r > f1r:
761 elif f2r > f1r:
757 f2 = next(g2)
762 f2 = next(g2)
758 else: # f1 and f2 point to files in the same linkrev
763 else: # f1 and f2 point to files in the same linkrev
759 return f1 == f2 # true if they point to the same file
764 return f1 == f2 # true if they point to the same file
760 except StopIteration:
765 except StopIteration:
761 return False
766 return False
762
767
763 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
768 def _checkcopies(srcctx, dstctx, f, base, tca, remotebase, limit, data):
764 """
769 """
765 check possible copies of f from msrc to mdst
770 check possible copies of f from msrc to mdst
766
771
767 srcctx = starting context for f in msrc
772 srcctx = starting context for f in msrc
768 dstctx = destination context for f in mdst
773 dstctx = destination context for f in mdst
769 f = the filename to check (as in msrc)
774 f = the filename to check (as in msrc)
770 base = the changectx used as a merge base
775 base = the changectx used as a merge base
771 tca = topological common ancestor for graft-like scenarios
776 tca = topological common ancestor for graft-like scenarios
772 remotebase = True if base is outside tca::srcctx, False otherwise
777 remotebase = True if base is outside tca::srcctx, False otherwise
773 limit = the rev number to not search beyond
778 limit = the rev number to not search beyond
774 data = dictionary of dictionary to store copy data. (see mergecopies)
779 data = dictionary of dictionary to store copy data. (see mergecopies)
775
780
776 note: limit is only an optimization, and provides no guarantee that
781 note: limit is only an optimization, and provides no guarantee that
777 irrelevant revisions will not be visited
782 irrelevant revisions will not be visited
778 there is no easy way to make this algorithm stop in a guaranteed way
783 there is no easy way to make this algorithm stop in a guaranteed way
779 once it "goes behind a certain revision".
784 once it "goes behind a certain revision".
780 """
785 """
781
786
782 msrc = srcctx.manifest()
787 msrc = srcctx.manifest()
783 mdst = dstctx.manifest()
788 mdst = dstctx.manifest()
784 mb = base.manifest()
789 mb = base.manifest()
785 mta = tca.manifest()
790 mta = tca.manifest()
786 # Might be true if this call is about finding backward renames,
791 # Might be true if this call is about finding backward renames,
787 # This happens in the case of grafts because the DAG is then rotated.
792 # This happens in the case of grafts because the DAG is then rotated.
788 # If the file exists in both the base and the source, we are not looking
793 # If the file exists in both the base and the source, we are not looking
789 # for a rename on the source side, but on the part of the DAG that is
794 # for a rename on the source side, but on the part of the DAG that is
790 # traversed backwards.
795 # traversed backwards.
791 #
796 #
792 # In the case there is both backward and forward renames (before and after
797 # In the case there is both backward and forward renames (before and after
793 # the base) this is more complicated as we must detect a divergence.
798 # the base) this is more complicated as we must detect a divergence.
794 # We use 'backwards = False' in that case.
799 # We use 'backwards = False' in that case.
795 backwards = not remotebase and base != tca and f in mb
800 backwards = not remotebase and base != tca and f in mb
796 getsrcfctx = _makegetfctx(srcctx)
801 getsrcfctx = _makegetfctx(srcctx)
797 getdstfctx = _makegetfctx(dstctx)
802 getdstfctx = _makegetfctx(dstctx)
798
803
799 if msrc[f] == mb.get(f) and not remotebase:
804 if msrc[f] == mb.get(f) and not remotebase:
800 # Nothing to merge
805 # Nothing to merge
801 return
806 return
802
807
803 of = None
808 of = None
804 seen = {f}
809 seen = {f}
805 for oc in getsrcfctx(f, msrc[f]).ancestors():
810 for oc in getsrcfctx(f, msrc[f]).ancestors():
806 ocr = oc.linkrev()
811 ocr = oc.linkrev()
807 of = oc.path()
812 of = oc.path()
808 if of in seen:
813 if of in seen:
809 # check limit late - grab last rename before
814 # check limit late - grab last rename before
810 if ocr < limit:
815 if ocr < limit:
811 break
816 break
812 continue
817 continue
813 seen.add(of)
818 seen.add(of)
814
819
815 # remember for dir rename detection
820 # remember for dir rename detection
816 if backwards:
821 if backwards:
817 data['fullcopy'][of] = f # grafting backwards through renames
822 data['fullcopy'][of] = f # grafting backwards through renames
818 else:
823 else:
819 data['fullcopy'][f] = of
824 data['fullcopy'][f] = of
820 if of not in mdst:
825 if of not in mdst:
821 continue # no match, keep looking
826 continue # no match, keep looking
822 if mdst[of] == mb.get(of):
827 if mdst[of] == mb.get(of):
823 return # no merge needed, quit early
828 return # no merge needed, quit early
824 c2 = getdstfctx(of, mdst[of])
829 c2 = getdstfctx(of, mdst[of])
825 # c2 might be a plain new file on added on destination side that is
830 # c2 might be a plain new file on added on destination side that is
826 # unrelated to the droids we are looking for.
831 # unrelated to the droids we are looking for.
827 cr = _related(oc, c2)
832 cr = _related(oc, c2)
828 if cr and (of == f or of == c2.path()): # non-divergent
833 if cr and (of == f or of == c2.path()): # non-divergent
829 if backwards:
834 if backwards:
830 data['copy'][of] = f
835 data['copy'][of] = f
831 elif of in mb:
836 elif of in mb:
832 data['copy'][f] = of
837 data['copy'][f] = of
833 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
838 elif remotebase: # special case: a <- b <- a -> b "ping-pong" rename
834 data['copy'][of] = f
839 data['copy'][of] = f
835 del data['fullcopy'][f]
840 del data['fullcopy'][f]
836 data['fullcopy'][of] = f
841 data['fullcopy'][of] = f
837 else: # divergence w.r.t. graft CA on one side of topological CA
842 else: # divergence w.r.t. graft CA on one side of topological CA
838 for sf in seen:
843 for sf in seen:
839 if sf in mb:
844 if sf in mb:
840 assert sf not in data['diverge']
845 assert sf not in data['diverge']
841 data['diverge'][sf] = [f, of]
846 data['diverge'][sf] = [f, of]
842 break
847 break
843 return
848 return
844
849
845 if of in mta:
850 if of in mta:
846 if backwards or remotebase:
851 if backwards or remotebase:
847 data['incomplete'][of] = f
852 data['incomplete'][of] = f
848 else:
853 else:
849 for sf in seen:
854 for sf in seen:
850 if sf in mb:
855 if sf in mb:
851 if tca == base:
856 if tca == base:
852 data['diverge'].setdefault(sf, []).append(f)
857 data['diverge'].setdefault(sf, []).append(f)
853 else:
858 else:
854 data['incompletediverge'][sf] = [of, f]
859 data['incompletediverge'][sf] = [of, f]
855 return
860 return
856
861
857 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
862 def duplicatecopies(repo, wctx, rev, fromrev, skiprev=None):
858 """reproduce copies from fromrev to rev in the dirstate
863 """reproduce copies from fromrev to rev in the dirstate
859
864
860 If skiprev is specified, it's a revision that should be used to
865 If skiprev is specified, it's a revision that should be used to
861 filter copy records. Any copies that occur between fromrev and
866 filter copy records. Any copies that occur between fromrev and
862 skiprev will not be duplicated, even if they appear in the set of
867 skiprev will not be duplicated, even if they appear in the set of
863 copies between fromrev and rev.
868 copies between fromrev and rev.
864 """
869 """
865 exclude = {}
870 exclude = {}
866 if (skiprev is not None and
871 if (skiprev is not None and
867 repo.ui.config('experimental', 'copytrace') != 'off'):
872 repo.ui.config('experimental', 'copytrace') != 'off'):
868 # copytrace='off' skips this line, but not the entire function because
873 # copytrace='off' skips this line, but not the entire function because
869 # the line below is O(size of the repo) during a rebase, while the rest
874 # the line below is O(size of the repo) during a rebase, while the rest
870 # of the function is much faster (and is required for carrying copy
875 # of the function is much faster (and is required for carrying copy
871 # metadata across the rebase anyway).
876 # metadata across the rebase anyway).
872 exclude = pathcopies(repo[fromrev], repo[skiprev])
877 exclude = pathcopies(repo[fromrev], repo[skiprev])
873 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
878 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
874 # copies.pathcopies returns backward renames, so dst might not
879 # copies.pathcopies returns backward renames, so dst might not
875 # actually be in the dirstate
880 # actually be in the dirstate
876 if dst in exclude:
881 if dst in exclude:
877 continue
882 continue
878 wctx[dst].markcopied(src)
883 wctx[dst].markcopied(src)
1 NO CONTENT: file was removed
NO CONTENT: file was removed
General Comments 0
You need to be logged in to leave comments. Login now