##// END OF EJS Templates
copies: compute a suitable TCA if base turns out to be unsuitable...
Gábor Stefanik -
r30194:8c69c52c default
parent child Browse files
Show More
@@ -1,592 +1,596 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from . import (
12 from . import (
13 node,
13 node,
14 pathutil,
14 pathutil,
15 scmutil,
15 scmutil,
16 util,
16 util,
17 )
17 )
18
18
19 def _findlimit(repo, a, b):
19 def _findlimit(repo, a, b):
20 """
20 """
21 Find the last revision that needs to be checked to ensure that a full
21 Find the last revision that needs to be checked to ensure that a full
22 transitive closure for file copies can be properly calculated.
22 transitive closure for file copies can be properly calculated.
23 Generally, this means finding the earliest revision number that's an
23 Generally, this means finding the earliest revision number that's an
24 ancestor of a or b but not both, except when a or b is a direct descendent
24 ancestor of a or b but not both, except when a or b is a direct descendent
25 of the other, in which case we can return the minimum revnum of a and b.
25 of the other, in which case we can return the minimum revnum of a and b.
26 None if no such revision exists.
26 None if no such revision exists.
27 """
27 """
28
28
29 # basic idea:
29 # basic idea:
30 # - mark a and b with different sides
30 # - mark a and b with different sides
31 # - if a parent's children are all on the same side, the parent is
31 # - if a parent's children are all on the same side, the parent is
32 # on that side, otherwise it is on no side
32 # on that side, otherwise it is on no side
33 # - walk the graph in topological order with the help of a heap;
33 # - walk the graph in topological order with the help of a heap;
34 # - add unseen parents to side map
34 # - add unseen parents to side map
35 # - clear side of any parent that has children on different sides
35 # - clear side of any parent that has children on different sides
36 # - track number of interesting revs that might still be on a side
36 # - track number of interesting revs that might still be on a side
37 # - track the lowest interesting rev seen
37 # - track the lowest interesting rev seen
38 # - quit when interesting revs is zero
38 # - quit when interesting revs is zero
39
39
40 cl = repo.changelog
40 cl = repo.changelog
41 working = len(cl) # pseudo rev for the working directory
41 working = len(cl) # pseudo rev for the working directory
42 if a is None:
42 if a is None:
43 a = working
43 a = working
44 if b is None:
44 if b is None:
45 b = working
45 b = working
46
46
47 side = {a: -1, b: 1}
47 side = {a: -1, b: 1}
48 visit = [-a, -b]
48 visit = [-a, -b]
49 heapq.heapify(visit)
49 heapq.heapify(visit)
50 interesting = len(visit)
50 interesting = len(visit)
51 hascommonancestor = False
51 hascommonancestor = False
52 limit = working
52 limit = working
53
53
54 while interesting:
54 while interesting:
55 r = -heapq.heappop(visit)
55 r = -heapq.heappop(visit)
56 if r == working:
56 if r == working:
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 else:
58 else:
59 parents = cl.parentrevs(r)
59 parents = cl.parentrevs(r)
60 for p in parents:
60 for p in parents:
61 if p < 0:
61 if p < 0:
62 continue
62 continue
63 if p not in side:
63 if p not in side:
64 # first time we see p; add it to visit
64 # first time we see p; add it to visit
65 side[p] = side[r]
65 side[p] = side[r]
66 if side[p]:
66 if side[p]:
67 interesting += 1
67 interesting += 1
68 heapq.heappush(visit, -p)
68 heapq.heappush(visit, -p)
69 elif side[p] and side[p] != side[r]:
69 elif side[p] and side[p] != side[r]:
70 # p was interesting but now we know better
70 # p was interesting but now we know better
71 side[p] = 0
71 side[p] = 0
72 interesting -= 1
72 interesting -= 1
73 hascommonancestor = True
73 hascommonancestor = True
74 if side[r]:
74 if side[r]:
75 limit = r # lowest rev visited
75 limit = r # lowest rev visited
76 interesting -= 1
76 interesting -= 1
77
77
78 if not hascommonancestor:
78 if not hascommonancestor:
79 return None
79 return None
80
80
81 # Consider the following flow (see test-commit-amend.t under issue4405):
81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 # 1/ File 'a0' committed
82 # 1/ File 'a0' committed
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 # 3/ Move back to first commit
84 # 3/ Move back to first commit
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 #
87 #
88 # During the amend in step five, we will be in this state:
88 # During the amend in step five, we will be in this state:
89 #
89 #
90 # @ 3 temporary amend commit for a1-amend
90 # @ 3 temporary amend commit for a1-amend
91 # |
91 # |
92 # o 2 a1-amend
92 # o 2 a1-amend
93 # |
93 # |
94 # | o 1 a1
94 # | o 1 a1
95 # |/
95 # |/
96 # o 0 a0
96 # o 0 a0
97 #
97 #
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 # yet the filelog has the copy information in rev 1 and we will not look
99 # yet the filelog has the copy information in rev 1 and we will not look
100 # back far enough unless we also look at the a and b as candidates.
100 # back far enough unless we also look at the a and b as candidates.
101 # This only occurs when a is a descendent of b or visa-versa.
101 # This only occurs when a is a descendent of b or visa-versa.
102 return min(limit, a, b)
102 return min(limit, a, b)
103
103
104 def _chain(src, dst, a, b):
104 def _chain(src, dst, a, b):
105 '''chain two sets of copies a->b'''
105 '''chain two sets of copies a->b'''
106 t = a.copy()
106 t = a.copy()
107 for k, v in b.iteritems():
107 for k, v in b.iteritems():
108 if v in t:
108 if v in t:
109 # found a chain
109 # found a chain
110 if t[v] != k:
110 if t[v] != k:
111 # file wasn't renamed back to itself
111 # file wasn't renamed back to itself
112 t[k] = t[v]
112 t[k] = t[v]
113 if v not in dst:
113 if v not in dst:
114 # chain was a rename, not a copy
114 # chain was a rename, not a copy
115 del t[v]
115 del t[v]
116 if v in src:
116 if v in src:
117 # file is a copy of an existing file
117 # file is a copy of an existing file
118 t[k] = v
118 t[k] = v
119
119
120 # remove criss-crossed copies
120 # remove criss-crossed copies
121 for k, v in t.items():
121 for k, v in t.items():
122 if k in src and v in dst:
122 if k in src and v in dst:
123 del t[k]
123 del t[k]
124
124
125 return t
125 return t
126
126
127 def _tracefile(fctx, am, limit=-1):
127 def _tracefile(fctx, am, limit=-1):
128 '''return file context that is the ancestor of fctx present in ancestor
128 '''return file context that is the ancestor of fctx present in ancestor
129 manifest am, stopping after the first ancestor lower than limit'''
129 manifest am, stopping after the first ancestor lower than limit'''
130
130
131 for f in fctx.ancestors():
131 for f in fctx.ancestors():
132 if am.get(f.path(), None) == f.filenode():
132 if am.get(f.path(), None) == f.filenode():
133 return f
133 return f
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 return None
135 return None
136
136
137 def _dirstatecopies(d):
137 def _dirstatecopies(d):
138 ds = d._repo.dirstate
138 ds = d._repo.dirstate
139 c = ds.copies().copy()
139 c = ds.copies().copy()
140 for k in c.keys():
140 for k in c.keys():
141 if ds[k] not in 'anm':
141 if ds[k] not in 'anm':
142 del c[k]
142 del c[k]
143 return c
143 return c
144
144
145 def _computeforwardmissing(a, b, match=None):
145 def _computeforwardmissing(a, b, match=None):
146 """Computes which files are in b but not a.
146 """Computes which files are in b but not a.
147 This is its own function so extensions can easily wrap this call to see what
147 This is its own function so extensions can easily wrap this call to see what
148 files _forwardcopies is about to process.
148 files _forwardcopies is about to process.
149 """
149 """
150 ma = a.manifest()
150 ma = a.manifest()
151 mb = b.manifest()
151 mb = b.manifest()
152 if match:
152 if match:
153 ma = ma.matches(match)
153 ma = ma.matches(match)
154 mb = mb.matches(match)
154 mb = mb.matches(match)
155 return mb.filesnotin(ma)
155 return mb.filesnotin(ma)
156
156
157 def _forwardcopies(a, b, match=None):
157 def _forwardcopies(a, b, match=None):
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159
159
160 # check for working copy
160 # check for working copy
161 w = None
161 w = None
162 if b.rev() is None:
162 if b.rev() is None:
163 w = b
163 w = b
164 b = w.p1()
164 b = w.p1()
165 if a == b:
165 if a == b:
166 # short-circuit to avoid issues with merge states
166 # short-circuit to avoid issues with merge states
167 return _dirstatecopies(w)
167 return _dirstatecopies(w)
168
168
169 # files might have to be traced back to the fctx parent of the last
169 # files might have to be traced back to the fctx parent of the last
170 # one-side-only changeset, but not further back than that
170 # one-side-only changeset, but not further back than that
171 limit = _findlimit(a._repo, a.rev(), b.rev())
171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 if limit is None:
172 if limit is None:
173 limit = -1
173 limit = -1
174 am = a.manifest()
174 am = a.manifest()
175
175
176 # find where new files came from
176 # find where new files came from
177 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
178 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 cm = {}
179 cm = {}
180
180
181 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
182 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
183 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
184 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
185 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
186 # this comparison.
186 # this comparison.
187 forwardmissingmatch = match
187 forwardmissingmatch = match
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191
191
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 for f in missing:
193 for f in missing:
194 fctx = b[f]
194 fctx = b[f]
195 fctx._ancestrycontext = ancestrycontext
195 fctx._ancestrycontext = ancestrycontext
196 ofctx = _tracefile(fctx, am, limit)
196 ofctx = _tracefile(fctx, am, limit)
197 if ofctx:
197 if ofctx:
198 cm[f] = ofctx.path()
198 cm[f] = ofctx.path()
199
199
200 # combine copies from dirstate if necessary
200 # combine copies from dirstate if necessary
201 if w is not None:
201 if w is not None:
202 cm = _chain(a, w, cm, _dirstatecopies(w))
202 cm = _chain(a, w, cm, _dirstatecopies(w))
203
203
204 return cm
204 return cm
205
205
206 def _backwardrenames(a, b):
206 def _backwardrenames(a, b):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 return {}
208 return {}
209
209
210 # Even though we're not taking copies into account, 1:n rename situations
210 # Even though we're not taking copies into account, 1:n rename situations
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 # arbitrarily pick one of the renames.
212 # arbitrarily pick one of the renames.
213 f = _forwardcopies(b, a)
213 f = _forwardcopies(b, a)
214 r = {}
214 r = {}
215 for k, v in sorted(f.iteritems()):
215 for k, v in sorted(f.iteritems()):
216 # remove copies
216 # remove copies
217 if v in a:
217 if v in a:
218 continue
218 continue
219 r[v] = k
219 r[v] = k
220 return r
220 return r
221
221
222 def pathcopies(x, y, match=None):
222 def pathcopies(x, y, match=None):
223 '''find {dst@y: src@x} copy mapping for directed compare'''
223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 if x == y or not x or not y:
224 if x == y or not x or not y:
225 return {}
225 return {}
226 a = y.ancestor(x)
226 a = y.ancestor(x)
227 if a == x:
227 if a == x:
228 return _forwardcopies(x, y, match=match)
228 return _forwardcopies(x, y, match=match)
229 if a == y:
229 if a == y:
230 return _backwardrenames(x, y)
230 return _backwardrenames(x, y)
231 return _chain(x, y, _backwardrenames(x, a),
231 return _chain(x, y, _backwardrenames(x, a),
232 _forwardcopies(a, y, match=match))
232 _forwardcopies(a, y, match=match))
233
233
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 and c2. This is its own function so extensions can easily wrap this call
236 and c2. This is its own function so extensions can easily wrap this call
237 to see what files mergecopies is about to process.
237 to see what files mergecopies is about to process.
238
238
239 Even though c1 and c2 are not used in this function, they are useful in
239 Even though c1 and c2 are not used in this function, they are useful in
240 other extensions for being able to read the file nodes of the changed files.
240 other extensions for being able to read the file nodes of the changed files.
241 """
241 """
242 u1 = sorted(addedinm1 - addedinm2)
242 u1 = sorted(addedinm1 - addedinm2)
243 u2 = sorted(addedinm2 - addedinm1)
243 u2 = sorted(addedinm2 - addedinm1)
244
244
245 if u1:
245 if u1:
246 repo.ui.debug(" unmatched files in local:\n %s\n"
246 repo.ui.debug(" unmatched files in local:\n %s\n"
247 % "\n ".join(u1))
247 % "\n ".join(u1))
248 if u2:
248 if u2:
249 repo.ui.debug(" unmatched files in other:\n %s\n"
249 repo.ui.debug(" unmatched files in other:\n %s\n"
250 % "\n ".join(u2))
250 % "\n ".join(u2))
251 return u1, u2
251 return u1, u2
252
252
253 def _makegetfctx(ctx):
253 def _makegetfctx(ctx):
254 """return a 'getfctx' function suitable for _checkcopies usage
254 """return a 'getfctx' function suitable for _checkcopies usage
255
255
256 We have to re-setup the function building 'filectx' for each
256 We have to re-setup the function building 'filectx' for each
257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
258 each. Linkrev adjustment is important to avoid bug in rename
258 each. Linkrev adjustment is important to avoid bug in rename
259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
260 the performance impact of this adjustment is kept limited. Without it,
260 the performance impact of this adjustment is kept limited. Without it,
261 each file could do a full dag traversal making the time complexity of
261 each file could do a full dag traversal making the time complexity of
262 the operation explode (see issue4537).
262 the operation explode (see issue4537).
263
263
264 This function exists here mostly to limit the impact on stable. Feel
264 This function exists here mostly to limit the impact on stable. Feel
265 free to refactor on default.
265 free to refactor on default.
266 """
266 """
267 rev = ctx.rev()
267 rev = ctx.rev()
268 repo = ctx._repo
268 repo = ctx._repo
269 ac = getattr(ctx, '_ancestrycontext', None)
269 ac = getattr(ctx, '_ancestrycontext', None)
270 if ac is None:
270 if ac is None:
271 revs = [rev]
271 revs = [rev]
272 if rev is None:
272 if rev is None:
273 revs = [p.rev() for p in ctx.parents()]
273 revs = [p.rev() for p in ctx.parents()]
274 ac = repo.changelog.ancestors(revs, inclusive=True)
274 ac = repo.changelog.ancestors(revs, inclusive=True)
275 ctx._ancestrycontext = ac
275 ctx._ancestrycontext = ac
276 def makectx(f, n):
276 def makectx(f, n):
277 if len(n) != 20: # in a working context?
277 if len(n) != 20: # in a working context?
278 if ctx.rev() is None:
278 if ctx.rev() is None:
279 return ctx.filectx(f)
279 return ctx.filectx(f)
280 return repo[None][f]
280 return repo[None][f]
281 fctx = repo.filectx(f, fileid=n)
281 fctx = repo.filectx(f, fileid=n)
282 # setup only needed for filectx not create from a changectx
282 # setup only needed for filectx not create from a changectx
283 fctx._ancestrycontext = ac
283 fctx._ancestrycontext = ac
284 fctx._descendantrev = rev
284 fctx._descendantrev = rev
285 return fctx
285 return fctx
286 return util.lrucachefunc(makectx)
286 return util.lrucachefunc(makectx)
287
287
288 def mergecopies(repo, c1, c2, base):
288 def mergecopies(repo, c1, c2, base):
289 """
289 """
290 Find moves and copies between context c1 and c2 that are relevant
290 Find moves and copies between context c1 and c2 that are relevant
291 for merging. 'base' will be used as the merge base.
291 for merging. 'base' will be used as the merge base.
292
292
293 Returns four dicts: "copy", "movewithdir", "diverge", and
293 Returns four dicts: "copy", "movewithdir", "diverge", and
294 "renamedelete".
294 "renamedelete".
295
295
296 "copy" is a mapping from destination name -> source name,
296 "copy" is a mapping from destination name -> source name,
297 where source is in c1 and destination is in c2 or vice-versa.
297 where source is in c1 and destination is in c2 or vice-versa.
298
298
299 "movewithdir" is a mapping from source name -> destination name,
299 "movewithdir" is a mapping from source name -> destination name,
300 where the file at source present in one context but not the other
300 where the file at source present in one context but not the other
301 needs to be moved to destination by the merge process, because the
301 needs to be moved to destination by the merge process, because the
302 other context moved the directory it is in.
302 other context moved the directory it is in.
303
303
304 "diverge" is a mapping of source name -> list of destination names
304 "diverge" is a mapping of source name -> list of destination names
305 for divergent renames.
305 for divergent renames.
306
306
307 "renamedelete" is a mapping of source name -> list of destination
307 "renamedelete" is a mapping of source name -> list of destination
308 names for files deleted in c1 that were renamed in c2 or vice-versa.
308 names for files deleted in c1 that were renamed in c2 or vice-versa.
309 """
309 """
310 # avoid silly behavior for update from empty dir
310 # avoid silly behavior for update from empty dir
311 if not c1 or not c2 or c1 == c2:
311 if not c1 or not c2 or c1 == c2:
312 return {}, {}, {}, {}
312 return {}, {}, {}, {}
313
313
314 # avoid silly behavior for parent -> working dir
314 # avoid silly behavior for parent -> working dir
315 if c2.node() is None and c1.node() == repo.dirstate.p1():
315 if c2.node() is None and c1.node() == repo.dirstate.p1():
316 return repo.dirstate.copies(), {}, {}, {}
316 return repo.dirstate.copies(), {}, {}, {}
317
317
318 # Copy trace disabling is explicitly below the node == p1 logic above
318 # Copy trace disabling is explicitly below the node == p1 logic above
319 # because the logic above is required for a simple copy to be kept across a
319 # because the logic above is required for a simple copy to be kept across a
320 # rebase.
320 # rebase.
321 if repo.ui.configbool('experimental', 'disablecopytrace'):
321 if repo.ui.configbool('experimental', 'disablecopytrace'):
322 return {}, {}, {}, {}
322 return {}, {}, {}, {}
323
323
324 # In certain scenarios (e.g. graft, update or rebase), base can be
324 # In certain scenarios (e.g. graft, update or rebase), base can be
325 # overridden We still need to know a real common ancestor in this case We
325 # overridden We still need to know a real common ancestor in this case We
326 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
326 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
327 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
327 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
328 # caller may not know if the revision passed in lieu of the CA is a genuine
328 # caller may not know if the revision passed in lieu of the CA is a genuine
329 # common ancestor or not without explicitly checking it, it's better to
329 # common ancestor or not without explicitly checking it, it's better to
330 # determine that here.
330 # determine that here.
331 #
331 #
332 # base.descendant(wc) and base.descendant(base) are False, work around that
332 # base.descendant(wc) and base.descendant(base) are False, work around that
333 _c1 = c1.p1() if c1.rev() is None else c1
333 _c1 = c1.p1() if c1.rev() is None else c1
334 _c2 = c2.p1() if c2.rev() is None else c2
334 _c2 = c2.p1() if c2.rev() is None else c2
335 # an endpoint is "dirty" if it isn't a descendant of the merge base
335 # an endpoint is "dirty" if it isn't a descendant of the merge base
336 # if we have a dirty endpoint, we need to trigger graft logic, and also
336 # if we have a dirty endpoint, we need to trigger graft logic, and also
337 # keep track of which endpoint is dirty
337 # keep track of which endpoint is dirty
338 dirtyc1 = not (base == _c1 or base.descendant(_c1))
338 dirtyc1 = not (base == _c1 or base.descendant(_c1))
339 dirtyc2 = not (base== _c2 or base.descendant(_c2))
339 dirtyc2 = not (base== _c2 or base.descendant(_c2))
340 graft = dirtyc1 or dirtyc2
340 graft = dirtyc1 or dirtyc2
341 tca = base
342 if graft:
343 tca = _c1.ancestor(_c2)
344
341 limit = _findlimit(repo, c1.rev(), c2.rev())
345 limit = _findlimit(repo, c1.rev(), c2.rev())
342 if limit is None:
346 if limit is None:
343 # no common ancestor, no copies
347 # no common ancestor, no copies
344 return {}, {}, {}, {}
348 return {}, {}, {}, {}
345 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
349 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
346
350
347 m1 = c1.manifest()
351 m1 = c1.manifest()
348 m2 = c2.manifest()
352 m2 = c2.manifest()
349 mb = base.manifest()
353 mb = base.manifest()
350
354
351 # gather data from _checkcopies:
355 # gather data from _checkcopies:
352 # - diverge = record all diverges in this dict
356 # - diverge = record all diverges in this dict
353 # - copy = record all non-divergent copies in this dict
357 # - copy = record all non-divergent copies in this dict
354 # - fullcopy = record all copies in this dict
358 # - fullcopy = record all copies in this dict
355 diverge = {} # divergence data is shared
359 diverge = {} # divergence data is shared
356 data1 = {'copy': {},
360 data1 = {'copy': {},
357 'fullcopy': {},
361 'fullcopy': {},
358 'diverge': diverge,
362 'diverge': diverge,
359 }
363 }
360 data2 = {'copy': {},
364 data2 = {'copy': {},
361 'fullcopy': {},
365 'fullcopy': {},
362 'diverge': diverge,
366 'diverge': diverge,
363 }
367 }
364
368
365 # find interesting file sets from manifests
369 # find interesting file sets from manifests
366 addedinm1 = m1.filesnotin(mb)
370 addedinm1 = m1.filesnotin(mb)
367 addedinm2 = m2.filesnotin(mb)
371 addedinm2 = m2.filesnotin(mb)
368 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
372 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
369 u1u, u2u = u1r, u2r
373 u1u, u2u = u1r, u2r
370 bothnew = sorted(addedinm1 & addedinm2)
374 bothnew = sorted(addedinm1 & addedinm2)
371
375
372 for f in u1u:
376 for f in u1u:
373 _checkcopies(c1, f, m1, m2, base, limit, data1)
377 _checkcopies(c1, f, m1, m2, base, limit, data1)
374
378
375 for f in u2u:
379 for f in u2u:
376 _checkcopies(c2, f, m2, m1, base, limit, data2)
380 _checkcopies(c2, f, m2, m1, base, limit, data2)
377
381
378 copy = dict(data1['copy'].items() + data2['copy'].items())
382 copy = dict(data1['copy'].items() + data2['copy'].items())
379 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
383 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
380
384
381 renamedelete = {}
385 renamedelete = {}
382 renamedeleteset = set()
386 renamedeleteset = set()
383 divergeset = set()
387 divergeset = set()
384 for of, fl in diverge.items():
388 for of, fl in diverge.items():
385 if len(fl) == 1 or of in c1 or of in c2:
389 if len(fl) == 1 or of in c1 or of in c2:
386 del diverge[of] # not actually divergent, or not a rename
390 del diverge[of] # not actually divergent, or not a rename
387 if of not in c1 and of not in c2:
391 if of not in c1 and of not in c2:
388 # renamed on one side, deleted on the other side, but filter
392 # renamed on one side, deleted on the other side, but filter
389 # out files that have been renamed and then deleted
393 # out files that have been renamed and then deleted
390 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
394 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
391 renamedeleteset.update(fl) # reverse map for below
395 renamedeleteset.update(fl) # reverse map for below
392 else:
396 else:
393 divergeset.update(fl) # reverse map for below
397 divergeset.update(fl) # reverse map for below
394
398
395 if bothnew:
399 if bothnew:
396 repo.ui.debug(" unmatched files new in both:\n %s\n"
400 repo.ui.debug(" unmatched files new in both:\n %s\n"
397 % "\n ".join(bothnew))
401 % "\n ".join(bothnew))
398 bothdiverge = {}
402 bothdiverge = {}
399 bothdata = {'copy': {},
403 bothdata = {'copy': {},
400 'fullcopy': {},
404 'fullcopy': {},
401 'diverge': bothdiverge,
405 'diverge': bothdiverge,
402 }
406 }
403 for f in bothnew:
407 for f in bothnew:
404 _checkcopies(c1, f, m1, m2, base, limit, bothdata)
408 _checkcopies(c1, f, m1, m2, base, limit, bothdata)
405 _checkcopies(c2, f, m2, m1, base, limit, bothdata)
409 _checkcopies(c2, f, m2, m1, base, limit, bothdata)
406 for of, fl in bothdiverge.items():
410 for of, fl in bothdiverge.items():
407 if len(fl) == 2 and fl[0] == fl[1]:
411 if len(fl) == 2 and fl[0] == fl[1]:
408 copy[fl[0]] = of # not actually divergent, just matching renames
412 copy[fl[0]] = of # not actually divergent, just matching renames
409
413
410 if fullcopy and repo.ui.debugflag:
414 if fullcopy and repo.ui.debugflag:
411 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
415 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
412 "% = renamed and deleted):\n")
416 "% = renamed and deleted):\n")
413 for f in sorted(fullcopy):
417 for f in sorted(fullcopy):
414 note = ""
418 note = ""
415 if f in copy:
419 if f in copy:
416 note += "*"
420 note += "*"
417 if f in divergeset:
421 if f in divergeset:
418 note += "!"
422 note += "!"
419 if f in renamedeleteset:
423 if f in renamedeleteset:
420 note += "%"
424 note += "%"
421 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
425 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
422 note))
426 note))
423 del divergeset
427 del divergeset
424
428
425 if not fullcopy:
429 if not fullcopy:
426 return copy, {}, diverge, renamedelete
430 return copy, {}, diverge, renamedelete
427
431
428 repo.ui.debug(" checking for directory renames\n")
432 repo.ui.debug(" checking for directory renames\n")
429
433
430 # generate a directory move map
434 # generate a directory move map
431 d1, d2 = c1.dirs(), c2.dirs()
435 d1, d2 = c1.dirs(), c2.dirs()
432 # Hack for adding '', which is not otherwise added, to d1 and d2
436 # Hack for adding '', which is not otherwise added, to d1 and d2
433 d1.addpath('/')
437 d1.addpath('/')
434 d2.addpath('/')
438 d2.addpath('/')
435 invalid = set()
439 invalid = set()
436 dirmove = {}
440 dirmove = {}
437
441
438 # examine each file copy for a potential directory move, which is
442 # examine each file copy for a potential directory move, which is
439 # when all the files in a directory are moved to a new directory
443 # when all the files in a directory are moved to a new directory
440 for dst, src in fullcopy.iteritems():
444 for dst, src in fullcopy.iteritems():
441 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
445 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
442 if dsrc in invalid:
446 if dsrc in invalid:
443 # already seen to be uninteresting
447 # already seen to be uninteresting
444 continue
448 continue
445 elif dsrc in d1 and ddst in d1:
449 elif dsrc in d1 and ddst in d1:
446 # directory wasn't entirely moved locally
450 # directory wasn't entirely moved locally
447 invalid.add(dsrc + "/")
451 invalid.add(dsrc + "/")
448 elif dsrc in d2 and ddst in d2:
452 elif dsrc in d2 and ddst in d2:
449 # directory wasn't entirely moved remotely
453 # directory wasn't entirely moved remotely
450 invalid.add(dsrc + "/")
454 invalid.add(dsrc + "/")
451 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
455 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
452 # files from the same directory moved to two different places
456 # files from the same directory moved to two different places
453 invalid.add(dsrc + "/")
457 invalid.add(dsrc + "/")
454 else:
458 else:
455 # looks good so far
459 # looks good so far
456 dirmove[dsrc + "/"] = ddst + "/"
460 dirmove[dsrc + "/"] = ddst + "/"
457
461
458 for i in invalid:
462 for i in invalid:
459 if i in dirmove:
463 if i in dirmove:
460 del dirmove[i]
464 del dirmove[i]
461 del d1, d2, invalid
465 del d1, d2, invalid
462
466
463 if not dirmove:
467 if not dirmove:
464 return copy, {}, diverge, renamedelete
468 return copy, {}, diverge, renamedelete
465
469
466 for d in dirmove:
470 for d in dirmove:
467 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
471 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
468 (d, dirmove[d]))
472 (d, dirmove[d]))
469
473
470 movewithdir = {}
474 movewithdir = {}
471 # check unaccounted nonoverlapping files against directory moves
475 # check unaccounted nonoverlapping files against directory moves
472 for f in u1r + u2r:
476 for f in u1r + u2r:
473 if f not in fullcopy:
477 if f not in fullcopy:
474 for d in dirmove:
478 for d in dirmove:
475 if f.startswith(d):
479 if f.startswith(d):
476 # new file added in a directory that was moved, move it
480 # new file added in a directory that was moved, move it
477 df = dirmove[d] + f[len(d):]
481 df = dirmove[d] + f[len(d):]
478 if df not in copy:
482 if df not in copy:
479 movewithdir[f] = df
483 movewithdir[f] = df
480 repo.ui.debug((" pending file src: '%s' -> "
484 repo.ui.debug((" pending file src: '%s' -> "
481 "dst: '%s'\n") % (f, df))
485 "dst: '%s'\n") % (f, df))
482 break
486 break
483
487
484 return copy, movewithdir, diverge, renamedelete
488 return copy, movewithdir, diverge, renamedelete
485
489
486 def _related(f1, f2, limit):
490 def _related(f1, f2, limit):
487 """return True if f1 and f2 filectx have a common ancestor
491 """return True if f1 and f2 filectx have a common ancestor
488
492
489 Walk back to common ancestor to see if the two files originate
493 Walk back to common ancestor to see if the two files originate
490 from the same file. Since workingfilectx's rev() is None it messes
494 from the same file. Since workingfilectx's rev() is None it messes
491 up the integer comparison logic, hence the pre-step check for
495 up the integer comparison logic, hence the pre-step check for
492 None (f1 and f2 can only be workingfilectx's initially).
496 None (f1 and f2 can only be workingfilectx's initially).
493 """
497 """
494
498
495 if f1 == f2:
499 if f1 == f2:
496 return f1 # a match
500 return f1 # a match
497
501
498 g1, g2 = f1.ancestors(), f2.ancestors()
502 g1, g2 = f1.ancestors(), f2.ancestors()
499 try:
503 try:
500 f1r, f2r = f1.linkrev(), f2.linkrev()
504 f1r, f2r = f1.linkrev(), f2.linkrev()
501
505
502 if f1r is None:
506 if f1r is None:
503 f1 = next(g1)
507 f1 = next(g1)
504 if f2r is None:
508 if f2r is None:
505 f2 = next(g2)
509 f2 = next(g2)
506
510
507 while True:
511 while True:
508 f1r, f2r = f1.linkrev(), f2.linkrev()
512 f1r, f2r = f1.linkrev(), f2.linkrev()
509 if f1r > f2r:
513 if f1r > f2r:
510 f1 = next(g1)
514 f1 = next(g1)
511 elif f2r > f1r:
515 elif f2r > f1r:
512 f2 = next(g2)
516 f2 = next(g2)
513 elif f1 == f2:
517 elif f1 == f2:
514 return f1 # a match
518 return f1 # a match
515 elif f1r == f2r or f1r < limit or f2r < limit:
519 elif f1r == f2r or f1r < limit or f2r < limit:
516 return False # copy no longer relevant
520 return False # copy no longer relevant
517 except StopIteration:
521 except StopIteration:
518 return False
522 return False
519
523
520 def _checkcopies(ctx, f, m1, m2, base, limit, data):
524 def _checkcopies(ctx, f, m1, m2, base, limit, data):
521 """
525 """
522 check possible copies of f from m1 to m2
526 check possible copies of f from m1 to m2
523
527
524 ctx = starting context for f in m1
528 ctx = starting context for f in m1
525 f = the filename to check (as in m1)
529 f = the filename to check (as in m1)
526 m1 = the source manifest
530 m1 = the source manifest
527 m2 = the destination manifest
531 m2 = the destination manifest
528 base = the changectx used as a merge base
532 base = the changectx used as a merge base
529 limit = the rev number to not search beyond
533 limit = the rev number to not search beyond
530 data = dictionary of dictionary to store copy data. (see mergecopies)
534 data = dictionary of dictionary to store copy data. (see mergecopies)
531
535
532 note: limit is only an optimization, and there is no guarantee that
536 note: limit is only an optimization, and there is no guarantee that
533 irrelevant revisions will not be limited
537 irrelevant revisions will not be limited
534 there is no easy way to make this algorithm stop in a guaranteed way
538 there is no easy way to make this algorithm stop in a guaranteed way
535 once it "goes behind a certain revision".
539 once it "goes behind a certain revision".
536 """
540 """
537
541
538 mb = base.manifest()
542 mb = base.manifest()
539 getfctx = _makegetfctx(ctx)
543 getfctx = _makegetfctx(ctx)
540
544
541 of = None
545 of = None
542 seen = set([f])
546 seen = set([f])
543 for oc in getfctx(f, m1[f]).ancestors():
547 for oc in getfctx(f, m1[f]).ancestors():
544 ocr = oc.linkrev()
548 ocr = oc.linkrev()
545 of = oc.path()
549 of = oc.path()
546 if of in seen:
550 if of in seen:
547 # check limit late - grab last rename before
551 # check limit late - grab last rename before
548 if ocr < limit:
552 if ocr < limit:
549 break
553 break
550 continue
554 continue
551 seen.add(of)
555 seen.add(of)
552
556
553 data['fullcopy'][f] = of # remember for dir rename detection
557 data['fullcopy'][f] = of # remember for dir rename detection
554 if of not in m2:
558 if of not in m2:
555 continue # no match, keep looking
559 continue # no match, keep looking
556 if m2[of] == mb.get(of):
560 if m2[of] == mb.get(of):
557 return # no merge needed, quit early
561 return # no merge needed, quit early
558 c2 = getfctx(of, m2[of])
562 c2 = getfctx(of, m2[of])
559 # c2 might be a plain new file on added on destination side that is
563 # c2 might be a plain new file on added on destination side that is
560 # unrelated to the droids we are looking for.
564 # unrelated to the droids we are looking for.
561 cr = _related(oc, c2, base.rev())
565 cr = _related(oc, c2, base.rev())
562 if cr and (of == f or of == c2.path()): # non-divergent
566 if cr and (of == f or of == c2.path()): # non-divergent
563 if of in mb:
567 if of in mb:
564 data['copy'][f] = of
568 data['copy'][f] = of
565 return
569 return
566
570
567 if of in mb:
571 if of in mb:
568 data['diverge'].setdefault(of, []).append(f)
572 data['diverge'].setdefault(of, []).append(f)
569
573
570 def duplicatecopies(repo, rev, fromrev, skiprev=None):
574 def duplicatecopies(repo, rev, fromrev, skiprev=None):
571 '''reproduce copies from fromrev to rev in the dirstate
575 '''reproduce copies from fromrev to rev in the dirstate
572
576
573 If skiprev is specified, it's a revision that should be used to
577 If skiprev is specified, it's a revision that should be used to
574 filter copy records. Any copies that occur between fromrev and
578 filter copy records. Any copies that occur between fromrev and
575 skiprev will not be duplicated, even if they appear in the set of
579 skiprev will not be duplicated, even if they appear in the set of
576 copies between fromrev and rev.
580 copies between fromrev and rev.
577 '''
581 '''
578 exclude = {}
582 exclude = {}
579 if (skiprev is not None and
583 if (skiprev is not None and
580 not repo.ui.configbool('experimental', 'disablecopytrace')):
584 not repo.ui.configbool('experimental', 'disablecopytrace')):
581 # disablecopytrace skips this line, but not the entire function because
585 # disablecopytrace skips this line, but not the entire function because
582 # the line below is O(size of the repo) during a rebase, while the rest
586 # the line below is O(size of the repo) during a rebase, while the rest
583 # of the function is much faster (and is required for carrying copy
587 # of the function is much faster (and is required for carrying copy
584 # metadata across the rebase anyway).
588 # metadata across the rebase anyway).
585 exclude = pathcopies(repo[fromrev], repo[skiprev])
589 exclude = pathcopies(repo[fromrev], repo[skiprev])
586 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
590 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
587 # copies.pathcopies returns backward renames, so dst might not
591 # copies.pathcopies returns backward renames, so dst might not
588 # actually be in the dirstate
592 # actually be in the dirstate
589 if dst in exclude:
593 if dst in exclude:
590 continue
594 continue
591 if repo.dirstate[dst] in "nma":
595 if repo.dirstate[dst] in "nma":
592 repo.dirstate.copy(src, dst)
596 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now