##// END OF EJS Templates
mergecopies: add logic to process incomplete data...
Gábor Stefanik -
r30202:a005c33d default
parent child Browse files
Show More
@@ -1,635 +1,688 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from . import (
12 from . import (
13 node,
13 node,
14 pathutil,
14 pathutil,
15 scmutil,
15 scmutil,
16 util,
16 util,
17 )
17 )
18
18
19 def _findlimit(repo, a, b):
19 def _findlimit(repo, a, b):
20 """
20 """
21 Find the last revision that needs to be checked to ensure that a full
21 Find the last revision that needs to be checked to ensure that a full
22 transitive closure for file copies can be properly calculated.
22 transitive closure for file copies can be properly calculated.
23 Generally, this means finding the earliest revision number that's an
23 Generally, this means finding the earliest revision number that's an
24 ancestor of a or b but not both, except when a or b is a direct descendent
24 ancestor of a or b but not both, except when a or b is a direct descendent
25 of the other, in which case we can return the minimum revnum of a and b.
25 of the other, in which case we can return the minimum revnum of a and b.
26 None if no such revision exists.
26 None if no such revision exists.
27 """
27 """
28
28
29 # basic idea:
29 # basic idea:
30 # - mark a and b with different sides
30 # - mark a and b with different sides
31 # - if a parent's children are all on the same side, the parent is
31 # - if a parent's children are all on the same side, the parent is
32 # on that side, otherwise it is on no side
32 # on that side, otherwise it is on no side
33 # - walk the graph in topological order with the help of a heap;
33 # - walk the graph in topological order with the help of a heap;
34 # - add unseen parents to side map
34 # - add unseen parents to side map
35 # - clear side of any parent that has children on different sides
35 # - clear side of any parent that has children on different sides
36 # - track number of interesting revs that might still be on a side
36 # - track number of interesting revs that might still be on a side
37 # - track the lowest interesting rev seen
37 # - track the lowest interesting rev seen
38 # - quit when interesting revs is zero
38 # - quit when interesting revs is zero
39
39
40 cl = repo.changelog
40 cl = repo.changelog
41 working = len(cl) # pseudo rev for the working directory
41 working = len(cl) # pseudo rev for the working directory
42 if a is None:
42 if a is None:
43 a = working
43 a = working
44 if b is None:
44 if b is None:
45 b = working
45 b = working
46
46
47 side = {a: -1, b: 1}
47 side = {a: -1, b: 1}
48 visit = [-a, -b]
48 visit = [-a, -b]
49 heapq.heapify(visit)
49 heapq.heapify(visit)
50 interesting = len(visit)
50 interesting = len(visit)
51 hascommonancestor = False
51 hascommonancestor = False
52 limit = working
52 limit = working
53
53
54 while interesting:
54 while interesting:
55 r = -heapq.heappop(visit)
55 r = -heapq.heappop(visit)
56 if r == working:
56 if r == working:
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 else:
58 else:
59 parents = cl.parentrevs(r)
59 parents = cl.parentrevs(r)
60 for p in parents:
60 for p in parents:
61 if p < 0:
61 if p < 0:
62 continue
62 continue
63 if p not in side:
63 if p not in side:
64 # first time we see p; add it to visit
64 # first time we see p; add it to visit
65 side[p] = side[r]
65 side[p] = side[r]
66 if side[p]:
66 if side[p]:
67 interesting += 1
67 interesting += 1
68 heapq.heappush(visit, -p)
68 heapq.heappush(visit, -p)
69 elif side[p] and side[p] != side[r]:
69 elif side[p] and side[p] != side[r]:
70 # p was interesting but now we know better
70 # p was interesting but now we know better
71 side[p] = 0
71 side[p] = 0
72 interesting -= 1
72 interesting -= 1
73 hascommonancestor = True
73 hascommonancestor = True
74 if side[r]:
74 if side[r]:
75 limit = r # lowest rev visited
75 limit = r # lowest rev visited
76 interesting -= 1
76 interesting -= 1
77
77
78 if not hascommonancestor:
78 if not hascommonancestor:
79 return None
79 return None
80
80
81 # Consider the following flow (see test-commit-amend.t under issue4405):
81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 # 1/ File 'a0' committed
82 # 1/ File 'a0' committed
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 # 3/ Move back to first commit
84 # 3/ Move back to first commit
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 #
87 #
88 # During the amend in step five, we will be in this state:
88 # During the amend in step five, we will be in this state:
89 #
89 #
90 # @ 3 temporary amend commit for a1-amend
90 # @ 3 temporary amend commit for a1-amend
91 # |
91 # |
92 # o 2 a1-amend
92 # o 2 a1-amend
93 # |
93 # |
94 # | o 1 a1
94 # | o 1 a1
95 # |/
95 # |/
96 # o 0 a0
96 # o 0 a0
97 #
97 #
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 # yet the filelog has the copy information in rev 1 and we will not look
99 # yet the filelog has the copy information in rev 1 and we will not look
100 # back far enough unless we also look at the a and b as candidates.
100 # back far enough unless we also look at the a and b as candidates.
101 # This only occurs when a is a descendent of b or visa-versa.
101 # This only occurs when a is a descendent of b or visa-versa.
102 return min(limit, a, b)
102 return min(limit, a, b)
103
103
104 def _chain(src, dst, a, b):
104 def _chain(src, dst, a, b):
105 '''chain two sets of copies a->b'''
105 '''chain two sets of copies a->b'''
106 t = a.copy()
106 t = a.copy()
107 for k, v in b.iteritems():
107 for k, v in b.iteritems():
108 if v in t:
108 if v in t:
109 # found a chain
109 # found a chain
110 if t[v] != k:
110 if t[v] != k:
111 # file wasn't renamed back to itself
111 # file wasn't renamed back to itself
112 t[k] = t[v]
112 t[k] = t[v]
113 if v not in dst:
113 if v not in dst:
114 # chain was a rename, not a copy
114 # chain was a rename, not a copy
115 del t[v]
115 del t[v]
116 if v in src:
116 if v in src:
117 # file is a copy of an existing file
117 # file is a copy of an existing file
118 t[k] = v
118 t[k] = v
119
119
120 # remove criss-crossed copies
120 # remove criss-crossed copies
121 for k, v in t.items():
121 for k, v in t.items():
122 if k in src and v in dst:
122 if k in src and v in dst:
123 del t[k]
123 del t[k]
124
124
125 return t
125 return t
126
126
127 def _tracefile(fctx, am, limit=-1):
127 def _tracefile(fctx, am, limit=-1):
128 '''return file context that is the ancestor of fctx present in ancestor
128 '''return file context that is the ancestor of fctx present in ancestor
129 manifest am, stopping after the first ancestor lower than limit'''
129 manifest am, stopping after the first ancestor lower than limit'''
130
130
131 for f in fctx.ancestors():
131 for f in fctx.ancestors():
132 if am.get(f.path(), None) == f.filenode():
132 if am.get(f.path(), None) == f.filenode():
133 return f
133 return f
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 return None
135 return None
136
136
137 def _dirstatecopies(d):
137 def _dirstatecopies(d):
138 ds = d._repo.dirstate
138 ds = d._repo.dirstate
139 c = ds.copies().copy()
139 c = ds.copies().copy()
140 for k in c.keys():
140 for k in c.keys():
141 if ds[k] not in 'anm':
141 if ds[k] not in 'anm':
142 del c[k]
142 del c[k]
143 return c
143 return c
144
144
145 def _computeforwardmissing(a, b, match=None):
145 def _computeforwardmissing(a, b, match=None):
146 """Computes which files are in b but not a.
146 """Computes which files are in b but not a.
147 This is its own function so extensions can easily wrap this call to see what
147 This is its own function so extensions can easily wrap this call to see what
148 files _forwardcopies is about to process.
148 files _forwardcopies is about to process.
149 """
149 """
150 ma = a.manifest()
150 ma = a.manifest()
151 mb = b.manifest()
151 mb = b.manifest()
152 if match:
152 if match:
153 ma = ma.matches(match)
153 ma = ma.matches(match)
154 mb = mb.matches(match)
154 mb = mb.matches(match)
155 return mb.filesnotin(ma)
155 return mb.filesnotin(ma)
156
156
157 def _forwardcopies(a, b, match=None):
157 def _forwardcopies(a, b, match=None):
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159
159
160 # check for working copy
160 # check for working copy
161 w = None
161 w = None
162 if b.rev() is None:
162 if b.rev() is None:
163 w = b
163 w = b
164 b = w.p1()
164 b = w.p1()
165 if a == b:
165 if a == b:
166 # short-circuit to avoid issues with merge states
166 # short-circuit to avoid issues with merge states
167 return _dirstatecopies(w)
167 return _dirstatecopies(w)
168
168
169 # files might have to be traced back to the fctx parent of the last
169 # files might have to be traced back to the fctx parent of the last
170 # one-side-only changeset, but not further back than that
170 # one-side-only changeset, but not further back than that
171 limit = _findlimit(a._repo, a.rev(), b.rev())
171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 if limit is None:
172 if limit is None:
173 limit = -1
173 limit = -1
174 am = a.manifest()
174 am = a.manifest()
175
175
176 # find where new files came from
176 # find where new files came from
177 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
178 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 cm = {}
179 cm = {}
180
180
181 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
182 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
183 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
184 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
185 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
186 # this comparison.
186 # this comparison.
187 forwardmissingmatch = match
187 forwardmissingmatch = match
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191
191
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 for f in missing:
193 for f in missing:
194 fctx = b[f]
194 fctx = b[f]
195 fctx._ancestrycontext = ancestrycontext
195 fctx._ancestrycontext = ancestrycontext
196 ofctx = _tracefile(fctx, am, limit)
196 ofctx = _tracefile(fctx, am, limit)
197 if ofctx:
197 if ofctx:
198 cm[f] = ofctx.path()
198 cm[f] = ofctx.path()
199
199
200 # combine copies from dirstate if necessary
200 # combine copies from dirstate if necessary
201 if w is not None:
201 if w is not None:
202 cm = _chain(a, w, cm, _dirstatecopies(w))
202 cm = _chain(a, w, cm, _dirstatecopies(w))
203
203
204 return cm
204 return cm
205
205
206 def _backwardrenames(a, b):
206 def _backwardrenames(a, b):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 return {}
208 return {}
209
209
210 # Even though we're not taking copies into account, 1:n rename situations
210 # Even though we're not taking copies into account, 1:n rename situations
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 # arbitrarily pick one of the renames.
212 # arbitrarily pick one of the renames.
213 f = _forwardcopies(b, a)
213 f = _forwardcopies(b, a)
214 r = {}
214 r = {}
215 for k, v in sorted(f.iteritems()):
215 for k, v in sorted(f.iteritems()):
216 # remove copies
216 # remove copies
217 if v in a:
217 if v in a:
218 continue
218 continue
219 r[v] = k
219 r[v] = k
220 return r
220 return r
221
221
222 def pathcopies(x, y, match=None):
222 def pathcopies(x, y, match=None):
223 '''find {dst@y: src@x} copy mapping for directed compare'''
223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 if x == y or not x or not y:
224 if x == y or not x or not y:
225 return {}
225 return {}
226 a = y.ancestor(x)
226 a = y.ancestor(x)
227 if a == x:
227 if a == x:
228 return _forwardcopies(x, y, match=match)
228 return _forwardcopies(x, y, match=match)
229 if a == y:
229 if a == y:
230 return _backwardrenames(x, y)
230 return _backwardrenames(x, y)
231 return _chain(x, y, _backwardrenames(x, a),
231 return _chain(x, y, _backwardrenames(x, a),
232 _forwardcopies(a, y, match=match))
232 _forwardcopies(a, y, match=match))
233
233
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2, baselabel=''):
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 and c2. This is its own function so extensions can easily wrap this call
236 and c2. This is its own function so extensions can easily wrap this call
237 to see what files mergecopies is about to process.
237 to see what files mergecopies is about to process.
238
238
239 Even though c1 and c2 are not used in this function, they are useful in
239 Even though c1 and c2 are not used in this function, they are useful in
240 other extensions for being able to read the file nodes of the changed files.
240 other extensions for being able to read the file nodes of the changed files.
241
241
242 "baselabel" can be passed to help distinguish the multiple computations
242 "baselabel" can be passed to help distinguish the multiple computations
243 done in the graft case.
243 done in the graft case.
244 """
244 """
245 u1 = sorted(addedinm1 - addedinm2)
245 u1 = sorted(addedinm1 - addedinm2)
246 u2 = sorted(addedinm2 - addedinm1)
246 u2 = sorted(addedinm2 - addedinm1)
247
247
248 header = " unmatched files in %s"
248 header = " unmatched files in %s"
249 if baselabel:
249 if baselabel:
250 header += ' (from %s)' % baselabel
250 header += ' (from %s)' % baselabel
251 if u1:
251 if u1:
252 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
252 repo.ui.debug("%s:\n %s\n" % (header % 'local', "\n ".join(u1)))
253 if u2:
253 if u2:
254 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
254 repo.ui.debug("%s:\n %s\n" % (header % 'other', "\n ".join(u2)))
255 return u1, u2
255 return u1, u2
256
256
257 def _makegetfctx(ctx):
257 def _makegetfctx(ctx):
258 """return a 'getfctx' function suitable for _checkcopies usage
258 """return a 'getfctx' function suitable for _checkcopies usage
259
259
260 We have to re-setup the function building 'filectx' for each
260 We have to re-setup the function building 'filectx' for each
261 '_checkcopies' to ensure the linkrev adjustment is properly setup for
261 '_checkcopies' to ensure the linkrev adjustment is properly setup for
262 each. Linkrev adjustment is important to avoid bug in rename
262 each. Linkrev adjustment is important to avoid bug in rename
263 detection. Moreover, having a proper '_ancestrycontext' setup ensures
263 detection. Moreover, having a proper '_ancestrycontext' setup ensures
264 the performance impact of this adjustment is kept limited. Without it,
264 the performance impact of this adjustment is kept limited. Without it,
265 each file could do a full dag traversal making the time complexity of
265 each file could do a full dag traversal making the time complexity of
266 the operation explode (see issue4537).
266 the operation explode (see issue4537).
267
267
268 This function exists here mostly to limit the impact on stable. Feel
268 This function exists here mostly to limit the impact on stable. Feel
269 free to refactor on default.
269 free to refactor on default.
270 """
270 """
271 rev = ctx.rev()
271 rev = ctx.rev()
272 repo = ctx._repo
272 repo = ctx._repo
273 ac = getattr(ctx, '_ancestrycontext', None)
273 ac = getattr(ctx, '_ancestrycontext', None)
274 if ac is None:
274 if ac is None:
275 revs = [rev]
275 revs = [rev]
276 if rev is None:
276 if rev is None:
277 revs = [p.rev() for p in ctx.parents()]
277 revs = [p.rev() for p in ctx.parents()]
278 ac = repo.changelog.ancestors(revs, inclusive=True)
278 ac = repo.changelog.ancestors(revs, inclusive=True)
279 ctx._ancestrycontext = ac
279 ctx._ancestrycontext = ac
280 def makectx(f, n):
280 def makectx(f, n):
281 if len(n) != 20: # in a working context?
281 if len(n) != 20: # in a working context?
282 if ctx.rev() is None:
282 if ctx.rev() is None:
283 return ctx.filectx(f)
283 return ctx.filectx(f)
284 return repo[None][f]
284 return repo[None][f]
285 fctx = repo.filectx(f, fileid=n)
285 fctx = repo.filectx(f, fileid=n)
286 # setup only needed for filectx not create from a changectx
286 # setup only needed for filectx not create from a changectx
287 fctx._ancestrycontext = ac
287 fctx._ancestrycontext = ac
288 fctx._descendantrev = rev
288 fctx._descendantrev = rev
289 return fctx
289 return fctx
290 return util.lrucachefunc(makectx)
290 return util.lrucachefunc(makectx)
291
291
292 def _combinecopies(copyfrom, copyto, finalcopy, diverge, incompletediverge):
293 """combine partial copy paths"""
294 remainder = {}
295 for f in copyfrom:
296 if f in copyto:
297 finalcopy[copyto[f]] = copyfrom[f]
298 del copyto[f]
299 for f in incompletediverge:
300 assert f not in diverge
301 ic = incompletediverge[f]
302 if ic[0] in copyto:
303 diverge[f] = [copyto[ic[0]], ic[1]]
304 else:
305 remainder[f] = ic
306 return remainder
307
292 def mergecopies(repo, c1, c2, base):
308 def mergecopies(repo, c1, c2, base):
293 """
309 """
294 Find moves and copies between context c1 and c2 that are relevant
310 Find moves and copies between context c1 and c2 that are relevant
295 for merging. 'base' will be used as the merge base.
311 for merging. 'base' will be used as the merge base.
296
312
297 Returns four dicts: "copy", "movewithdir", "diverge", and
313 Returns four dicts: "copy", "movewithdir", "diverge", and
298 "renamedelete".
314 "renamedelete".
299
315
300 "copy" is a mapping from destination name -> source name,
316 "copy" is a mapping from destination name -> source name,
301 where source is in c1 and destination is in c2 or vice-versa.
317 where source is in c1 and destination is in c2 or vice-versa.
302
318
303 "movewithdir" is a mapping from source name -> destination name,
319 "movewithdir" is a mapping from source name -> destination name,
304 where the file at source present in one context but not the other
320 where the file at source present in one context but not the other
305 needs to be moved to destination by the merge process, because the
321 needs to be moved to destination by the merge process, because the
306 other context moved the directory it is in.
322 other context moved the directory it is in.
307
323
308 "diverge" is a mapping of source name -> list of destination names
324 "diverge" is a mapping of source name -> list of destination names
309 for divergent renames.
325 for divergent renames.
310
326
311 "renamedelete" is a mapping of source name -> list of destination
327 "renamedelete" is a mapping of source name -> list of destination
312 names for files deleted in c1 that were renamed in c2 or vice-versa.
328 names for files deleted in c1 that were renamed in c2 or vice-versa.
313 """
329 """
314 # avoid silly behavior for update from empty dir
330 # avoid silly behavior for update from empty dir
315 if not c1 or not c2 or c1 == c2:
331 if not c1 or not c2 or c1 == c2:
316 return {}, {}, {}, {}
332 return {}, {}, {}, {}
317
333
318 # avoid silly behavior for parent -> working dir
334 # avoid silly behavior for parent -> working dir
319 if c2.node() is None and c1.node() == repo.dirstate.p1():
335 if c2.node() is None and c1.node() == repo.dirstate.p1():
320 return repo.dirstate.copies(), {}, {}, {}
336 return repo.dirstate.copies(), {}, {}, {}
321
337
322 # Copy trace disabling is explicitly below the node == p1 logic above
338 # Copy trace disabling is explicitly below the node == p1 logic above
323 # because the logic above is required for a simple copy to be kept across a
339 # because the logic above is required for a simple copy to be kept across a
324 # rebase.
340 # rebase.
325 if repo.ui.configbool('experimental', 'disablecopytrace'):
341 if repo.ui.configbool('experimental', 'disablecopytrace'):
326 return {}, {}, {}, {}
342 return {}, {}, {}, {}
327
343
328 # In certain scenarios (e.g. graft, update or rebase), base can be
344 # In certain scenarios (e.g. graft, update or rebase), base can be
329 # overridden We still need to know a real common ancestor in this case We
345 # overridden We still need to know a real common ancestor in this case We
330 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
346 # can't just compute _c1.ancestor(_c2) and compare it to ca, because there
331 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
347 # can be multiple common ancestors, e.g. in case of bidmerge. Because our
332 # caller may not know if the revision passed in lieu of the CA is a genuine
348 # caller may not know if the revision passed in lieu of the CA is a genuine
333 # common ancestor or not without explicitly checking it, it's better to
349 # common ancestor or not without explicitly checking it, it's better to
334 # determine that here.
350 # determine that here.
335 #
351 #
336 # base.descendant(wc) and base.descendant(base) are False, work around that
352 # base.descendant(wc) and base.descendant(base) are False, work around that
337 _c1 = c1.p1() if c1.rev() is None else c1
353 _c1 = c1.p1() if c1.rev() is None else c1
338 _c2 = c2.p1() if c2.rev() is None else c2
354 _c2 = c2.p1() if c2.rev() is None else c2
339 # an endpoint is "dirty" if it isn't a descendant of the merge base
355 # an endpoint is "dirty" if it isn't a descendant of the merge base
340 # if we have a dirty endpoint, we need to trigger graft logic, and also
356 # if we have a dirty endpoint, we need to trigger graft logic, and also
341 # keep track of which endpoint is dirty
357 # keep track of which endpoint is dirty
342 dirtyc1 = not (base == _c1 or base.descendant(_c1))
358 dirtyc1 = not (base == _c1 or base.descendant(_c1))
343 dirtyc2 = not (base== _c2 or base.descendant(_c2))
359 dirtyc2 = not (base== _c2 or base.descendant(_c2))
344 graft = dirtyc1 or dirtyc2
360 graft = dirtyc1 or dirtyc2
345 tca = base
361 tca = base
346 if graft:
362 if graft:
347 tca = _c1.ancestor(_c2)
363 tca = _c1.ancestor(_c2)
348
364
349 limit = _findlimit(repo, c1.rev(), c2.rev())
365 limit = _findlimit(repo, c1.rev(), c2.rev())
350 if limit is None:
366 if limit is None:
351 # no common ancestor, no copies
367 # no common ancestor, no copies
352 return {}, {}, {}, {}
368 return {}, {}, {}, {}
353 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
369 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
354
370
355 m1 = c1.manifest()
371 m1 = c1.manifest()
356 m2 = c2.manifest()
372 m2 = c2.manifest()
357 mb = base.manifest()
373 mb = base.manifest()
358
374
359 # gather data from _checkcopies:
375 # gather data from _checkcopies:
360 # - diverge = record all diverges in this dict
376 # - diverge = record all diverges in this dict
361 # - copy = record all non-divergent copies in this dict
377 # - copy = record all non-divergent copies in this dict
362 # - fullcopy = record all copies in this dict
378 # - fullcopy = record all copies in this dict
379 # - incomplete = record non-divergent partial copies here
380 # - incompletediverge = record divergent partial copies here
363 diverge = {} # divergence data is shared
381 diverge = {} # divergence data is shared
382 incompletediverge = {}
364 data1 = {'copy': {},
383 data1 = {'copy': {},
365 'fullcopy': {},
384 'fullcopy': {},
385 'incomplete': {},
366 'diverge': diverge,
386 'diverge': diverge,
387 'incompletediverge': incompletediverge,
367 }
388 }
368 data2 = {'copy': {},
389 data2 = {'copy': {},
369 'fullcopy': {},
390 'fullcopy': {},
391 'incomplete': {},
370 'diverge': diverge,
392 'diverge': diverge,
393 'incompletediverge': incompletediverge,
371 }
394 }
372
395
373 # find interesting file sets from manifests
396 # find interesting file sets from manifests
374 addedinm1 = m1.filesnotin(mb)
397 addedinm1 = m1.filesnotin(mb)
375 addedinm2 = m2.filesnotin(mb)
398 addedinm2 = m2.filesnotin(mb)
376 bothnew = sorted(addedinm1 & addedinm2)
399 bothnew = sorted(addedinm1 & addedinm2)
377 if tca == base:
400 if tca == base:
378 # unmatched file from base
401 # unmatched file from base
379 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
402 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
380 u1u, u2u = u1r, u2r
403 u1u, u2u = u1r, u2r
381 else:
404 else:
382 # unmatched file from base (DAG rotation in the graft case)
405 # unmatched file from base (DAG rotation in the graft case)
383 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
406 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2,
384 baselabel='base')
407 baselabel='base')
385 # unmatched file from topological common ancestors (no DAG rotation)
408 # unmatched file from topological common ancestors (no DAG rotation)
386 # need to recompute this for directory move handling when grafting
409 # need to recompute this for directory move handling when grafting
387 mta = tca.manifest()
410 mta = tca.manifest()
388 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
411 u1u, u2u = _computenonoverlap(repo, c1, c2, m1.filesnotin(mta),
389 m2.filesnotin(mta),
412 m2.filesnotin(mta),
390 baselabel='topological common ancestor')
413 baselabel='topological common ancestor')
391
414
392 for f in u1u:
415 for f in u1u:
393 _checkcopies(c1, f, m1, m2, base, tca, limit, data1)
416 _checkcopies(c1, f, m1, m2, base, tca, limit, data1)
394
417
395 for f in u2u:
418 for f in u2u:
396 _checkcopies(c2, f, m2, m1, base, tca, limit, data2)
419 _checkcopies(c2, f, m2, m1, base, tca, limit, data2)
397
420
398 copy = dict(data1['copy'].items() + data2['copy'].items())
421 copy = dict(data1['copy'].items() + data2['copy'].items())
399 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
422 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
400
423
424 if dirtyc1:
425 _combinecopies(data2['incomplete'], data1['incomplete'], copy, diverge,
426 incompletediverge)
427 else:
428 _combinecopies(data1['incomplete'], data2['incomplete'], copy, diverge,
429 incompletediverge)
430
401 renamedelete = {}
431 renamedelete = {}
402 renamedeleteset = set()
432 renamedeleteset = set()
403 divergeset = set()
433 divergeset = set()
404 for of, fl in diverge.items():
434 for of, fl in diverge.items():
405 if len(fl) == 1 or of in c1 or of in c2:
435 if len(fl) == 1 or of in c1 or of in c2:
406 del diverge[of] # not actually divergent, or not a rename
436 del diverge[of] # not actually divergent, or not a rename
407 if of not in c1 and of not in c2:
437 if of not in c1 and of not in c2:
408 # renamed on one side, deleted on the other side, but filter
438 # renamed on one side, deleted on the other side, but filter
409 # out files that have been renamed and then deleted
439 # out files that have been renamed and then deleted
410 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
440 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
411 renamedeleteset.update(fl) # reverse map for below
441 renamedeleteset.update(fl) # reverse map for below
412 else:
442 else:
413 divergeset.update(fl) # reverse map for below
443 divergeset.update(fl) # reverse map for below
414
444
415 if bothnew:
445 if bothnew:
416 repo.ui.debug(" unmatched files new in both:\n %s\n"
446 repo.ui.debug(" unmatched files new in both:\n %s\n"
417 % "\n ".join(bothnew))
447 % "\n ".join(bothnew))
418 bothdiverge = {}
448 bothdiverge = {}
419 bothdata = {'copy': {},
449 bothincompletediverge = {}
450 both1 = {'copy': {},
420 'fullcopy': {},
451 'fullcopy': {},
452 'incomplete': {},
421 'diverge': bothdiverge,
453 'diverge': bothdiverge,
454 'incompletediverge': bothincompletediverge
455 }
456 both2 = {'copy': {},
457 'fullcopy': {},
458 'incomplete': {},
459 'diverge': bothdiverge,
460 'incompletediverge': bothincompletediverge
422 }
461 }
423 for f in bothnew:
462 for f in bothnew:
424 _checkcopies(c1, f, m1, m2, base, tca, limit, bothdata)
463 _checkcopies(c1, f, m1, m2, base, tca, limit, both1)
425 _checkcopies(c2, f, m2, m1, base, tca, limit, bothdata)
464 _checkcopies(c2, f, m2, m1, base, tca, limit, both2)
465 if dirtyc1:
466 assert both2['incomplete'] == {}
467 remainder = _combinecopies({}, both1['incomplete'], copy, bothdiverge,
468 bothincompletediverge)
469 else:
470 assert both1['incomplete'] == {}
471 remainder = _combinecopies({}, both2['incomplete'], copy, bothdiverge,
472 bothincompletediverge)
473 for f in remainder:
474 assert f not in bothdiverge
475 ic = remainder[f]
476 if ic[0] in (m1 if dirtyc1 else m2):
477 # backed-out rename on one side, but watch out for deleted files
478 bothdiverge[f] = ic
426 for of, fl in bothdiverge.items():
479 for of, fl in bothdiverge.items():
427 if len(fl) == 2 and fl[0] == fl[1]:
480 if len(fl) == 2 and fl[0] == fl[1]:
428 copy[fl[0]] = of # not actually divergent, just matching renames
481 copy[fl[0]] = of # not actually divergent, just matching renames
429
482
430 if fullcopy and repo.ui.debugflag:
483 if fullcopy and repo.ui.debugflag:
431 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
484 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
432 "% = renamed and deleted):\n")
485 "% = renamed and deleted):\n")
433 for f in sorted(fullcopy):
486 for f in sorted(fullcopy):
434 note = ""
487 note = ""
435 if f in copy:
488 if f in copy:
436 note += "*"
489 note += "*"
437 if f in divergeset:
490 if f in divergeset:
438 note += "!"
491 note += "!"
439 if f in renamedeleteset:
492 if f in renamedeleteset:
440 note += "%"
493 note += "%"
441 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
494 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
442 note))
495 note))
443 del divergeset
496 del divergeset
444
497
445 if not fullcopy:
498 if not fullcopy:
446 return copy, {}, diverge, renamedelete
499 return copy, {}, diverge, renamedelete
447
500
448 repo.ui.debug(" checking for directory renames\n")
501 repo.ui.debug(" checking for directory renames\n")
449
502
450 # generate a directory move map
503 # generate a directory move map
451 d1, d2 = c1.dirs(), c2.dirs()
504 d1, d2 = c1.dirs(), c2.dirs()
452 # Hack for adding '', which is not otherwise added, to d1 and d2
505 # Hack for adding '', which is not otherwise added, to d1 and d2
453 d1.addpath('/')
506 d1.addpath('/')
454 d2.addpath('/')
507 d2.addpath('/')
455 invalid = set()
508 invalid = set()
456 dirmove = {}
509 dirmove = {}
457
510
458 # examine each file copy for a potential directory move, which is
511 # examine each file copy for a potential directory move, which is
459 # when all the files in a directory are moved to a new directory
512 # when all the files in a directory are moved to a new directory
460 for dst, src in fullcopy.iteritems():
513 for dst, src in fullcopy.iteritems():
461 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
514 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
462 if dsrc in invalid:
515 if dsrc in invalid:
463 # already seen to be uninteresting
516 # already seen to be uninteresting
464 continue
517 continue
465 elif dsrc in d1 and ddst in d1:
518 elif dsrc in d1 and ddst in d1:
466 # directory wasn't entirely moved locally
519 # directory wasn't entirely moved locally
467 invalid.add(dsrc + "/")
520 invalid.add(dsrc + "/")
468 elif dsrc in d2 and ddst in d2:
521 elif dsrc in d2 and ddst in d2:
469 # directory wasn't entirely moved remotely
522 # directory wasn't entirely moved remotely
470 invalid.add(dsrc + "/")
523 invalid.add(dsrc + "/")
471 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
524 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
472 # files from the same directory moved to two different places
525 # files from the same directory moved to two different places
473 invalid.add(dsrc + "/")
526 invalid.add(dsrc + "/")
474 else:
527 else:
475 # looks good so far
528 # looks good so far
476 dirmove[dsrc + "/"] = ddst + "/"
529 dirmove[dsrc + "/"] = ddst + "/"
477
530
478 for i in invalid:
531 for i in invalid:
479 if i in dirmove:
532 if i in dirmove:
480 del dirmove[i]
533 del dirmove[i]
481 del d1, d2, invalid
534 del d1, d2, invalid
482
535
483 if not dirmove:
536 if not dirmove:
484 return copy, {}, diverge, renamedelete
537 return copy, {}, diverge, renamedelete
485
538
486 for d in dirmove:
539 for d in dirmove:
487 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
540 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
488 (d, dirmove[d]))
541 (d, dirmove[d]))
489
542
490 movewithdir = {}
543 movewithdir = {}
491 # check unaccounted nonoverlapping files against directory moves
544 # check unaccounted nonoverlapping files against directory moves
492 for f in u1r + u2r:
545 for f in u1r + u2r:
493 if f not in fullcopy:
546 if f not in fullcopy:
494 for d in dirmove:
547 for d in dirmove:
495 if f.startswith(d):
548 if f.startswith(d):
496 # new file added in a directory that was moved, move it
549 # new file added in a directory that was moved, move it
497 df = dirmove[d] + f[len(d):]
550 df = dirmove[d] + f[len(d):]
498 if df not in copy:
551 if df not in copy:
499 movewithdir[f] = df
552 movewithdir[f] = df
500 repo.ui.debug((" pending file src: '%s' -> "
553 repo.ui.debug((" pending file src: '%s' -> "
501 "dst: '%s'\n") % (f, df))
554 "dst: '%s'\n") % (f, df))
502 break
555 break
503
556
504 return copy, movewithdir, diverge, renamedelete
557 return copy, movewithdir, diverge, renamedelete
505
558
506 def _related(f1, f2, limit):
559 def _related(f1, f2, limit):
507 """return True if f1 and f2 filectx have a common ancestor
560 """return True if f1 and f2 filectx have a common ancestor
508
561
509 Walk back to common ancestor to see if the two files originate
562 Walk back to common ancestor to see if the two files originate
510 from the same file. Since workingfilectx's rev() is None it messes
563 from the same file. Since workingfilectx's rev() is None it messes
511 up the integer comparison logic, hence the pre-step check for
564 up the integer comparison logic, hence the pre-step check for
512 None (f1 and f2 can only be workingfilectx's initially).
565 None (f1 and f2 can only be workingfilectx's initially).
513 """
566 """
514
567
515 if f1 == f2:
568 if f1 == f2:
516 return f1 # a match
569 return f1 # a match
517
570
518 g1, g2 = f1.ancestors(), f2.ancestors()
571 g1, g2 = f1.ancestors(), f2.ancestors()
519 try:
572 try:
520 f1r, f2r = f1.linkrev(), f2.linkrev()
573 f1r, f2r = f1.linkrev(), f2.linkrev()
521
574
522 if f1r is None:
575 if f1r is None:
523 f1 = next(g1)
576 f1 = next(g1)
524 if f2r is None:
577 if f2r is None:
525 f2 = next(g2)
578 f2 = next(g2)
526
579
527 while True:
580 while True:
528 f1r, f2r = f1.linkrev(), f2.linkrev()
581 f1r, f2r = f1.linkrev(), f2.linkrev()
529 if f1r > f2r:
582 if f1r > f2r:
530 f1 = next(g1)
583 f1 = next(g1)
531 elif f2r > f1r:
584 elif f2r > f1r:
532 f2 = next(g2)
585 f2 = next(g2)
533 elif f1 == f2:
586 elif f1 == f2:
534 return f1 # a match
587 return f1 # a match
535 elif f1r == f2r or f1r < limit or f2r < limit:
588 elif f1r == f2r or f1r < limit or f2r < limit:
536 return False # copy no longer relevant
589 return False # copy no longer relevant
537 except StopIteration:
590 except StopIteration:
538 return False
591 return False
539
592
540 def _checkcopies(ctx, f, m1, m2, base, tca, limit, data):
593 def _checkcopies(ctx, f, m1, m2, base, tca, limit, data):
541 """
594 """
542 check possible copies of f from m1 to m2
595 check possible copies of f from m1 to m2
543
596
544 ctx = starting context for f in m1
597 ctx = starting context for f in m1
545 f = the filename to check (as in m1)
598 f = the filename to check (as in m1)
546 m1 = the source manifest
599 m1 = the source manifest
547 m2 = the destination manifest
600 m2 = the destination manifest
548 base = the changectx used as a merge base
601 base = the changectx used as a merge base
549 tca = topological common ancestor for graft-like scenarios
602 tca = topological common ancestor for graft-like scenarios
550 limit = the rev number to not search beyond
603 limit = the rev number to not search beyond
551 data = dictionary of dictionary to store copy data. (see mergecopies)
604 data = dictionary of dictionary to store copy data. (see mergecopies)
552
605
553 note: limit is only an optimization, and there is no guarantee that
606 note: limit is only an optimization, and there is no guarantee that
554 irrelevant revisions will not be limited
607 irrelevant revisions will not be limited
555 there is no easy way to make this algorithm stop in a guaranteed way
608 there is no easy way to make this algorithm stop in a guaranteed way
556 once it "goes behind a certain revision".
609 once it "goes behind a certain revision".
557 """
610 """
558
611
559 mb = base.manifest()
612 mb = base.manifest()
560 # Might be true if this call is about finding backward renames,
613 # Might be true if this call is about finding backward renames,
561 # This happens in the case of grafts because the DAG is then rotated.
614 # This happens in the case of grafts because the DAG is then rotated.
562 # If the file exists in both the base and the source, we are not looking
615 # If the file exists in both the base and the source, we are not looking
563 # for a rename on the source side, but on the part of the DAG that is
616 # for a rename on the source side, but on the part of the DAG that is
564 # traversed backwards.
617 # traversed backwards.
565 #
618 #
566 # In the case there is both backward and forward renames (before and after
619 # In the case there is both backward and forward renames (before and after
567 # the base) this is more complicated as we must detect a divergence.
620 # the base) this is more complicated as we must detect a divergence.
568 # We use 'backwards = False' in that case.
621 # We use 'backwards = False' in that case.
569 backwards = base != tca and f in mb
622 backwards = base != tca and f in mb
570 getfctx = _makegetfctx(ctx)
623 getfctx = _makegetfctx(ctx)
571
624
572 of = None
625 of = None
573 seen = set([f])
626 seen = set([f])
574 for oc in getfctx(f, m1[f]).ancestors():
627 for oc in getfctx(f, m1[f]).ancestors():
575 ocr = oc.linkrev()
628 ocr = oc.linkrev()
576 of = oc.path()
629 of = oc.path()
577 if of in seen:
630 if of in seen:
578 # check limit late - grab last rename before
631 # check limit late - grab last rename before
579 if ocr < limit:
632 if ocr < limit:
580 break
633 break
581 continue
634 continue
582 seen.add(of)
635 seen.add(of)
583
636
584 # remember for dir rename detection
637 # remember for dir rename detection
585 if backwards:
638 if backwards:
586 data['fullcopy'][of] = f # grafting backwards through renames
639 data['fullcopy'][of] = f # grafting backwards through renames
587 else:
640 else:
588 data['fullcopy'][f] = of
641 data['fullcopy'][f] = of
589 if of not in m2:
642 if of not in m2:
590 continue # no match, keep looking
643 continue # no match, keep looking
591 if m2[of] == mb.get(of):
644 if m2[of] == mb.get(of):
592 return # no merge needed, quit early
645 return # no merge needed, quit early
593 c2 = getfctx(of, m2[of])
646 c2 = getfctx(of, m2[of])
594 # c2 might be a plain new file on added on destination side that is
647 # c2 might be a plain new file on added on destination side that is
595 # unrelated to the droids we are looking for.
648 # unrelated to the droids we are looking for.
596 cr = _related(oc, c2, tca.rev())
649 cr = _related(oc, c2, tca.rev())
597 if cr and (of == f or of == c2.path()): # non-divergent
650 if cr and (of == f or of == c2.path()): # non-divergent
598 if backwards:
651 if backwards:
599 data['copy'][of] = f
652 data['copy'][of] = f
600 elif of in mb:
653 elif of in mb:
601 data['copy'][f] = of
654 data['copy'][f] = of
602 else: # divergence w.r.t. graft CA on one side of topological CA
655 else: # divergence w.r.t. graft CA on one side of topological CA
603 for sf in seen:
656 for sf in seen:
604 if sf in mb:
657 if sf in mb:
605 assert sf not in data['diverge']
658 assert sf not in data['diverge']
606 data['diverge'][sf] = [f, of]
659 data['diverge'][sf] = [f, of]
607 break
660 break
608 return
661 return
609
662
610 if of in mb:
663 if of in mb:
611 data['diverge'].setdefault(of, []).append(f)
664 data['diverge'].setdefault(of, []).append(f)
612
665
613 def duplicatecopies(repo, rev, fromrev, skiprev=None):
666 def duplicatecopies(repo, rev, fromrev, skiprev=None):
614 '''reproduce copies from fromrev to rev in the dirstate
667 '''reproduce copies from fromrev to rev in the dirstate
615
668
616 If skiprev is specified, it's a revision that should be used to
669 If skiprev is specified, it's a revision that should be used to
617 filter copy records. Any copies that occur between fromrev and
670 filter copy records. Any copies that occur between fromrev and
618 skiprev will not be duplicated, even if they appear in the set of
671 skiprev will not be duplicated, even if they appear in the set of
619 copies between fromrev and rev.
672 copies between fromrev and rev.
620 '''
673 '''
621 exclude = {}
674 exclude = {}
622 if (skiprev is not None and
675 if (skiprev is not None and
623 not repo.ui.configbool('experimental', 'disablecopytrace')):
676 not repo.ui.configbool('experimental', 'disablecopytrace')):
624 # disablecopytrace skips this line, but not the entire function because
677 # disablecopytrace skips this line, but not the entire function because
625 # the line below is O(size of the repo) during a rebase, while the rest
678 # the line below is O(size of the repo) during a rebase, while the rest
626 # of the function is much faster (and is required for carrying copy
679 # of the function is much faster (and is required for carrying copy
627 # metadata across the rebase anyway).
680 # metadata across the rebase anyway).
628 exclude = pathcopies(repo[fromrev], repo[skiprev])
681 exclude = pathcopies(repo[fromrev], repo[skiprev])
629 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
682 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
630 # copies.pathcopies returns backward renames, so dst might not
683 # copies.pathcopies returns backward renames, so dst might not
631 # actually be in the dirstate
684 # actually be in the dirstate
632 if dst in exclude:
685 if dst in exclude:
633 continue
686 continue
634 if repo.dirstate[dst] in "nma":
687 if repo.dirstate[dst] in "nma":
635 repo.dirstate.copy(src, dst)
688 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now