##// END OF EJS Templates
checkcopies: pass data as a dictionary of dictionaries...
Pierre-Yves David -
r30184:7321c6b0 default
parent child Browse files
Show More
@@ -1,563 +1,574 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import heapq
10 import heapq
11
11
12 from . import (
12 from . import (
13 node,
13 node,
14 pathutil,
14 pathutil,
15 scmutil,
15 scmutil,
16 util,
16 util,
17 )
17 )
18
18
19 def _findlimit(repo, a, b):
19 def _findlimit(repo, a, b):
20 """
20 """
21 Find the last revision that needs to be checked to ensure that a full
21 Find the last revision that needs to be checked to ensure that a full
22 transitive closure for file copies can be properly calculated.
22 transitive closure for file copies can be properly calculated.
23 Generally, this means finding the earliest revision number that's an
23 Generally, this means finding the earliest revision number that's an
24 ancestor of a or b but not both, except when a or b is a direct descendent
24 ancestor of a or b but not both, except when a or b is a direct descendent
25 of the other, in which case we can return the minimum revnum of a and b.
25 of the other, in which case we can return the minimum revnum of a and b.
26 None if no such revision exists.
26 None if no such revision exists.
27 """
27 """
28
28
29 # basic idea:
29 # basic idea:
30 # - mark a and b with different sides
30 # - mark a and b with different sides
31 # - if a parent's children are all on the same side, the parent is
31 # - if a parent's children are all on the same side, the parent is
32 # on that side, otherwise it is on no side
32 # on that side, otherwise it is on no side
33 # - walk the graph in topological order with the help of a heap;
33 # - walk the graph in topological order with the help of a heap;
34 # - add unseen parents to side map
34 # - add unseen parents to side map
35 # - clear side of any parent that has children on different sides
35 # - clear side of any parent that has children on different sides
36 # - track number of interesting revs that might still be on a side
36 # - track number of interesting revs that might still be on a side
37 # - track the lowest interesting rev seen
37 # - track the lowest interesting rev seen
38 # - quit when interesting revs is zero
38 # - quit when interesting revs is zero
39
39
40 cl = repo.changelog
40 cl = repo.changelog
41 working = len(cl) # pseudo rev for the working directory
41 working = len(cl) # pseudo rev for the working directory
42 if a is None:
42 if a is None:
43 a = working
43 a = working
44 if b is None:
44 if b is None:
45 b = working
45 b = working
46
46
47 side = {a: -1, b: 1}
47 side = {a: -1, b: 1}
48 visit = [-a, -b]
48 visit = [-a, -b]
49 heapq.heapify(visit)
49 heapq.heapify(visit)
50 interesting = len(visit)
50 interesting = len(visit)
51 hascommonancestor = False
51 hascommonancestor = False
52 limit = working
52 limit = working
53
53
54 while interesting:
54 while interesting:
55 r = -heapq.heappop(visit)
55 r = -heapq.heappop(visit)
56 if r == working:
56 if r == working:
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 else:
58 else:
59 parents = cl.parentrevs(r)
59 parents = cl.parentrevs(r)
60 for p in parents:
60 for p in parents:
61 if p < 0:
61 if p < 0:
62 continue
62 continue
63 if p not in side:
63 if p not in side:
64 # first time we see p; add it to visit
64 # first time we see p; add it to visit
65 side[p] = side[r]
65 side[p] = side[r]
66 if side[p]:
66 if side[p]:
67 interesting += 1
67 interesting += 1
68 heapq.heappush(visit, -p)
68 heapq.heappush(visit, -p)
69 elif side[p] and side[p] != side[r]:
69 elif side[p] and side[p] != side[r]:
70 # p was interesting but now we know better
70 # p was interesting but now we know better
71 side[p] = 0
71 side[p] = 0
72 interesting -= 1
72 interesting -= 1
73 hascommonancestor = True
73 hascommonancestor = True
74 if side[r]:
74 if side[r]:
75 limit = r # lowest rev visited
75 limit = r # lowest rev visited
76 interesting -= 1
76 interesting -= 1
77
77
78 if not hascommonancestor:
78 if not hascommonancestor:
79 return None
79 return None
80
80
81 # Consider the following flow (see test-commit-amend.t under issue4405):
81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 # 1/ File 'a0' committed
82 # 1/ File 'a0' committed
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 # 3/ Move back to first commit
84 # 3/ Move back to first commit
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 #
87 #
88 # During the amend in step five, we will be in this state:
88 # During the amend in step five, we will be in this state:
89 #
89 #
90 # @ 3 temporary amend commit for a1-amend
90 # @ 3 temporary amend commit for a1-amend
91 # |
91 # |
92 # o 2 a1-amend
92 # o 2 a1-amend
93 # |
93 # |
94 # | o 1 a1
94 # | o 1 a1
95 # |/
95 # |/
96 # o 0 a0
96 # o 0 a0
97 #
97 #
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 # yet the filelog has the copy information in rev 1 and we will not look
99 # yet the filelog has the copy information in rev 1 and we will not look
100 # back far enough unless we also look at the a and b as candidates.
100 # back far enough unless we also look at the a and b as candidates.
101 # This only occurs when a is a descendent of b or visa-versa.
101 # This only occurs when a is a descendent of b or visa-versa.
102 return min(limit, a, b)
102 return min(limit, a, b)
103
103
104 def _chain(src, dst, a, b):
104 def _chain(src, dst, a, b):
105 '''chain two sets of copies a->b'''
105 '''chain two sets of copies a->b'''
106 t = a.copy()
106 t = a.copy()
107 for k, v in b.iteritems():
107 for k, v in b.iteritems():
108 if v in t:
108 if v in t:
109 # found a chain
109 # found a chain
110 if t[v] != k:
110 if t[v] != k:
111 # file wasn't renamed back to itself
111 # file wasn't renamed back to itself
112 t[k] = t[v]
112 t[k] = t[v]
113 if v not in dst:
113 if v not in dst:
114 # chain was a rename, not a copy
114 # chain was a rename, not a copy
115 del t[v]
115 del t[v]
116 if v in src:
116 if v in src:
117 # file is a copy of an existing file
117 # file is a copy of an existing file
118 t[k] = v
118 t[k] = v
119
119
120 # remove criss-crossed copies
120 # remove criss-crossed copies
121 for k, v in t.items():
121 for k, v in t.items():
122 if k in src and v in dst:
122 if k in src and v in dst:
123 del t[k]
123 del t[k]
124
124
125 return t
125 return t
126
126
127 def _tracefile(fctx, am, limit=-1):
127 def _tracefile(fctx, am, limit=-1):
128 '''return file context that is the ancestor of fctx present in ancestor
128 '''return file context that is the ancestor of fctx present in ancestor
129 manifest am, stopping after the first ancestor lower than limit'''
129 manifest am, stopping after the first ancestor lower than limit'''
130
130
131 for f in fctx.ancestors():
131 for f in fctx.ancestors():
132 if am.get(f.path(), None) == f.filenode():
132 if am.get(f.path(), None) == f.filenode():
133 return f
133 return f
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 return None
135 return None
136
136
137 def _dirstatecopies(d):
137 def _dirstatecopies(d):
138 ds = d._repo.dirstate
138 ds = d._repo.dirstate
139 c = ds.copies().copy()
139 c = ds.copies().copy()
140 for k in c.keys():
140 for k in c.keys():
141 if ds[k] not in 'anm':
141 if ds[k] not in 'anm':
142 del c[k]
142 del c[k]
143 return c
143 return c
144
144
145 def _computeforwardmissing(a, b, match=None):
145 def _computeforwardmissing(a, b, match=None):
146 """Computes which files are in b but not a.
146 """Computes which files are in b but not a.
147 This is its own function so extensions can easily wrap this call to see what
147 This is its own function so extensions can easily wrap this call to see what
148 files _forwardcopies is about to process.
148 files _forwardcopies is about to process.
149 """
149 """
150 ma = a.manifest()
150 ma = a.manifest()
151 mb = b.manifest()
151 mb = b.manifest()
152 if match:
152 if match:
153 ma = ma.matches(match)
153 ma = ma.matches(match)
154 mb = mb.matches(match)
154 mb = mb.matches(match)
155 return mb.filesnotin(ma)
155 return mb.filesnotin(ma)
156
156
157 def _forwardcopies(a, b, match=None):
157 def _forwardcopies(a, b, match=None):
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159
159
160 # check for working copy
160 # check for working copy
161 w = None
161 w = None
162 if b.rev() is None:
162 if b.rev() is None:
163 w = b
163 w = b
164 b = w.p1()
164 b = w.p1()
165 if a == b:
165 if a == b:
166 # short-circuit to avoid issues with merge states
166 # short-circuit to avoid issues with merge states
167 return _dirstatecopies(w)
167 return _dirstatecopies(w)
168
168
169 # files might have to be traced back to the fctx parent of the last
169 # files might have to be traced back to the fctx parent of the last
170 # one-side-only changeset, but not further back than that
170 # one-side-only changeset, but not further back than that
171 limit = _findlimit(a._repo, a.rev(), b.rev())
171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 if limit is None:
172 if limit is None:
173 limit = -1
173 limit = -1
174 am = a.manifest()
174 am = a.manifest()
175
175
176 # find where new files came from
176 # find where new files came from
177 # we currently don't try to find where old files went, too expensive
177 # we currently don't try to find where old files went, too expensive
178 # this means we can miss a case like 'hg rm b; hg cp a b'
178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 cm = {}
179 cm = {}
180
180
181 # Computing the forward missing is quite expensive on large manifests, since
181 # Computing the forward missing is quite expensive on large manifests, since
182 # it compares the entire manifests. We can optimize it in the common use
182 # it compares the entire manifests. We can optimize it in the common use
183 # case of computing what copies are in a commit versus its parent (like
183 # case of computing what copies are in a commit versus its parent (like
184 # during a rebase or histedit). Note, we exclude merge commits from this
184 # during a rebase or histedit). Note, we exclude merge commits from this
185 # optimization, since the ctx.files() for a merge commit is not correct for
185 # optimization, since the ctx.files() for a merge commit is not correct for
186 # this comparison.
186 # this comparison.
187 forwardmissingmatch = match
187 forwardmissingmatch = match
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191
191
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 for f in missing:
193 for f in missing:
194 fctx = b[f]
194 fctx = b[f]
195 fctx._ancestrycontext = ancestrycontext
195 fctx._ancestrycontext = ancestrycontext
196 ofctx = _tracefile(fctx, am, limit)
196 ofctx = _tracefile(fctx, am, limit)
197 if ofctx:
197 if ofctx:
198 cm[f] = ofctx.path()
198 cm[f] = ofctx.path()
199
199
200 # combine copies from dirstate if necessary
200 # combine copies from dirstate if necessary
201 if w is not None:
201 if w is not None:
202 cm = _chain(a, w, cm, _dirstatecopies(w))
202 cm = _chain(a, w, cm, _dirstatecopies(w))
203
203
204 return cm
204 return cm
205
205
206 def _backwardrenames(a, b):
206 def _backwardrenames(a, b):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 return {}
208 return {}
209
209
210 # Even though we're not taking copies into account, 1:n rename situations
210 # Even though we're not taking copies into account, 1:n rename situations
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 # arbitrarily pick one of the renames.
212 # arbitrarily pick one of the renames.
213 f = _forwardcopies(b, a)
213 f = _forwardcopies(b, a)
214 r = {}
214 r = {}
215 for k, v in sorted(f.iteritems()):
215 for k, v in sorted(f.iteritems()):
216 # remove copies
216 # remove copies
217 if v in a:
217 if v in a:
218 continue
218 continue
219 r[v] = k
219 r[v] = k
220 return r
220 return r
221
221
222 def pathcopies(x, y, match=None):
222 def pathcopies(x, y, match=None):
223 '''find {dst@y: src@x} copy mapping for directed compare'''
223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 if x == y or not x or not y:
224 if x == y or not x or not y:
225 return {}
225 return {}
226 a = y.ancestor(x)
226 a = y.ancestor(x)
227 if a == x:
227 if a == x:
228 return _forwardcopies(x, y, match=match)
228 return _forwardcopies(x, y, match=match)
229 if a == y:
229 if a == y:
230 return _backwardrenames(x, y)
230 return _backwardrenames(x, y)
231 return _chain(x, y, _backwardrenames(x, a),
231 return _chain(x, y, _backwardrenames(x, a),
232 _forwardcopies(a, y, match=match))
232 _forwardcopies(a, y, match=match))
233
233
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 and c2. This is its own function so extensions can easily wrap this call
236 and c2. This is its own function so extensions can easily wrap this call
237 to see what files mergecopies is about to process.
237 to see what files mergecopies is about to process.
238
238
239 Even though c1 and c2 are not used in this function, they are useful in
239 Even though c1 and c2 are not used in this function, they are useful in
240 other extensions for being able to read the file nodes of the changed files.
240 other extensions for being able to read the file nodes of the changed files.
241 """
241 """
242 u1 = sorted(addedinm1 - addedinm2)
242 u1 = sorted(addedinm1 - addedinm2)
243 u2 = sorted(addedinm2 - addedinm1)
243 u2 = sorted(addedinm2 - addedinm1)
244
244
245 if u1:
245 if u1:
246 repo.ui.debug(" unmatched files in local:\n %s\n"
246 repo.ui.debug(" unmatched files in local:\n %s\n"
247 % "\n ".join(u1))
247 % "\n ".join(u1))
248 if u2:
248 if u2:
249 repo.ui.debug(" unmatched files in other:\n %s\n"
249 repo.ui.debug(" unmatched files in other:\n %s\n"
250 % "\n ".join(u2))
250 % "\n ".join(u2))
251 return u1, u2
251 return u1, u2
252
252
253 def _makegetfctx(ctx):
253 def _makegetfctx(ctx):
254 """return a 'getfctx' function suitable for _checkcopies usage
254 """return a 'getfctx' function suitable for _checkcopies usage
255
255
256 We have to re-setup the function building 'filectx' for each
256 We have to re-setup the function building 'filectx' for each
257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
258 each. Linkrev adjustment is important to avoid bug in rename
258 each. Linkrev adjustment is important to avoid bug in rename
259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
260 the performance impact of this adjustment is kept limited. Without it,
260 the performance impact of this adjustment is kept limited. Without it,
261 each file could do a full dag traversal making the time complexity of
261 each file could do a full dag traversal making the time complexity of
262 the operation explode (see issue4537).
262 the operation explode (see issue4537).
263
263
264 This function exists here mostly to limit the impact on stable. Feel
264 This function exists here mostly to limit the impact on stable. Feel
265 free to refactor on default.
265 free to refactor on default.
266 """
266 """
267 rev = ctx.rev()
267 rev = ctx.rev()
268 repo = ctx._repo
268 repo = ctx._repo
269 ac = getattr(ctx, '_ancestrycontext', None)
269 ac = getattr(ctx, '_ancestrycontext', None)
270 if ac is None:
270 if ac is None:
271 revs = [rev]
271 revs = [rev]
272 if rev is None:
272 if rev is None:
273 revs = [p.rev() for p in ctx.parents()]
273 revs = [p.rev() for p in ctx.parents()]
274 ac = repo.changelog.ancestors(revs, inclusive=True)
274 ac = repo.changelog.ancestors(revs, inclusive=True)
275 ctx._ancestrycontext = ac
275 ctx._ancestrycontext = ac
276 def makectx(f, n):
276 def makectx(f, n):
277 if len(n) != 20: # in a working context?
277 if len(n) != 20: # in a working context?
278 if ctx.rev() is None:
278 if ctx.rev() is None:
279 return ctx.filectx(f)
279 return ctx.filectx(f)
280 return repo[None][f]
280 return repo[None][f]
281 fctx = repo.filectx(f, fileid=n)
281 fctx = repo.filectx(f, fileid=n)
282 # setup only needed for filectx not create from a changectx
282 # setup only needed for filectx not create from a changectx
283 fctx._ancestrycontext = ac
283 fctx._ancestrycontext = ac
284 fctx._descendantrev = rev
284 fctx._descendantrev = rev
285 return fctx
285 return fctx
286 return util.lrucachefunc(makectx)
286 return util.lrucachefunc(makectx)
287
287
288 def mergecopies(repo, c1, c2, ca):
288 def mergecopies(repo, c1, c2, ca):
289 """
289 """
290 Find moves and copies between context c1 and c2 that are relevant
290 Find moves and copies between context c1 and c2 that are relevant
291 for merging.
291 for merging.
292
292
293 Returns four dicts: "copy", "movewithdir", "diverge", and
293 Returns four dicts: "copy", "movewithdir", "diverge", and
294 "renamedelete".
294 "renamedelete".
295
295
296 "copy" is a mapping from destination name -> source name,
296 "copy" is a mapping from destination name -> source name,
297 where source is in c1 and destination is in c2 or vice-versa.
297 where source is in c1 and destination is in c2 or vice-versa.
298
298
299 "movewithdir" is a mapping from source name -> destination name,
299 "movewithdir" is a mapping from source name -> destination name,
300 where the file at source present in one context but not the other
300 where the file at source present in one context but not the other
301 needs to be moved to destination by the merge process, because the
301 needs to be moved to destination by the merge process, because the
302 other context moved the directory it is in.
302 other context moved the directory it is in.
303
303
304 "diverge" is a mapping of source name -> list of destination names
304 "diverge" is a mapping of source name -> list of destination names
305 for divergent renames.
305 for divergent renames.
306
306
307 "renamedelete" is a mapping of source name -> list of destination
307 "renamedelete" is a mapping of source name -> list of destination
308 names for files deleted in c1 that were renamed in c2 or vice-versa.
308 names for files deleted in c1 that were renamed in c2 or vice-versa.
309 """
309 """
310 # avoid silly behavior for update from empty dir
310 # avoid silly behavior for update from empty dir
311 if not c1 or not c2 or c1 == c2:
311 if not c1 or not c2 or c1 == c2:
312 return {}, {}, {}, {}
312 return {}, {}, {}, {}
313
313
314 # avoid silly behavior for parent -> working dir
314 # avoid silly behavior for parent -> working dir
315 if c2.node() is None and c1.node() == repo.dirstate.p1():
315 if c2.node() is None and c1.node() == repo.dirstate.p1():
316 return repo.dirstate.copies(), {}, {}, {}
316 return repo.dirstate.copies(), {}, {}, {}
317
317
318 # Copy trace disabling is explicitly below the node == p1 logic above
318 # Copy trace disabling is explicitly below the node == p1 logic above
319 # because the logic above is required for a simple copy to be kept across a
319 # because the logic above is required for a simple copy to be kept across a
320 # rebase.
320 # rebase.
321 if repo.ui.configbool('experimental', 'disablecopytrace'):
321 if repo.ui.configbool('experimental', 'disablecopytrace'):
322 return {}, {}, {}, {}
322 return {}, {}, {}, {}
323
323
324 limit = _findlimit(repo, c1.rev(), c2.rev())
324 limit = _findlimit(repo, c1.rev(), c2.rev())
325 if limit is None:
325 if limit is None:
326 # no common ancestor, no copies
326 # no common ancestor, no copies
327 return {}, {}, {}, {}
327 return {}, {}, {}, {}
328 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
328 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
329
329
330 m1 = c1.manifest()
330 m1 = c1.manifest()
331 m2 = c2.manifest()
331 m2 = c2.manifest()
332 ma = ca.manifest()
332 ma = ca.manifest()
333
333
334 # see _checkcopies documentation below for these dicts
334 # see _checkcopies documentation below for these dicts
335 copy1, copy2 = {}, {}
335 diverge = {} # divergence data is shared
336 fullcopy1, fullcopy2 = {}, {}
336 data1 = {'copy': {},
337 diverge = {}
337 'fullcopy': {},
338 'diverge': diverge,
339 }
340 data2 = {'copy': {},
341 'fullcopy': {},
342 'diverge': diverge,
343 }
338
344
339 # find interesting file sets from manifests
345 # find interesting file sets from manifests
340 addedinm1 = m1.filesnotin(ma)
346 addedinm1 = m1.filesnotin(ma)
341 addedinm2 = m2.filesnotin(ma)
347 addedinm2 = m2.filesnotin(ma)
342 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
348 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
343 u1u, u2u = u1r, u2r
349 u1u, u2u = u1r, u2r
344 bothnew = sorted(addedinm1 & addedinm2)
350 bothnew = sorted(addedinm1 & addedinm2)
345
351
346 for f in u1u:
352 for f in u1u:
347 _checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
353 _checkcopies(c1, f, m1, m2, ca, limit, data1)
348
354
349 for f in u2u:
355 for f in u2u:
350 _checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
356 _checkcopies(c2, f, m2, m1, ca, limit, data2)
351
357
352 copy = dict(copy1.items() + copy2.items())
358 copy = dict(data1['copy'].items() + data2['copy'].items())
353 fullcopy = dict(fullcopy1.items() + fullcopy2.items())
359 fullcopy = dict(data1['fullcopy'].items() + data2['fullcopy'].items())
354
360
355 renamedelete = {}
361 renamedelete = {}
356 renamedeleteset = set()
362 renamedeleteset = set()
357 divergeset = set()
363 divergeset = set()
358 for of, fl in diverge.items():
364 for of, fl in diverge.items():
359 if len(fl) == 1 or of in c1 or of in c2:
365 if len(fl) == 1 or of in c1 or of in c2:
360 del diverge[of] # not actually divergent, or not a rename
366 del diverge[of] # not actually divergent, or not a rename
361 if of not in c1 and of not in c2:
367 if of not in c1 and of not in c2:
362 # renamed on one side, deleted on the other side, but filter
368 # renamed on one side, deleted on the other side, but filter
363 # out files that have been renamed and then deleted
369 # out files that have been renamed and then deleted
364 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
370 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
365 renamedeleteset.update(fl) # reverse map for below
371 renamedeleteset.update(fl) # reverse map for below
366 else:
372 else:
367 divergeset.update(fl) # reverse map for below
373 divergeset.update(fl) # reverse map for below
368
374
369 if bothnew:
375 if bothnew:
370 repo.ui.debug(" unmatched files new in both:\n %s\n"
376 repo.ui.debug(" unmatched files new in both:\n %s\n"
371 % "\n ".join(bothnew))
377 % "\n ".join(bothnew))
372 bothdiverge, _copy, _fullcopy = {}, {}, {}
378 bothdiverge = {}
379 bothdata = {'copy': {},
380 'fullcopy': {},
381 'diverge': bothdiverge,
382 }
373 for f in bothnew:
383 for f in bothnew:
374 _checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
384 _checkcopies(c1, f, m1, m2, ca, limit, bothdata)
375 _checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
385 _checkcopies(c2, f, m2, m1, ca, limit, bothdata)
376 for of, fl in bothdiverge.items():
386 for of, fl in bothdiverge.items():
377 if len(fl) == 2 and fl[0] == fl[1]:
387 if len(fl) == 2 and fl[0] == fl[1]:
378 copy[fl[0]] = of # not actually divergent, just matching renames
388 copy[fl[0]] = of # not actually divergent, just matching renames
379
389
380 if fullcopy and repo.ui.debugflag:
390 if fullcopy and repo.ui.debugflag:
381 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
391 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
382 "% = renamed and deleted):\n")
392 "% = renamed and deleted):\n")
383 for f in sorted(fullcopy):
393 for f in sorted(fullcopy):
384 note = ""
394 note = ""
385 if f in copy:
395 if f in copy:
386 note += "*"
396 note += "*"
387 if f in divergeset:
397 if f in divergeset:
388 note += "!"
398 note += "!"
389 if f in renamedeleteset:
399 if f in renamedeleteset:
390 note += "%"
400 note += "%"
391 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
401 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
392 note))
402 note))
393 del divergeset
403 del divergeset
394
404
395 if not fullcopy:
405 if not fullcopy:
396 return copy, {}, diverge, renamedelete
406 return copy, {}, diverge, renamedelete
397
407
398 repo.ui.debug(" checking for directory renames\n")
408 repo.ui.debug(" checking for directory renames\n")
399
409
400 # generate a directory move map
410 # generate a directory move map
401 d1, d2 = c1.dirs(), c2.dirs()
411 d1, d2 = c1.dirs(), c2.dirs()
402 # Hack for adding '', which is not otherwise added, to d1 and d2
412 # Hack for adding '', which is not otherwise added, to d1 and d2
403 d1.addpath('/')
413 d1.addpath('/')
404 d2.addpath('/')
414 d2.addpath('/')
405 invalid = set()
415 invalid = set()
406 dirmove = {}
416 dirmove = {}
407
417
408 # examine each file copy for a potential directory move, which is
418 # examine each file copy for a potential directory move, which is
409 # when all the files in a directory are moved to a new directory
419 # when all the files in a directory are moved to a new directory
410 for dst, src in fullcopy.iteritems():
420 for dst, src in fullcopy.iteritems():
411 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
421 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
412 if dsrc in invalid:
422 if dsrc in invalid:
413 # already seen to be uninteresting
423 # already seen to be uninteresting
414 continue
424 continue
415 elif dsrc in d1 and ddst in d1:
425 elif dsrc in d1 and ddst in d1:
416 # directory wasn't entirely moved locally
426 # directory wasn't entirely moved locally
417 invalid.add(dsrc + "/")
427 invalid.add(dsrc + "/")
418 elif dsrc in d2 and ddst in d2:
428 elif dsrc in d2 and ddst in d2:
419 # directory wasn't entirely moved remotely
429 # directory wasn't entirely moved remotely
420 invalid.add(dsrc + "/")
430 invalid.add(dsrc + "/")
421 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
431 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
422 # files from the same directory moved to two different places
432 # files from the same directory moved to two different places
423 invalid.add(dsrc + "/")
433 invalid.add(dsrc + "/")
424 else:
434 else:
425 # looks good so far
435 # looks good so far
426 dirmove[dsrc + "/"] = ddst + "/"
436 dirmove[dsrc + "/"] = ddst + "/"
427
437
428 for i in invalid:
438 for i in invalid:
429 if i in dirmove:
439 if i in dirmove:
430 del dirmove[i]
440 del dirmove[i]
431 del d1, d2, invalid
441 del d1, d2, invalid
432
442
433 if not dirmove:
443 if not dirmove:
434 return copy, {}, diverge, renamedelete
444 return copy, {}, diverge, renamedelete
435
445
436 for d in dirmove:
446 for d in dirmove:
437 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
447 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
438 (d, dirmove[d]))
448 (d, dirmove[d]))
439
449
440 movewithdir = {}
450 movewithdir = {}
441 # check unaccounted nonoverlapping files against directory moves
451 # check unaccounted nonoverlapping files against directory moves
442 for f in u1r + u2r:
452 for f in u1r + u2r:
443 if f not in fullcopy:
453 if f not in fullcopy:
444 for d in dirmove:
454 for d in dirmove:
445 if f.startswith(d):
455 if f.startswith(d):
446 # new file added in a directory that was moved, move it
456 # new file added in a directory that was moved, move it
447 df = dirmove[d] + f[len(d):]
457 df = dirmove[d] + f[len(d):]
448 if df not in copy:
458 if df not in copy:
449 movewithdir[f] = df
459 movewithdir[f] = df
450 repo.ui.debug((" pending file src: '%s' -> "
460 repo.ui.debug((" pending file src: '%s' -> "
451 "dst: '%s'\n") % (f, df))
461 "dst: '%s'\n") % (f, df))
452 break
462 break
453
463
454 return copy, movewithdir, diverge, renamedelete
464 return copy, movewithdir, diverge, renamedelete
455
465
456 def _related(f1, f2, limit):
466 def _related(f1, f2, limit):
457 """return True if f1 and f2 filectx have a common ancestor
467 """return True if f1 and f2 filectx have a common ancestor
458
468
459 Walk back to common ancestor to see if the two files originate
469 Walk back to common ancestor to see if the two files originate
460 from the same file. Since workingfilectx's rev() is None it messes
470 from the same file. Since workingfilectx's rev() is None it messes
461 up the integer comparison logic, hence the pre-step check for
471 up the integer comparison logic, hence the pre-step check for
462 None (f1 and f2 can only be workingfilectx's initially).
472 None (f1 and f2 can only be workingfilectx's initially).
463 """
473 """
464
474
465 if f1 == f2:
475 if f1 == f2:
466 return f1 # a match
476 return f1 # a match
467
477
468 g1, g2 = f1.ancestors(), f2.ancestors()
478 g1, g2 = f1.ancestors(), f2.ancestors()
469 try:
479 try:
470 f1r, f2r = f1.linkrev(), f2.linkrev()
480 f1r, f2r = f1.linkrev(), f2.linkrev()
471
481
472 if f1r is None:
482 if f1r is None:
473 f1 = next(g1)
483 f1 = next(g1)
474 if f2r is None:
484 if f2r is None:
475 f2 = next(g2)
485 f2 = next(g2)
476
486
477 while True:
487 while True:
478 f1r, f2r = f1.linkrev(), f2.linkrev()
488 f1r, f2r = f1.linkrev(), f2.linkrev()
479 if f1r > f2r:
489 if f1r > f2r:
480 f1 = next(g1)
490 f1 = next(g1)
481 elif f2r > f1r:
491 elif f2r > f1r:
482 f2 = next(g2)
492 f2 = next(g2)
483 elif f1 == f2:
493 elif f1 == f2:
484 return f1 # a match
494 return f1 # a match
485 elif f1r == f2r or f1r < limit or f2r < limit:
495 elif f1r == f2r or f1r < limit or f2r < limit:
486 return False # copy no longer relevant
496 return False # copy no longer relevant
487 except StopIteration:
497 except StopIteration:
488 return False
498 return False
489
499
490 def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):
500 def _checkcopies(ctx, f, m1, m2, base, limit, data):
491 """
501 """
492 check possible copies of f from m1 to m2
502 check possible copies of f from m1 to m2
493
503
494 ctx = starting context for f in m1
504 ctx = starting context for f in m1
495 f = the filename to check (as in m1)
505 f = the filename to check (as in m1)
496 m1 = the source manifest
506 m1 = the source manifest
497 m2 = the destination manifest
507 m2 = the destination manifest
498 base = the changectx used as a merge base
508 base = the changectx used as a merge base
499 limit = the rev number to not search beyond
509 limit = the rev number to not search beyond
500 diverge = record all diverges in this dict
510 data = dictionary of dictionary to store copy data. The keys are:
501 copy = record all non-divergent copies in this dict
511 - diverge = record all diverges in this dict
502 fullcopy = record all copies in this dict
512 - copy = record all non-divergent copies in this dict
513 - fullcopy = record all copies in this dict
503
514
504 note: limit is only an optimization, and there is no guarantee that
515 note: limit is only an optimization, and there is no guarantee that
505 irrelevant revisions will not be limited
516 irrelevant revisions will not be limited
506 there is no easy way to make this algorithm stop in a guaranteed way
517 there is no easy way to make this algorithm stop in a guaranteed way
507 once it "goes behind a certain revision".
518 once it "goes behind a certain revision".
508 """
519 """
509
520
510 mb = base.manifest()
521 mb = base.manifest()
511 getfctx = _makegetfctx(ctx)
522 getfctx = _makegetfctx(ctx)
512
523
513 of = None
524 of = None
514 seen = set([f])
525 seen = set([f])
515 for oc in getfctx(f, m1[f]).ancestors():
526 for oc in getfctx(f, m1[f]).ancestors():
516 ocr = oc.linkrev()
527 ocr = oc.linkrev()
517 of = oc.path()
528 of = oc.path()
518 if of in seen:
529 if of in seen:
519 # check limit late - grab last rename before
530 # check limit late - grab last rename before
520 if ocr < limit:
531 if ocr < limit:
521 break
532 break
522 continue
533 continue
523 seen.add(of)
534 seen.add(of)
524
535
525 fullcopy[f] = of # remember for dir rename detection
536 data['fullcopy'][f] = of # remember for dir rename detection
526 if of not in m2:
537 if of not in m2:
527 continue # no match, keep looking
538 continue # no match, keep looking
528 if m2[of] == mb.get(of):
539 if m2[of] == mb.get(of):
529 return # no merge needed, quit early
540 return # no merge needed, quit early
530 c2 = getfctx(of, m2[of])
541 c2 = getfctx(of, m2[of])
531 # c2 might be a plain new file on added on destination side that is
542 # c2 might be a plain new file on added on destination side that is
532 # unrelated to the droids we are looking for.
543 # unrelated to the droids we are looking for.
533 cr = _related(oc, c2, base.rev())
544 cr = _related(oc, c2, base.rev())
534 if cr and (of == f or of == c2.path()): # non-divergent
545 if cr and (of == f or of == c2.path()): # non-divergent
535 copy[f] = of
546 data['copy'][f] = of
536 return
547 return
537
548
538 if of in mb:
549 if of in mb:
539 diverge.setdefault(of, []).append(f)
550 data['diverge'].setdefault(of, []).append(f)
540
551
541 def duplicatecopies(repo, rev, fromrev, skiprev=None):
552 def duplicatecopies(repo, rev, fromrev, skiprev=None):
542 '''reproduce copies from fromrev to rev in the dirstate
553 '''reproduce copies from fromrev to rev in the dirstate
543
554
544 If skiprev is specified, it's a revision that should be used to
555 If skiprev is specified, it's a revision that should be used to
545 filter copy records. Any copies that occur between fromrev and
556 filter copy records. Any copies that occur between fromrev and
546 skiprev will not be duplicated, even if they appear in the set of
557 skiprev will not be duplicated, even if they appear in the set of
547 copies between fromrev and rev.
558 copies between fromrev and rev.
548 '''
559 '''
549 exclude = {}
560 exclude = {}
550 if (skiprev is not None and
561 if (skiprev is not None and
551 not repo.ui.configbool('experimental', 'disablecopytrace')):
562 not repo.ui.configbool('experimental', 'disablecopytrace')):
552 # disablecopytrace skips this line, but not the entire function because
563 # disablecopytrace skips this line, but not the entire function because
553 # the line below is O(size of the repo) during a rebase, while the rest
564 # the line below is O(size of the repo) during a rebase, while the rest
554 # of the function is much faster (and is required for carrying copy
565 # of the function is much faster (and is required for carrying copy
555 # metadata across the rebase anyway).
566 # metadata across the rebase anyway).
556 exclude = pathcopies(repo[fromrev], repo[skiprev])
567 exclude = pathcopies(repo[fromrev], repo[skiprev])
557 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
568 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
558 # copies.pathcopies returns backward renames, so dst might not
569 # copies.pathcopies returns backward renames, so dst might not
559 # actually be in the dirstate
570 # actually be in the dirstate
560 if dst in exclude:
571 if dst in exclude:
561 continue
572 continue
562 if repo.dirstate[dst] in "nma":
573 if repo.dirstate[dst] in "nma":
563 repo.dirstate.copy(src, dst)
574 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now