##// END OF EJS Templates
copies: fix mergecopies doc mapping direction
Matt Mackall -
r16177:b8c1a8a5 default
parent child Browse files
Show More
@@ -1,366 +1,366 b''
1 # copies.py - copy detection for Mercurial
1 # copies.py - copy detection for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import util
8 import util
9 import heapq
9 import heapq
10
10
11 def _nonoverlap(d1, d2, d3):
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
14
15 def _dirname(f):
15 def _dirname(f):
16 s = f.rfind("/")
16 s = f.rfind("/")
17 if s == -1:
17 if s == -1:
18 return ""
18 return ""
19 return f[:s]
19 return f[:s]
20
20
21 def _dirs(files):
21 def _dirs(files):
22 d = set()
22 d = set()
23 for f in files:
23 for f in files:
24 f = _dirname(f)
24 f = _dirname(f)
25 while f not in d:
25 while f not in d:
26 d.add(f)
26 d.add(f)
27 f = _dirname(f)
27 f = _dirname(f)
28 return d
28 return d
29
29
30 def _findlimit(repo, a, b):
30 def _findlimit(repo, a, b):
31 """Find the earliest revision that's an ancestor of a or b but not both,
31 """Find the earliest revision that's an ancestor of a or b but not both,
32 None if no such revision exists.
32 None if no such revision exists.
33 """
33 """
34 # basic idea:
34 # basic idea:
35 # - mark a and b with different sides
35 # - mark a and b with different sides
36 # - if a parent's children are all on the same side, the parent is
36 # - if a parent's children are all on the same side, the parent is
37 # on that side, otherwise it is on no side
37 # on that side, otherwise it is on no side
38 # - walk the graph in topological order with the help of a heap;
38 # - walk the graph in topological order with the help of a heap;
39 # - add unseen parents to side map
39 # - add unseen parents to side map
40 # - clear side of any parent that has children on different sides
40 # - clear side of any parent that has children on different sides
41 # - track number of interesting revs that might still be on a side
41 # - track number of interesting revs that might still be on a side
42 # - track the lowest interesting rev seen
42 # - track the lowest interesting rev seen
43 # - quit when interesting revs is zero
43 # - quit when interesting revs is zero
44
44
45 cl = repo.changelog
45 cl = repo.changelog
46 working = len(cl) # pseudo rev for the working directory
46 working = len(cl) # pseudo rev for the working directory
47 if a is None:
47 if a is None:
48 a = working
48 a = working
49 if b is None:
49 if b is None:
50 b = working
50 b = working
51
51
52 side = {a: -1, b: 1}
52 side = {a: -1, b: 1}
53 visit = [-a, -b]
53 visit = [-a, -b]
54 heapq.heapify(visit)
54 heapq.heapify(visit)
55 interesting = len(visit)
55 interesting = len(visit)
56 hascommonancestor = False
56 hascommonancestor = False
57 limit = working
57 limit = working
58
58
59 while interesting:
59 while interesting:
60 r = -heapq.heappop(visit)
60 r = -heapq.heappop(visit)
61 if r == working:
61 if r == working:
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 else:
63 else:
64 parents = cl.parentrevs(r)
64 parents = cl.parentrevs(r)
65 for p in parents:
65 for p in parents:
66 if p < 0:
66 if p < 0:
67 continue
67 continue
68 if p not in side:
68 if p not in side:
69 # first time we see p; add it to visit
69 # first time we see p; add it to visit
70 side[p] = side[r]
70 side[p] = side[r]
71 if side[p]:
71 if side[p]:
72 interesting += 1
72 interesting += 1
73 heapq.heappush(visit, -p)
73 heapq.heappush(visit, -p)
74 elif side[p] and side[p] != side[r]:
74 elif side[p] and side[p] != side[r]:
75 # p was interesting but now we know better
75 # p was interesting but now we know better
76 side[p] = 0
76 side[p] = 0
77 interesting -= 1
77 interesting -= 1
78 hascommonancestor = True
78 hascommonancestor = True
79 if side[r]:
79 if side[r]:
80 limit = r # lowest rev visited
80 limit = r # lowest rev visited
81 interesting -= 1
81 interesting -= 1
82
82
83 if not hascommonancestor:
83 if not hascommonancestor:
84 return None
84 return None
85 return limit
85 return limit
86
86
87 def _chain(src, dst, a, b):
87 def _chain(src, dst, a, b):
88 '''chain two sets of copies a->b'''
88 '''chain two sets of copies a->b'''
89 t = a.copy()
89 t = a.copy()
90 for k, v in b.iteritems():
90 for k, v in b.iteritems():
91 if v in t:
91 if v in t:
92 # found a chain
92 # found a chain
93 if t[v] != k:
93 if t[v] != k:
94 # file wasn't renamed back to itself
94 # file wasn't renamed back to itself
95 t[k] = t[v]
95 t[k] = t[v]
96 if v not in dst:
96 if v not in dst:
97 # chain was a rename, not a copy
97 # chain was a rename, not a copy
98 del t[v]
98 del t[v]
99 if v in src:
99 if v in src:
100 # file is a copy of an existing file
100 # file is a copy of an existing file
101 t[k] = v
101 t[k] = v
102
102
103 # remove criss-crossed copies
103 # remove criss-crossed copies
104 for k, v in t.items():
104 for k, v in t.items():
105 if k in src and v in dst:
105 if k in src and v in dst:
106 del t[k]
106 del t[k]
107
107
108 return t
108 return t
109
109
110 def _tracefile(fctx, actx):
110 def _tracefile(fctx, actx):
111 '''return file context that is the ancestor of fctx present in actx'''
111 '''return file context that is the ancestor of fctx present in actx'''
112 stop = actx.rev()
112 stop = actx.rev()
113 am = actx.manifest()
113 am = actx.manifest()
114
114
115 for f in fctx.ancestors():
115 for f in fctx.ancestors():
116 if am.get(f.path(), None) == f.filenode():
116 if am.get(f.path(), None) == f.filenode():
117 return f
117 return f
118 if f.rev() < stop:
118 if f.rev() < stop:
119 return None
119 return None
120
120
121 def _dirstatecopies(d):
121 def _dirstatecopies(d):
122 ds = d._repo.dirstate
122 ds = d._repo.dirstate
123 c = ds.copies().copy()
123 c = ds.copies().copy()
124 for k in c.keys():
124 for k in c.keys():
125 if ds[k] not in 'anm':
125 if ds[k] not in 'anm':
126 del c[k]
126 del c[k]
127 return c
127 return c
128
128
129 def _forwardcopies(a, b):
129 def _forwardcopies(a, b):
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
131
131
132 # check for working copy
132 # check for working copy
133 w = None
133 w = None
134 if b.rev() is None:
134 if b.rev() is None:
135 w = b
135 w = b
136 b = w.p1()
136 b = w.p1()
137 if a == b:
137 if a == b:
138 # short-circuit to avoid issues with merge states
138 # short-circuit to avoid issues with merge states
139 return _dirstatecopies(w)
139 return _dirstatecopies(w)
140
140
141 # find where new files came from
141 # find where new files came from
142 # we currently don't try to find where old files went, too expensive
142 # we currently don't try to find where old files went, too expensive
143 # this means we can miss a case like 'hg rm b; hg cp a b'
143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 cm = {}
144 cm = {}
145 for f in b:
145 for f in b:
146 if f not in a:
146 if f not in a:
147 ofctx = _tracefile(b[f], a)
147 ofctx = _tracefile(b[f], a)
148 if ofctx:
148 if ofctx:
149 cm[f] = ofctx.path()
149 cm[f] = ofctx.path()
150
150
151 # combine copies from dirstate if necessary
151 # combine copies from dirstate if necessary
152 if w is not None:
152 if w is not None:
153 cm = _chain(a, w, cm, _dirstatecopies(w))
153 cm = _chain(a, w, cm, _dirstatecopies(w))
154
154
155 return cm
155 return cm
156
156
157 def _backwardcopies(a, b):
157 def _backwardcopies(a, b):
158 # because the forward mapping is 1:n, we can lose renames here
158 # because the forward mapping is 1:n, we can lose renames here
159 # in particular, we find renames better than copies
159 # in particular, we find renames better than copies
160 f = _forwardcopies(b, a)
160 f = _forwardcopies(b, a)
161 r = {}
161 r = {}
162 for k, v in f.iteritems():
162 for k, v in f.iteritems():
163 r[v] = k
163 r[v] = k
164 return r
164 return r
165
165
166 def pathcopies(x, y):
166 def pathcopies(x, y):
167 '''find {dst@y: src@x} copy mapping for directed compare'''
167 '''find {dst@y: src@x} copy mapping for directed compare'''
168 if x == y or not x or not y:
168 if x == y or not x or not y:
169 return {}
169 return {}
170 a = y.ancestor(x)
170 a = y.ancestor(x)
171 if a == x:
171 if a == x:
172 return _forwardcopies(x, y)
172 return _forwardcopies(x, y)
173 if a == y:
173 if a == y:
174 return _backwardcopies(x, y)
174 return _backwardcopies(x, y)
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
176
176
177 def mergecopies(repo, c1, c2, ca):
177 def mergecopies(repo, c1, c2, ca):
178 """
178 """
179 Find moves and copies between context c1 and c2 that are relevant
179 Find moves and copies between context c1 and c2 that are relevant
180 for merging.
180 for merging.
181
181
182 Returns two dicts, "copy" and "diverge".
182 Returns two dicts, "copy" and "diverge".
183
183
184 "copy" is a mapping from source name -> destination name,
184 "copy" is a mapping from destination name -> source name,
185 where source is in c1 and destination is in c2 or vice-versa.
185 where source is in c1 and destination is in c2 or vice-versa.
186
186
187 "diverge" is a mapping of source name -> list of destination names
187 "diverge" is a mapping of source name -> list of destination names
188 for divergent renames.
188 for divergent renames.
189 """
189 """
190 # avoid silly behavior for update from empty dir
190 # avoid silly behavior for update from empty dir
191 if not c1 or not c2 or c1 == c2:
191 if not c1 or not c2 or c1 == c2:
192 return {}, {}
192 return {}, {}
193
193
194 # avoid silly behavior for parent -> working dir
194 # avoid silly behavior for parent -> working dir
195 if c2.node() is None and c1.node() == repo.dirstate.p1():
195 if c2.node() is None and c1.node() == repo.dirstate.p1():
196 return repo.dirstate.copies(), {}
196 return repo.dirstate.copies(), {}
197
197
198 limit = _findlimit(repo, c1.rev(), c2.rev())
198 limit = _findlimit(repo, c1.rev(), c2.rev())
199 if limit is None:
199 if limit is None:
200 # no common ancestor, no copies
200 # no common ancestor, no copies
201 return {}, {}
201 return {}, {}
202 m1 = c1.manifest()
202 m1 = c1.manifest()
203 m2 = c2.manifest()
203 m2 = c2.manifest()
204 ma = ca.manifest()
204 ma = ca.manifest()
205
205
206 def makectx(f, n):
206 def makectx(f, n):
207 if len(n) != 20: # in a working context?
207 if len(n) != 20: # in a working context?
208 if c1.rev() is None:
208 if c1.rev() is None:
209 return c1.filectx(f)
209 return c1.filectx(f)
210 return c2.filectx(f)
210 return c2.filectx(f)
211 return repo.filectx(f, fileid=n)
211 return repo.filectx(f, fileid=n)
212
212
213 ctx = util.lrucachefunc(makectx)
213 ctx = util.lrucachefunc(makectx)
214 copy = {}
214 copy = {}
215 fullcopy = {}
215 fullcopy = {}
216 diverge = {}
216 diverge = {}
217
217
218 def related(f1, f2, limit):
218 def related(f1, f2, limit):
219 # Walk back to common ancestor to see if the two files originate
219 # Walk back to common ancestor to see if the two files originate
220 # from the same file. Since workingfilectx's rev() is None it messes
220 # from the same file. Since workingfilectx's rev() is None it messes
221 # up the integer comparison logic, hence the pre-step check for
221 # up the integer comparison logic, hence the pre-step check for
222 # None (f1 and f2 can only be workingfilectx's initially).
222 # None (f1 and f2 can only be workingfilectx's initially).
223
223
224 if f1 == f2:
224 if f1 == f2:
225 return f1 # a match
225 return f1 # a match
226
226
227 g1, g2 = f1.ancestors(), f2.ancestors()
227 g1, g2 = f1.ancestors(), f2.ancestors()
228 try:
228 try:
229 f1r, f2r = f1.rev(), f2.rev()
229 f1r, f2r = f1.rev(), f2.rev()
230
230
231 if f1r is None:
231 if f1r is None:
232 f1 = g1.next()
232 f1 = g1.next()
233 if f2r is None:
233 if f2r is None:
234 f2 = g2.next()
234 f2 = g2.next()
235
235
236 while True:
236 while True:
237 f1r, f2r = f1.rev(), f2.rev()
237 f1r, f2r = f1.rev(), f2.rev()
238 if f1r > f2r:
238 if f1r > f2r:
239 f1 = g1.next()
239 f1 = g1.next()
240 elif f2r > f1r:
240 elif f2r > f1r:
241 f2 = g2.next()
241 f2 = g2.next()
242 elif f1 == f2:
242 elif f1 == f2:
243 return f1 # a match
243 return f1 # a match
244 elif f1r == f2r or f1r < limit or f2r < limit:
244 elif f1r == f2r or f1r < limit or f2r < limit:
245 return False # copy no longer relevant
245 return False # copy no longer relevant
246 except StopIteration:
246 except StopIteration:
247 return False
247 return False
248
248
249 def checkcopies(f, m1, m2):
249 def checkcopies(f, m1, m2):
250 '''check possible copies of f from m1 to m2'''
250 '''check possible copies of f from m1 to m2'''
251 of = None
251 of = None
252 seen = set([f])
252 seen = set([f])
253 for oc in ctx(f, m1[f]).ancestors():
253 for oc in ctx(f, m1[f]).ancestors():
254 ocr = oc.rev()
254 ocr = oc.rev()
255 of = oc.path()
255 of = oc.path()
256 if of in seen:
256 if of in seen:
257 # check limit late - grab last rename before
257 # check limit late - grab last rename before
258 if ocr < limit:
258 if ocr < limit:
259 break
259 break
260 continue
260 continue
261 seen.add(of)
261 seen.add(of)
262
262
263 fullcopy[f] = of # remember for dir rename detection
263 fullcopy[f] = of # remember for dir rename detection
264 if of not in m2:
264 if of not in m2:
265 continue # no match, keep looking
265 continue # no match, keep looking
266 if m2[of] == ma.get(of):
266 if m2[of] == ma.get(of):
267 break # no merge needed, quit early
267 break # no merge needed, quit early
268 c2 = ctx(of, m2[of])
268 c2 = ctx(of, m2[of])
269 cr = related(oc, c2, ca.rev())
269 cr = related(oc, c2, ca.rev())
270 if cr and (of == f or of == c2.path()): # non-divergent
270 if cr and (of == f or of == c2.path()): # non-divergent
271 copy[f] = of
271 copy[f] = of
272 of = None
272 of = None
273 break
273 break
274
274
275 if of in ma:
275 if of in ma:
276 diverge.setdefault(of, []).append(f)
276 diverge.setdefault(of, []).append(f)
277
277
278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
279
279
280 u1 = _nonoverlap(m1, m2, ma)
280 u1 = _nonoverlap(m1, m2, ma)
281 u2 = _nonoverlap(m2, m1, ma)
281 u2 = _nonoverlap(m2, m1, ma)
282
282
283 if u1:
283 if u1:
284 repo.ui.debug(" unmatched files in local:\n %s\n"
284 repo.ui.debug(" unmatched files in local:\n %s\n"
285 % "\n ".join(u1))
285 % "\n ".join(u1))
286 if u2:
286 if u2:
287 repo.ui.debug(" unmatched files in other:\n %s\n"
287 repo.ui.debug(" unmatched files in other:\n %s\n"
288 % "\n ".join(u2))
288 % "\n ".join(u2))
289
289
290 for f in u1:
290 for f in u1:
291 checkcopies(f, m1, m2)
291 checkcopies(f, m1, m2)
292 for f in u2:
292 for f in u2:
293 checkcopies(f, m2, m1)
293 checkcopies(f, m2, m1)
294
294
295 diverge2 = set()
295 diverge2 = set()
296 for of, fl in diverge.items():
296 for of, fl in diverge.items():
297 if len(fl) == 1 or of in c2:
297 if len(fl) == 1 or of in c2:
298 del diverge[of] # not actually divergent, or not a rename
298 del diverge[of] # not actually divergent, or not a rename
299 else:
299 else:
300 diverge2.update(fl) # reverse map for below
300 diverge2.update(fl) # reverse map for below
301
301
302 if fullcopy:
302 if fullcopy:
303 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
303 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
304 for f in fullcopy:
304 for f in fullcopy:
305 note = ""
305 note = ""
306 if f in copy:
306 if f in copy:
307 note += "*"
307 note += "*"
308 if f in diverge2:
308 if f in diverge2:
309 note += "!"
309 note += "!"
310 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
310 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
311 del diverge2
311 del diverge2
312
312
313 if not fullcopy:
313 if not fullcopy:
314 return copy, diverge
314 return copy, diverge
315
315
316 repo.ui.debug(" checking for directory renames\n")
316 repo.ui.debug(" checking for directory renames\n")
317
317
318 # generate a directory move map
318 # generate a directory move map
319 d1, d2 = _dirs(m1), _dirs(m2)
319 d1, d2 = _dirs(m1), _dirs(m2)
320 invalid = set()
320 invalid = set()
321 dirmove = {}
321 dirmove = {}
322
322
323 # examine each file copy for a potential directory move, which is
323 # examine each file copy for a potential directory move, which is
324 # when all the files in a directory are moved to a new directory
324 # when all the files in a directory are moved to a new directory
325 for dst, src in fullcopy.iteritems():
325 for dst, src in fullcopy.iteritems():
326 dsrc, ddst = _dirname(src), _dirname(dst)
326 dsrc, ddst = _dirname(src), _dirname(dst)
327 if dsrc in invalid:
327 if dsrc in invalid:
328 # already seen to be uninteresting
328 # already seen to be uninteresting
329 continue
329 continue
330 elif dsrc in d1 and ddst in d1:
330 elif dsrc in d1 and ddst in d1:
331 # directory wasn't entirely moved locally
331 # directory wasn't entirely moved locally
332 invalid.add(dsrc)
332 invalid.add(dsrc)
333 elif dsrc in d2 and ddst in d2:
333 elif dsrc in d2 and ddst in d2:
334 # directory wasn't entirely moved remotely
334 # directory wasn't entirely moved remotely
335 invalid.add(dsrc)
335 invalid.add(dsrc)
336 elif dsrc in dirmove and dirmove[dsrc] != ddst:
336 elif dsrc in dirmove and dirmove[dsrc] != ddst:
337 # files from the same directory moved to two different places
337 # files from the same directory moved to two different places
338 invalid.add(dsrc)
338 invalid.add(dsrc)
339 else:
339 else:
340 # looks good so far
340 # looks good so far
341 dirmove[dsrc + "/"] = ddst + "/"
341 dirmove[dsrc + "/"] = ddst + "/"
342
342
343 for i in invalid:
343 for i in invalid:
344 if i in dirmove:
344 if i in dirmove:
345 del dirmove[i]
345 del dirmove[i]
346 del d1, d2, invalid
346 del d1, d2, invalid
347
347
348 if not dirmove:
348 if not dirmove:
349 return copy, diverge
349 return copy, diverge
350
350
351 for d in dirmove:
351 for d in dirmove:
352 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
352 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
353
353
354 # check unaccounted nonoverlapping files against directory moves
354 # check unaccounted nonoverlapping files against directory moves
355 for f in u1 + u2:
355 for f in u1 + u2:
356 if f not in fullcopy:
356 if f not in fullcopy:
357 for d in dirmove:
357 for d in dirmove:
358 if f.startswith(d):
358 if f.startswith(d):
359 # new file added in a directory that was moved, move it
359 # new file added in a directory that was moved, move it
360 df = dirmove[d] + f[len(d):]
360 df = dirmove[d] + f[len(d):]
361 if df not in copy:
361 if df not in copy:
362 copy[f] = df
362 copy[f] = df
363 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
363 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
364 break
364 break
365
365
366 return copy, diverge
366 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now