##// END OF EJS Templates
copies: fix mergecopies doc mapping direction
Matt Mackall -
r16177:b8c1a8a5 default
parent child Browse files
Show More
@@ -1,366 +1,366 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _nonoverlap(d1, d2, d3):
12 12 "Return list of elements in d1 not in d2 or d3"
13 13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14 14
15 15 def _dirname(f):
16 16 s = f.rfind("/")
17 17 if s == -1:
18 18 return ""
19 19 return f[:s]
20 20
21 21 def _dirs(files):
22 22 d = set()
23 23 for f in files:
24 24 f = _dirname(f)
25 25 while f not in d:
26 26 d.add(f)
27 27 f = _dirname(f)
28 28 return d
29 29
30 30 def _findlimit(repo, a, b):
31 31 """Find the earliest revision that's an ancestor of a or b but not both,
32 32 None if no such revision exists.
33 33 """
34 34 # basic idea:
35 35 # - mark a and b with different sides
36 36 # - if a parent's children are all on the same side, the parent is
37 37 # on that side, otherwise it is on no side
38 38 # - walk the graph in topological order with the help of a heap;
39 39 # - add unseen parents to side map
40 40 # - clear side of any parent that has children on different sides
41 41 # - track number of interesting revs that might still be on a side
42 42 # - track the lowest interesting rev seen
43 43 # - quit when interesting revs is zero
44 44
45 45 cl = repo.changelog
46 46 working = len(cl) # pseudo rev for the working directory
47 47 if a is None:
48 48 a = working
49 49 if b is None:
50 50 b = working
51 51
52 52 side = {a: -1, b: 1}
53 53 visit = [-a, -b]
54 54 heapq.heapify(visit)
55 55 interesting = len(visit)
56 56 hascommonancestor = False
57 57 limit = working
58 58
59 59 while interesting:
60 60 r = -heapq.heappop(visit)
61 61 if r == working:
62 62 parents = [cl.rev(p) for p in repo.dirstate.parents()]
63 63 else:
64 64 parents = cl.parentrevs(r)
65 65 for p in parents:
66 66 if p < 0:
67 67 continue
68 68 if p not in side:
69 69 # first time we see p; add it to visit
70 70 side[p] = side[r]
71 71 if side[p]:
72 72 interesting += 1
73 73 heapq.heappush(visit, -p)
74 74 elif side[p] and side[p] != side[r]:
75 75 # p was interesting but now we know better
76 76 side[p] = 0
77 77 interesting -= 1
78 78 hascommonancestor = True
79 79 if side[r]:
80 80 limit = r # lowest rev visited
81 81 interesting -= 1
82 82
83 83 if not hascommonancestor:
84 84 return None
85 85 return limit
86 86
87 87 def _chain(src, dst, a, b):
88 88 '''chain two sets of copies a->b'''
89 89 t = a.copy()
90 90 for k, v in b.iteritems():
91 91 if v in t:
92 92 # found a chain
93 93 if t[v] != k:
94 94 # file wasn't renamed back to itself
95 95 t[k] = t[v]
96 96 if v not in dst:
97 97 # chain was a rename, not a copy
98 98 del t[v]
99 99 if v in src:
100 100 # file is a copy of an existing file
101 101 t[k] = v
102 102
103 103 # remove criss-crossed copies
104 104 for k, v in t.items():
105 105 if k in src and v in dst:
106 106 del t[k]
107 107
108 108 return t
109 109
110 110 def _tracefile(fctx, actx):
111 111 '''return file context that is the ancestor of fctx present in actx'''
112 112 stop = actx.rev()
113 113 am = actx.manifest()
114 114
115 115 for f in fctx.ancestors():
116 116 if am.get(f.path(), None) == f.filenode():
117 117 return f
118 118 if f.rev() < stop:
119 119 return None
120 120
121 121 def _dirstatecopies(d):
122 122 ds = d._repo.dirstate
123 123 c = ds.copies().copy()
124 124 for k in c.keys():
125 125 if ds[k] not in 'anm':
126 126 del c[k]
127 127 return c
128 128
129 129 def _forwardcopies(a, b):
130 130 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
131 131
132 132 # check for working copy
133 133 w = None
134 134 if b.rev() is None:
135 135 w = b
136 136 b = w.p1()
137 137 if a == b:
138 138 # short-circuit to avoid issues with merge states
139 139 return _dirstatecopies(w)
140 140
141 141 # find where new files came from
142 142 # we currently don't try to find where old files went, too expensive
143 143 # this means we can miss a case like 'hg rm b; hg cp a b'
144 144 cm = {}
145 145 for f in b:
146 146 if f not in a:
147 147 ofctx = _tracefile(b[f], a)
148 148 if ofctx:
149 149 cm[f] = ofctx.path()
150 150
151 151 # combine copies from dirstate if necessary
152 152 if w is not None:
153 153 cm = _chain(a, w, cm, _dirstatecopies(w))
154 154
155 155 return cm
156 156
157 157 def _backwardcopies(a, b):
158 158 # because the forward mapping is 1:n, we can lose renames here
159 159 # in particular, we find renames better than copies
160 160 f = _forwardcopies(b, a)
161 161 r = {}
162 162 for k, v in f.iteritems():
163 163 r[v] = k
164 164 return r
165 165
166 166 def pathcopies(x, y):
167 167 '''find {dst@y: src@x} copy mapping for directed compare'''
168 168 if x == y or not x or not y:
169 169 return {}
170 170 a = y.ancestor(x)
171 171 if a == x:
172 172 return _forwardcopies(x, y)
173 173 if a == y:
174 174 return _backwardcopies(x, y)
175 175 return _chain(x, y, _backwardcopies(x, a), _forwardcopies(a, y))
176 176
177 177 def mergecopies(repo, c1, c2, ca):
178 178 """
179 179 Find moves and copies between context c1 and c2 that are relevant
180 180 for merging.
181 181
182 182 Returns two dicts, "copy" and "diverge".
183 183
184 "copy" is a mapping from source name -> destination name,
184 "copy" is a mapping from destination name -> source name,
185 185 where source is in c1 and destination is in c2 or vice-versa.
186 186
187 187 "diverge" is a mapping of source name -> list of destination names
188 188 for divergent renames.
189 189 """
190 190 # avoid silly behavior for update from empty dir
191 191 if not c1 or not c2 or c1 == c2:
192 192 return {}, {}
193 193
194 194 # avoid silly behavior for parent -> working dir
195 195 if c2.node() is None and c1.node() == repo.dirstate.p1():
196 196 return repo.dirstate.copies(), {}
197 197
198 198 limit = _findlimit(repo, c1.rev(), c2.rev())
199 199 if limit is None:
200 200 # no common ancestor, no copies
201 201 return {}, {}
202 202 m1 = c1.manifest()
203 203 m2 = c2.manifest()
204 204 ma = ca.manifest()
205 205
206 206 def makectx(f, n):
207 207 if len(n) != 20: # in a working context?
208 208 if c1.rev() is None:
209 209 return c1.filectx(f)
210 210 return c2.filectx(f)
211 211 return repo.filectx(f, fileid=n)
212 212
213 213 ctx = util.lrucachefunc(makectx)
214 214 copy = {}
215 215 fullcopy = {}
216 216 diverge = {}
217 217
218 218 def related(f1, f2, limit):
219 219 # Walk back to common ancestor to see if the two files originate
220 220 # from the same file. Since workingfilectx's rev() is None it messes
221 221 # up the integer comparison logic, hence the pre-step check for
222 222 # None (f1 and f2 can only be workingfilectx's initially).
223 223
224 224 if f1 == f2:
225 225 return f1 # a match
226 226
227 227 g1, g2 = f1.ancestors(), f2.ancestors()
228 228 try:
229 229 f1r, f2r = f1.rev(), f2.rev()
230 230
231 231 if f1r is None:
232 232 f1 = g1.next()
233 233 if f2r is None:
234 234 f2 = g2.next()
235 235
236 236 while True:
237 237 f1r, f2r = f1.rev(), f2.rev()
238 238 if f1r > f2r:
239 239 f1 = g1.next()
240 240 elif f2r > f1r:
241 241 f2 = g2.next()
242 242 elif f1 == f2:
243 243 return f1 # a match
244 244 elif f1r == f2r or f1r < limit or f2r < limit:
245 245 return False # copy no longer relevant
246 246 except StopIteration:
247 247 return False
248 248
249 249 def checkcopies(f, m1, m2):
250 250 '''check possible copies of f from m1 to m2'''
251 251 of = None
252 252 seen = set([f])
253 253 for oc in ctx(f, m1[f]).ancestors():
254 254 ocr = oc.rev()
255 255 of = oc.path()
256 256 if of in seen:
257 257 # check limit late - grab last rename before
258 258 if ocr < limit:
259 259 break
260 260 continue
261 261 seen.add(of)
262 262
263 263 fullcopy[f] = of # remember for dir rename detection
264 264 if of not in m2:
265 265 continue # no match, keep looking
266 266 if m2[of] == ma.get(of):
267 267 break # no merge needed, quit early
268 268 c2 = ctx(of, m2[of])
269 269 cr = related(oc, c2, ca.rev())
270 270 if cr and (of == f or of == c2.path()): # non-divergent
271 271 copy[f] = of
272 272 of = None
273 273 break
274 274
275 275 if of in ma:
276 276 diverge.setdefault(of, []).append(f)
277 277
278 278 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
279 279
280 280 u1 = _nonoverlap(m1, m2, ma)
281 281 u2 = _nonoverlap(m2, m1, ma)
282 282
283 283 if u1:
284 284 repo.ui.debug(" unmatched files in local:\n %s\n"
285 285 % "\n ".join(u1))
286 286 if u2:
287 287 repo.ui.debug(" unmatched files in other:\n %s\n"
288 288 % "\n ".join(u2))
289 289
290 290 for f in u1:
291 291 checkcopies(f, m1, m2)
292 292 for f in u2:
293 293 checkcopies(f, m2, m1)
294 294
295 295 diverge2 = set()
296 296 for of, fl in diverge.items():
297 297 if len(fl) == 1 or of in c2:
298 298 del diverge[of] # not actually divergent, or not a rename
299 299 else:
300 300 diverge2.update(fl) # reverse map for below
301 301
302 302 if fullcopy:
303 303 repo.ui.debug(" all copies found (* = to merge, ! = divergent):\n")
304 304 for f in fullcopy:
305 305 note = ""
306 306 if f in copy:
307 307 note += "*"
308 308 if f in diverge2:
309 309 note += "!"
310 310 repo.ui.debug(" %s -> %s %s\n" % (f, fullcopy[f], note))
311 311 del diverge2
312 312
313 313 if not fullcopy:
314 314 return copy, diverge
315 315
316 316 repo.ui.debug(" checking for directory renames\n")
317 317
318 318 # generate a directory move map
319 319 d1, d2 = _dirs(m1), _dirs(m2)
320 320 invalid = set()
321 321 dirmove = {}
322 322
323 323 # examine each file copy for a potential directory move, which is
324 324 # when all the files in a directory are moved to a new directory
325 325 for dst, src in fullcopy.iteritems():
326 326 dsrc, ddst = _dirname(src), _dirname(dst)
327 327 if dsrc in invalid:
328 328 # already seen to be uninteresting
329 329 continue
330 330 elif dsrc in d1 and ddst in d1:
331 331 # directory wasn't entirely moved locally
332 332 invalid.add(dsrc)
333 333 elif dsrc in d2 and ddst in d2:
334 334 # directory wasn't entirely moved remotely
335 335 invalid.add(dsrc)
336 336 elif dsrc in dirmove and dirmove[dsrc] != ddst:
337 337 # files from the same directory moved to two different places
338 338 invalid.add(dsrc)
339 339 else:
340 340 # looks good so far
341 341 dirmove[dsrc + "/"] = ddst + "/"
342 342
343 343 for i in invalid:
344 344 if i in dirmove:
345 345 del dirmove[i]
346 346 del d1, d2, invalid
347 347
348 348 if not dirmove:
349 349 return copy, diverge
350 350
351 351 for d in dirmove:
352 352 repo.ui.debug(" dir %s -> %s\n" % (d, dirmove[d]))
353 353
354 354 # check unaccounted nonoverlapping files against directory moves
355 355 for f in u1 + u2:
356 356 if f not in fullcopy:
357 357 for d in dirmove:
358 358 if f.startswith(d):
359 359 # new file added in a directory that was moved, move it
360 360 df = dirmove[d] + f[len(d):]
361 361 if df not in copy:
362 362 copy[f] = df
363 363 repo.ui.debug(" file %s -> %s\n" % (f, copy[f]))
364 364 break
365 365
366 366 return copy, diverge
General Comments 0
You need to be logged in to leave comments. Login now