##// END OF EJS Templates
copies._forwardcopies: use set operations to find missing files...
Siddharth Agarwal -
r18878:3cfaace0 default
parent child Browse files
Show More
@@ -1,386 +1,388 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 11 def _nonoverlap(d1, d2, d3):
12 12 "Return list of elements in d1 not in d2 or d3"
13 13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14 14
15 15 def _dirname(f):
16 16 s = f.rfind("/")
17 17 if s == -1:
18 18 return ""
19 19 return f[:s]
20 20
21 21 def _findlimit(repo, a, b):
22 22 """Find the earliest revision that's an ancestor of a or b but not both,
23 23 None if no such revision exists.
24 24 """
25 25 # basic idea:
26 26 # - mark a and b with different sides
27 27 # - if a parent's children are all on the same side, the parent is
28 28 # on that side, otherwise it is on no side
29 29 # - walk the graph in topological order with the help of a heap;
30 30 # - add unseen parents to side map
31 31 # - clear side of any parent that has children on different sides
32 32 # - track number of interesting revs that might still be on a side
33 33 # - track the lowest interesting rev seen
34 34 # - quit when interesting revs is zero
35 35
36 36 cl = repo.changelog
37 37 working = len(cl) # pseudo rev for the working directory
38 38 if a is None:
39 39 a = working
40 40 if b is None:
41 41 b = working
42 42
43 43 side = {a: -1, b: 1}
44 44 visit = [-a, -b]
45 45 heapq.heapify(visit)
46 46 interesting = len(visit)
47 47 hascommonancestor = False
48 48 limit = working
49 49
50 50 while interesting:
51 51 r = -heapq.heappop(visit)
52 52 if r == working:
53 53 parents = [cl.rev(p) for p in repo.dirstate.parents()]
54 54 else:
55 55 parents = cl.parentrevs(r)
56 56 for p in parents:
57 57 if p < 0:
58 58 continue
59 59 if p not in side:
60 60 # first time we see p; add it to visit
61 61 side[p] = side[r]
62 62 if side[p]:
63 63 interesting += 1
64 64 heapq.heappush(visit, -p)
65 65 elif side[p] and side[p] != side[r]:
66 66 # p was interesting but now we know better
67 67 side[p] = 0
68 68 interesting -= 1
69 69 hascommonancestor = True
70 70 if side[r]:
71 71 limit = r # lowest rev visited
72 72 interesting -= 1
73 73
74 74 if not hascommonancestor:
75 75 return None
76 76 return limit
77 77
78 78 def _chain(src, dst, a, b):
79 79 '''chain two sets of copies a->b'''
80 80 t = a.copy()
81 81 for k, v in b.iteritems():
82 82 if v in t:
83 83 # found a chain
84 84 if t[v] != k:
85 85 # file wasn't renamed back to itself
86 86 t[k] = t[v]
87 87 if v not in dst:
88 88 # chain was a rename, not a copy
89 89 del t[v]
90 90 if v in src:
91 91 # file is a copy of an existing file
92 92 t[k] = v
93 93
94 94 # remove criss-crossed copies
95 95 for k, v in t.items():
96 96 if k in src and v in dst:
97 97 del t[k]
98 98
99 99 return t
100 100
101 101 def _tracefile(fctx, actx):
102 102 '''return file context that is the ancestor of fctx present in actx'''
103 103 stop = actx.rev()
104 104 am = actx.manifest()
105 105
106 106 for f in fctx.ancestors():
107 107 if am.get(f.path(), None) == f.filenode():
108 108 return f
109 109 if f.rev() < stop:
110 110 return None
111 111
112 112 def _dirstatecopies(d):
113 113 ds = d._repo.dirstate
114 114 c = ds.copies().copy()
115 115 for k in c.keys():
116 116 if ds[k] not in 'anm':
117 117 del c[k]
118 118 return c
119 119
120 120 def _forwardcopies(a, b):
121 121 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
122 122
123 123 # check for working copy
124 124 w = None
125 125 if b.rev() is None:
126 126 w = b
127 127 b = w.p1()
128 128 if a == b:
129 129 # short-circuit to avoid issues with merge states
130 130 return _dirstatecopies(w)
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 for f in b:
137 if f not in a:
136 missing = set(b.manifest().iterkeys())
137 missing.difference_update(a.manifest().iterkeys())
138
139 for f in missing:
138 140 ofctx = _tracefile(b[f], a)
139 141 if ofctx:
140 142 cm[f] = ofctx.path()
141 143
142 144 # combine copies from dirstate if necessary
143 145 if w is not None:
144 146 cm = _chain(a, w, cm, _dirstatecopies(w))
145 147
146 148 return cm
147 149
148 150 def _backwardrenames(a, b):
149 151 # Even though we're not taking copies into account, 1:n rename situations
150 152 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
151 153 # arbitrarily pick one of the renames.
152 154 f = _forwardcopies(b, a)
153 155 r = {}
154 156 for k, v in sorted(f.iteritems()):
155 157 # remove copies
156 158 if v in a:
157 159 continue
158 160 r[v] = k
159 161 return r
160 162
161 163 def pathcopies(x, y):
162 164 '''find {dst@y: src@x} copy mapping for directed compare'''
163 165 if x == y or not x or not y:
164 166 return {}
165 167 a = y.ancestor(x)
166 168 if a == x:
167 169 return _forwardcopies(x, y)
168 170 if a == y:
169 171 return _backwardrenames(x, y)
170 172 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
171 173
172 174 def mergecopies(repo, c1, c2, ca):
173 175 """
174 176 Find moves and copies between context c1 and c2 that are relevant
175 177 for merging.
176 178
177 179 Returns four dicts: "copy", "movewithdir", "diverge", and
178 180 "renamedelete".
179 181
180 182 "copy" is a mapping from destination name -> source name,
181 183 where source is in c1 and destination is in c2 or vice-versa.
182 184
183 185 "movewithdir" is a mapping from source name -> destination name,
184 186 where the file at source present in one context but not the other
185 187 needs to be moved to destination by the merge process, because the
186 188 other context moved the directory it is in.
187 189
188 190 "diverge" is a mapping of source name -> list of destination names
189 191 for divergent renames.
190 192
191 193 "renamedelete" is a mapping of source name -> list of destination
192 194 names for files deleted in c1 that were renamed in c2 or vice-versa.
193 195 """
194 196 # avoid silly behavior for update from empty dir
195 197 if not c1 or not c2 or c1 == c2:
196 198 return {}, {}, {}, {}
197 199
198 200 # avoid silly behavior for parent -> working dir
199 201 if c2.node() is None and c1.node() == repo.dirstate.p1():
200 202 return repo.dirstate.copies(), {}, {}, {}
201 203
202 204 limit = _findlimit(repo, c1.rev(), c2.rev())
203 205 if limit is None:
204 206 # no common ancestor, no copies
205 207 return {}, {}, {}, {}
206 208 m1 = c1.manifest()
207 209 m2 = c2.manifest()
208 210 ma = ca.manifest()
209 211
210 212 def makectx(f, n):
211 213 if len(n) != 20: # in a working context?
212 214 if c1.rev() is None:
213 215 return c1.filectx(f)
214 216 return c2.filectx(f)
215 217 return repo.filectx(f, fileid=n)
216 218
217 219 ctx = util.lrucachefunc(makectx)
218 220 copy = {}
219 221 movewithdir = {}
220 222 fullcopy = {}
221 223 diverge = {}
222 224
223 225 def related(f1, f2, limit):
224 226 # Walk back to common ancestor to see if the two files originate
225 227 # from the same file. Since workingfilectx's rev() is None it messes
226 228 # up the integer comparison logic, hence the pre-step check for
227 229 # None (f1 and f2 can only be workingfilectx's initially).
228 230
229 231 if f1 == f2:
230 232 return f1 # a match
231 233
232 234 g1, g2 = f1.ancestors(), f2.ancestors()
233 235 try:
234 236 f1r, f2r = f1.rev(), f2.rev()
235 237
236 238 if f1r is None:
237 239 f1 = g1.next()
238 240 if f2r is None:
239 241 f2 = g2.next()
240 242
241 243 while True:
242 244 f1r, f2r = f1.rev(), f2.rev()
243 245 if f1r > f2r:
244 246 f1 = g1.next()
245 247 elif f2r > f1r:
246 248 f2 = g2.next()
247 249 elif f1 == f2:
248 250 return f1 # a match
249 251 elif f1r == f2r or f1r < limit or f2r < limit:
250 252 return False # copy no longer relevant
251 253 except StopIteration:
252 254 return False
253 255
254 256 def checkcopies(f, m1, m2):
255 257 '''check possible copies of f from m1 to m2'''
256 258 of = None
257 259 seen = set([f])
258 260 for oc in ctx(f, m1[f]).ancestors():
259 261 ocr = oc.rev()
260 262 of = oc.path()
261 263 if of in seen:
262 264 # check limit late - grab last rename before
263 265 if ocr < limit:
264 266 break
265 267 continue
266 268 seen.add(of)
267 269
268 270 fullcopy[f] = of # remember for dir rename detection
269 271 if of not in m2:
270 272 continue # no match, keep looking
271 273 if m2[of] == ma.get(of):
272 274 break # no merge needed, quit early
273 275 c2 = ctx(of, m2[of])
274 276 cr = related(oc, c2, ca.rev())
275 277 if cr and (of == f or of == c2.path()): # non-divergent
276 278 copy[f] = of
277 279 of = None
278 280 break
279 281
280 282 if of in ma:
281 283 diverge.setdefault(of, []).append(f)
282 284
283 285 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
284 286
285 287 u1 = _nonoverlap(m1, m2, ma)
286 288 u2 = _nonoverlap(m2, m1, ma)
287 289
288 290 if u1:
289 291 repo.ui.debug(" unmatched files in local:\n %s\n"
290 292 % "\n ".join(u1))
291 293 if u2:
292 294 repo.ui.debug(" unmatched files in other:\n %s\n"
293 295 % "\n ".join(u2))
294 296
295 297 for f in u1:
296 298 checkcopies(f, m1, m2)
297 299 for f in u2:
298 300 checkcopies(f, m2, m1)
299 301
300 302 renamedelete = {}
301 303 renamedelete2 = set()
302 304 diverge2 = set()
303 305 for of, fl in diverge.items():
304 306 if len(fl) == 1 or of in c1 or of in c2:
305 307 del diverge[of] # not actually divergent, or not a rename
306 308 if of not in c1 and of not in c2:
307 309 # renamed on one side, deleted on the other side, but filter
308 310 # out files that have been renamed and then deleted
309 311 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
310 312 renamedelete2.update(fl) # reverse map for below
311 313 else:
312 314 diverge2.update(fl) # reverse map for below
313 315
314 316 if fullcopy:
315 317 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
316 318 "% = renamed and deleted):\n")
317 319 for f in sorted(fullcopy):
318 320 note = ""
319 321 if f in copy:
320 322 note += "*"
321 323 if f in diverge2:
322 324 note += "!"
323 325 if f in renamedelete2:
324 326 note += "%"
325 327 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
326 328 note))
327 329 del diverge2
328 330
329 331 if not fullcopy:
330 332 return copy, movewithdir, diverge, renamedelete
331 333
332 334 repo.ui.debug(" checking for directory renames\n")
333 335
334 336 # generate a directory move map
335 337 d1, d2 = c1.dirs(), c2.dirs()
336 338 d1.add('')
337 339 d2.add('')
338 340 invalid = set()
339 341 dirmove = {}
340 342
341 343 # examine each file copy for a potential directory move, which is
342 344 # when all the files in a directory are moved to a new directory
343 345 for dst, src in fullcopy.iteritems():
344 346 dsrc, ddst = _dirname(src), _dirname(dst)
345 347 if dsrc in invalid:
346 348 # already seen to be uninteresting
347 349 continue
348 350 elif dsrc in d1 and ddst in d1:
349 351 # directory wasn't entirely moved locally
350 352 invalid.add(dsrc)
351 353 elif dsrc in d2 and ddst in d2:
352 354 # directory wasn't entirely moved remotely
353 355 invalid.add(dsrc)
354 356 elif dsrc in dirmove and dirmove[dsrc] != ddst:
355 357 # files from the same directory moved to two different places
356 358 invalid.add(dsrc)
357 359 else:
358 360 # looks good so far
359 361 dirmove[dsrc + "/"] = ddst + "/"
360 362
361 363 for i in invalid:
362 364 if i in dirmove:
363 365 del dirmove[i]
364 366 del d1, d2, invalid
365 367
366 368 if not dirmove:
367 369 return copy, movewithdir, diverge, renamedelete
368 370
369 371 for d in dirmove:
370 372 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
371 373 (d, dirmove[d]))
372 374
373 375 # check unaccounted nonoverlapping files against directory moves
374 376 for f in u1 + u2:
375 377 if f not in fullcopy:
376 378 for d in dirmove:
377 379 if f.startswith(d):
378 380 # new file added in a directory that was moved, move it
379 381 df = dirmove[d] + f[len(d):]
380 382 if df not in copy:
381 383 movewithdir[f] = df
382 384 repo.ui.debug((" pending file src: '%s' -> "
383 385 "dst: '%s'\n") % (f, df))
384 386 break
385 387
386 388 return copy, movewithdir, diverge, renamedelete
General Comments 0
You need to be logged in to leave comments. Login now