##// END OF EJS Templates
copies: replace _nonoverlap() by calls to manifestdict.filesnotin()...
Martin von Zweigbergk -
r24185:3a3806fe default
parent child Browse files
Show More
@@ -1,485 +1,483
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import util
9 9 import heapq
10 10
11 def _nonoverlap(d1, d2, d3):
12 "Return list of elements in d1 not in d2 or d3"
13 return sorted([d for d in d1 if d not in d3 and d not in d2])
14
15 11 def _dirname(f):
16 12 s = f.rfind("/")
17 13 if s == -1:
18 14 return ""
19 15 return f[:s]
20 16
21 17 def _findlimit(repo, a, b):
22 18 """
23 19 Find the last revision that needs to be checked to ensure that a full
24 20 transitive closure for file copies can be properly calculated.
25 21 Generally, this means finding the earliest revision number that's an
26 22 ancestor of a or b but not both, except when a or b is a direct descendent
27 23 of the other, in which case we can return the minimum revnum of a and b.
28 24 None if no such revision exists.
29 25 """
30 26
31 27 # basic idea:
32 28 # - mark a and b with different sides
33 29 # - if a parent's children are all on the same side, the parent is
34 30 # on that side, otherwise it is on no side
35 31 # - walk the graph in topological order with the help of a heap;
36 32 # - add unseen parents to side map
37 33 # - clear side of any parent that has children on different sides
38 34 # - track number of interesting revs that might still be on a side
39 35 # - track the lowest interesting rev seen
40 36 # - quit when interesting revs is zero
41 37
42 38 cl = repo.changelog
43 39 working = len(cl) # pseudo rev for the working directory
44 40 if a is None:
45 41 a = working
46 42 if b is None:
47 43 b = working
48 44
49 45 side = {a: -1, b: 1}
50 46 visit = [-a, -b]
51 47 heapq.heapify(visit)
52 48 interesting = len(visit)
53 49 hascommonancestor = False
54 50 limit = working
55 51
56 52 while interesting:
57 53 r = -heapq.heappop(visit)
58 54 if r == working:
59 55 parents = [cl.rev(p) for p in repo.dirstate.parents()]
60 56 else:
61 57 parents = cl.parentrevs(r)
62 58 for p in parents:
63 59 if p < 0:
64 60 continue
65 61 if p not in side:
66 62 # first time we see p; add it to visit
67 63 side[p] = side[r]
68 64 if side[p]:
69 65 interesting += 1
70 66 heapq.heappush(visit, -p)
71 67 elif side[p] and side[p] != side[r]:
72 68 # p was interesting but now we know better
73 69 side[p] = 0
74 70 interesting -= 1
75 71 hascommonancestor = True
76 72 if side[r]:
77 73 limit = r # lowest rev visited
78 74 interesting -= 1
79 75
80 76 if not hascommonancestor:
81 77 return None
82 78
83 79 # Consider the following flow (see test-commit-amend.t under issue4405):
84 80 # 1/ File 'a0' committed
85 81 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
86 82 # 3/ Move back to first commit
87 83 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
88 84 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
89 85 #
90 86 # During the amend in step five, we will be in this state:
91 87 #
92 88 # @ 3 temporary amend commit for a1-amend
93 89 # |
94 90 # o 2 a1-amend
95 91 # |
96 92 # | o 1 a1
97 93 # |/
98 94 # o 0 a0
99 95 #
100 96 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
101 97 # yet the filelog has the copy information in rev 1 and we will not look
102 98 # back far enough unless we also look at the a and b as candidates.
103 99 # This only occurs when a is a descendent of b or visa-versa.
104 100 return min(limit, a, b)
105 101
106 102 def _chain(src, dst, a, b):
107 103 '''chain two sets of copies a->b'''
108 104 t = a.copy()
109 105 for k, v in b.iteritems():
110 106 if v in t:
111 107 # found a chain
112 108 if t[v] != k:
113 109 # file wasn't renamed back to itself
114 110 t[k] = t[v]
115 111 if v not in dst:
116 112 # chain was a rename, not a copy
117 113 del t[v]
118 114 if v in src:
119 115 # file is a copy of an existing file
120 116 t[k] = v
121 117
122 118 # remove criss-crossed copies
123 119 for k, v in t.items():
124 120 if k in src and v in dst:
125 121 del t[k]
126 122
127 123 return t
128 124
129 125 def _tracefile(fctx, am, limit=-1):
130 126 '''return file context that is the ancestor of fctx present in ancestor
131 127 manifest am, stopping after the first ancestor lower than limit'''
132 128
133 129 for f in fctx.ancestors():
134 130 if am.get(f.path(), None) == f.filenode():
135 131 return f
136 132 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
137 133 return None
138 134
139 135 def _dirstatecopies(d):
140 136 ds = d._repo.dirstate
141 137 c = ds.copies().copy()
142 138 for k in c.keys():
143 139 if ds[k] not in 'anm':
144 140 del c[k]
145 141 return c
146 142
147 143 def _computeforwardmissing(a, b):
148 144 """Computes which files are in b but not a.
149 145 This is its own function so extensions can easily wrap this call to see what
150 146 files _forwardcopies is about to process.
151 147 """
152 148 return b.manifest().filesnotin(a.manifest())
153 149
154 150 def _forwardcopies(a, b):
155 151 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
156 152
157 153 # check for working copy
158 154 w = None
159 155 if b.rev() is None:
160 156 w = b
161 157 b = w.p1()
162 158 if a == b:
163 159 # short-circuit to avoid issues with merge states
164 160 return _dirstatecopies(w)
165 161
166 162 # files might have to be traced back to the fctx parent of the last
167 163 # one-side-only changeset, but not further back than that
168 164 limit = _findlimit(a._repo, a.rev(), b.rev())
169 165 if limit is None:
170 166 limit = -1
171 167 am = a.manifest()
172 168
173 169 # find where new files came from
174 170 # we currently don't try to find where old files went, too expensive
175 171 # this means we can miss a case like 'hg rm b; hg cp a b'
176 172 cm = {}
177 173 missing = _computeforwardmissing(a, b)
178 174 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
179 175 for f in missing:
180 176 fctx = b[f]
181 177 fctx._ancestrycontext = ancestrycontext
182 178 ofctx = _tracefile(fctx, am, limit)
183 179 if ofctx:
184 180 cm[f] = ofctx.path()
185 181
186 182 # combine copies from dirstate if necessary
187 183 if w is not None:
188 184 cm = _chain(a, w, cm, _dirstatecopies(w))
189 185
190 186 return cm
191 187
192 188 def _backwardrenames(a, b):
193 189 # Even though we're not taking copies into account, 1:n rename situations
194 190 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
195 191 # arbitrarily pick one of the renames.
196 192 f = _forwardcopies(b, a)
197 193 r = {}
198 194 for k, v in sorted(f.iteritems()):
199 195 # remove copies
200 196 if v in a:
201 197 continue
202 198 r[v] = k
203 199 return r
204 200
205 201 def pathcopies(x, y):
206 202 '''find {dst@y: src@x} copy mapping for directed compare'''
207 203 if x == y or not x or not y:
208 204 return {}
209 205 a = y.ancestor(x)
210 206 if a == x:
211 207 return _forwardcopies(x, y)
212 208 if a == y:
213 209 return _backwardrenames(x, y)
214 210 return _chain(x, y, _backwardrenames(x, a), _forwardcopies(a, y))
215 211
216 212 def _computenonoverlap(repo, m1, m2, ma):
217 213 """Computes the files exclusive to m1 and m2.
218 214 This is its own function so extensions can easily wrap this call to see what
219 215 files mergecopies is about to process.
220 216 """
221 u1 = _nonoverlap(m1, m2, ma)
222 u2 = _nonoverlap(m2, m1, ma)
217 addedinm1 = m1.filesnotin(ma)
218 addedinm2 = m2.filesnotin(ma)
219 u1 = sorted(addedinm1 - addedinm2)
220 u2 = sorted(addedinm2 - addedinm1)
223 221
224 222 if u1:
225 223 repo.ui.debug(" unmatched files in local:\n %s\n"
226 224 % "\n ".join(u1))
227 225 if u2:
228 226 repo.ui.debug(" unmatched files in other:\n %s\n"
229 227 % "\n ".join(u2))
230 228 return u1, u2
231 229
232 230 def mergecopies(repo, c1, c2, ca):
233 231 """
234 232 Find moves and copies between context c1 and c2 that are relevant
235 233 for merging.
236 234
237 235 Returns four dicts: "copy", "movewithdir", "diverge", and
238 236 "renamedelete".
239 237
240 238 "copy" is a mapping from destination name -> source name,
241 239 where source is in c1 and destination is in c2 or vice-versa.
242 240
243 241 "movewithdir" is a mapping from source name -> destination name,
244 242 where the file at source present in one context but not the other
245 243 needs to be moved to destination by the merge process, because the
246 244 other context moved the directory it is in.
247 245
248 246 "diverge" is a mapping of source name -> list of destination names
249 247 for divergent renames.
250 248
251 249 "renamedelete" is a mapping of source name -> list of destination
252 250 names for files deleted in c1 that were renamed in c2 or vice-versa.
253 251 """
254 252 # avoid silly behavior for update from empty dir
255 253 if not c1 or not c2 or c1 == c2:
256 254 return {}, {}, {}, {}
257 255
258 256 # avoid silly behavior for parent -> working dir
259 257 if c2.node() is None and c1.node() == repo.dirstate.p1():
260 258 return repo.dirstate.copies(), {}, {}, {}
261 259
262 260 limit = _findlimit(repo, c1.rev(), c2.rev())
263 261 if limit is None:
264 262 # no common ancestor, no copies
265 263 return {}, {}, {}, {}
266 264 m1 = c1.manifest()
267 265 m2 = c2.manifest()
268 266 ma = ca.manifest()
269 267
270 268 def makectx(f, n):
271 269 if len(n) != 20: # in a working context?
272 270 if c1.rev() is None:
273 271 return c1.filectx(f)
274 272 return c2.filectx(f)
275 273 return repo.filectx(f, fileid=n)
276 274
277 275 ctx = util.lrucachefunc(makectx)
278 276 copy = {}
279 277 movewithdir = {}
280 278 fullcopy = {}
281 279 diverge = {}
282 280
283 281 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
284 282
285 283 u1, u2 = _computenonoverlap(repo, m1, m2, ma)
286 284
287 285 for f in u1:
288 286 checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy)
289 287
290 288 for f in u2:
291 289 checkcopies(ctx, f, m2, m1, ca, limit, diverge, copy, fullcopy)
292 290
293 291 renamedelete = {}
294 292 renamedelete2 = set()
295 293 diverge2 = set()
296 294 for of, fl in diverge.items():
297 295 if len(fl) == 1 or of in c1 or of in c2:
298 296 del diverge[of] # not actually divergent, or not a rename
299 297 if of not in c1 and of not in c2:
300 298 # renamed on one side, deleted on the other side, but filter
301 299 # out files that have been renamed and then deleted
302 300 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
303 301 renamedelete2.update(fl) # reverse map for below
304 302 else:
305 303 diverge2.update(fl) # reverse map for below
306 304
307 305 bothnew = sorted([d for d in m1 if d in m2 and d not in ma])
308 306 if bothnew:
309 307 repo.ui.debug(" unmatched files new in both:\n %s\n"
310 308 % "\n ".join(bothnew))
311 309 bothdiverge, _copy, _fullcopy = {}, {}, {}
312 310 for f in bothnew:
313 311 checkcopies(ctx, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
314 312 checkcopies(ctx, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
315 313 for of, fl in bothdiverge.items():
316 314 if len(fl) == 2 and fl[0] == fl[1]:
317 315 copy[fl[0]] = of # not actually divergent, just matching renames
318 316
319 317 if fullcopy and repo.ui.debugflag:
320 318 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
321 319 "% = renamed and deleted):\n")
322 320 for f in sorted(fullcopy):
323 321 note = ""
324 322 if f in copy:
325 323 note += "*"
326 324 if f in diverge2:
327 325 note += "!"
328 326 if f in renamedelete2:
329 327 note += "%"
330 328 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
331 329 note))
332 330 del diverge2
333 331
334 332 if not fullcopy:
335 333 return copy, movewithdir, diverge, renamedelete
336 334
337 335 repo.ui.debug(" checking for directory renames\n")
338 336
339 337 # generate a directory move map
340 338 d1, d2 = c1.dirs(), c2.dirs()
341 339 d1.addpath('/')
342 340 d2.addpath('/')
343 341 invalid = set()
344 342 dirmove = {}
345 343
346 344 # examine each file copy for a potential directory move, which is
347 345 # when all the files in a directory are moved to a new directory
348 346 for dst, src in fullcopy.iteritems():
349 347 dsrc, ddst = _dirname(src), _dirname(dst)
350 348 if dsrc in invalid:
351 349 # already seen to be uninteresting
352 350 continue
353 351 elif dsrc in d1 and ddst in d1:
354 352 # directory wasn't entirely moved locally
355 353 invalid.add(dsrc)
356 354 elif dsrc in d2 and ddst in d2:
357 355 # directory wasn't entirely moved remotely
358 356 invalid.add(dsrc)
359 357 elif dsrc in dirmove and dirmove[dsrc] != ddst:
360 358 # files from the same directory moved to two different places
361 359 invalid.add(dsrc)
362 360 else:
363 361 # looks good so far
364 362 dirmove[dsrc + "/"] = ddst + "/"
365 363
366 364 for i in invalid:
367 365 if i in dirmove:
368 366 del dirmove[i]
369 367 del d1, d2, invalid
370 368
371 369 if not dirmove:
372 370 return copy, movewithdir, diverge, renamedelete
373 371
374 372 for d in dirmove:
375 373 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
376 374 (d, dirmove[d]))
377 375
378 376 # check unaccounted nonoverlapping files against directory moves
379 377 for f in u1 + u2:
380 378 if f not in fullcopy:
381 379 for d in dirmove:
382 380 if f.startswith(d):
383 381 # new file added in a directory that was moved, move it
384 382 df = dirmove[d] + f[len(d):]
385 383 if df not in copy:
386 384 movewithdir[f] = df
387 385 repo.ui.debug((" pending file src: '%s' -> "
388 386 "dst: '%s'\n") % (f, df))
389 387 break
390 388
391 389 return copy, movewithdir, diverge, renamedelete
392 390
393 391 def checkcopies(ctx, f, m1, m2, ca, limit, diverge, copy, fullcopy):
394 392 """
395 393 check possible copies of f from m1 to m2
396 394
397 395 ctx = function accepting (filename, node) that returns a filectx.
398 396 f = the filename to check
399 397 m1 = the source manifest
400 398 m2 = the destination manifest
401 399 ca = the changectx of the common ancestor
402 400 limit = the rev number to not search beyond
403 401 diverge = record all diverges in this dict
404 402 copy = record all non-divergent copies in this dict
405 403 fullcopy = record all copies in this dict
406 404 """
407 405
408 406 ma = ca.manifest()
409 407
410 408 def _related(f1, f2, limit):
411 409 # Walk back to common ancestor to see if the two files originate
412 410 # from the same file. Since workingfilectx's rev() is None it messes
413 411 # up the integer comparison logic, hence the pre-step check for
414 412 # None (f1 and f2 can only be workingfilectx's initially).
415 413
416 414 if f1 == f2:
417 415 return f1 # a match
418 416
419 417 g1, g2 = f1.ancestors(), f2.ancestors()
420 418 try:
421 419 f1r, f2r = f1.rev(), f2.rev()
422 420
423 421 if f1r is None:
424 422 f1 = g1.next()
425 423 if f2r is None:
426 424 f2 = g2.next()
427 425
428 426 while True:
429 427 f1r, f2r = f1.rev(), f2.rev()
430 428 if f1r > f2r:
431 429 f1 = g1.next()
432 430 elif f2r > f1r:
433 431 f2 = g2.next()
434 432 elif f1 == f2:
435 433 return f1 # a match
436 434 elif f1r == f2r or f1r < limit or f2r < limit:
437 435 return False # copy no longer relevant
438 436 except StopIteration:
439 437 return False
440 438
441 439 of = None
442 440 seen = set([f])
443 441 for oc in ctx(f, m1[f]).ancestors():
444 442 ocr = oc.rev()
445 443 of = oc.path()
446 444 if of in seen:
447 445 # check limit late - grab last rename before
448 446 if ocr < limit:
449 447 break
450 448 continue
451 449 seen.add(of)
452 450
453 451 fullcopy[f] = of # remember for dir rename detection
454 452 if of not in m2:
455 453 continue # no match, keep looking
456 454 if m2[of] == ma.get(of):
457 455 break # no merge needed, quit early
458 456 c2 = ctx(of, m2[of])
459 457 cr = _related(oc, c2, ca.rev())
460 458 if cr and (of == f or of == c2.path()): # non-divergent
461 459 copy[f] = of
462 460 of = None
463 461 break
464 462
465 463 if of in ma:
466 464 diverge.setdefault(of, []).append(f)
467 465
468 466 def duplicatecopies(repo, rev, fromrev, skiprev=None):
469 467 '''reproduce copies from fromrev to rev in the dirstate
470 468
471 469 If skiprev is specified, it's a revision that should be used to
472 470 filter copy records. Any copies that occur between fromrev and
473 471 skiprev will not be duplicated, even if they appear in the set of
474 472 copies between fromrev and rev.
475 473 '''
476 474 exclude = {}
477 475 if skiprev is not None:
478 476 exclude = pathcopies(repo[fromrev], repo[skiprev])
479 477 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
480 478 # copies.pathcopies returns backward renames, so dst might not
481 479 # actually be in the dirstate
482 480 if dst in exclude:
483 481 continue
484 482 if repo.dirstate[dst] in "nma":
485 483 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now