##// END OF EJS Templates
checkcopies: add an inline comment about the '_related' call...
Pierre-Yves David -
r30137:f85f9e06 default
parent child Browse files
Show More
@@ -1,559 +1,561
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11
12 12 from . import (
13 13 node,
14 14 pathutil,
15 15 scmutil,
16 16 util,
17 17 )
18 18
19 19 def _findlimit(repo, a, b):
20 20 """
21 21 Find the last revision that needs to be checked to ensure that a full
22 22 transitive closure for file copies can be properly calculated.
23 23 Generally, this means finding the earliest revision number that's an
24 24 ancestor of a or b but not both, except when a or b is a direct descendent
25 25 of the other, in which case we can return the minimum revnum of a and b.
26 26 None if no such revision exists.
27 27 """
28 28
29 29 # basic idea:
30 30 # - mark a and b with different sides
31 31 # - if a parent's children are all on the same side, the parent is
32 32 # on that side, otherwise it is on no side
33 33 # - walk the graph in topological order with the help of a heap;
34 34 # - add unseen parents to side map
35 35 # - clear side of any parent that has children on different sides
36 36 # - track number of interesting revs that might still be on a side
37 37 # - track the lowest interesting rev seen
38 38 # - quit when interesting revs is zero
39 39
40 40 cl = repo.changelog
41 41 working = len(cl) # pseudo rev for the working directory
42 42 if a is None:
43 43 a = working
44 44 if b is None:
45 45 b = working
46 46
47 47 side = {a: -1, b: 1}
48 48 visit = [-a, -b]
49 49 heapq.heapify(visit)
50 50 interesting = len(visit)
51 51 hascommonancestor = False
52 52 limit = working
53 53
54 54 while interesting:
55 55 r = -heapq.heappop(visit)
56 56 if r == working:
57 57 parents = [cl.rev(p) for p in repo.dirstate.parents()]
58 58 else:
59 59 parents = cl.parentrevs(r)
60 60 for p in parents:
61 61 if p < 0:
62 62 continue
63 63 if p not in side:
64 64 # first time we see p; add it to visit
65 65 side[p] = side[r]
66 66 if side[p]:
67 67 interesting += 1
68 68 heapq.heappush(visit, -p)
69 69 elif side[p] and side[p] != side[r]:
70 70 # p was interesting but now we know better
71 71 side[p] = 0
72 72 interesting -= 1
73 73 hascommonancestor = True
74 74 if side[r]:
75 75 limit = r # lowest rev visited
76 76 interesting -= 1
77 77
78 78 if not hascommonancestor:
79 79 return None
80 80
81 81 # Consider the following flow (see test-commit-amend.t under issue4405):
82 82 # 1/ File 'a0' committed
83 83 # 2/ File renamed from 'a0' to 'a1' in a new commit (call it 'a1')
84 84 # 3/ Move back to first commit
85 85 # 4/ Create a new commit via revert to contents of 'a1' (call it 'a1-amend')
86 86 # 5/ Rename file from 'a1' to 'a2' and commit --amend 'a1-msg'
87 87 #
88 88 # During the amend in step five, we will be in this state:
89 89 #
90 90 # @ 3 temporary amend commit for a1-amend
91 91 # |
92 92 # o 2 a1-amend
93 93 # |
94 94 # | o 1 a1
95 95 # |/
96 96 # o 0 a0
97 97 #
98 98 # When _findlimit is called, a and b are revs 3 and 0, so limit will be 2,
99 99 # yet the filelog has the copy information in rev 1 and we will not look
100 100 # back far enough unless we also look at the a and b as candidates.
101 101 # This only occurs when a is a descendent of b or visa-versa.
102 102 return min(limit, a, b)
103 103
104 104 def _chain(src, dst, a, b):
105 105 '''chain two sets of copies a->b'''
106 106 t = a.copy()
107 107 for k, v in b.iteritems():
108 108 if v in t:
109 109 # found a chain
110 110 if t[v] != k:
111 111 # file wasn't renamed back to itself
112 112 t[k] = t[v]
113 113 if v not in dst:
114 114 # chain was a rename, not a copy
115 115 del t[v]
116 116 if v in src:
117 117 # file is a copy of an existing file
118 118 t[k] = v
119 119
120 120 # remove criss-crossed copies
121 121 for k, v in t.items():
122 122 if k in src and v in dst:
123 123 del t[k]
124 124
125 125 return t
126 126
127 127 def _tracefile(fctx, am, limit=-1):
128 128 '''return file context that is the ancestor of fctx present in ancestor
129 129 manifest am, stopping after the first ancestor lower than limit'''
130 130
131 131 for f in fctx.ancestors():
132 132 if am.get(f.path(), None) == f.filenode():
133 133 return f
134 134 if limit >= 0 and f.linkrev() < limit and f.rev() < limit:
135 135 return None
136 136
137 137 def _dirstatecopies(d):
138 138 ds = d._repo.dirstate
139 139 c = ds.copies().copy()
140 140 for k in c.keys():
141 141 if ds[k] not in 'anm':
142 142 del c[k]
143 143 return c
144 144
145 145 def _computeforwardmissing(a, b, match=None):
146 146 """Computes which files are in b but not a.
147 147 This is its own function so extensions can easily wrap this call to see what
148 148 files _forwardcopies is about to process.
149 149 """
150 150 ma = a.manifest()
151 151 mb = b.manifest()
152 152 if match:
153 153 ma = ma.matches(match)
154 154 mb = mb.matches(match)
155 155 return mb.filesnotin(ma)
156 156
157 157 def _forwardcopies(a, b, match=None):
158 158 '''find {dst@b: src@a} copy mapping where a is an ancestor of b'''
159 159
160 160 # check for working copy
161 161 w = None
162 162 if b.rev() is None:
163 163 w = b
164 164 b = w.p1()
165 165 if a == b:
166 166 # short-circuit to avoid issues with merge states
167 167 return _dirstatecopies(w)
168 168
169 169 # files might have to be traced back to the fctx parent of the last
170 170 # one-side-only changeset, but not further back than that
171 171 limit = _findlimit(a._repo, a.rev(), b.rev())
172 172 if limit is None:
173 173 limit = -1
174 174 am = a.manifest()
175 175
176 176 # find where new files came from
177 177 # we currently don't try to find where old files went, too expensive
178 178 # this means we can miss a case like 'hg rm b; hg cp a b'
179 179 cm = {}
180 180
181 181 # Computing the forward missing is quite expensive on large manifests, since
182 182 # it compares the entire manifests. We can optimize it in the common use
183 183 # case of computing what copies are in a commit versus its parent (like
184 184 # during a rebase or histedit). Note, we exclude merge commits from this
185 185 # optimization, since the ctx.files() for a merge commit is not correct for
186 186 # this comparison.
187 187 forwardmissingmatch = match
188 188 if not match and b.p1() == a and b.p2().node() == node.nullid:
189 189 forwardmissingmatch = scmutil.matchfiles(a._repo, b.files())
190 190 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
191 191
192 192 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
193 193 for f in missing:
194 194 fctx = b[f]
195 195 fctx._ancestrycontext = ancestrycontext
196 196 ofctx = _tracefile(fctx, am, limit)
197 197 if ofctx:
198 198 cm[f] = ofctx.path()
199 199
200 200 # combine copies from dirstate if necessary
201 201 if w is not None:
202 202 cm = _chain(a, w, cm, _dirstatecopies(w))
203 203
204 204 return cm
205 205
206 206 def _backwardrenames(a, b):
207 207 if a._repo.ui.configbool('experimental', 'disablecopytrace'):
208 208 return {}
209 209
210 210 # Even though we're not taking copies into account, 1:n rename situations
211 211 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
212 212 # arbitrarily pick one of the renames.
213 213 f = _forwardcopies(b, a)
214 214 r = {}
215 215 for k, v in sorted(f.iteritems()):
216 216 # remove copies
217 217 if v in a:
218 218 continue
219 219 r[v] = k
220 220 return r
221 221
222 222 def pathcopies(x, y, match=None):
223 223 '''find {dst@y: src@x} copy mapping for directed compare'''
224 224 if x == y or not x or not y:
225 225 return {}
226 226 a = y.ancestor(x)
227 227 if a == x:
228 228 return _forwardcopies(x, y, match=match)
229 229 if a == y:
230 230 return _backwardrenames(x, y)
231 231 return _chain(x, y, _backwardrenames(x, a),
232 232 _forwardcopies(a, y, match=match))
233 233
234 234 def _computenonoverlap(repo, c1, c2, addedinm1, addedinm2):
235 235 """Computes, based on addedinm1 and addedinm2, the files exclusive to c1
236 236 and c2. This is its own function so extensions can easily wrap this call
237 237 to see what files mergecopies is about to process.
238 238
239 239 Even though c1 and c2 are not used in this function, they are useful in
240 240 other extensions for being able to read the file nodes of the changed files.
241 241 """
242 242 u1 = sorted(addedinm1 - addedinm2)
243 243 u2 = sorted(addedinm2 - addedinm1)
244 244
245 245 if u1:
246 246 repo.ui.debug(" unmatched files in local:\n %s\n"
247 247 % "\n ".join(u1))
248 248 if u2:
249 249 repo.ui.debug(" unmatched files in other:\n %s\n"
250 250 % "\n ".join(u2))
251 251 return u1, u2
252 252
253 253 def _makegetfctx(ctx):
254 254 """return a 'getfctx' function suitable for _checkcopies usage
255 255
256 256 We have to re-setup the function building 'filectx' for each
257 257 '_checkcopies' to ensure the linkrev adjustment is properly setup for
258 258 each. Linkrev adjustment is important to avoid bug in rename
259 259 detection. Moreover, having a proper '_ancestrycontext' setup ensures
260 260 the performance impact of this adjustment is kept limited. Without it,
261 261 each file could do a full dag traversal making the time complexity of
262 262 the operation explode (see issue4537).
263 263
264 264 This function exists here mostly to limit the impact on stable. Feel
265 265 free to refactor on default.
266 266 """
267 267 rev = ctx.rev()
268 268 repo = ctx._repo
269 269 ac = getattr(ctx, '_ancestrycontext', None)
270 270 if ac is None:
271 271 revs = [rev]
272 272 if rev is None:
273 273 revs = [p.rev() for p in ctx.parents()]
274 274 ac = repo.changelog.ancestors(revs, inclusive=True)
275 275 ctx._ancestrycontext = ac
276 276 def makectx(f, n):
277 277 if len(n) != 20: # in a working context?
278 278 if ctx.rev() is None:
279 279 return ctx.filectx(f)
280 280 return repo[None][f]
281 281 fctx = repo.filectx(f, fileid=n)
282 282 # setup only needed for filectx not create from a changectx
283 283 fctx._ancestrycontext = ac
284 284 fctx._descendantrev = rev
285 285 return fctx
286 286 return util.lrucachefunc(makectx)
287 287
288 288 def mergecopies(repo, c1, c2, ca):
289 289 """
290 290 Find moves and copies between context c1 and c2 that are relevant
291 291 for merging.
292 292
293 293 Returns four dicts: "copy", "movewithdir", "diverge", and
294 294 "renamedelete".
295 295
296 296 "copy" is a mapping from destination name -> source name,
297 297 where source is in c1 and destination is in c2 or vice-versa.
298 298
299 299 "movewithdir" is a mapping from source name -> destination name,
300 300 where the file at source present in one context but not the other
301 301 needs to be moved to destination by the merge process, because the
302 302 other context moved the directory it is in.
303 303
304 304 "diverge" is a mapping of source name -> list of destination names
305 305 for divergent renames.
306 306
307 307 "renamedelete" is a mapping of source name -> list of destination
308 308 names for files deleted in c1 that were renamed in c2 or vice-versa.
309 309 """
310 310 # avoid silly behavior for update from empty dir
311 311 if not c1 or not c2 or c1 == c2:
312 312 return {}, {}, {}, {}
313 313
314 314 # avoid silly behavior for parent -> working dir
315 315 if c2.node() is None and c1.node() == repo.dirstate.p1():
316 316 return repo.dirstate.copies(), {}, {}, {}
317 317
318 318 # Copy trace disabling is explicitly below the node == p1 logic above
319 319 # because the logic above is required for a simple copy to be kept across a
320 320 # rebase.
321 321 if repo.ui.configbool('experimental', 'disablecopytrace'):
322 322 return {}, {}, {}, {}
323 323
324 324 limit = _findlimit(repo, c1.rev(), c2.rev())
325 325 if limit is None:
326 326 # no common ancestor, no copies
327 327 return {}, {}, {}, {}
328 328 repo.ui.debug(" searching for copies back to rev %d\n" % limit)
329 329
330 330 m1 = c1.manifest()
331 331 m2 = c2.manifest()
332 332 ma = ca.manifest()
333 333
334 334 # see _checkcopies documentation below for these dicts
335 335 copy1, copy2 = {}, {}
336 336 movewithdir1, movewithdir2 = {}, {}
337 337 fullcopy1, fullcopy2 = {}, {}
338 338 diverge = {}
339 339
340 340 # find interesting file sets from manifests
341 341 addedinm1 = m1.filesnotin(ma)
342 342 addedinm2 = m2.filesnotin(ma)
343 343 u1r, u2r = _computenonoverlap(repo, c1, c2, addedinm1, addedinm2)
344 344 u1u, u2u = u1r, u2r
345 345 bothnew = sorted(addedinm1 & addedinm2)
346 346
347 347 for f in u1u:
348 348 _checkcopies(c1, f, m1, m2, ca, limit, diverge, copy1, fullcopy1)
349 349
350 350 for f in u2u:
351 351 _checkcopies(c2, f, m2, m1, ca, limit, diverge, copy2, fullcopy2)
352 352
353 353 copy = dict(copy1.items() + copy2.items())
354 354 movewithdir = dict(movewithdir1.items() + movewithdir2.items())
355 355 fullcopy = dict(fullcopy1.items() + fullcopy2.items())
356 356
357 357 renamedelete = {}
358 358 renamedeleteset = set()
359 359 divergeset = set()
360 360 for of, fl in diverge.items():
361 361 if len(fl) == 1 or of in c1 or of in c2:
362 362 del diverge[of] # not actually divergent, or not a rename
363 363 if of not in c1 and of not in c2:
364 364 # renamed on one side, deleted on the other side, but filter
365 365 # out files that have been renamed and then deleted
366 366 renamedelete[of] = [f for f in fl if f in c1 or f in c2]
367 367 renamedeleteset.update(fl) # reverse map for below
368 368 else:
369 369 divergeset.update(fl) # reverse map for below
370 370
371 371 if bothnew:
372 372 repo.ui.debug(" unmatched files new in both:\n %s\n"
373 373 % "\n ".join(bothnew))
374 374 bothdiverge, _copy, _fullcopy = {}, {}, {}
375 375 for f in bothnew:
376 376 _checkcopies(c1, f, m1, m2, ca, limit, bothdiverge, _copy, _fullcopy)
377 377 _checkcopies(c2, f, m2, m1, ca, limit, bothdiverge, _copy, _fullcopy)
378 378 for of, fl in bothdiverge.items():
379 379 if len(fl) == 2 and fl[0] == fl[1]:
380 380 copy[fl[0]] = of # not actually divergent, just matching renames
381 381
382 382 if fullcopy and repo.ui.debugflag:
383 383 repo.ui.debug(" all copies found (* = to merge, ! = divergent, "
384 384 "% = renamed and deleted):\n")
385 385 for f in sorted(fullcopy):
386 386 note = ""
387 387 if f in copy:
388 388 note += "*"
389 389 if f in divergeset:
390 390 note += "!"
391 391 if f in renamedeleteset:
392 392 note += "%"
393 393 repo.ui.debug(" src: '%s' -> dst: '%s' %s\n" % (fullcopy[f], f,
394 394 note))
395 395 del divergeset
396 396
397 397 if not fullcopy:
398 398 return copy, movewithdir, diverge, renamedelete
399 399
400 400 repo.ui.debug(" checking for directory renames\n")
401 401
402 402 # generate a directory move map
403 403 d1, d2 = c1.dirs(), c2.dirs()
404 404 # Hack for adding '', which is not otherwise added, to d1 and d2
405 405 d1.addpath('/')
406 406 d2.addpath('/')
407 407 invalid = set()
408 408 dirmove = {}
409 409
410 410 # examine each file copy for a potential directory move, which is
411 411 # when all the files in a directory are moved to a new directory
412 412 for dst, src in fullcopy.iteritems():
413 413 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
414 414 if dsrc in invalid:
415 415 # already seen to be uninteresting
416 416 continue
417 417 elif dsrc in d1 and ddst in d1:
418 418 # directory wasn't entirely moved locally
419 419 invalid.add(dsrc + "/")
420 420 elif dsrc in d2 and ddst in d2:
421 421 # directory wasn't entirely moved remotely
422 422 invalid.add(dsrc + "/")
423 423 elif dsrc + "/" in dirmove and dirmove[dsrc + "/"] != ddst + "/":
424 424 # files from the same directory moved to two different places
425 425 invalid.add(dsrc + "/")
426 426 else:
427 427 # looks good so far
428 428 dirmove[dsrc + "/"] = ddst + "/"
429 429
430 430 for i in invalid:
431 431 if i in dirmove:
432 432 del dirmove[i]
433 433 del d1, d2, invalid
434 434
435 435 if not dirmove:
436 436 return copy, movewithdir, diverge, renamedelete
437 437
438 438 for d in dirmove:
439 439 repo.ui.debug(" discovered dir src: '%s' -> dst: '%s'\n" %
440 440 (d, dirmove[d]))
441 441
442 442 # check unaccounted nonoverlapping files against directory moves
443 443 for f in u1r + u2r:
444 444 if f not in fullcopy:
445 445 for d in dirmove:
446 446 if f.startswith(d):
447 447 # new file added in a directory that was moved, move it
448 448 df = dirmove[d] + f[len(d):]
449 449 if df not in copy:
450 450 movewithdir[f] = df
451 451 repo.ui.debug((" pending file src: '%s' -> "
452 452 "dst: '%s'\n") % (f, df))
453 453 break
454 454
455 455 return copy, movewithdir, diverge, renamedelete
456 456
457 457 def _checkcopies(ctx, f, m1, m2, base, limit, diverge, copy, fullcopy):
458 458 """
459 459 check possible copies of f from m1 to m2
460 460
461 461 ctx = starting context for f in m1
462 462 f = the filename to check (as in m1)
463 463 m1 = the source manifest
464 464 m2 = the destination manifest
465 465 base = the changectx used as a merge base
466 466 limit = the rev number to not search beyond
467 467 diverge = record all diverges in this dict
468 468 copy = record all non-divergent copies in this dict
469 469 fullcopy = record all copies in this dict
470 470
471 471 note: limit is only an optimization, and there is no guarantee that
472 472 irrelevant revisions will not be limited
473 473 there is no easy way to make this algorithm stop in a guaranteed way
474 474 once it "goes behind a certain revision".
475 475 """
476 476
477 477 mb = base.manifest()
478 478 getfctx = _makegetfctx(ctx)
479 479
480 480 def _related(f1, f2, limit):
481 481 # Walk back to common ancestor to see if the two files originate
482 482 # from the same file. Since workingfilectx's rev() is None it messes
483 483 # up the integer comparison logic, hence the pre-step check for
484 484 # None (f1 and f2 can only be workingfilectx's initially).
485 485
486 486 if f1 == f2:
487 487 return f1 # a match
488 488
489 489 g1, g2 = f1.ancestors(), f2.ancestors()
490 490 try:
491 491 f1r, f2r = f1.linkrev(), f2.linkrev()
492 492
493 493 if f1r is None:
494 494 f1 = next(g1)
495 495 if f2r is None:
496 496 f2 = next(g2)
497 497
498 498 while True:
499 499 f1r, f2r = f1.linkrev(), f2.linkrev()
500 500 if f1r > f2r:
501 501 f1 = next(g1)
502 502 elif f2r > f1r:
503 503 f2 = next(g2)
504 504 elif f1 == f2:
505 505 return f1 # a match
506 506 elif f1r == f2r or f1r < limit or f2r < limit:
507 507 return False # copy no longer relevant
508 508 except StopIteration:
509 509 return False
510 510
511 511 of = None
512 512 seen = set([f])
513 513 for oc in getfctx(f, m1[f]).ancestors():
514 514 ocr = oc.linkrev()
515 515 of = oc.path()
516 516 if of in seen:
517 517 # check limit late - grab last rename before
518 518 if ocr < limit:
519 519 break
520 520 continue
521 521 seen.add(of)
522 522
523 523 fullcopy[f] = of # remember for dir rename detection
524 524 if of not in m2:
525 525 continue # no match, keep looking
526 526 if m2[of] == mb.get(of):
527 527 return # no merge needed, quit early
528 528 c2 = getfctx(of, m2[of])
529 # c2 might be a plain new file on added on destination side that is
530 # unrelated to the droids we are looking for.
529 531 cr = _related(oc, c2, base.rev())
530 532 if cr and (of == f or of == c2.path()): # non-divergent
531 533 copy[f] = of
532 534 return
533 535
534 536 if of in mb:
535 537 diverge.setdefault(of, []).append(f)
536 538
537 539 def duplicatecopies(repo, rev, fromrev, skiprev=None):
538 540 '''reproduce copies from fromrev to rev in the dirstate
539 541
540 542 If skiprev is specified, it's a revision that should be used to
541 543 filter copy records. Any copies that occur between fromrev and
542 544 skiprev will not be duplicated, even if they appear in the set of
543 545 copies between fromrev and rev.
544 546 '''
545 547 exclude = {}
546 548 if (skiprev is not None and
547 549 not repo.ui.configbool('experimental', 'disablecopytrace')):
548 550 # disablecopytrace skips this line, but not the entire function because
549 551 # the line below is O(size of the repo) during a rebase, while the rest
550 552 # of the function is much faster (and is required for carrying copy
551 553 # metadata across the rebase anyway).
552 554 exclude = pathcopies(repo[fromrev], repo[skiprev])
553 555 for dst, src in pathcopies(repo[fromrev], repo[rev]).iteritems():
554 556 # copies.pathcopies returns backward renames, so dst might not
555 557 # actually be in the dirstate
556 558 if dst in exclude:
557 559 continue
558 560 if repo.dirstate[dst] in "nma":
559 561 repo.dirstate.copy(src, dst)
General Comments 0
You need to be logged in to leave comments. Login now