##// END OF EJS Templates
copies: fast path no-op merge...
marmoute -
r46590:cf04af3a default
parent child Browse files
Show More
@@ -1,1144 +1,1147 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 )
24 24
25 25
26 26 from .utils import stringutil
27 27
28 28 from .revlogutils import flagutil
29 29
30 30 rustmod = policy.importrust("copy_tracing")
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(prefix, suffix):
68 68 """chain two sets of copies 'prefix' and 'suffix'"""
69 69 result = prefix.copy()
70 70 for key, value in pycompat.iteritems(suffix):
71 71 result[key] = prefix.get(value, value)
72 72 return result
73 73
74 74
75 75 def _tracefile(fctx, am, basemf):
76 76 """return file context that is the ancestor of fctx present in ancestor
77 77 manifest am
78 78
79 79 Note: we used to try and stop after a given limit, however checking if that
80 80 limit is reached turned out to be very expensive. we are better off
81 81 disabling that feature."""
82 82
83 83 for f in fctx.ancestors():
84 84 path = f.path()
85 85 if am.get(path, None) == f.filenode():
86 86 return path
87 87 if basemf and basemf.get(path, None) == f.filenode():
88 88 return path
89 89
90 90
91 91 def _dirstatecopies(repo, match=None):
92 92 ds = repo.dirstate
93 93 c = ds.copies().copy()
94 94 for k in list(c):
95 95 if ds[k] not in b'anm' or (match and not match(k)):
96 96 del c[k]
97 97 return c
98 98
99 99
100 100 def _computeforwardmissing(a, b, match=None):
101 101 """Computes which files are in b but not a.
102 102 This is its own function so extensions can easily wrap this call to see what
103 103 files _forwardcopies is about to process.
104 104 """
105 105 ma = a.manifest()
106 106 mb = b.manifest()
107 107 return mb.filesnotin(ma, match=match)
108 108
109 109
110 110 def usechangesetcentricalgo(repo):
111 111 """Checks if we should use changeset-centric copy algorithms"""
112 112 if repo.filecopiesmode == b'changeset-sidedata':
113 113 return True
114 114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 115 changesetsource = (b'changeset-only', b'compatibility')
116 116 return readfrom in changesetsource
117 117
118 118
119 119 def _committedforwardcopies(a, b, base, match):
120 120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 121 # files might have to be traced back to the fctx parent of the last
122 122 # one-side-only changeset, but not further back than that
123 123 repo = a._repo
124 124
125 125 if usechangesetcentricalgo(repo):
126 126 return _changesetforwardcopies(a, b, match)
127 127
128 128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 129 dbg = repo.ui.debug
130 130 if debug:
131 131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 132 am = a.manifest()
133 133 basemf = None if base is None else base.manifest()
134 134
135 135 # find where new files came from
136 136 # we currently don't try to find where old files went, too expensive
137 137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 138 cm = {}
139 139
140 140 # Computing the forward missing is quite expensive on large manifests, since
141 141 # it compares the entire manifests. We can optimize it in the common use
142 142 # case of computing what copies are in a commit versus its parent (like
143 143 # during a rebase or histedit). Note, we exclude merge commits from this
144 144 # optimization, since the ctx.files() for a merge commit is not correct for
145 145 # this comparison.
146 146 forwardmissingmatch = match
147 147 if b.p1() == a and b.p2().node() == node.nullid:
148 148 filesmatcher = matchmod.exact(b.files())
149 149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151 151
152 152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153 153
154 154 if debug:
155 155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156 156
157 157 for f in sorted(missing):
158 158 if debug:
159 159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 160 fctx = b[f]
161 161 fctx._ancestrycontext = ancestrycontext
162 162
163 163 if debug:
164 164 start = util.timer()
165 165 opath = _tracefile(fctx, am, basemf)
166 166 if opath:
167 167 if debug:
168 168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 169 cm[f] = opath
170 170 if debug:
171 171 dbg(
172 172 b'debug.copies: time: %f seconds\n'
173 173 % (util.timer() - start)
174 174 )
175 175 return cm
176 176
177 177
178 178 def _revinfo_getter(repo):
179 179 """returns a function that returns the following data given a <rev>"
180 180
181 181 * p1: revision number of first parent
182 182 * p2: revision number of first parent
183 183 * changes: a ChangingFiles object
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187 flags = cl.flags
188 188
189 189 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
190 190
191 191 changelogrevision = cl.changelogrevision
192 192
193 193 # A small cache to avoid doing the work twice for merges
194 194 #
195 195 # In the vast majority of cases, if we ask information for a revision
196 196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 197 # keep the information around when reaching the first parent of a merge
198 198 # and dropping it after it was provided for the second parents.
199 199 #
200 200 # It exists cases were only one parent of the merge will be walked. It
201 201 # happens when the "destination" the copy tracing is descendant from a
202 202 # new root, not common with the "source". In that case, we will only walk
203 203 # through merge parents that are descendant of changesets common
204 204 # between "source" and "destination".
205 205 #
206 206 # With the current case implementation if such changesets have a copy
207 207 # information, we'll keep them in memory until the end of
208 208 # _changesetforwardcopies. We don't expect the case to be frequent
209 209 # enough to matters.
210 210 #
211 211 # In addition, it would be possible to reach pathological case, were
212 212 # many first parent are met before any second parent is reached. In
213 213 # that case the cache could grow. If this even become an issue one can
214 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 215 # time to save memory.
216 216 merge_caches = {}
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 value = None
221 221 e = merge_caches.pop(rev, None)
222 222 if e is not None:
223 223 return e
224 224 changes = None
225 225 if flags(rev) & HASCOPIESINFO:
226 226 changes = changelogrevision(rev).changes
227 227 value = (p1, p2, changes)
228 228 if p1 != node.nullrev and p2 != node.nullrev:
229 229 # XXX some case we over cache, IGNORE
230 230 merge_caches[rev] = value
231 231 return value
232 232
233 233 return revinfo
234 234
235 235
236 236 def cached_is_ancestor(is_ancestor):
237 237 """return a cached version of is_ancestor"""
238 238 cache = {}
239 239
240 240 def _is_ancestor(anc, desc):
241 241 if anc > desc:
242 242 return False
243 243 elif anc == desc:
244 244 return True
245 245 key = (anc, desc)
246 246 ret = cache.get(key)
247 247 if ret is None:
248 248 ret = cache[key] = is_ancestor(anc, desc)
249 249 return ret
250 250
251 251 return _is_ancestor
252 252
253 253
254 254 def _changesetforwardcopies(a, b, match):
255 255 if a.rev() in (node.nullrev, b.rev()):
256 256 return {}
257 257
258 258 repo = a.repo().unfiltered()
259 259 children = {}
260 260
261 261 cl = repo.changelog
262 262 isancestor = cl.isancestorrev
263 263 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
264 264 mrset = set(missingrevs)
265 265 roots = set()
266 266 for r in missingrevs:
267 267 for p in cl.parentrevs(r):
268 268 if p == node.nullrev:
269 269 continue
270 270 if p not in children:
271 271 children[p] = [r]
272 272 else:
273 273 children[p].append(r)
274 274 if p not in mrset:
275 275 roots.add(p)
276 276 if not roots:
277 277 # no common revision to track copies from
278 278 return {}
279 279 min_root = min(roots)
280 280
281 281 from_head = set(
282 282 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
283 283 )
284 284
285 285 iterrevs = set(from_head)
286 286 iterrevs &= mrset
287 287 iterrevs.update(roots)
288 288 iterrevs.remove(b.rev())
289 289 revs = sorted(iterrevs)
290 290
291 291 if repo.filecopiesmode == b'changeset-sidedata':
292 292 revinfo = _revinfo_getter(repo)
293 293 return _combine_changeset_copies(
294 294 revs, children, b.rev(), revinfo, match, isancestor
295 295 )
296 296 else:
297 297 revinfo = _revinfo_getter_extra(repo)
298 298 return _combine_changeset_copies_extra(
299 299 revs, children, b.rev(), revinfo, match, isancestor
300 300 )
301 301
302 302
303 303 def _combine_changeset_copies(
304 304 revs, children, targetrev, revinfo, match, isancestor
305 305 ):
306 306 """combine the copies information for each item of iterrevs
307 307
308 308 revs: sorted iterable of revision to visit
309 309 children: a {parent: [children]} mapping.
310 310 targetrev: the final copies destination revision (not in iterrevs)
311 311 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
312 312 match: a matcher
313 313
314 314 It returns the aggregated copies information for `targetrev`.
315 315 """
316 316
317 317 alwaysmatch = match.always()
318 318
319 319 if rustmod is not None and alwaysmatch:
320 320 return rustmod.combine_changeset_copies(
321 321 list(revs), children, targetrev, revinfo, isancestor
322 322 )
323 323
324 324 isancestor = cached_is_ancestor(isancestor)
325 325
326 326 all_copies = {}
327 327 for r in revs:
328 328 copies = all_copies.pop(r, None)
329 329 if copies is None:
330 330 # this is a root
331 331 copies = {}
332 332 for i, c in enumerate(children[r]):
333 333 p1, p2, changes = revinfo(c)
334 334 childcopies = {}
335 335 if r == p1:
336 336 parent = 1
337 337 if changes is not None:
338 338 childcopies = changes.copied_from_p1
339 339 else:
340 340 assert r == p2
341 341 parent = 2
342 342 if changes is not None:
343 343 childcopies = changes.copied_from_p2
344 344 if not alwaysmatch:
345 345 childcopies = {
346 346 dst: src for dst, src in childcopies.items() if match(dst)
347 347 }
348 348 newcopies = copies
349 349 if childcopies:
350 350 newcopies = copies.copy()
351 351 for dest, source in pycompat.iteritems(childcopies):
352 352 prev = copies.get(source)
353 353 if prev is not None and prev[1] is not None:
354 354 source = prev[1]
355 355 newcopies[dest] = (c, source)
356 356 assert newcopies is not copies
357 357 if changes is not None:
358 358 for f in changes.removed:
359 359 if f in newcopies:
360 360 if newcopies is copies:
361 361 # copy on write to avoid affecting potential other
362 362 # branches. when there are no other branches, this
363 363 # could be avoided.
364 364 newcopies = copies.copy()
365 365 newcopies[f] = (c, None)
366 366 othercopies = all_copies.get(c)
367 367 if othercopies is None:
368 368 all_copies[c] = newcopies
369 elif newcopies is othercopies:
370 # nothing to merge:
371 pass
369 372 else:
370 373 # we are the second parent to work on c, we need to merge our
371 374 # work with the other.
372 375 #
373 376 # In case of conflict, parent 1 take precedence over parent 2.
374 377 # This is an arbitrary choice made anew when implementing
375 378 # changeset based copies. It was made without regards with
376 379 # potential filelog related behavior.
377 380 if parent == 1:
378 381 _merge_copies_dict(
379 382 othercopies, newcopies, isancestor, changes
380 383 )
381 384 else:
382 385 _merge_copies_dict(
383 386 newcopies, othercopies, isancestor, changes
384 387 )
385 388 all_copies[c] = newcopies
386 389
387 390 final_copies = {}
388 391 for dest, (tt, source) in all_copies[targetrev].items():
389 392 if source is not None:
390 393 final_copies[dest] = source
391 394 return final_copies
392 395
393 396
394 397 def _merge_copies_dict(minor, major, isancestor, changes):
395 398 """merge two copies-mapping together, minor and major
396 399
397 400 In case of conflict, value from "major" will be picked.
398 401
399 402 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
400 403 ancestors of `high_rev`,
401 404
402 405 - `ismerged(path)`: callable return True if `path` have been merged in the
403 406 current revision,
404 407 """
405 408 for dest, value in major.items():
406 409 other = minor.get(dest)
407 410 if other is None:
408 411 minor[dest] = value
409 412 else:
410 413 new_tt = value[0]
411 414 other_tt = other[0]
412 415 if value[1] == other[1]:
413 416 continue
414 417 # content from "major" wins, unless it is older
415 418 # than the branch point or there is a merge
416 419 if new_tt == other_tt:
417 420 minor[dest] = value
418 421 elif (
419 422 changes is not None
420 423 and value[1] is None
421 424 and dest in changes.salvaged
422 425 ):
423 426 pass
424 427 elif (
425 428 changes is not None
426 429 and other[1] is None
427 430 and dest in changes.salvaged
428 431 ):
429 432 minor[dest] = value
430 433 elif changes is not None and dest in changes.merged:
431 434 minor[dest] = value
432 435 elif not isancestor(new_tt, other_tt):
433 436 if value[1] is not None:
434 437 minor[dest] = value
435 438 elif isancestor(other_tt, new_tt):
436 439 minor[dest] = value
437 440
438 441
439 442 def _revinfo_getter_extra(repo):
440 443 """return a function that return multiple data given a <rev>"i
441 444
442 445 * p1: revision number of first parent
443 446 * p2: revision number of first parent
444 447 * p1copies: mapping of copies from p1
445 448 * p2copies: mapping of copies from p2
446 449 * removed: a list of removed files
447 450 * ismerged: a callback to know if file was merged in that revision
448 451 """
449 452 cl = repo.changelog
450 453 parents = cl.parentrevs
451 454
452 455 def get_ismerged(rev):
453 456 ctx = repo[rev]
454 457
455 458 def ismerged(path):
456 459 if path not in ctx.files():
457 460 return False
458 461 fctx = ctx[path]
459 462 parents = fctx._filelog.parents(fctx._filenode)
460 463 nb_parents = 0
461 464 for n in parents:
462 465 if n != node.nullid:
463 466 nb_parents += 1
464 467 return nb_parents >= 2
465 468
466 469 return ismerged
467 470
468 471 def revinfo(rev):
469 472 p1, p2 = parents(rev)
470 473 ctx = repo[rev]
471 474 p1copies, p2copies = ctx._copies
472 475 removed = ctx.filesremoved()
473 476 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
474 477
475 478 return revinfo
476 479
477 480
478 481 def _combine_changeset_copies_extra(
479 482 revs, children, targetrev, revinfo, match, isancestor
480 483 ):
481 484 """version of `_combine_changeset_copies` that works with the Google
482 485 specific "extra" based storage for copy information"""
483 486 all_copies = {}
484 487 alwaysmatch = match.always()
485 488 for r in revs:
486 489 copies = all_copies.pop(r, None)
487 490 if copies is None:
488 491 # this is a root
489 492 copies = {}
490 493 for i, c in enumerate(children[r]):
491 494 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
492 495 if r == p1:
493 496 parent = 1
494 497 childcopies = p1copies
495 498 else:
496 499 assert r == p2
497 500 parent = 2
498 501 childcopies = p2copies
499 502 if not alwaysmatch:
500 503 childcopies = {
501 504 dst: src for dst, src in childcopies.items() if match(dst)
502 505 }
503 506 newcopies = copies
504 507 if childcopies:
505 508 newcopies = copies.copy()
506 509 for dest, source in pycompat.iteritems(childcopies):
507 510 prev = copies.get(source)
508 511 if prev is not None and prev[1] is not None:
509 512 source = prev[1]
510 513 newcopies[dest] = (c, source)
511 514 assert newcopies is not copies
512 515 for f in removed:
513 516 if f in newcopies:
514 517 if newcopies is copies:
515 518 # copy on write to avoid affecting potential other
516 519 # branches. when there are no other branches, this
517 520 # could be avoided.
518 521 newcopies = copies.copy()
519 522 newcopies[f] = (c, None)
520 523 othercopies = all_copies.get(c)
521 524 if othercopies is None:
522 525 all_copies[c] = newcopies
523 526 else:
524 527 # we are the second parent to work on c, we need to merge our
525 528 # work with the other.
526 529 #
527 530 # In case of conflict, parent 1 take precedence over parent 2.
528 531 # This is an arbitrary choice made anew when implementing
529 532 # changeset based copies. It was made without regards with
530 533 # potential filelog related behavior.
531 534 if parent == 1:
532 535 _merge_copies_dict_extra(
533 536 othercopies, newcopies, isancestor, ismerged
534 537 )
535 538 else:
536 539 _merge_copies_dict_extra(
537 540 newcopies, othercopies, isancestor, ismerged
538 541 )
539 542 all_copies[c] = newcopies
540 543
541 544 final_copies = {}
542 545 for dest, (tt, source) in all_copies[targetrev].items():
543 546 if source is not None:
544 547 final_copies[dest] = source
545 548 return final_copies
546 549
547 550
548 551 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
549 552 """version of `_merge_copies_dict` that works with the Google
550 553 specific "extra" based storage for copy information"""
551 554 for dest, value in major.items():
552 555 other = minor.get(dest)
553 556 if other is None:
554 557 minor[dest] = value
555 558 else:
556 559 new_tt = value[0]
557 560 other_tt = other[0]
558 561 if value[1] == other[1]:
559 562 continue
560 563 # content from "major" wins, unless it is older
561 564 # than the branch point or there is a merge
562 565 if (
563 566 new_tt == other_tt
564 567 or not isancestor(new_tt, other_tt)
565 568 or ismerged(dest)
566 569 ):
567 570 minor[dest] = value
568 571
569 572
570 573 def _forwardcopies(a, b, base=None, match=None):
571 574 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
572 575
573 576 if base is None:
574 577 base = a
575 578 match = a.repo().narrowmatch(match)
576 579 # check for working copy
577 580 if b.rev() is None:
578 581 cm = _committedforwardcopies(a, b.p1(), base, match)
579 582 # combine copies from dirstate if necessary
580 583 copies = _chain(cm, _dirstatecopies(b._repo, match))
581 584 else:
582 585 copies = _committedforwardcopies(a, b, base, match)
583 586 return copies
584 587
585 588
586 589 def _backwardrenames(a, b, match):
587 590 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
588 591 return {}
589 592
590 593 # Even though we're not taking copies into account, 1:n rename situations
591 594 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
592 595 # arbitrarily pick one of the renames.
593 596 # We don't want to pass in "match" here, since that would filter
594 597 # the destination by it. Since we're reversing the copies, we want
595 598 # to filter the source instead.
596 599 f = _forwardcopies(b, a)
597 600 r = {}
598 601 for k, v in sorted(pycompat.iteritems(f)):
599 602 if match and not match(v):
600 603 continue
601 604 # remove copies
602 605 if v in a:
603 606 continue
604 607 r[v] = k
605 608 return r
606 609
607 610
608 611 def pathcopies(x, y, match=None):
609 612 """find {dst@y: src@x} copy mapping for directed compare"""
610 613 repo = x._repo
611 614 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
612 615 if debug:
613 616 repo.ui.debug(
614 617 b'debug.copies: searching copies from %s to %s\n' % (x, y)
615 618 )
616 619 if x == y or not x or not y:
617 620 return {}
618 621 if y.rev() is None and x == y.p1():
619 622 if debug:
620 623 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
621 624 # short-circuit to avoid issues with merge states
622 625 return _dirstatecopies(repo, match)
623 626 a = y.ancestor(x)
624 627 if a == x:
625 628 if debug:
626 629 repo.ui.debug(b'debug.copies: search mode: forward\n')
627 630 copies = _forwardcopies(x, y, match=match)
628 631 elif a == y:
629 632 if debug:
630 633 repo.ui.debug(b'debug.copies: search mode: backward\n')
631 634 copies = _backwardrenames(x, y, match=match)
632 635 else:
633 636 if debug:
634 637 repo.ui.debug(b'debug.copies: search mode: combined\n')
635 638 base = None
636 639 if a.rev() != node.nullrev:
637 640 base = x
638 641 copies = _chain(
639 642 _backwardrenames(x, a, match=match),
640 643 _forwardcopies(a, y, base, match=match),
641 644 )
642 645 _filter(x, y, copies)
643 646 return copies
644 647
645 648
646 649 def mergecopies(repo, c1, c2, base):
647 650 """
648 651 Finds moves and copies between context c1 and c2 that are relevant for
649 652 merging. 'base' will be used as the merge base.
650 653
651 654 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
652 655 files that were moved/ copied in one merge parent and modified in another.
653 656 For example:
654 657
655 658 o ---> 4 another commit
656 659 |
657 660 | o ---> 3 commit that modifies a.txt
658 661 | /
659 662 o / ---> 2 commit that moves a.txt to b.txt
660 663 |/
661 664 o ---> 1 merge base
662 665
663 666 If we try to rebase revision 3 on revision 4, since there is no a.txt in
664 667 revision 4, and if user have copytrace disabled, we prints the following
665 668 message:
666 669
667 670 ```other changed <file> which local deleted```
668 671
669 672 Returns a tuple where:
670 673
671 674 "branch_copies" an instance of branch_copies.
672 675
673 676 "diverge" is a mapping of source name -> list of destination names
674 677 for divergent renames.
675 678
676 679 This function calls different copytracing algorithms based on config.
677 680 """
678 681 # avoid silly behavior for update from empty dir
679 682 if not c1 or not c2 or c1 == c2:
680 683 return branch_copies(), branch_copies(), {}
681 684
682 685 narrowmatch = c1.repo().narrowmatch()
683 686
684 687 # avoid silly behavior for parent -> working dir
685 688 if c2.node() is None and c1.node() == repo.dirstate.p1():
686 689 return (
687 690 branch_copies(_dirstatecopies(repo, narrowmatch)),
688 691 branch_copies(),
689 692 {},
690 693 )
691 694
692 695 copytracing = repo.ui.config(b'experimental', b'copytrace')
693 696 if stringutil.parsebool(copytracing) is False:
694 697 # stringutil.parsebool() returns None when it is unable to parse the
695 698 # value, so we should rely on making sure copytracing is on such cases
696 699 return branch_copies(), branch_copies(), {}
697 700
698 701 if usechangesetcentricalgo(repo):
699 702 # The heuristics don't make sense when we need changeset-centric algos
700 703 return _fullcopytracing(repo, c1, c2, base)
701 704
702 705 # Copy trace disabling is explicitly below the node == p1 logic above
703 706 # because the logic above is required for a simple copy to be kept across a
704 707 # rebase.
705 708 if copytracing == b'heuristics':
706 709 # Do full copytracing if only non-public revisions are involved as
707 710 # that will be fast enough and will also cover the copies which could
708 711 # be missed by heuristics
709 712 if _isfullcopytraceable(repo, c1, base):
710 713 return _fullcopytracing(repo, c1, c2, base)
711 714 return _heuristicscopytracing(repo, c1, c2, base)
712 715 else:
713 716 return _fullcopytracing(repo, c1, c2, base)
714 717
715 718
716 719 def _isfullcopytraceable(repo, c1, base):
717 720 """Checks that if base, source and destination are all no-public branches,
718 721 if yes let's use the full copytrace algorithm for increased capabilities
719 722 since it will be fast enough.
720 723
721 724 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
722 725 number of changesets from c1 to base such that if number of changesets are
723 726 more than the limit, full copytracing algorithm won't be used.
724 727 """
725 728 if c1.rev() is None:
726 729 c1 = c1.p1()
727 730 if c1.mutable() and base.mutable():
728 731 sourcecommitlimit = repo.ui.configint(
729 732 b'experimental', b'copytrace.sourcecommitlimit'
730 733 )
731 734 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
732 735 return commits < sourcecommitlimit
733 736 return False
734 737
735 738
736 739 def _checksinglesidecopies(
737 740 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
738 741 ):
739 742 if src not in m2:
740 743 # deleted on side 2
741 744 if src not in m1:
742 745 # renamed on side 1, deleted on side 2
743 746 renamedelete[src] = dsts1
744 747 elif src not in mb:
745 748 # Work around the "short-circuit to avoid issues with merge states"
746 749 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
747 750 # destination doesn't exist in y.
748 751 pass
749 752 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
750 753 return
751 754 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
752 755 # modified on side 2
753 756 for dst in dsts1:
754 757 copy[dst] = src
755 758
756 759
757 760 class branch_copies(object):
758 761 """Information about copies made on one side of a merge/graft.
759 762
760 763 "copy" is a mapping from destination name -> source name,
761 764 where source is in c1 and destination is in c2 or vice-versa.
762 765
763 766 "movewithdir" is a mapping from source name -> destination name,
764 767 where the file at source present in one context but not the other
765 768 needs to be moved to destination by the merge process, because the
766 769 other context moved the directory it is in.
767 770
768 771 "renamedelete" is a mapping of source name -> list of destination
769 772 names for files deleted in c1 that were renamed in c2 or vice-versa.
770 773
771 774 "dirmove" is a mapping of detected source dir -> destination dir renames.
772 775 This is needed for handling changes to new files previously grafted into
773 776 renamed directories.
774 777 """
775 778
776 779 def __init__(
777 780 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
778 781 ):
779 782 self.copy = {} if copy is None else copy
780 783 self.renamedelete = {} if renamedelete is None else renamedelete
781 784 self.dirmove = {} if dirmove is None else dirmove
782 785 self.movewithdir = {} if movewithdir is None else movewithdir
783 786
784 787 def __repr__(self):
785 788 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
786 789 self.copy,
787 790 self.renamedelete,
788 791 self.dirmove,
789 792 self.movewithdir,
790 793 )
791 794
792 795
793 796 def _fullcopytracing(repo, c1, c2, base):
794 797 """The full copytracing algorithm which finds all the new files that were
795 798 added from merge base up to the top commit and for each file it checks if
796 799 this file was copied from another file.
797 800
798 801 This is pretty slow when a lot of changesets are involved but will track all
799 802 the copies.
800 803 """
801 804 m1 = c1.manifest()
802 805 m2 = c2.manifest()
803 806 mb = base.manifest()
804 807
805 808 copies1 = pathcopies(base, c1)
806 809 copies2 = pathcopies(base, c2)
807 810
808 811 if not (copies1 or copies2):
809 812 return branch_copies(), branch_copies(), {}
810 813
811 814 inversecopies1 = {}
812 815 inversecopies2 = {}
813 816 for dst, src in copies1.items():
814 817 inversecopies1.setdefault(src, []).append(dst)
815 818 for dst, src in copies2.items():
816 819 inversecopies2.setdefault(src, []).append(dst)
817 820
818 821 copy1 = {}
819 822 copy2 = {}
820 823 diverge = {}
821 824 renamedelete1 = {}
822 825 renamedelete2 = {}
823 826 allsources = set(inversecopies1) | set(inversecopies2)
824 827 for src in allsources:
825 828 dsts1 = inversecopies1.get(src)
826 829 dsts2 = inversecopies2.get(src)
827 830 if dsts1 and dsts2:
828 831 # copied/renamed on both sides
829 832 if src not in m1 and src not in m2:
830 833 # renamed on both sides
831 834 dsts1 = set(dsts1)
832 835 dsts2 = set(dsts2)
833 836 # If there's some overlap in the rename destinations, we
834 837 # consider it not divergent. For example, if side 1 copies 'a'
835 838 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
836 839 # and 'd' and deletes 'a'.
837 840 if dsts1 & dsts2:
838 841 for dst in dsts1 & dsts2:
839 842 copy1[dst] = src
840 843 copy2[dst] = src
841 844 else:
842 845 diverge[src] = sorted(dsts1 | dsts2)
843 846 elif src in m1 and src in m2:
844 847 # copied on both sides
845 848 dsts1 = set(dsts1)
846 849 dsts2 = set(dsts2)
847 850 for dst in dsts1 & dsts2:
848 851 copy1[dst] = src
849 852 copy2[dst] = src
850 853 # TODO: Handle cases where it was renamed on one side and copied
851 854 # on the other side
852 855 elif dsts1:
853 856 # copied/renamed only on side 1
854 857 _checksinglesidecopies(
855 858 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
856 859 )
857 860 elif dsts2:
858 861 # copied/renamed only on side 2
859 862 _checksinglesidecopies(
860 863 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
861 864 )
862 865
863 866 # find interesting file sets from manifests
864 867 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
865 868 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
866 869 u1 = sorted(addedinm1 - addedinm2)
867 870 u2 = sorted(addedinm2 - addedinm1)
868 871
869 872 header = b" unmatched files in %s"
870 873 if u1:
871 874 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
872 875 if u2:
873 876 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
874 877
875 878 if repo.ui.debugflag:
876 879 renamedeleteset = set()
877 880 divergeset = set()
878 881 for dsts in diverge.values():
879 882 divergeset.update(dsts)
880 883 for dsts in renamedelete1.values():
881 884 renamedeleteset.update(dsts)
882 885 for dsts in renamedelete2.values():
883 886 renamedeleteset.update(dsts)
884 887
885 888 repo.ui.debug(
886 889 b" all copies found (* = to merge, ! = divergent, "
887 890 b"% = renamed and deleted):\n"
888 891 )
889 892 for side, copies in ((b"local", copies1), (b"remote", copies2)):
890 893 if not copies:
891 894 continue
892 895 repo.ui.debug(b" on %s side:\n" % side)
893 896 for f in sorted(copies):
894 897 note = b""
895 898 if f in copy1 or f in copy2:
896 899 note += b"*"
897 900 if f in divergeset:
898 901 note += b"!"
899 902 if f in renamedeleteset:
900 903 note += b"%"
901 904 repo.ui.debug(
902 905 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
903 906 )
904 907 del renamedeleteset
905 908 del divergeset
906 909
907 910 repo.ui.debug(b" checking for directory renames\n")
908 911
909 912 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
910 913 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
911 914
912 915 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
913 916 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
914 917
915 918 return branch_copies1, branch_copies2, diverge
916 919
917 920
918 921 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
919 922 """Finds moved directories and files that should move with them.
920 923
921 924 ctx: the context for one of the sides
922 925 copy: files copied on the same side (as ctx)
923 926 fullcopy: files copied on the same side (as ctx), including those that
924 927 merge.manifestmerge() won't care about
925 928 addedfiles: added files on the other side (compared to ctx)
926 929 """
927 930 # generate a directory move map
928 931 d = ctx.dirs()
929 932 invalid = set()
930 933 dirmove = {}
931 934
932 935 # examine each file copy for a potential directory move, which is
933 936 # when all the files in a directory are moved to a new directory
934 937 for dst, src in pycompat.iteritems(fullcopy):
935 938 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
936 939 if dsrc in invalid:
937 940 # already seen to be uninteresting
938 941 continue
939 942 elif dsrc in d and ddst in d:
940 943 # directory wasn't entirely moved locally
941 944 invalid.add(dsrc)
942 945 elif dsrc in dirmove and dirmove[dsrc] != ddst:
943 946 # files from the same directory moved to two different places
944 947 invalid.add(dsrc)
945 948 else:
946 949 # looks good so far
947 950 dirmove[dsrc] = ddst
948 951
949 952 for i in invalid:
950 953 if i in dirmove:
951 954 del dirmove[i]
952 955 del d, invalid
953 956
954 957 if not dirmove:
955 958 return {}, {}
956 959
957 960 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
958 961
959 962 for d in dirmove:
960 963 repo.ui.debug(
961 964 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
962 965 )
963 966
964 967 movewithdir = {}
965 968 # check unaccounted nonoverlapping files against directory moves
966 969 for f in addedfiles:
967 970 if f not in fullcopy:
968 971 for d in dirmove:
969 972 if f.startswith(d):
970 973 # new file added in a directory that was moved, move it
971 974 df = dirmove[d] + f[len(d) :]
972 975 if df not in copy:
973 976 movewithdir[f] = df
974 977 repo.ui.debug(
975 978 b" pending file src: '%s' -> dst: '%s'\n"
976 979 % (f, df)
977 980 )
978 981 break
979 982
980 983 return dirmove, movewithdir
981 984
982 985
983 986 def _heuristicscopytracing(repo, c1, c2, base):
984 987 """Fast copytracing using filename heuristics
985 988
986 989 Assumes that moves or renames are of following two types:
987 990
988 991 1) Inside a directory only (same directory name but different filenames)
989 992 2) Move from one directory to another
990 993 (same filenames but different directory names)
991 994
992 995 Works only when there are no merge commits in the "source branch".
993 996 Source branch is commits from base up to c2 not including base.
994 997
995 998 If merge is involved it fallbacks to _fullcopytracing().
996 999
997 1000 Can be used by setting the following config:
998 1001
999 1002 [experimental]
1000 1003 copytrace = heuristics
1001 1004
1002 1005 In some cases the copy/move candidates found by heuristics can be very large
1003 1006 in number and that will make the algorithm slow. The number of possible
1004 1007 candidates to check can be limited by using the config
1005 1008 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1006 1009 """
1007 1010
1008 1011 if c1.rev() is None:
1009 1012 c1 = c1.p1()
1010 1013 if c2.rev() is None:
1011 1014 c2 = c2.p1()
1012 1015
1013 1016 changedfiles = set()
1014 1017 m1 = c1.manifest()
1015 1018 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1016 1019 # If base is not in c2 branch, we switch to fullcopytracing
1017 1020 repo.ui.debug(
1018 1021 b"switching to full copytracing as base is not "
1019 1022 b"an ancestor of c2\n"
1020 1023 )
1021 1024 return _fullcopytracing(repo, c1, c2, base)
1022 1025
1023 1026 ctx = c2
1024 1027 while ctx != base:
1025 1028 if len(ctx.parents()) == 2:
1026 1029 # To keep things simple let's not handle merges
1027 1030 repo.ui.debug(b"switching to full copytracing because of merges\n")
1028 1031 return _fullcopytracing(repo, c1, c2, base)
1029 1032 changedfiles.update(ctx.files())
1030 1033 ctx = ctx.p1()
1031 1034
1032 1035 copies2 = {}
1033 1036 cp = _forwardcopies(base, c2)
1034 1037 for dst, src in pycompat.iteritems(cp):
1035 1038 if src in m1:
1036 1039 copies2[dst] = src
1037 1040
1038 1041 # file is missing if it isn't present in the destination, but is present in
1039 1042 # the base and present in the source.
1040 1043 # Presence in the base is important to exclude added files, presence in the
1041 1044 # source is important to exclude removed files.
1042 1045 filt = lambda f: f not in m1 and f in base and f in c2
1043 1046 missingfiles = [f for f in changedfiles if filt(f)]
1044 1047
1045 1048 copies1 = {}
1046 1049 if missingfiles:
1047 1050 basenametofilename = collections.defaultdict(list)
1048 1051 dirnametofilename = collections.defaultdict(list)
1049 1052
1050 1053 for f in m1.filesnotin(base.manifest()):
1051 1054 basename = os.path.basename(f)
1052 1055 dirname = os.path.dirname(f)
1053 1056 basenametofilename[basename].append(f)
1054 1057 dirnametofilename[dirname].append(f)
1055 1058
1056 1059 for f in missingfiles:
1057 1060 basename = os.path.basename(f)
1058 1061 dirname = os.path.dirname(f)
1059 1062 samebasename = basenametofilename[basename]
1060 1063 samedirname = dirnametofilename[dirname]
1061 1064 movecandidates = samebasename + samedirname
1062 1065 # f is guaranteed to be present in c2, that's why
1063 1066 # c2.filectx(f) won't fail
1064 1067 f2 = c2.filectx(f)
1065 1068 # we can have a lot of candidates which can slow down the heuristics
1066 1069 # config value to limit the number of candidates moves to check
1067 1070 maxcandidates = repo.ui.configint(
1068 1071 b'experimental', b'copytrace.movecandidateslimit'
1069 1072 )
1070 1073
1071 1074 if len(movecandidates) > maxcandidates:
1072 1075 repo.ui.status(
1073 1076 _(
1074 1077 b"skipping copytracing for '%s', more "
1075 1078 b"candidates than the limit: %d\n"
1076 1079 )
1077 1080 % (f, len(movecandidates))
1078 1081 )
1079 1082 continue
1080 1083
1081 1084 for candidate in movecandidates:
1082 1085 f1 = c1.filectx(candidate)
1083 1086 if _related(f1, f2):
1084 1087 # if there are a few related copies then we'll merge
1085 1088 # changes into all of them. This matches the behaviour
1086 1089 # of upstream copytracing
1087 1090 copies1[candidate] = f
1088 1091
1089 1092 return branch_copies(copies1), branch_copies(copies2), {}
1090 1093
1091 1094
1092 1095 def _related(f1, f2):
1093 1096 """return True if f1 and f2 filectx have a common ancestor
1094 1097
1095 1098 Walk back to common ancestor to see if the two files originate
1096 1099 from the same file. Since workingfilectx's rev() is None it messes
1097 1100 up the integer comparison logic, hence the pre-step check for
1098 1101 None (f1 and f2 can only be workingfilectx's initially).
1099 1102 """
1100 1103
1101 1104 if f1 == f2:
1102 1105 return True # a match
1103 1106
1104 1107 g1, g2 = f1.ancestors(), f2.ancestors()
1105 1108 try:
1106 1109 f1r, f2r = f1.linkrev(), f2.linkrev()
1107 1110
1108 1111 if f1r is None:
1109 1112 f1 = next(g1)
1110 1113 if f2r is None:
1111 1114 f2 = next(g2)
1112 1115
1113 1116 while True:
1114 1117 f1r, f2r = f1.linkrev(), f2.linkrev()
1115 1118 if f1r > f2r:
1116 1119 f1 = next(g1)
1117 1120 elif f2r > f1r:
1118 1121 f2 = next(g2)
1119 1122 else: # f1 and f2 point to files in the same linkrev
1120 1123 return f1 == f2 # true if they point to the same file
1121 1124 except StopIteration:
1122 1125 return False
1123 1126
1124 1127
1125 1128 def graftcopies(wctx, ctx, base):
1126 1129 """reproduce copies between base and ctx in the wctx
1127 1130
1128 1131 Unlike mergecopies(), this function will only consider copies between base
1129 1132 and ctx; it will ignore copies between base and wctx. Also unlike
1130 1133 mergecopies(), this function will apply copies to the working copy (instead
1131 1134 of just returning information about the copies). That makes it cheaper
1132 1135 (especially in the common case of base==ctx.p1()) and useful also when
1133 1136 experimental.copytrace=off.
1134 1137
1135 1138 merge.update() will have already marked most copies, but it will only
1136 1139 mark copies if it thinks the source files are related (see
1137 1140 merge._related()). It will also not mark copies if the file wasn't modified
1138 1141 on the local side. This function adds the copies that were "missed"
1139 1142 by merge.update().
1140 1143 """
1141 1144 new_copies = pathcopies(base, ctx)
1142 1145 _filter(wctx.p1(), wctx, new_copies)
1143 1146 for dst, src in pycompat.iteritems(new_copies):
1144 1147 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now