##// END OF EJS Templates
copies: use the rust code for `combine_changeset_copies`...
marmoute -
r46576:a66568f2 default
parent child Browse files
Show More
@@ -1,1132 +1,1142
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 policy,
20 21 pycompat,
21 22 util,
22 23 )
23 24
24 25
25 26 from .utils import stringutil
26 27
27 28 from .revlogutils import flagutil
28 29
30 rustmod = policy.importrust("copy_tracing")
31
29 32
30 33 def _filter(src, dst, t):
31 34 """filters out invalid copies after chaining"""
32 35
33 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 38 # in the following table (not including trivial cases). For example, case 2
36 39 # is where a file existed in 'src' and remained under that name in 'mid' and
37 40 # then was renamed between 'mid' and 'dst'.
38 41 #
39 42 # case src mid dst result
40 43 # 1 x y - -
41 44 # 2 x y y x->y
42 45 # 3 x y x -
43 46 # 4 x y z x->z
44 47 # 5 - x y -
45 48 # 6 x x y x->y
46 49 #
47 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 52 # between 5 and 6, so it includes all cases in its result.
50 53 # Cases 1, 3, and 5 are then removed by _filter().
51 54
52 55 for k, v in list(t.items()):
53 56 # remove copies from files that didn't exist
54 57 if v not in src:
55 58 del t[k]
56 59 # remove criss-crossed copies
57 60 elif k in src and v in dst:
58 61 del t[k]
59 62 # remove copies to files that were then removed
60 63 elif k not in dst:
61 64 del t[k]
62 65
63 66
64 67 def _chain(prefix, suffix):
65 68 """chain two sets of copies 'prefix' and 'suffix'"""
66 69 result = prefix.copy()
67 70 for key, value in pycompat.iteritems(suffix):
68 71 result[key] = prefix.get(value, value)
69 72 return result
70 73
71 74
72 75 def _tracefile(fctx, am, basemf):
73 76 """return file context that is the ancestor of fctx present in ancestor
74 77 manifest am
75 78
76 79 Note: we used to try and stop after a given limit, however checking if that
77 80 limit is reached turned out to be very expensive. we are better off
78 81 disabling that feature."""
79 82
80 83 for f in fctx.ancestors():
81 84 path = f.path()
82 85 if am.get(path, None) == f.filenode():
83 86 return path
84 87 if basemf and basemf.get(path, None) == f.filenode():
85 88 return path
86 89
87 90
88 91 def _dirstatecopies(repo, match=None):
89 92 ds = repo.dirstate
90 93 c = ds.copies().copy()
91 94 for k in list(c):
92 95 if ds[k] not in b'anm' or (match and not match(k)):
93 96 del c[k]
94 97 return c
95 98
96 99
97 100 def _computeforwardmissing(a, b, match=None):
98 101 """Computes which files are in b but not a.
99 102 This is its own function so extensions can easily wrap this call to see what
100 103 files _forwardcopies is about to process.
101 104 """
102 105 ma = a.manifest()
103 106 mb = b.manifest()
104 107 return mb.filesnotin(ma, match=match)
105 108
106 109
107 110 def usechangesetcentricalgo(repo):
108 111 """Checks if we should use changeset-centric copy algorithms"""
109 112 if repo.filecopiesmode == b'changeset-sidedata':
110 113 return True
111 114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 115 changesetsource = (b'changeset-only', b'compatibility')
113 116 return readfrom in changesetsource
114 117
115 118
116 119 def _committedforwardcopies(a, b, base, match):
117 120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 121 # files might have to be traced back to the fctx parent of the last
119 122 # one-side-only changeset, but not further back than that
120 123 repo = a._repo
121 124
122 125 if usechangesetcentricalgo(repo):
123 126 return _changesetforwardcopies(a, b, match)
124 127
125 128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 129 dbg = repo.ui.debug
127 130 if debug:
128 131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 132 am = a.manifest()
130 133 basemf = None if base is None else base.manifest()
131 134
132 135 # find where new files came from
133 136 # we currently don't try to find where old files went, too expensive
134 137 # this means we can miss a case like 'hg rm b; hg cp a b'
135 138 cm = {}
136 139
137 140 # Computing the forward missing is quite expensive on large manifests, since
138 141 # it compares the entire manifests. We can optimize it in the common use
139 142 # case of computing what copies are in a commit versus its parent (like
140 143 # during a rebase or histedit). Note, we exclude merge commits from this
141 144 # optimization, since the ctx.files() for a merge commit is not correct for
142 145 # this comparison.
143 146 forwardmissingmatch = match
144 147 if b.p1() == a and b.p2().node() == node.nullid:
145 148 filesmatcher = matchmod.exact(b.files())
146 149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 151
149 152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 153
151 154 if debug:
152 155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 156
154 157 for f in sorted(missing):
155 158 if debug:
156 159 dbg(b'debug.copies: tracing file: %s\n' % f)
157 160 fctx = b[f]
158 161 fctx._ancestrycontext = ancestrycontext
159 162
160 163 if debug:
161 164 start = util.timer()
162 165 opath = _tracefile(fctx, am, basemf)
163 166 if opath:
164 167 if debug:
165 168 dbg(b'debug.copies: rename of: %s\n' % opath)
166 169 cm[f] = opath
167 170 if debug:
168 171 dbg(
169 172 b'debug.copies: time: %f seconds\n'
170 173 % (util.timer() - start)
171 174 )
172 175 return cm
173 176
174 177
175 178 def _revinfo_getter(repo):
176 179 """returns a function that returns the following data given a <rev>"
177 180
178 181 * p1: revision number of first parent
179 182 * p2: revision number of first parent
180 183 * changes: a ChangingFiles object
181 184 """
182 185 cl = repo.changelog
183 186 parents = cl.parentrevs
184 187 flags = cl.flags
185 188
186 189 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
187 190
188 191 changelogrevision = cl.changelogrevision
189 192
190 193 # A small cache to avoid doing the work twice for merges
191 194 #
192 195 # In the vast majority of cases, if we ask information for a revision
193 196 # about 1 parent, we'll later ask it for the other. So it make sense to
194 197 # keep the information around when reaching the first parent of a merge
195 198 # and dropping it after it was provided for the second parents.
196 199 #
197 200 # It exists cases were only one parent of the merge will be walked. It
198 201 # happens when the "destination" the copy tracing is descendant from a
199 202 # new root, not common with the "source". In that case, we will only walk
200 203 # through merge parents that are descendant of changesets common
201 204 # between "source" and "destination".
202 205 #
203 206 # With the current case implementation if such changesets have a copy
204 207 # information, we'll keep them in memory until the end of
205 208 # _changesetforwardcopies. We don't expect the case to be frequent
206 209 # enough to matters.
207 210 #
208 211 # In addition, it would be possible to reach pathological case, were
209 212 # many first parent are met before any second parent is reached. In
210 213 # that case the cache could grow. If this even become an issue one can
211 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
212 215 # time to save memory.
213 216 merge_caches = {}
214 217
215 218 def revinfo(rev):
216 219 p1, p2 = parents(rev)
217 220 value = None
218 221 e = merge_caches.pop(rev, None)
219 222 if e is not None:
220 223 return e
221 224 changes = None
222 225 if flags(rev) & HASCOPIESINFO:
223 226 changes = changelogrevision(rev).changes
224 227 value = (p1, p2, changes)
225 228 if p1 != node.nullrev and p2 != node.nullrev:
226 229 # XXX some case we over cache, IGNORE
227 230 merge_caches[rev] = value
228 231 return value
229 232
230 233 return revinfo
231 234
232 235
233 236 def cached_is_ancestor(is_ancestor):
234 237 """return a cached version of is_ancestor"""
235 238 cache = {}
236 239
237 240 def _is_ancestor(anc, desc):
238 241 if anc > desc:
239 242 return False
240 243 elif anc == desc:
241 244 return True
242 245 key = (anc, desc)
243 246 ret = cache.get(key)
244 247 if ret is None:
245 248 ret = cache[key] = is_ancestor(anc, desc)
246 249 return ret
247 250
248 251 return _is_ancestor
249 252
250 253
251 254 def _changesetforwardcopies(a, b, match):
252 255 if a.rev() in (node.nullrev, b.rev()):
253 256 return {}
254 257
255 258 repo = a.repo().unfiltered()
256 259 children = {}
257 260
258 261 cl = repo.changelog
259 262 isancestor = cached_is_ancestor(cl.isancestorrev)
260 263 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
261 264 mrset = set(missingrevs)
262 265 roots = set()
263 266 for r in missingrevs:
264 267 for p in cl.parentrevs(r):
265 268 if p == node.nullrev:
266 269 continue
267 270 if p not in children:
268 271 children[p] = [r]
269 272 else:
270 273 children[p].append(r)
271 274 if p not in mrset:
272 275 roots.add(p)
273 276 if not roots:
274 277 # no common revision to track copies from
275 278 return {}
276 279 min_root = min(roots)
277 280
278 281 from_head = set(
279 282 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
280 283 )
281 284
282 285 iterrevs = set(from_head)
283 286 iterrevs &= mrset
284 287 iterrevs.update(roots)
285 288 iterrevs.remove(b.rev())
286 289 revs = sorted(iterrevs)
287 290
288 291 if repo.filecopiesmode == b'changeset-sidedata':
289 292 revinfo = _revinfo_getter(repo)
290 293 return _combine_changeset_copies(
291 294 revs, children, b.rev(), revinfo, match, isancestor
292 295 )
293 296 else:
294 297 revinfo = _revinfo_getter_extra(repo)
295 298 return _combine_changeset_copies_extra(
296 299 revs, children, b.rev(), revinfo, match, isancestor
297 300 )
298 301
299 302
300 303 def _combine_changeset_copies(
301 304 revs, children, targetrev, revinfo, match, isancestor
302 305 ):
303 306 """combine the copies information for each item of iterrevs
304 307
305 308 revs: sorted iterable of revision to visit
306 309 children: a {parent: [children]} mapping.
307 310 targetrev: the final copies destination revision (not in iterrevs)
308 311 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
309 312 match: a matcher
310 313
311 314 It returns the aggregated copies information for `targetrev`.
312 315 """
316
317 alwaysmatch = match.always()
318
319 if rustmod is not None and alwaysmatch:
320 return rustmod.combine_changeset_copies(
321 list(revs), children, targetrev, revinfo, isancestor
322 )
323
313 324 all_copies = {}
314 alwaysmatch = match.always()
315 325 for r in revs:
316 326 copies = all_copies.pop(r, None)
317 327 if copies is None:
318 328 # this is a root
319 329 copies = {}
320 330 for i, c in enumerate(children[r]):
321 331 p1, p2, changes = revinfo(c)
322 332 childcopies = {}
323 333 if r == p1:
324 334 parent = 1
325 335 if changes is not None:
326 336 childcopies = changes.copied_from_p1
327 337 else:
328 338 assert r == p2
329 339 parent = 2
330 340 if changes is not None:
331 341 childcopies = changes.copied_from_p2
332 342 if not alwaysmatch:
333 343 childcopies = {
334 344 dst: src for dst, src in childcopies.items() if match(dst)
335 345 }
336 346 newcopies = copies
337 347 if childcopies:
338 348 newcopies = copies.copy()
339 349 for dest, source in pycompat.iteritems(childcopies):
340 350 prev = copies.get(source)
341 351 if prev is not None and prev[1] is not None:
342 352 source = prev[1]
343 353 newcopies[dest] = (c, source)
344 354 assert newcopies is not copies
345 355 if changes is not None:
346 356 for f in changes.removed:
347 357 if f in newcopies:
348 358 if newcopies is copies:
349 359 # copy on write to avoid affecting potential other
350 360 # branches. when there are no other branches, this
351 361 # could be avoided.
352 362 newcopies = copies.copy()
353 363 newcopies[f] = (c, None)
354 364 othercopies = all_copies.get(c)
355 365 if othercopies is None:
356 366 all_copies[c] = newcopies
357 367 else:
358 368 # we are the second parent to work on c, we need to merge our
359 369 # work with the other.
360 370 #
361 371 # In case of conflict, parent 1 take precedence over parent 2.
362 372 # This is an arbitrary choice made anew when implementing
363 373 # changeset based copies. It was made without regards with
364 374 # potential filelog related behavior.
365 375 if parent == 1:
366 376 _merge_copies_dict(
367 377 othercopies, newcopies, isancestor, changes
368 378 )
369 379 else:
370 380 _merge_copies_dict(
371 381 newcopies, othercopies, isancestor, changes
372 382 )
373 383 all_copies[c] = newcopies
374 384
375 385 final_copies = {}
376 386 for dest, (tt, source) in all_copies[targetrev].items():
377 387 if source is not None:
378 388 final_copies[dest] = source
379 389 return final_copies
380 390
381 391
382 392 def _merge_copies_dict(minor, major, isancestor, changes):
383 393 """merge two copies-mapping together, minor and major
384 394
385 395 In case of conflict, value from "major" will be picked.
386 396
387 397 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
388 398 ancestors of `high_rev`,
389 399
390 400 - `ismerged(path)`: callable return True if `path` have been merged in the
391 401 current revision,
392 402 """
393 403 for dest, value in major.items():
394 404 other = minor.get(dest)
395 405 if other is None:
396 406 minor[dest] = value
397 407 else:
398 408 new_tt = value[0]
399 409 other_tt = other[0]
400 410 if value[1] == other[1]:
401 411 continue
402 412 # content from "major" wins, unless it is older
403 413 # than the branch point or there is a merge
404 414 if new_tt == other_tt:
405 415 minor[dest] = value
406 416 elif (
407 417 changes is not None
408 418 and value[1] is None
409 419 and dest in changes.salvaged
410 420 ):
411 421 pass
412 422 elif (
413 423 changes is not None
414 424 and other[1] is None
415 425 and dest in changes.salvaged
416 426 ):
417 427 minor[dest] = value
418 428 elif changes is not None and dest in changes.merged:
419 429 minor[dest] = value
420 430 elif not isancestor(new_tt, other_tt):
421 431 if value[1] is not None:
422 432 minor[dest] = value
423 433 elif isancestor(other_tt, new_tt):
424 434 minor[dest] = value
425 435
426 436
427 437 def _revinfo_getter_extra(repo):
428 438 """return a function that return multiple data given a <rev>"i
429 439
430 440 * p1: revision number of first parent
431 441 * p2: revision number of first parent
432 442 * p1copies: mapping of copies from p1
433 443 * p2copies: mapping of copies from p2
434 444 * removed: a list of removed files
435 445 * ismerged: a callback to know if file was merged in that revision
436 446 """
437 447 cl = repo.changelog
438 448 parents = cl.parentrevs
439 449
440 450 def get_ismerged(rev):
441 451 ctx = repo[rev]
442 452
443 453 def ismerged(path):
444 454 if path not in ctx.files():
445 455 return False
446 456 fctx = ctx[path]
447 457 parents = fctx._filelog.parents(fctx._filenode)
448 458 nb_parents = 0
449 459 for n in parents:
450 460 if n != node.nullid:
451 461 nb_parents += 1
452 462 return nb_parents >= 2
453 463
454 464 return ismerged
455 465
456 466 def revinfo(rev):
457 467 p1, p2 = parents(rev)
458 468 ctx = repo[rev]
459 469 p1copies, p2copies = ctx._copies
460 470 removed = ctx.filesremoved()
461 471 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
462 472
463 473 return revinfo
464 474
465 475
466 476 def _combine_changeset_copies_extra(
467 477 revs, children, targetrev, revinfo, match, isancestor
468 478 ):
469 479 """version of `_combine_changeset_copies` that works with the Google
470 480 specific "extra" based storage for copy information"""
471 481 all_copies = {}
472 482 alwaysmatch = match.always()
473 483 for r in revs:
474 484 copies = all_copies.pop(r, None)
475 485 if copies is None:
476 486 # this is a root
477 487 copies = {}
478 488 for i, c in enumerate(children[r]):
479 489 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
480 490 if r == p1:
481 491 parent = 1
482 492 childcopies = p1copies
483 493 else:
484 494 assert r == p2
485 495 parent = 2
486 496 childcopies = p2copies
487 497 if not alwaysmatch:
488 498 childcopies = {
489 499 dst: src for dst, src in childcopies.items() if match(dst)
490 500 }
491 501 newcopies = copies
492 502 if childcopies:
493 503 newcopies = copies.copy()
494 504 for dest, source in pycompat.iteritems(childcopies):
495 505 prev = copies.get(source)
496 506 if prev is not None and prev[1] is not None:
497 507 source = prev[1]
498 508 newcopies[dest] = (c, source)
499 509 assert newcopies is not copies
500 510 for f in removed:
501 511 if f in newcopies:
502 512 if newcopies is copies:
503 513 # copy on write to avoid affecting potential other
504 514 # branches. when there are no other branches, this
505 515 # could be avoided.
506 516 newcopies = copies.copy()
507 517 newcopies[f] = (c, None)
508 518 othercopies = all_copies.get(c)
509 519 if othercopies is None:
510 520 all_copies[c] = newcopies
511 521 else:
512 522 # we are the second parent to work on c, we need to merge our
513 523 # work with the other.
514 524 #
515 525 # In case of conflict, parent 1 take precedence over parent 2.
516 526 # This is an arbitrary choice made anew when implementing
517 527 # changeset based copies. It was made without regards with
518 528 # potential filelog related behavior.
519 529 if parent == 1:
520 530 _merge_copies_dict_extra(
521 531 othercopies, newcopies, isancestor, ismerged
522 532 )
523 533 else:
524 534 _merge_copies_dict_extra(
525 535 newcopies, othercopies, isancestor, ismerged
526 536 )
527 537 all_copies[c] = newcopies
528 538
529 539 final_copies = {}
530 540 for dest, (tt, source) in all_copies[targetrev].items():
531 541 if source is not None:
532 542 final_copies[dest] = source
533 543 return final_copies
534 544
535 545
536 546 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
537 547 """version of `_merge_copies_dict` that works with the Google
538 548 specific "extra" based storage for copy information"""
539 549 for dest, value in major.items():
540 550 other = minor.get(dest)
541 551 if other is None:
542 552 minor[dest] = value
543 553 else:
544 554 new_tt = value[0]
545 555 other_tt = other[0]
546 556 if value[1] == other[1]:
547 557 continue
548 558 # content from "major" wins, unless it is older
549 559 # than the branch point or there is a merge
550 560 if (
551 561 new_tt == other_tt
552 562 or not isancestor(new_tt, other_tt)
553 563 or ismerged(dest)
554 564 ):
555 565 minor[dest] = value
556 566
557 567
558 568 def _forwardcopies(a, b, base=None, match=None):
559 569 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
560 570
561 571 if base is None:
562 572 base = a
563 573 match = a.repo().narrowmatch(match)
564 574 # check for working copy
565 575 if b.rev() is None:
566 576 cm = _committedforwardcopies(a, b.p1(), base, match)
567 577 # combine copies from dirstate if necessary
568 578 copies = _chain(cm, _dirstatecopies(b._repo, match))
569 579 else:
570 580 copies = _committedforwardcopies(a, b, base, match)
571 581 return copies
572 582
573 583
574 584 def _backwardrenames(a, b, match):
575 585 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
576 586 return {}
577 587
578 588 # Even though we're not taking copies into account, 1:n rename situations
579 589 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
580 590 # arbitrarily pick one of the renames.
581 591 # We don't want to pass in "match" here, since that would filter
582 592 # the destination by it. Since we're reversing the copies, we want
583 593 # to filter the source instead.
584 594 f = _forwardcopies(b, a)
585 595 r = {}
586 596 for k, v in sorted(pycompat.iteritems(f)):
587 597 if match and not match(v):
588 598 continue
589 599 # remove copies
590 600 if v in a:
591 601 continue
592 602 r[v] = k
593 603 return r
594 604
595 605
596 606 def pathcopies(x, y, match=None):
597 607 """find {dst@y: src@x} copy mapping for directed compare"""
598 608 repo = x._repo
599 609 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
600 610 if debug:
601 611 repo.ui.debug(
602 612 b'debug.copies: searching copies from %s to %s\n' % (x, y)
603 613 )
604 614 if x == y or not x or not y:
605 615 return {}
606 616 if y.rev() is None and x == y.p1():
607 617 if debug:
608 618 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
609 619 # short-circuit to avoid issues with merge states
610 620 return _dirstatecopies(repo, match)
611 621 a = y.ancestor(x)
612 622 if a == x:
613 623 if debug:
614 624 repo.ui.debug(b'debug.copies: search mode: forward\n')
615 625 copies = _forwardcopies(x, y, match=match)
616 626 elif a == y:
617 627 if debug:
618 628 repo.ui.debug(b'debug.copies: search mode: backward\n')
619 629 copies = _backwardrenames(x, y, match=match)
620 630 else:
621 631 if debug:
622 632 repo.ui.debug(b'debug.copies: search mode: combined\n')
623 633 base = None
624 634 if a.rev() != node.nullrev:
625 635 base = x
626 636 copies = _chain(
627 637 _backwardrenames(x, a, match=match),
628 638 _forwardcopies(a, y, base, match=match),
629 639 )
630 640 _filter(x, y, copies)
631 641 return copies
632 642
633 643
634 644 def mergecopies(repo, c1, c2, base):
635 645 """
636 646 Finds moves and copies between context c1 and c2 that are relevant for
637 647 merging. 'base' will be used as the merge base.
638 648
639 649 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
640 650 files that were moved/ copied in one merge parent and modified in another.
641 651 For example:
642 652
643 653 o ---> 4 another commit
644 654 |
645 655 | o ---> 3 commit that modifies a.txt
646 656 | /
647 657 o / ---> 2 commit that moves a.txt to b.txt
648 658 |/
649 659 o ---> 1 merge base
650 660
651 661 If we try to rebase revision 3 on revision 4, since there is no a.txt in
652 662 revision 4, and if user have copytrace disabled, we prints the following
653 663 message:
654 664
655 665 ```other changed <file> which local deleted```
656 666
657 667 Returns a tuple where:
658 668
659 669 "branch_copies" an instance of branch_copies.
660 670
661 671 "diverge" is a mapping of source name -> list of destination names
662 672 for divergent renames.
663 673
664 674 This function calls different copytracing algorithms based on config.
665 675 """
666 676 # avoid silly behavior for update from empty dir
667 677 if not c1 or not c2 or c1 == c2:
668 678 return branch_copies(), branch_copies(), {}
669 679
670 680 narrowmatch = c1.repo().narrowmatch()
671 681
672 682 # avoid silly behavior for parent -> working dir
673 683 if c2.node() is None and c1.node() == repo.dirstate.p1():
674 684 return (
675 685 branch_copies(_dirstatecopies(repo, narrowmatch)),
676 686 branch_copies(),
677 687 {},
678 688 )
679 689
680 690 copytracing = repo.ui.config(b'experimental', b'copytrace')
681 691 if stringutil.parsebool(copytracing) is False:
682 692 # stringutil.parsebool() returns None when it is unable to parse the
683 693 # value, so we should rely on making sure copytracing is on such cases
684 694 return branch_copies(), branch_copies(), {}
685 695
686 696 if usechangesetcentricalgo(repo):
687 697 # The heuristics don't make sense when we need changeset-centric algos
688 698 return _fullcopytracing(repo, c1, c2, base)
689 699
690 700 # Copy trace disabling is explicitly below the node == p1 logic above
691 701 # because the logic above is required for a simple copy to be kept across a
692 702 # rebase.
693 703 if copytracing == b'heuristics':
694 704 # Do full copytracing if only non-public revisions are involved as
695 705 # that will be fast enough and will also cover the copies which could
696 706 # be missed by heuristics
697 707 if _isfullcopytraceable(repo, c1, base):
698 708 return _fullcopytracing(repo, c1, c2, base)
699 709 return _heuristicscopytracing(repo, c1, c2, base)
700 710 else:
701 711 return _fullcopytracing(repo, c1, c2, base)
702 712
703 713
704 714 def _isfullcopytraceable(repo, c1, base):
705 715 """Checks that if base, source and destination are all no-public branches,
706 716 if yes let's use the full copytrace algorithm for increased capabilities
707 717 since it will be fast enough.
708 718
709 719 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
710 720 number of changesets from c1 to base such that if number of changesets are
711 721 more than the limit, full copytracing algorithm won't be used.
712 722 """
713 723 if c1.rev() is None:
714 724 c1 = c1.p1()
715 725 if c1.mutable() and base.mutable():
716 726 sourcecommitlimit = repo.ui.configint(
717 727 b'experimental', b'copytrace.sourcecommitlimit'
718 728 )
719 729 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
720 730 return commits < sourcecommitlimit
721 731 return False
722 732
723 733
724 734 def _checksinglesidecopies(
725 735 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
726 736 ):
727 737 if src not in m2:
728 738 # deleted on side 2
729 739 if src not in m1:
730 740 # renamed on side 1, deleted on side 2
731 741 renamedelete[src] = dsts1
732 742 elif src not in mb:
733 743 # Work around the "short-circuit to avoid issues with merge states"
734 744 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
735 745 # destination doesn't exist in y.
736 746 pass
737 747 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
738 748 return
739 749 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
740 750 # modified on side 2
741 751 for dst in dsts1:
742 752 copy[dst] = src
743 753
744 754
745 755 class branch_copies(object):
746 756 """Information about copies made on one side of a merge/graft.
747 757
748 758 "copy" is a mapping from destination name -> source name,
749 759 where source is in c1 and destination is in c2 or vice-versa.
750 760
751 761 "movewithdir" is a mapping from source name -> destination name,
752 762 where the file at source present in one context but not the other
753 763 needs to be moved to destination by the merge process, because the
754 764 other context moved the directory it is in.
755 765
756 766 "renamedelete" is a mapping of source name -> list of destination
757 767 names for files deleted in c1 that were renamed in c2 or vice-versa.
758 768
759 769 "dirmove" is a mapping of detected source dir -> destination dir renames.
760 770 This is needed for handling changes to new files previously grafted into
761 771 renamed directories.
762 772 """
763 773
764 774 def __init__(
765 775 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
766 776 ):
767 777 self.copy = {} if copy is None else copy
768 778 self.renamedelete = {} if renamedelete is None else renamedelete
769 779 self.dirmove = {} if dirmove is None else dirmove
770 780 self.movewithdir = {} if movewithdir is None else movewithdir
771 781
772 782 def __repr__(self):
773 783 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
774 784 self.copy,
775 785 self.renamedelete,
776 786 self.dirmove,
777 787 self.movewithdir,
778 788 )
779 789
780 790
781 791 def _fullcopytracing(repo, c1, c2, base):
782 792 """The full copytracing algorithm which finds all the new files that were
783 793 added from merge base up to the top commit and for each file it checks if
784 794 this file was copied from another file.
785 795
786 796 This is pretty slow when a lot of changesets are involved but will track all
787 797 the copies.
788 798 """
789 799 m1 = c1.manifest()
790 800 m2 = c2.manifest()
791 801 mb = base.manifest()
792 802
793 803 copies1 = pathcopies(base, c1)
794 804 copies2 = pathcopies(base, c2)
795 805
796 806 if not (copies1 or copies2):
797 807 return branch_copies(), branch_copies(), {}
798 808
799 809 inversecopies1 = {}
800 810 inversecopies2 = {}
801 811 for dst, src in copies1.items():
802 812 inversecopies1.setdefault(src, []).append(dst)
803 813 for dst, src in copies2.items():
804 814 inversecopies2.setdefault(src, []).append(dst)
805 815
806 816 copy1 = {}
807 817 copy2 = {}
808 818 diverge = {}
809 819 renamedelete1 = {}
810 820 renamedelete2 = {}
811 821 allsources = set(inversecopies1) | set(inversecopies2)
812 822 for src in allsources:
813 823 dsts1 = inversecopies1.get(src)
814 824 dsts2 = inversecopies2.get(src)
815 825 if dsts1 and dsts2:
816 826 # copied/renamed on both sides
817 827 if src not in m1 and src not in m2:
818 828 # renamed on both sides
819 829 dsts1 = set(dsts1)
820 830 dsts2 = set(dsts2)
821 831 # If there's some overlap in the rename destinations, we
822 832 # consider it not divergent. For example, if side 1 copies 'a'
823 833 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
824 834 # and 'd' and deletes 'a'.
825 835 if dsts1 & dsts2:
826 836 for dst in dsts1 & dsts2:
827 837 copy1[dst] = src
828 838 copy2[dst] = src
829 839 else:
830 840 diverge[src] = sorted(dsts1 | dsts2)
831 841 elif src in m1 and src in m2:
832 842 # copied on both sides
833 843 dsts1 = set(dsts1)
834 844 dsts2 = set(dsts2)
835 845 for dst in dsts1 & dsts2:
836 846 copy1[dst] = src
837 847 copy2[dst] = src
838 848 # TODO: Handle cases where it was renamed on one side and copied
839 849 # on the other side
840 850 elif dsts1:
841 851 # copied/renamed only on side 1
842 852 _checksinglesidecopies(
843 853 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
844 854 )
845 855 elif dsts2:
846 856 # copied/renamed only on side 2
847 857 _checksinglesidecopies(
848 858 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
849 859 )
850 860
851 861 # find interesting file sets from manifests
852 862 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
853 863 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
854 864 u1 = sorted(addedinm1 - addedinm2)
855 865 u2 = sorted(addedinm2 - addedinm1)
856 866
857 867 header = b" unmatched files in %s"
858 868 if u1:
859 869 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
860 870 if u2:
861 871 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
862 872
863 873 if repo.ui.debugflag:
864 874 renamedeleteset = set()
865 875 divergeset = set()
866 876 for dsts in diverge.values():
867 877 divergeset.update(dsts)
868 878 for dsts in renamedelete1.values():
869 879 renamedeleteset.update(dsts)
870 880 for dsts in renamedelete2.values():
871 881 renamedeleteset.update(dsts)
872 882
873 883 repo.ui.debug(
874 884 b" all copies found (* = to merge, ! = divergent, "
875 885 b"% = renamed and deleted):\n"
876 886 )
877 887 for side, copies in ((b"local", copies1), (b"remote", copies2)):
878 888 if not copies:
879 889 continue
880 890 repo.ui.debug(b" on %s side:\n" % side)
881 891 for f in sorted(copies):
882 892 note = b""
883 893 if f in copy1 or f in copy2:
884 894 note += b"*"
885 895 if f in divergeset:
886 896 note += b"!"
887 897 if f in renamedeleteset:
888 898 note += b"%"
889 899 repo.ui.debug(
890 900 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
891 901 )
892 902 del renamedeleteset
893 903 del divergeset
894 904
895 905 repo.ui.debug(b" checking for directory renames\n")
896 906
897 907 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
898 908 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
899 909
900 910 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
901 911 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
902 912
903 913 return branch_copies1, branch_copies2, diverge
904 914
905 915
906 916 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
907 917 """Finds moved directories and files that should move with them.
908 918
909 919 ctx: the context for one of the sides
910 920 copy: files copied on the same side (as ctx)
911 921 fullcopy: files copied on the same side (as ctx), including those that
912 922 merge.manifestmerge() won't care about
913 923 addedfiles: added files on the other side (compared to ctx)
914 924 """
915 925 # generate a directory move map
916 926 d = ctx.dirs()
917 927 invalid = set()
918 928 dirmove = {}
919 929
920 930 # examine each file copy for a potential directory move, which is
921 931 # when all the files in a directory are moved to a new directory
922 932 for dst, src in pycompat.iteritems(fullcopy):
923 933 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
924 934 if dsrc in invalid:
925 935 # already seen to be uninteresting
926 936 continue
927 937 elif dsrc in d and ddst in d:
928 938 # directory wasn't entirely moved locally
929 939 invalid.add(dsrc)
930 940 elif dsrc in dirmove and dirmove[dsrc] != ddst:
931 941 # files from the same directory moved to two different places
932 942 invalid.add(dsrc)
933 943 else:
934 944 # looks good so far
935 945 dirmove[dsrc] = ddst
936 946
937 947 for i in invalid:
938 948 if i in dirmove:
939 949 del dirmove[i]
940 950 del d, invalid
941 951
942 952 if not dirmove:
943 953 return {}, {}
944 954
945 955 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
946 956
947 957 for d in dirmove:
948 958 repo.ui.debug(
949 959 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
950 960 )
951 961
952 962 movewithdir = {}
953 963 # check unaccounted nonoverlapping files against directory moves
954 964 for f in addedfiles:
955 965 if f not in fullcopy:
956 966 for d in dirmove:
957 967 if f.startswith(d):
958 968 # new file added in a directory that was moved, move it
959 969 df = dirmove[d] + f[len(d) :]
960 970 if df not in copy:
961 971 movewithdir[f] = df
962 972 repo.ui.debug(
963 973 b" pending file src: '%s' -> dst: '%s'\n"
964 974 % (f, df)
965 975 )
966 976 break
967 977
968 978 return dirmove, movewithdir
969 979
970 980
971 981 def _heuristicscopytracing(repo, c1, c2, base):
972 982 """Fast copytracing using filename heuristics
973 983
974 984 Assumes that moves or renames are of following two types:
975 985
976 986 1) Inside a directory only (same directory name but different filenames)
977 987 2) Move from one directory to another
978 988 (same filenames but different directory names)
979 989
980 990 Works only when there are no merge commits in the "source branch".
981 991 Source branch is commits from base up to c2 not including base.
982 992
983 993 If merge is involved it fallbacks to _fullcopytracing().
984 994
985 995 Can be used by setting the following config:
986 996
987 997 [experimental]
988 998 copytrace = heuristics
989 999
990 1000 In some cases the copy/move candidates found by heuristics can be very large
991 1001 in number and that will make the algorithm slow. The number of possible
992 1002 candidates to check can be limited by using the config
993 1003 `experimental.copytrace.movecandidateslimit` which defaults to 100.
994 1004 """
995 1005
996 1006 if c1.rev() is None:
997 1007 c1 = c1.p1()
998 1008 if c2.rev() is None:
999 1009 c2 = c2.p1()
1000 1010
1001 1011 changedfiles = set()
1002 1012 m1 = c1.manifest()
1003 1013 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1004 1014 # If base is not in c2 branch, we switch to fullcopytracing
1005 1015 repo.ui.debug(
1006 1016 b"switching to full copytracing as base is not "
1007 1017 b"an ancestor of c2\n"
1008 1018 )
1009 1019 return _fullcopytracing(repo, c1, c2, base)
1010 1020
1011 1021 ctx = c2
1012 1022 while ctx != base:
1013 1023 if len(ctx.parents()) == 2:
1014 1024 # To keep things simple let's not handle merges
1015 1025 repo.ui.debug(b"switching to full copytracing because of merges\n")
1016 1026 return _fullcopytracing(repo, c1, c2, base)
1017 1027 changedfiles.update(ctx.files())
1018 1028 ctx = ctx.p1()
1019 1029
1020 1030 copies2 = {}
1021 1031 cp = _forwardcopies(base, c2)
1022 1032 for dst, src in pycompat.iteritems(cp):
1023 1033 if src in m1:
1024 1034 copies2[dst] = src
1025 1035
1026 1036 # file is missing if it isn't present in the destination, but is present in
1027 1037 # the base and present in the source.
1028 1038 # Presence in the base is important to exclude added files, presence in the
1029 1039 # source is important to exclude removed files.
1030 1040 filt = lambda f: f not in m1 and f in base and f in c2
1031 1041 missingfiles = [f for f in changedfiles if filt(f)]
1032 1042
1033 1043 copies1 = {}
1034 1044 if missingfiles:
1035 1045 basenametofilename = collections.defaultdict(list)
1036 1046 dirnametofilename = collections.defaultdict(list)
1037 1047
1038 1048 for f in m1.filesnotin(base.manifest()):
1039 1049 basename = os.path.basename(f)
1040 1050 dirname = os.path.dirname(f)
1041 1051 basenametofilename[basename].append(f)
1042 1052 dirnametofilename[dirname].append(f)
1043 1053
1044 1054 for f in missingfiles:
1045 1055 basename = os.path.basename(f)
1046 1056 dirname = os.path.dirname(f)
1047 1057 samebasename = basenametofilename[basename]
1048 1058 samedirname = dirnametofilename[dirname]
1049 1059 movecandidates = samebasename + samedirname
1050 1060 # f is guaranteed to be present in c2, that's why
1051 1061 # c2.filectx(f) won't fail
1052 1062 f2 = c2.filectx(f)
1053 1063 # we can have a lot of candidates which can slow down the heuristics
1054 1064 # config value to limit the number of candidates moves to check
1055 1065 maxcandidates = repo.ui.configint(
1056 1066 b'experimental', b'copytrace.movecandidateslimit'
1057 1067 )
1058 1068
1059 1069 if len(movecandidates) > maxcandidates:
1060 1070 repo.ui.status(
1061 1071 _(
1062 1072 b"skipping copytracing for '%s', more "
1063 1073 b"candidates than the limit: %d\n"
1064 1074 )
1065 1075 % (f, len(movecandidates))
1066 1076 )
1067 1077 continue
1068 1078
1069 1079 for candidate in movecandidates:
1070 1080 f1 = c1.filectx(candidate)
1071 1081 if _related(f1, f2):
1072 1082 # if there are a few related copies then we'll merge
1073 1083 # changes into all of them. This matches the behaviour
1074 1084 # of upstream copytracing
1075 1085 copies1[candidate] = f
1076 1086
1077 1087 return branch_copies(copies1), branch_copies(copies2), {}
1078 1088
1079 1089
1080 1090 def _related(f1, f2):
1081 1091 """return True if f1 and f2 filectx have a common ancestor
1082 1092
1083 1093 Walk back to common ancestor to see if the two files originate
1084 1094 from the same file. Since workingfilectx's rev() is None it messes
1085 1095 up the integer comparison logic, hence the pre-step check for
1086 1096 None (f1 and f2 can only be workingfilectx's initially).
1087 1097 """
1088 1098
1089 1099 if f1 == f2:
1090 1100 return True # a match
1091 1101
1092 1102 g1, g2 = f1.ancestors(), f2.ancestors()
1093 1103 try:
1094 1104 f1r, f2r = f1.linkrev(), f2.linkrev()
1095 1105
1096 1106 if f1r is None:
1097 1107 f1 = next(g1)
1098 1108 if f2r is None:
1099 1109 f2 = next(g2)
1100 1110
1101 1111 while True:
1102 1112 f1r, f2r = f1.linkrev(), f2.linkrev()
1103 1113 if f1r > f2r:
1104 1114 f1 = next(g1)
1105 1115 elif f2r > f1r:
1106 1116 f2 = next(g2)
1107 1117 else: # f1 and f2 point to files in the same linkrev
1108 1118 return f1 == f2 # true if they point to the same file
1109 1119 except StopIteration:
1110 1120 return False
1111 1121
1112 1122
1113 1123 def graftcopies(wctx, ctx, base):
1114 1124 """reproduce copies between base and ctx in the wctx
1115 1125
1116 1126 Unlike mergecopies(), this function will only consider copies between base
1117 1127 and ctx; it will ignore copies between base and wctx. Also unlike
1118 1128 mergecopies(), this function will apply copies to the working copy (instead
1119 1129 of just returning information about the copies). That makes it cheaper
1120 1130 (especially in the common case of base==ctx.p1()) and useful also when
1121 1131 experimental.copytrace=off.
1122 1132
1123 1133 merge.update() will have already marked most copies, but it will only
1124 1134 mark copies if it thinks the source files are related (see
1125 1135 merge._related()). It will also not mark copies if the file wasn't modified
1126 1136 on the local side. This function adds the copies that were "missed"
1127 1137 by merge.update().
1128 1138 """
1129 1139 new_copies = pathcopies(base, ctx)
1130 1140 _filter(wctx.p1(), wctx, new_copies)
1131 1141 for dst, src in pycompat.iteritems(new_copies):
1132 1142 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now