##// END OF EJS Templates
copies: move `merged` testing sooner...
marmoute -
r46265:7990e7d9 default
parent child Browse files
Show More
@@ -1,1109 +1,1109 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 pycompat,
21 21 util,
22 22 )
23 23
24 24
25 25 from .utils import stringutil
26 26
27 27 from .revlogutils import flagutil
28 28
29 29
30 30 def _filter(src, dst, t):
31 31 """filters out invalid copies after chaining"""
32 32
33 33 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
34 34 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
35 35 # in the following table (not including trivial cases). For example, case 2
36 36 # is where a file existed in 'src' and remained under that name in 'mid' and
37 37 # then was renamed between 'mid' and 'dst'.
38 38 #
39 39 # case src mid dst result
40 40 # 1 x y - -
41 41 # 2 x y y x->y
42 42 # 3 x y x -
43 43 # 4 x y z x->z
44 44 # 5 - x y -
45 45 # 6 x x y x->y
46 46 #
47 47 # _chain() takes care of chaining the copies in 'a' and 'b', but it
48 48 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
49 49 # between 5 and 6, so it includes all cases in its result.
50 50 # Cases 1, 3, and 5 are then removed by _filter().
51 51
52 52 for k, v in list(t.items()):
53 53 # remove copies from files that didn't exist
54 54 if v not in src:
55 55 del t[k]
56 56 # remove criss-crossed copies
57 57 elif k in src and v in dst:
58 58 del t[k]
59 59 # remove copies to files that were then removed
60 60 elif k not in dst:
61 61 del t[k]
62 62
63 63
64 64 def _chain(prefix, suffix):
65 65 """chain two sets of copies 'prefix' and 'suffix'"""
66 66 result = prefix.copy()
67 67 for key, value in pycompat.iteritems(suffix):
68 68 result[key] = prefix.get(value, value)
69 69 return result
70 70
71 71
72 72 def _tracefile(fctx, am, basemf):
73 73 """return file context that is the ancestor of fctx present in ancestor
74 74 manifest am
75 75
76 76 Note: we used to try and stop after a given limit, however checking if that
77 77 limit is reached turned out to be very expensive. we are better off
78 78 disabling that feature."""
79 79
80 80 for f in fctx.ancestors():
81 81 path = f.path()
82 82 if am.get(path, None) == f.filenode():
83 83 return path
84 84 if basemf and basemf.get(path, None) == f.filenode():
85 85 return path
86 86
87 87
88 88 def _dirstatecopies(repo, match=None):
89 89 ds = repo.dirstate
90 90 c = ds.copies().copy()
91 91 for k in list(c):
92 92 if ds[k] not in b'anm' or (match and not match(k)):
93 93 del c[k]
94 94 return c
95 95
96 96
97 97 def _computeforwardmissing(a, b, match=None):
98 98 """Computes which files are in b but not a.
99 99 This is its own function so extensions can easily wrap this call to see what
100 100 files _forwardcopies is about to process.
101 101 """
102 102 ma = a.manifest()
103 103 mb = b.manifest()
104 104 return mb.filesnotin(ma, match=match)
105 105
106 106
107 107 def usechangesetcentricalgo(repo):
108 108 """Checks if we should use changeset-centric copy algorithms"""
109 109 if repo.filecopiesmode == b'changeset-sidedata':
110 110 return True
111 111 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
112 112 changesetsource = (b'changeset-only', b'compatibility')
113 113 return readfrom in changesetsource
114 114
115 115
116 116 def _committedforwardcopies(a, b, base, match):
117 117 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
118 118 # files might have to be traced back to the fctx parent of the last
119 119 # one-side-only changeset, but not further back than that
120 120 repo = a._repo
121 121
122 122 if usechangesetcentricalgo(repo):
123 123 return _changesetforwardcopies(a, b, match)
124 124
125 125 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
126 126 dbg = repo.ui.debug
127 127 if debug:
128 128 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
129 129 am = a.manifest()
130 130 basemf = None if base is None else base.manifest()
131 131
132 132 # find where new files came from
133 133 # we currently don't try to find where old files went, too expensive
134 134 # this means we can miss a case like 'hg rm b; hg cp a b'
135 135 cm = {}
136 136
137 137 # Computing the forward missing is quite expensive on large manifests, since
138 138 # it compares the entire manifests. We can optimize it in the common use
139 139 # case of computing what copies are in a commit versus its parent (like
140 140 # during a rebase or histedit). Note, we exclude merge commits from this
141 141 # optimization, since the ctx.files() for a merge commit is not correct for
142 142 # this comparison.
143 143 forwardmissingmatch = match
144 144 if b.p1() == a and b.p2().node() == node.nullid:
145 145 filesmatcher = matchmod.exact(b.files())
146 146 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
147 147 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
148 148
149 149 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
150 150
151 151 if debug:
152 152 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
153 153
154 154 for f in sorted(missing):
155 155 if debug:
156 156 dbg(b'debug.copies: tracing file: %s\n' % f)
157 157 fctx = b[f]
158 158 fctx._ancestrycontext = ancestrycontext
159 159
160 160 if debug:
161 161 start = util.timer()
162 162 opath = _tracefile(fctx, am, basemf)
163 163 if opath:
164 164 if debug:
165 165 dbg(b'debug.copies: rename of: %s\n' % opath)
166 166 cm[f] = opath
167 167 if debug:
168 168 dbg(
169 169 b'debug.copies: time: %f seconds\n'
170 170 % (util.timer() - start)
171 171 )
172 172 return cm
173 173
174 174
175 175 def _revinfo_getter(repo):
176 176 """returns a function that returns the following data given a <rev>"
177 177
178 178 * p1: revision number of first parent
179 179 * p2: revision number of first parent
180 180 * changes: a ChangingFiles object
181 181 """
182 182 cl = repo.changelog
183 183 parents = cl.parentrevs
184 184 flags = cl.flags
185 185
186 186 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
187 187
188 188 changelogrevision = cl.changelogrevision
189 189
190 190 # A small cache to avoid doing the work twice for merges
191 191 #
192 192 # In the vast majority of cases, if we ask information for a revision
193 193 # about 1 parent, we'll later ask it for the other. So it make sense to
194 194 # keep the information around when reaching the first parent of a merge
195 195 # and dropping it after it was provided for the second parents.
196 196 #
197 197 # It exists cases were only one parent of the merge will be walked. It
198 198 # happens when the "destination" the copy tracing is descendant from a
199 199 # new root, not common with the "source". In that case, we will only walk
200 200 # through merge parents that are descendant of changesets common
201 201 # between "source" and "destination".
202 202 #
203 203 # With the current case implementation if such changesets have a copy
204 204 # information, we'll keep them in memory until the end of
205 205 # _changesetforwardcopies. We don't expect the case to be frequent
206 206 # enough to matters.
207 207 #
208 208 # In addition, it would be possible to reach pathological case, were
209 209 # many first parent are met before any second parent is reached. In
210 210 # that case the cache could grow. If this even become an issue one can
211 211 # safely introduce a maximum cache size. This would trade extra CPU/IO
212 212 # time to save memory.
213 213 merge_caches = {}
214 214
215 215 def revinfo(rev):
216 216 p1, p2 = parents(rev)
217 217 value = None
218 218 e = merge_caches.pop(rev, None)
219 219 if e is not None:
220 220 return e
221 221 changes = None
222 222 if flags(rev) & HASCOPIESINFO:
223 223 changes = changelogrevision(rev).changes
224 224 value = (p1, p2, changes)
225 225 if p1 != node.nullrev and p2 != node.nullrev:
226 226 # XXX some case we over cache, IGNORE
227 227 merge_caches[rev] = value
228 228 return value
229 229
230 230 return revinfo
231 231
232 232
233 233 def _changesetforwardcopies(a, b, match):
234 234 if a.rev() in (node.nullrev, b.rev()):
235 235 return {}
236 236
237 237 repo = a.repo().unfiltered()
238 238 children = {}
239 239
240 240 cl = repo.changelog
241 241 isancestor = cl.isancestorrev # XXX we should had chaching to this.
242 242 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
243 243 mrset = set(missingrevs)
244 244 roots = set()
245 245 for r in missingrevs:
246 246 for p in cl.parentrevs(r):
247 247 if p == node.nullrev:
248 248 continue
249 249 if p not in children:
250 250 children[p] = [r]
251 251 else:
252 252 children[p].append(r)
253 253 if p not in mrset:
254 254 roots.add(p)
255 255 if not roots:
256 256 # no common revision to track copies from
257 257 return {}
258 258 min_root = min(roots)
259 259
260 260 from_head = set(
261 261 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
262 262 )
263 263
264 264 iterrevs = set(from_head)
265 265 iterrevs &= mrset
266 266 iterrevs.update(roots)
267 267 iterrevs.remove(b.rev())
268 268 revs = sorted(iterrevs)
269 269
270 270 if repo.filecopiesmode == b'changeset-sidedata':
271 271 revinfo = _revinfo_getter(repo)
272 272 return _combine_changeset_copies(
273 273 revs, children, b.rev(), revinfo, match, isancestor
274 274 )
275 275 else:
276 276 revinfo = _revinfo_getter_extra(repo)
277 277 return _combine_changeset_copies_extra(
278 278 revs, children, b.rev(), revinfo, match, isancestor
279 279 )
280 280
281 281
282 282 def _combine_changeset_copies(
283 283 revs, children, targetrev, revinfo, match, isancestor
284 284 ):
285 285 """combine the copies information for each item of iterrevs
286 286
287 287 revs: sorted iterable of revision to visit
288 288 children: a {parent: [children]} mapping.
289 289 targetrev: the final copies destination revision (not in iterrevs)
290 290 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
291 291 match: a matcher
292 292
293 293 It returns the aggregated copies information for `targetrev`.
294 294 """
295 295 all_copies = {}
296 296 alwaysmatch = match.always()
297 297 for r in revs:
298 298 copies = all_copies.pop(r, None)
299 299 if copies is None:
300 300 # this is a root
301 301 copies = {}
302 302 for i, c in enumerate(children[r]):
303 303 p1, p2, changes = revinfo(c)
304 304 childcopies = {}
305 305 if r == p1:
306 306 parent = 1
307 307 if changes is not None:
308 308 childcopies = changes.copied_from_p1
309 309 else:
310 310 assert r == p2
311 311 parent = 2
312 312 if changes is not None:
313 313 childcopies = changes.copied_from_p2
314 314 if not alwaysmatch:
315 315 childcopies = {
316 316 dst: src for dst, src in childcopies.items() if match(dst)
317 317 }
318 318 newcopies = copies
319 319 if childcopies:
320 320 newcopies = copies.copy()
321 321 for dest, source in pycompat.iteritems(childcopies):
322 322 prev = copies.get(source)
323 323 if prev is not None and prev[1] is not None:
324 324 source = prev[1]
325 325 newcopies[dest] = (c, source)
326 326 assert newcopies is not copies
327 327 if changes is not None:
328 328 for f in changes.removed:
329 329 if f in newcopies:
330 330 if newcopies is copies:
331 331 # copy on write to avoid affecting potential other
332 332 # branches. when there are no other branches, this
333 333 # could be avoided.
334 334 newcopies = copies.copy()
335 335 newcopies[f] = (c, None)
336 336 othercopies = all_copies.get(c)
337 337 if othercopies is None:
338 338 all_copies[c] = newcopies
339 339 else:
340 340 # we are the second parent to work on c, we need to merge our
341 341 # work with the other.
342 342 #
343 343 # In case of conflict, parent 1 take precedence over parent 2.
344 344 # This is an arbitrary choice made anew when implementing
345 345 # changeset based copies. It was made without regards with
346 346 # potential filelog related behavior.
347 347 if parent == 1:
348 348 _merge_copies_dict(
349 349 othercopies, newcopies, isancestor, changes
350 350 )
351 351 else:
352 352 _merge_copies_dict(
353 353 newcopies, othercopies, isancestor, changes
354 354 )
355 355 all_copies[c] = newcopies
356 356
357 357 final_copies = {}
358 358 for dest, (tt, source) in all_copies[targetrev].items():
359 359 if source is not None:
360 360 final_copies[dest] = source
361 361 return final_copies
362 362
363 363
364 364 def _merge_copies_dict(minor, major, isancestor, changes):
365 365 """merge two copies-mapping together, minor and major
366 366
367 367 In case of conflict, value from "major" will be picked.
368 368
369 369 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
370 370 ancestors of `high_rev`,
371 371
372 372 - `ismerged(path)`: callable return True if `path` have been merged in the
373 373 current revision,
374 374 """
375 375 for dest, value in major.items():
376 376 other = minor.get(dest)
377 377 if other is None:
378 378 minor[dest] = value
379 379 else:
380 380 new_tt = value[0]
381 381 other_tt = other[0]
382 382 if value[1] == other[1]:
383 383 continue
384 384 # content from "major" wins, unless it is older
385 385 # than the branch point or there is a merge
386 386 if new_tt == other_tt:
387 387 minor[dest] = value
388 388 elif (
389 389 changes is not None
390 390 and value[1] is None
391 391 and dest in changes.salvaged
392 392 ):
393 393 pass
394 394 elif (
395 395 changes is not None
396 396 and other[1] is None
397 397 and dest in changes.salvaged
398 398 ):
399 399 minor[dest] = value
400 elif not isancestor(new_tt, other_tt):
400 elif changes is not None and dest in changes.merged:
401 401 minor[dest] = value
402 elif changes is not None and dest in changes.merged:
402 elif not isancestor(new_tt, other_tt):
403 403 minor[dest] = value
404 404
405 405
406 406 def _revinfo_getter_extra(repo):
407 407 """return a function that return multiple data given a <rev>"i
408 408
409 409 * p1: revision number of first parent
410 410 * p2: revision number of first parent
411 411 * p1copies: mapping of copies from p1
412 412 * p2copies: mapping of copies from p2
413 413 * removed: a list of removed files
414 414 * ismerged: a callback to know if file was merged in that revision
415 415 """
416 416 cl = repo.changelog
417 417 parents = cl.parentrevs
418 418
419 419 def get_ismerged(rev):
420 420 ctx = repo[rev]
421 421
422 422 def ismerged(path):
423 423 if path not in ctx.files():
424 424 return False
425 425 fctx = ctx[path]
426 426 parents = fctx._filelog.parents(fctx._filenode)
427 427 nb_parents = 0
428 428 for n in parents:
429 429 if n != node.nullid:
430 430 nb_parents += 1
431 431 return nb_parents >= 2
432 432
433 433 return ismerged
434 434
435 435 def revinfo(rev):
436 436 p1, p2 = parents(rev)
437 437 ctx = repo[rev]
438 438 p1copies, p2copies = ctx._copies
439 439 removed = ctx.filesremoved()
440 440 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
441 441
442 442 return revinfo
443 443
444 444
445 445 def _combine_changeset_copies_extra(
446 446 revs, children, targetrev, revinfo, match, isancestor
447 447 ):
448 448 """version of `_combine_changeset_copies` that works with the Google
449 449 specific "extra" based storage for copy information"""
450 450 all_copies = {}
451 451 alwaysmatch = match.always()
452 452 for r in revs:
453 453 copies = all_copies.pop(r, None)
454 454 if copies is None:
455 455 # this is a root
456 456 copies = {}
457 457 for i, c in enumerate(children[r]):
458 458 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
459 459 if r == p1:
460 460 parent = 1
461 461 childcopies = p1copies
462 462 else:
463 463 assert r == p2
464 464 parent = 2
465 465 childcopies = p2copies
466 466 if not alwaysmatch:
467 467 childcopies = {
468 468 dst: src for dst, src in childcopies.items() if match(dst)
469 469 }
470 470 newcopies = copies
471 471 if childcopies:
472 472 newcopies = copies.copy()
473 473 for dest, source in pycompat.iteritems(childcopies):
474 474 prev = copies.get(source)
475 475 if prev is not None and prev[1] is not None:
476 476 source = prev[1]
477 477 newcopies[dest] = (c, source)
478 478 assert newcopies is not copies
479 479 for f in removed:
480 480 if f in newcopies:
481 481 if newcopies is copies:
482 482 # copy on write to avoid affecting potential other
483 483 # branches. when there are no other branches, this
484 484 # could be avoided.
485 485 newcopies = copies.copy()
486 486 newcopies[f] = (c, None)
487 487 othercopies = all_copies.get(c)
488 488 if othercopies is None:
489 489 all_copies[c] = newcopies
490 490 else:
491 491 # we are the second parent to work on c, we need to merge our
492 492 # work with the other.
493 493 #
494 494 # In case of conflict, parent 1 take precedence over parent 2.
495 495 # This is an arbitrary choice made anew when implementing
496 496 # changeset based copies. It was made without regards with
497 497 # potential filelog related behavior.
498 498 if parent == 1:
499 499 _merge_copies_dict_extra(
500 500 othercopies, newcopies, isancestor, ismerged
501 501 )
502 502 else:
503 503 _merge_copies_dict_extra(
504 504 newcopies, othercopies, isancestor, ismerged
505 505 )
506 506 all_copies[c] = newcopies
507 507
508 508 final_copies = {}
509 509 for dest, (tt, source) in all_copies[targetrev].items():
510 510 if source is not None:
511 511 final_copies[dest] = source
512 512 return final_copies
513 513
514 514
515 515 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
516 516 """version of `_merge_copies_dict` that works with the Google
517 517 specific "extra" based storage for copy information"""
518 518 for dest, value in major.items():
519 519 other = minor.get(dest)
520 520 if other is None:
521 521 minor[dest] = value
522 522 else:
523 523 new_tt = value[0]
524 524 other_tt = other[0]
525 525 if value[1] == other[1]:
526 526 continue
527 527 # content from "major" wins, unless it is older
528 528 # than the branch point or there is a merge
529 529 if (
530 530 new_tt == other_tt
531 531 or not isancestor(new_tt, other_tt)
532 532 or ismerged(dest)
533 533 ):
534 534 minor[dest] = value
535 535
536 536
537 537 def _forwardcopies(a, b, base=None, match=None):
538 538 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
539 539
540 540 if base is None:
541 541 base = a
542 542 match = a.repo().narrowmatch(match)
543 543 # check for working copy
544 544 if b.rev() is None:
545 545 cm = _committedforwardcopies(a, b.p1(), base, match)
546 546 # combine copies from dirstate if necessary
547 547 copies = _chain(cm, _dirstatecopies(b._repo, match))
548 548 else:
549 549 copies = _committedforwardcopies(a, b, base, match)
550 550 return copies
551 551
552 552
553 553 def _backwardrenames(a, b, match):
554 554 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
555 555 return {}
556 556
557 557 # Even though we're not taking copies into account, 1:n rename situations
558 558 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
559 559 # arbitrarily pick one of the renames.
560 560 # We don't want to pass in "match" here, since that would filter
561 561 # the destination by it. Since we're reversing the copies, we want
562 562 # to filter the source instead.
563 563 f = _forwardcopies(b, a)
564 564 r = {}
565 565 for k, v in sorted(pycompat.iteritems(f)):
566 566 if match and not match(v):
567 567 continue
568 568 # remove copies
569 569 if v in a:
570 570 continue
571 571 r[v] = k
572 572 return r
573 573
574 574
575 575 def pathcopies(x, y, match=None):
576 576 """find {dst@y: src@x} copy mapping for directed compare"""
577 577 repo = x._repo
578 578 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
579 579 if debug:
580 580 repo.ui.debug(
581 581 b'debug.copies: searching copies from %s to %s\n' % (x, y)
582 582 )
583 583 if x == y or not x or not y:
584 584 return {}
585 585 if y.rev() is None and x == y.p1():
586 586 if debug:
587 587 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
588 588 # short-circuit to avoid issues with merge states
589 589 return _dirstatecopies(repo, match)
590 590 a = y.ancestor(x)
591 591 if a == x:
592 592 if debug:
593 593 repo.ui.debug(b'debug.copies: search mode: forward\n')
594 594 copies = _forwardcopies(x, y, match=match)
595 595 elif a == y:
596 596 if debug:
597 597 repo.ui.debug(b'debug.copies: search mode: backward\n')
598 598 copies = _backwardrenames(x, y, match=match)
599 599 else:
600 600 if debug:
601 601 repo.ui.debug(b'debug.copies: search mode: combined\n')
602 602 base = None
603 603 if a.rev() != node.nullrev:
604 604 base = x
605 605 copies = _chain(
606 606 _backwardrenames(x, a, match=match),
607 607 _forwardcopies(a, y, base, match=match),
608 608 )
609 609 _filter(x, y, copies)
610 610 return copies
611 611
612 612
613 613 def mergecopies(repo, c1, c2, base):
614 614 """
615 615 Finds moves and copies between context c1 and c2 that are relevant for
616 616 merging. 'base' will be used as the merge base.
617 617
618 618 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
619 619 files that were moved/ copied in one merge parent and modified in another.
620 620 For example:
621 621
622 622 o ---> 4 another commit
623 623 |
624 624 | o ---> 3 commit that modifies a.txt
625 625 | /
626 626 o / ---> 2 commit that moves a.txt to b.txt
627 627 |/
628 628 o ---> 1 merge base
629 629
630 630 If we try to rebase revision 3 on revision 4, since there is no a.txt in
631 631 revision 4, and if user have copytrace disabled, we prints the following
632 632 message:
633 633
634 634 ```other changed <file> which local deleted```
635 635
636 636 Returns a tuple where:
637 637
638 638 "branch_copies" an instance of branch_copies.
639 639
640 640 "diverge" is a mapping of source name -> list of destination names
641 641 for divergent renames.
642 642
643 643 This function calls different copytracing algorithms based on config.
644 644 """
645 645 # avoid silly behavior for update from empty dir
646 646 if not c1 or not c2 or c1 == c2:
647 647 return branch_copies(), branch_copies(), {}
648 648
649 649 narrowmatch = c1.repo().narrowmatch()
650 650
651 651 # avoid silly behavior for parent -> working dir
652 652 if c2.node() is None and c1.node() == repo.dirstate.p1():
653 653 return (
654 654 branch_copies(_dirstatecopies(repo, narrowmatch)),
655 655 branch_copies(),
656 656 {},
657 657 )
658 658
659 659 copytracing = repo.ui.config(b'experimental', b'copytrace')
660 660 if stringutil.parsebool(copytracing) is False:
661 661 # stringutil.parsebool() returns None when it is unable to parse the
662 662 # value, so we should rely on making sure copytracing is on such cases
663 663 return branch_copies(), branch_copies(), {}
664 664
665 665 if usechangesetcentricalgo(repo):
666 666 # The heuristics don't make sense when we need changeset-centric algos
667 667 return _fullcopytracing(repo, c1, c2, base)
668 668
669 669 # Copy trace disabling is explicitly below the node == p1 logic above
670 670 # because the logic above is required for a simple copy to be kept across a
671 671 # rebase.
672 672 if copytracing == b'heuristics':
673 673 # Do full copytracing if only non-public revisions are involved as
674 674 # that will be fast enough and will also cover the copies which could
675 675 # be missed by heuristics
676 676 if _isfullcopytraceable(repo, c1, base):
677 677 return _fullcopytracing(repo, c1, c2, base)
678 678 return _heuristicscopytracing(repo, c1, c2, base)
679 679 else:
680 680 return _fullcopytracing(repo, c1, c2, base)
681 681
682 682
683 683 def _isfullcopytraceable(repo, c1, base):
684 684 """ Checks that if base, source and destination are all no-public branches,
685 685 if yes let's use the full copytrace algorithm for increased capabilities
686 686 since it will be fast enough.
687 687
688 688 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
689 689 number of changesets from c1 to base such that if number of changesets are
690 690 more than the limit, full copytracing algorithm won't be used.
691 691 """
692 692 if c1.rev() is None:
693 693 c1 = c1.p1()
694 694 if c1.mutable() and base.mutable():
695 695 sourcecommitlimit = repo.ui.configint(
696 696 b'experimental', b'copytrace.sourcecommitlimit'
697 697 )
698 698 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
699 699 return commits < sourcecommitlimit
700 700 return False
701 701
702 702
703 703 def _checksinglesidecopies(
704 704 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
705 705 ):
706 706 if src not in m2:
707 707 # deleted on side 2
708 708 if src not in m1:
709 709 # renamed on side 1, deleted on side 2
710 710 renamedelete[src] = dsts1
711 711 elif src not in mb:
712 712 # Work around the "short-circuit to avoid issues with merge states"
713 713 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
714 714 # destination doesn't exist in y.
715 715 pass
716 716 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
717 717 return
718 718 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
719 719 # modified on side 2
720 720 for dst in dsts1:
721 721 copy[dst] = src
722 722
723 723
724 724 class branch_copies(object):
725 725 """Information about copies made on one side of a merge/graft.
726 726
727 727 "copy" is a mapping from destination name -> source name,
728 728 where source is in c1 and destination is in c2 or vice-versa.
729 729
730 730 "movewithdir" is a mapping from source name -> destination name,
731 731 where the file at source present in one context but not the other
732 732 needs to be moved to destination by the merge process, because the
733 733 other context moved the directory it is in.
734 734
735 735 "renamedelete" is a mapping of source name -> list of destination
736 736 names for files deleted in c1 that were renamed in c2 or vice-versa.
737 737
738 738 "dirmove" is a mapping of detected source dir -> destination dir renames.
739 739 This is needed for handling changes to new files previously grafted into
740 740 renamed directories.
741 741 """
742 742
743 743 def __init__(
744 744 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
745 745 ):
746 746 self.copy = {} if copy is None else copy
747 747 self.renamedelete = {} if renamedelete is None else renamedelete
748 748 self.dirmove = {} if dirmove is None else dirmove
749 749 self.movewithdir = {} if movewithdir is None else movewithdir
750 750
751 751 def __repr__(self):
752 752 return (
753 753 '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>'
754 754 % (self.copy, self.renamedelete, self.dirmove, self.movewithdir,)
755 755 )
756 756
757 757
758 758 def _fullcopytracing(repo, c1, c2, base):
759 759 """ The full copytracing algorithm which finds all the new files that were
760 760 added from merge base up to the top commit and for each file it checks if
761 761 this file was copied from another file.
762 762
763 763 This is pretty slow when a lot of changesets are involved but will track all
764 764 the copies.
765 765 """
766 766 m1 = c1.manifest()
767 767 m2 = c2.manifest()
768 768 mb = base.manifest()
769 769
770 770 copies1 = pathcopies(base, c1)
771 771 copies2 = pathcopies(base, c2)
772 772
773 773 if not (copies1 or copies2):
774 774 return branch_copies(), branch_copies(), {}
775 775
776 776 inversecopies1 = {}
777 777 inversecopies2 = {}
778 778 for dst, src in copies1.items():
779 779 inversecopies1.setdefault(src, []).append(dst)
780 780 for dst, src in copies2.items():
781 781 inversecopies2.setdefault(src, []).append(dst)
782 782
783 783 copy1 = {}
784 784 copy2 = {}
785 785 diverge = {}
786 786 renamedelete1 = {}
787 787 renamedelete2 = {}
788 788 allsources = set(inversecopies1) | set(inversecopies2)
789 789 for src in allsources:
790 790 dsts1 = inversecopies1.get(src)
791 791 dsts2 = inversecopies2.get(src)
792 792 if dsts1 and dsts2:
793 793 # copied/renamed on both sides
794 794 if src not in m1 and src not in m2:
795 795 # renamed on both sides
796 796 dsts1 = set(dsts1)
797 797 dsts2 = set(dsts2)
798 798 # If there's some overlap in the rename destinations, we
799 799 # consider it not divergent. For example, if side 1 copies 'a'
800 800 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
801 801 # and 'd' and deletes 'a'.
802 802 if dsts1 & dsts2:
803 803 for dst in dsts1 & dsts2:
804 804 copy1[dst] = src
805 805 copy2[dst] = src
806 806 else:
807 807 diverge[src] = sorted(dsts1 | dsts2)
808 808 elif src in m1 and src in m2:
809 809 # copied on both sides
810 810 dsts1 = set(dsts1)
811 811 dsts2 = set(dsts2)
812 812 for dst in dsts1 & dsts2:
813 813 copy1[dst] = src
814 814 copy2[dst] = src
815 815 # TODO: Handle cases where it was renamed on one side and copied
816 816 # on the other side
817 817 elif dsts1:
818 818 # copied/renamed only on side 1
819 819 _checksinglesidecopies(
820 820 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
821 821 )
822 822 elif dsts2:
823 823 # copied/renamed only on side 2
824 824 _checksinglesidecopies(
825 825 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
826 826 )
827 827
828 828 # find interesting file sets from manifests
829 829 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
830 830 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
831 831 u1 = sorted(addedinm1 - addedinm2)
832 832 u2 = sorted(addedinm2 - addedinm1)
833 833
834 834 header = b" unmatched files in %s"
835 835 if u1:
836 836 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
837 837 if u2:
838 838 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
839 839
840 840 if repo.ui.debugflag:
841 841 renamedeleteset = set()
842 842 divergeset = set()
843 843 for dsts in diverge.values():
844 844 divergeset.update(dsts)
845 845 for dsts in renamedelete1.values():
846 846 renamedeleteset.update(dsts)
847 847 for dsts in renamedelete2.values():
848 848 renamedeleteset.update(dsts)
849 849
850 850 repo.ui.debug(
851 851 b" all copies found (* = to merge, ! = divergent, "
852 852 b"% = renamed and deleted):\n"
853 853 )
854 854 for side, copies in ((b"local", copies1), (b"remote", copies2)):
855 855 if not copies:
856 856 continue
857 857 repo.ui.debug(b" on %s side:\n" % side)
858 858 for f in sorted(copies):
859 859 note = b""
860 860 if f in copy1 or f in copy2:
861 861 note += b"*"
862 862 if f in divergeset:
863 863 note += b"!"
864 864 if f in renamedeleteset:
865 865 note += b"%"
866 866 repo.ui.debug(
867 867 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
868 868 )
869 869 del renamedeleteset
870 870 del divergeset
871 871
872 872 repo.ui.debug(b" checking for directory renames\n")
873 873
874 874 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
875 875 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
876 876
877 877 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
878 878 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
879 879
880 880 return branch_copies1, branch_copies2, diverge
881 881
882 882
883 883 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
884 884 """Finds moved directories and files that should move with them.
885 885
886 886 ctx: the context for one of the sides
887 887 copy: files copied on the same side (as ctx)
888 888 fullcopy: files copied on the same side (as ctx), including those that
889 889 merge.manifestmerge() won't care about
890 890 addedfiles: added files on the other side (compared to ctx)
891 891 """
892 892 # generate a directory move map
893 893 d = ctx.dirs()
894 894 invalid = set()
895 895 dirmove = {}
896 896
897 897 # examine each file copy for a potential directory move, which is
898 898 # when all the files in a directory are moved to a new directory
899 899 for dst, src in pycompat.iteritems(fullcopy):
900 900 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
901 901 if dsrc in invalid:
902 902 # already seen to be uninteresting
903 903 continue
904 904 elif dsrc in d and ddst in d:
905 905 # directory wasn't entirely moved locally
906 906 invalid.add(dsrc)
907 907 elif dsrc in dirmove and dirmove[dsrc] != ddst:
908 908 # files from the same directory moved to two different places
909 909 invalid.add(dsrc)
910 910 else:
911 911 # looks good so far
912 912 dirmove[dsrc] = ddst
913 913
914 914 for i in invalid:
915 915 if i in dirmove:
916 916 del dirmove[i]
917 917 del d, invalid
918 918
919 919 if not dirmove:
920 920 return {}, {}
921 921
922 922 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
923 923
924 924 for d in dirmove:
925 925 repo.ui.debug(
926 926 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
927 927 )
928 928
929 929 movewithdir = {}
930 930 # check unaccounted nonoverlapping files against directory moves
931 931 for f in addedfiles:
932 932 if f not in fullcopy:
933 933 for d in dirmove:
934 934 if f.startswith(d):
935 935 # new file added in a directory that was moved, move it
936 936 df = dirmove[d] + f[len(d) :]
937 937 if df not in copy:
938 938 movewithdir[f] = df
939 939 repo.ui.debug(
940 940 b" pending file src: '%s' -> dst: '%s'\n"
941 941 % (f, df)
942 942 )
943 943 break
944 944
945 945 return dirmove, movewithdir
946 946
947 947
948 948 def _heuristicscopytracing(repo, c1, c2, base):
949 949 """ Fast copytracing using filename heuristics
950 950
951 951 Assumes that moves or renames are of following two types:
952 952
953 953 1) Inside a directory only (same directory name but different filenames)
954 954 2) Move from one directory to another
955 955 (same filenames but different directory names)
956 956
957 957 Works only when there are no merge commits in the "source branch".
958 958 Source branch is commits from base up to c2 not including base.
959 959
960 960 If merge is involved it fallbacks to _fullcopytracing().
961 961
962 962 Can be used by setting the following config:
963 963
964 964 [experimental]
965 965 copytrace = heuristics
966 966
967 967 In some cases the copy/move candidates found by heuristics can be very large
968 968 in number and that will make the algorithm slow. The number of possible
969 969 candidates to check can be limited by using the config
970 970 `experimental.copytrace.movecandidateslimit` which defaults to 100.
971 971 """
972 972
973 973 if c1.rev() is None:
974 974 c1 = c1.p1()
975 975 if c2.rev() is None:
976 976 c2 = c2.p1()
977 977
978 978 changedfiles = set()
979 979 m1 = c1.manifest()
980 980 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
981 981 # If base is not in c2 branch, we switch to fullcopytracing
982 982 repo.ui.debug(
983 983 b"switching to full copytracing as base is not "
984 984 b"an ancestor of c2\n"
985 985 )
986 986 return _fullcopytracing(repo, c1, c2, base)
987 987
988 988 ctx = c2
989 989 while ctx != base:
990 990 if len(ctx.parents()) == 2:
991 991 # To keep things simple let's not handle merges
992 992 repo.ui.debug(b"switching to full copytracing because of merges\n")
993 993 return _fullcopytracing(repo, c1, c2, base)
994 994 changedfiles.update(ctx.files())
995 995 ctx = ctx.p1()
996 996
997 997 copies2 = {}
998 998 cp = _forwardcopies(base, c2)
999 999 for dst, src in pycompat.iteritems(cp):
1000 1000 if src in m1:
1001 1001 copies2[dst] = src
1002 1002
1003 1003 # file is missing if it isn't present in the destination, but is present in
1004 1004 # the base and present in the source.
1005 1005 # Presence in the base is important to exclude added files, presence in the
1006 1006 # source is important to exclude removed files.
1007 1007 filt = lambda f: f not in m1 and f in base and f in c2
1008 1008 missingfiles = [f for f in changedfiles if filt(f)]
1009 1009
1010 1010 copies1 = {}
1011 1011 if missingfiles:
1012 1012 basenametofilename = collections.defaultdict(list)
1013 1013 dirnametofilename = collections.defaultdict(list)
1014 1014
1015 1015 for f in m1.filesnotin(base.manifest()):
1016 1016 basename = os.path.basename(f)
1017 1017 dirname = os.path.dirname(f)
1018 1018 basenametofilename[basename].append(f)
1019 1019 dirnametofilename[dirname].append(f)
1020 1020
1021 1021 for f in missingfiles:
1022 1022 basename = os.path.basename(f)
1023 1023 dirname = os.path.dirname(f)
1024 1024 samebasename = basenametofilename[basename]
1025 1025 samedirname = dirnametofilename[dirname]
1026 1026 movecandidates = samebasename + samedirname
1027 1027 # f is guaranteed to be present in c2, that's why
1028 1028 # c2.filectx(f) won't fail
1029 1029 f2 = c2.filectx(f)
1030 1030 # we can have a lot of candidates which can slow down the heuristics
1031 1031 # config value to limit the number of candidates moves to check
1032 1032 maxcandidates = repo.ui.configint(
1033 1033 b'experimental', b'copytrace.movecandidateslimit'
1034 1034 )
1035 1035
1036 1036 if len(movecandidates) > maxcandidates:
1037 1037 repo.ui.status(
1038 1038 _(
1039 1039 b"skipping copytracing for '%s', more "
1040 1040 b"candidates than the limit: %d\n"
1041 1041 )
1042 1042 % (f, len(movecandidates))
1043 1043 )
1044 1044 continue
1045 1045
1046 1046 for candidate in movecandidates:
1047 1047 f1 = c1.filectx(candidate)
1048 1048 if _related(f1, f2):
1049 1049 # if there are a few related copies then we'll merge
1050 1050 # changes into all of them. This matches the behaviour
1051 1051 # of upstream copytracing
1052 1052 copies1[candidate] = f
1053 1053
1054 1054 return branch_copies(copies1), branch_copies(copies2), {}
1055 1055
1056 1056
1057 1057 def _related(f1, f2):
1058 1058 """return True if f1 and f2 filectx have a common ancestor
1059 1059
1060 1060 Walk back to common ancestor to see if the two files originate
1061 1061 from the same file. Since workingfilectx's rev() is None it messes
1062 1062 up the integer comparison logic, hence the pre-step check for
1063 1063 None (f1 and f2 can only be workingfilectx's initially).
1064 1064 """
1065 1065
1066 1066 if f1 == f2:
1067 1067 return True # a match
1068 1068
1069 1069 g1, g2 = f1.ancestors(), f2.ancestors()
1070 1070 try:
1071 1071 f1r, f2r = f1.linkrev(), f2.linkrev()
1072 1072
1073 1073 if f1r is None:
1074 1074 f1 = next(g1)
1075 1075 if f2r is None:
1076 1076 f2 = next(g2)
1077 1077
1078 1078 while True:
1079 1079 f1r, f2r = f1.linkrev(), f2.linkrev()
1080 1080 if f1r > f2r:
1081 1081 f1 = next(g1)
1082 1082 elif f2r > f1r:
1083 1083 f2 = next(g2)
1084 1084 else: # f1 and f2 point to files in the same linkrev
1085 1085 return f1 == f2 # true if they point to the same file
1086 1086 except StopIteration:
1087 1087 return False
1088 1088
1089 1089
1090 1090 def graftcopies(wctx, ctx, base):
1091 1091 """reproduce copies between base and ctx in the wctx
1092 1092
1093 1093 Unlike mergecopies(), this function will only consider copies between base
1094 1094 and ctx; it will ignore copies between base and wctx. Also unlike
1095 1095 mergecopies(), this function will apply copies to the working copy (instead
1096 1096 of just returning information about the copies). That makes it cheaper
1097 1097 (especially in the common case of base==ctx.p1()) and useful also when
1098 1098 experimental.copytrace=off.
1099 1099
1100 1100 merge.update() will have already marked most copies, but it will only
1101 1101 mark copies if it thinks the source files are related (see
1102 1102 merge._related()). It will also not mark copies if the file wasn't modified
1103 1103 on the local side. This function adds the copies that were "missed"
1104 1104 by merge.update().
1105 1105 """
1106 1106 new_copies = pathcopies(base, ctx)
1107 1107 _filter(wctx.p1(), wctx, new_copies)
1108 1108 for dst, src in pycompat.iteritems(new_copies):
1109 1109 wctx[dst].markcopied(src)
General Comments 0
You need to be logged in to leave comments. Login now