##// END OF EJS Templates
copies-rust: move is_ancestor caching within the rust code...
marmoute -
r46586:8b99c473 default
parent child Browse files
Show More
@@ -1,1142 +1,1144 b''
1 1 # copies.py - copy detection for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import os
12 12
13 13 from .i18n import _
14 14
15 15
16 16 from . import (
17 17 match as matchmod,
18 18 node,
19 19 pathutil,
20 20 policy,
21 21 pycompat,
22 22 util,
23 23 )
24 24
25 25
26 26 from .utils import stringutil
27 27
28 28 from .revlogutils import flagutil
29 29
30 30 rustmod = policy.importrust("copy_tracing")
31 31
32 32
33 33 def _filter(src, dst, t):
34 34 """filters out invalid copies after chaining"""
35 35
36 36 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
37 37 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
38 38 # in the following table (not including trivial cases). For example, case 2
39 39 # is where a file existed in 'src' and remained under that name in 'mid' and
40 40 # then was renamed between 'mid' and 'dst'.
41 41 #
42 42 # case src mid dst result
43 43 # 1 x y - -
44 44 # 2 x y y x->y
45 45 # 3 x y x -
46 46 # 4 x y z x->z
47 47 # 5 - x y -
48 48 # 6 x x y x->y
49 49 #
50 50 # _chain() takes care of chaining the copies in 'a' and 'b', but it
51 51 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
52 52 # between 5 and 6, so it includes all cases in its result.
53 53 # Cases 1, 3, and 5 are then removed by _filter().
54 54
55 55 for k, v in list(t.items()):
56 56 # remove copies from files that didn't exist
57 57 if v not in src:
58 58 del t[k]
59 59 # remove criss-crossed copies
60 60 elif k in src and v in dst:
61 61 del t[k]
62 62 # remove copies to files that were then removed
63 63 elif k not in dst:
64 64 del t[k]
65 65
66 66
67 67 def _chain(prefix, suffix):
68 68 """chain two sets of copies 'prefix' and 'suffix'"""
69 69 result = prefix.copy()
70 70 for key, value in pycompat.iteritems(suffix):
71 71 result[key] = prefix.get(value, value)
72 72 return result
73 73
74 74
75 75 def _tracefile(fctx, am, basemf):
76 76 """return file context that is the ancestor of fctx present in ancestor
77 77 manifest am
78 78
79 79 Note: we used to try and stop after a given limit, however checking if that
80 80 limit is reached turned out to be very expensive. we are better off
81 81 disabling that feature."""
82 82
83 83 for f in fctx.ancestors():
84 84 path = f.path()
85 85 if am.get(path, None) == f.filenode():
86 86 return path
87 87 if basemf and basemf.get(path, None) == f.filenode():
88 88 return path
89 89
90 90
91 91 def _dirstatecopies(repo, match=None):
92 92 ds = repo.dirstate
93 93 c = ds.copies().copy()
94 94 for k in list(c):
95 95 if ds[k] not in b'anm' or (match and not match(k)):
96 96 del c[k]
97 97 return c
98 98
99 99
100 100 def _computeforwardmissing(a, b, match=None):
101 101 """Computes which files are in b but not a.
102 102 This is its own function so extensions can easily wrap this call to see what
103 103 files _forwardcopies is about to process.
104 104 """
105 105 ma = a.manifest()
106 106 mb = b.manifest()
107 107 return mb.filesnotin(ma, match=match)
108 108
109 109
110 110 def usechangesetcentricalgo(repo):
111 111 """Checks if we should use changeset-centric copy algorithms"""
112 112 if repo.filecopiesmode == b'changeset-sidedata':
113 113 return True
114 114 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
115 115 changesetsource = (b'changeset-only', b'compatibility')
116 116 return readfrom in changesetsource
117 117
118 118
119 119 def _committedforwardcopies(a, b, base, match):
120 120 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
121 121 # files might have to be traced back to the fctx parent of the last
122 122 # one-side-only changeset, but not further back than that
123 123 repo = a._repo
124 124
125 125 if usechangesetcentricalgo(repo):
126 126 return _changesetforwardcopies(a, b, match)
127 127
128 128 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
129 129 dbg = repo.ui.debug
130 130 if debug:
131 131 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
132 132 am = a.manifest()
133 133 basemf = None if base is None else base.manifest()
134 134
135 135 # find where new files came from
136 136 # we currently don't try to find where old files went, too expensive
137 137 # this means we can miss a case like 'hg rm b; hg cp a b'
138 138 cm = {}
139 139
140 140 # Computing the forward missing is quite expensive on large manifests, since
141 141 # it compares the entire manifests. We can optimize it in the common use
142 142 # case of computing what copies are in a commit versus its parent (like
143 143 # during a rebase or histedit). Note, we exclude merge commits from this
144 144 # optimization, since the ctx.files() for a merge commit is not correct for
145 145 # this comparison.
146 146 forwardmissingmatch = match
147 147 if b.p1() == a and b.p2().node() == node.nullid:
148 148 filesmatcher = matchmod.exact(b.files())
149 149 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
150 150 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
151 151
152 152 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
153 153
154 154 if debug:
155 155 dbg(b'debug.copies: missing files to search: %d\n' % len(missing))
156 156
157 157 for f in sorted(missing):
158 158 if debug:
159 159 dbg(b'debug.copies: tracing file: %s\n' % f)
160 160 fctx = b[f]
161 161 fctx._ancestrycontext = ancestrycontext
162 162
163 163 if debug:
164 164 start = util.timer()
165 165 opath = _tracefile(fctx, am, basemf)
166 166 if opath:
167 167 if debug:
168 168 dbg(b'debug.copies: rename of: %s\n' % opath)
169 169 cm[f] = opath
170 170 if debug:
171 171 dbg(
172 172 b'debug.copies: time: %f seconds\n'
173 173 % (util.timer() - start)
174 174 )
175 175 return cm
176 176
177 177
178 178 def _revinfo_getter(repo):
179 179 """returns a function that returns the following data given a <rev>"
180 180
181 181 * p1: revision number of first parent
182 182 * p2: revision number of first parent
183 183 * changes: a ChangingFiles object
184 184 """
185 185 cl = repo.changelog
186 186 parents = cl.parentrevs
187 187 flags = cl.flags
188 188
189 189 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
190 190
191 191 changelogrevision = cl.changelogrevision
192 192
193 193 # A small cache to avoid doing the work twice for merges
194 194 #
195 195 # In the vast majority of cases, if we ask information for a revision
196 196 # about 1 parent, we'll later ask it for the other. So it make sense to
197 197 # keep the information around when reaching the first parent of a merge
198 198 # and dropping it after it was provided for the second parents.
199 199 #
200 200 # It exists cases were only one parent of the merge will be walked. It
201 201 # happens when the "destination" the copy tracing is descendant from a
202 202 # new root, not common with the "source". In that case, we will only walk
203 203 # through merge parents that are descendant of changesets common
204 204 # between "source" and "destination".
205 205 #
206 206 # With the current case implementation if such changesets have a copy
207 207 # information, we'll keep them in memory until the end of
208 208 # _changesetforwardcopies. We don't expect the case to be frequent
209 209 # enough to matters.
210 210 #
211 211 # In addition, it would be possible to reach pathological case, were
212 212 # many first parent are met before any second parent is reached. In
213 213 # that case the cache could grow. If this even become an issue one can
214 214 # safely introduce a maximum cache size. This would trade extra CPU/IO
215 215 # time to save memory.
216 216 merge_caches = {}
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 value = None
221 221 e = merge_caches.pop(rev, None)
222 222 if e is not None:
223 223 return e
224 224 changes = None
225 225 if flags(rev) & HASCOPIESINFO:
226 226 changes = changelogrevision(rev).changes
227 227 value = (p1, p2, changes)
228 228 if p1 != node.nullrev and p2 != node.nullrev:
229 229 # XXX some case we over cache, IGNORE
230 230 merge_caches[rev] = value
231 231 return value
232 232
233 233 return revinfo
234 234
235 235
236 236 def cached_is_ancestor(is_ancestor):
237 237 """return a cached version of is_ancestor"""
238 238 cache = {}
239 239
240 240 def _is_ancestor(anc, desc):
241 241 if anc > desc:
242 242 return False
243 243 elif anc == desc:
244 244 return True
245 245 key = (anc, desc)
246 246 ret = cache.get(key)
247 247 if ret is None:
248 248 ret = cache[key] = is_ancestor(anc, desc)
249 249 return ret
250 250
251 251 return _is_ancestor
252 252
253 253
254 254 def _changesetforwardcopies(a, b, match):
255 255 if a.rev() in (node.nullrev, b.rev()):
256 256 return {}
257 257
258 258 repo = a.repo().unfiltered()
259 259 children = {}
260 260
261 261 cl = repo.changelog
262 isancestor = cached_is_ancestor(cl.isancestorrev)
262 isancestor = cl.isancestorrev
263 263 missingrevs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
264 264 mrset = set(missingrevs)
265 265 roots = set()
266 266 for r in missingrevs:
267 267 for p in cl.parentrevs(r):
268 268 if p == node.nullrev:
269 269 continue
270 270 if p not in children:
271 271 children[p] = [r]
272 272 else:
273 273 children[p].append(r)
274 274 if p not in mrset:
275 275 roots.add(p)
276 276 if not roots:
277 277 # no common revision to track copies from
278 278 return {}
279 279 min_root = min(roots)
280 280
281 281 from_head = set(
282 282 cl.reachableroots(min_root, [b.rev()], list(roots), includepath=True)
283 283 )
284 284
285 285 iterrevs = set(from_head)
286 286 iterrevs &= mrset
287 287 iterrevs.update(roots)
288 288 iterrevs.remove(b.rev())
289 289 revs = sorted(iterrevs)
290 290
291 291 if repo.filecopiesmode == b'changeset-sidedata':
292 292 revinfo = _revinfo_getter(repo)
293 293 return _combine_changeset_copies(
294 294 revs, children, b.rev(), revinfo, match, isancestor
295 295 )
296 296 else:
297 297 revinfo = _revinfo_getter_extra(repo)
298 298 return _combine_changeset_copies_extra(
299 299 revs, children, b.rev(), revinfo, match, isancestor
300 300 )
301 301
302 302
303 303 def _combine_changeset_copies(
304 304 revs, children, targetrev, revinfo, match, isancestor
305 305 ):
306 306 """combine the copies information for each item of iterrevs
307 307
308 308 revs: sorted iterable of revision to visit
309 309 children: a {parent: [children]} mapping.
310 310 targetrev: the final copies destination revision (not in iterrevs)
311 311 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
312 312 match: a matcher
313 313
314 314 It returns the aggregated copies information for `targetrev`.
315 315 """
316 316
317 317 alwaysmatch = match.always()
318 318
319 319 if rustmod is not None and alwaysmatch:
320 320 return rustmod.combine_changeset_copies(
321 321 list(revs), children, targetrev, revinfo, isancestor
322 322 )
323 323
324 isancestor = cached_is_ancestor(isancestor)
325
324 326 all_copies = {}
325 327 for r in revs:
326 328 copies = all_copies.pop(r, None)
327 329 if copies is None:
328 330 # this is a root
329 331 copies = {}
330 332 for i, c in enumerate(children[r]):
331 333 p1, p2, changes = revinfo(c)
332 334 childcopies = {}
333 335 if r == p1:
334 336 parent = 1
335 337 if changes is not None:
336 338 childcopies = changes.copied_from_p1
337 339 else:
338 340 assert r == p2
339 341 parent = 2
340 342 if changes is not None:
341 343 childcopies = changes.copied_from_p2
342 344 if not alwaysmatch:
343 345 childcopies = {
344 346 dst: src for dst, src in childcopies.items() if match(dst)
345 347 }
346 348 newcopies = copies
347 349 if childcopies:
348 350 newcopies = copies.copy()
349 351 for dest, source in pycompat.iteritems(childcopies):
350 352 prev = copies.get(source)
351 353 if prev is not None and prev[1] is not None:
352 354 source = prev[1]
353 355 newcopies[dest] = (c, source)
354 356 assert newcopies is not copies
355 357 if changes is not None:
356 358 for f in changes.removed:
357 359 if f in newcopies:
358 360 if newcopies is copies:
359 361 # copy on write to avoid affecting potential other
360 362 # branches. when there are no other branches, this
361 363 # could be avoided.
362 364 newcopies = copies.copy()
363 365 newcopies[f] = (c, None)
364 366 othercopies = all_copies.get(c)
365 367 if othercopies is None:
366 368 all_copies[c] = newcopies
367 369 else:
368 370 # we are the second parent to work on c, we need to merge our
369 371 # work with the other.
370 372 #
371 373 # In case of conflict, parent 1 take precedence over parent 2.
372 374 # This is an arbitrary choice made anew when implementing
373 375 # changeset based copies. It was made without regards with
374 376 # potential filelog related behavior.
375 377 if parent == 1:
376 378 _merge_copies_dict(
377 379 othercopies, newcopies, isancestor, changes
378 380 )
379 381 else:
380 382 _merge_copies_dict(
381 383 newcopies, othercopies, isancestor, changes
382 384 )
383 385 all_copies[c] = newcopies
384 386
385 387 final_copies = {}
386 388 for dest, (tt, source) in all_copies[targetrev].items():
387 389 if source is not None:
388 390 final_copies[dest] = source
389 391 return final_copies
390 392
391 393
392 394 def _merge_copies_dict(minor, major, isancestor, changes):
393 395 """merge two copies-mapping together, minor and major
394 396
395 397 In case of conflict, value from "major" will be picked.
396 398
397 399 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
398 400 ancestors of `high_rev`,
399 401
400 402 - `ismerged(path)`: callable return True if `path` have been merged in the
401 403 current revision,
402 404 """
403 405 for dest, value in major.items():
404 406 other = minor.get(dest)
405 407 if other is None:
406 408 minor[dest] = value
407 409 else:
408 410 new_tt = value[0]
409 411 other_tt = other[0]
410 412 if value[1] == other[1]:
411 413 continue
412 414 # content from "major" wins, unless it is older
413 415 # than the branch point or there is a merge
414 416 if new_tt == other_tt:
415 417 minor[dest] = value
416 418 elif (
417 419 changes is not None
418 420 and value[1] is None
419 421 and dest in changes.salvaged
420 422 ):
421 423 pass
422 424 elif (
423 425 changes is not None
424 426 and other[1] is None
425 427 and dest in changes.salvaged
426 428 ):
427 429 minor[dest] = value
428 430 elif changes is not None and dest in changes.merged:
429 431 minor[dest] = value
430 432 elif not isancestor(new_tt, other_tt):
431 433 if value[1] is not None:
432 434 minor[dest] = value
433 435 elif isancestor(other_tt, new_tt):
434 436 minor[dest] = value
435 437
436 438
437 439 def _revinfo_getter_extra(repo):
438 440 """return a function that return multiple data given a <rev>"i
439 441
440 442 * p1: revision number of first parent
441 443 * p2: revision number of first parent
442 444 * p1copies: mapping of copies from p1
443 445 * p2copies: mapping of copies from p2
444 446 * removed: a list of removed files
445 447 * ismerged: a callback to know if file was merged in that revision
446 448 """
447 449 cl = repo.changelog
448 450 parents = cl.parentrevs
449 451
450 452 def get_ismerged(rev):
451 453 ctx = repo[rev]
452 454
453 455 def ismerged(path):
454 456 if path not in ctx.files():
455 457 return False
456 458 fctx = ctx[path]
457 459 parents = fctx._filelog.parents(fctx._filenode)
458 460 nb_parents = 0
459 461 for n in parents:
460 462 if n != node.nullid:
461 463 nb_parents += 1
462 464 return nb_parents >= 2
463 465
464 466 return ismerged
465 467
466 468 def revinfo(rev):
467 469 p1, p2 = parents(rev)
468 470 ctx = repo[rev]
469 471 p1copies, p2copies = ctx._copies
470 472 removed = ctx.filesremoved()
471 473 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
472 474
473 475 return revinfo
474 476
475 477
476 478 def _combine_changeset_copies_extra(
477 479 revs, children, targetrev, revinfo, match, isancestor
478 480 ):
479 481 """version of `_combine_changeset_copies` that works with the Google
480 482 specific "extra" based storage for copy information"""
481 483 all_copies = {}
482 484 alwaysmatch = match.always()
483 485 for r in revs:
484 486 copies = all_copies.pop(r, None)
485 487 if copies is None:
486 488 # this is a root
487 489 copies = {}
488 490 for i, c in enumerate(children[r]):
489 491 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
490 492 if r == p1:
491 493 parent = 1
492 494 childcopies = p1copies
493 495 else:
494 496 assert r == p2
495 497 parent = 2
496 498 childcopies = p2copies
497 499 if not alwaysmatch:
498 500 childcopies = {
499 501 dst: src for dst, src in childcopies.items() if match(dst)
500 502 }
501 503 newcopies = copies
502 504 if childcopies:
503 505 newcopies = copies.copy()
504 506 for dest, source in pycompat.iteritems(childcopies):
505 507 prev = copies.get(source)
506 508 if prev is not None and prev[1] is not None:
507 509 source = prev[1]
508 510 newcopies[dest] = (c, source)
509 511 assert newcopies is not copies
510 512 for f in removed:
511 513 if f in newcopies:
512 514 if newcopies is copies:
513 515 # copy on write to avoid affecting potential other
514 516 # branches. when there are no other branches, this
515 517 # could be avoided.
516 518 newcopies = copies.copy()
517 519 newcopies[f] = (c, None)
518 520 othercopies = all_copies.get(c)
519 521 if othercopies is None:
520 522 all_copies[c] = newcopies
521 523 else:
522 524 # we are the second parent to work on c, we need to merge our
523 525 # work with the other.
524 526 #
525 527 # In case of conflict, parent 1 take precedence over parent 2.
526 528 # This is an arbitrary choice made anew when implementing
527 529 # changeset based copies. It was made without regards with
528 530 # potential filelog related behavior.
529 531 if parent == 1:
530 532 _merge_copies_dict_extra(
531 533 othercopies, newcopies, isancestor, ismerged
532 534 )
533 535 else:
534 536 _merge_copies_dict_extra(
535 537 newcopies, othercopies, isancestor, ismerged
536 538 )
537 539 all_copies[c] = newcopies
538 540
539 541 final_copies = {}
540 542 for dest, (tt, source) in all_copies[targetrev].items():
541 543 if source is not None:
542 544 final_copies[dest] = source
543 545 return final_copies
544 546
545 547
546 548 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
547 549 """version of `_merge_copies_dict` that works with the Google
548 550 specific "extra" based storage for copy information"""
549 551 for dest, value in major.items():
550 552 other = minor.get(dest)
551 553 if other is None:
552 554 minor[dest] = value
553 555 else:
554 556 new_tt = value[0]
555 557 other_tt = other[0]
556 558 if value[1] == other[1]:
557 559 continue
558 560 # content from "major" wins, unless it is older
559 561 # than the branch point or there is a merge
560 562 if (
561 563 new_tt == other_tt
562 564 or not isancestor(new_tt, other_tt)
563 565 or ismerged(dest)
564 566 ):
565 567 minor[dest] = value
566 568
567 569
568 570 def _forwardcopies(a, b, base=None, match=None):
569 571 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
570 572
571 573 if base is None:
572 574 base = a
573 575 match = a.repo().narrowmatch(match)
574 576 # check for working copy
575 577 if b.rev() is None:
576 578 cm = _committedforwardcopies(a, b.p1(), base, match)
577 579 # combine copies from dirstate if necessary
578 580 copies = _chain(cm, _dirstatecopies(b._repo, match))
579 581 else:
580 582 copies = _committedforwardcopies(a, b, base, match)
581 583 return copies
582 584
583 585
584 586 def _backwardrenames(a, b, match):
585 587 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
586 588 return {}
587 589
588 590 # Even though we're not taking copies into account, 1:n rename situations
589 591 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
590 592 # arbitrarily pick one of the renames.
591 593 # We don't want to pass in "match" here, since that would filter
592 594 # the destination by it. Since we're reversing the copies, we want
593 595 # to filter the source instead.
594 596 f = _forwardcopies(b, a)
595 597 r = {}
596 598 for k, v in sorted(pycompat.iteritems(f)):
597 599 if match and not match(v):
598 600 continue
599 601 # remove copies
600 602 if v in a:
601 603 continue
602 604 r[v] = k
603 605 return r
604 606
605 607
606 608 def pathcopies(x, y, match=None):
607 609 """find {dst@y: src@x} copy mapping for directed compare"""
608 610 repo = x._repo
609 611 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
610 612 if debug:
611 613 repo.ui.debug(
612 614 b'debug.copies: searching copies from %s to %s\n' % (x, y)
613 615 )
614 616 if x == y or not x or not y:
615 617 return {}
616 618 if y.rev() is None and x == y.p1():
617 619 if debug:
618 620 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
619 621 # short-circuit to avoid issues with merge states
620 622 return _dirstatecopies(repo, match)
621 623 a = y.ancestor(x)
622 624 if a == x:
623 625 if debug:
624 626 repo.ui.debug(b'debug.copies: search mode: forward\n')
625 627 copies = _forwardcopies(x, y, match=match)
626 628 elif a == y:
627 629 if debug:
628 630 repo.ui.debug(b'debug.copies: search mode: backward\n')
629 631 copies = _backwardrenames(x, y, match=match)
630 632 else:
631 633 if debug:
632 634 repo.ui.debug(b'debug.copies: search mode: combined\n')
633 635 base = None
634 636 if a.rev() != node.nullrev:
635 637 base = x
636 638 copies = _chain(
637 639 _backwardrenames(x, a, match=match),
638 640 _forwardcopies(a, y, base, match=match),
639 641 )
640 642 _filter(x, y, copies)
641 643 return copies
642 644
643 645
644 646 def mergecopies(repo, c1, c2, base):
645 647 """
646 648 Finds moves and copies between context c1 and c2 that are relevant for
647 649 merging. 'base' will be used as the merge base.
648 650
649 651 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
650 652 files that were moved/ copied in one merge parent and modified in another.
651 653 For example:
652 654
653 655 o ---> 4 another commit
654 656 |
655 657 | o ---> 3 commit that modifies a.txt
656 658 | /
657 659 o / ---> 2 commit that moves a.txt to b.txt
658 660 |/
659 661 o ---> 1 merge base
660 662
661 663 If we try to rebase revision 3 on revision 4, since there is no a.txt in
662 664 revision 4, and if user have copytrace disabled, we prints the following
663 665 message:
664 666
665 667 ```other changed <file> which local deleted```
666 668
667 669 Returns a tuple where:
668 670
669 671 "branch_copies" an instance of branch_copies.
670 672
671 673 "diverge" is a mapping of source name -> list of destination names
672 674 for divergent renames.
673 675
674 676 This function calls different copytracing algorithms based on config.
675 677 """
676 678 # avoid silly behavior for update from empty dir
677 679 if not c1 or not c2 or c1 == c2:
678 680 return branch_copies(), branch_copies(), {}
679 681
680 682 narrowmatch = c1.repo().narrowmatch()
681 683
682 684 # avoid silly behavior for parent -> working dir
683 685 if c2.node() is None and c1.node() == repo.dirstate.p1():
684 686 return (
685 687 branch_copies(_dirstatecopies(repo, narrowmatch)),
686 688 branch_copies(),
687 689 {},
688 690 )
689 691
690 692 copytracing = repo.ui.config(b'experimental', b'copytrace')
691 693 if stringutil.parsebool(copytracing) is False:
692 694 # stringutil.parsebool() returns None when it is unable to parse the
693 695 # value, so we should rely on making sure copytracing is on such cases
694 696 return branch_copies(), branch_copies(), {}
695 697
696 698 if usechangesetcentricalgo(repo):
697 699 # The heuristics don't make sense when we need changeset-centric algos
698 700 return _fullcopytracing(repo, c1, c2, base)
699 701
700 702 # Copy trace disabling is explicitly below the node == p1 logic above
701 703 # because the logic above is required for a simple copy to be kept across a
702 704 # rebase.
703 705 if copytracing == b'heuristics':
704 706 # Do full copytracing if only non-public revisions are involved as
705 707 # that will be fast enough and will also cover the copies which could
706 708 # be missed by heuristics
707 709 if _isfullcopytraceable(repo, c1, base):
708 710 return _fullcopytracing(repo, c1, c2, base)
709 711 return _heuristicscopytracing(repo, c1, c2, base)
710 712 else:
711 713 return _fullcopytracing(repo, c1, c2, base)
712 714
713 715
714 716 def _isfullcopytraceable(repo, c1, base):
715 717 """Checks that if base, source and destination are all no-public branches,
716 718 if yes let's use the full copytrace algorithm for increased capabilities
717 719 since it will be fast enough.
718 720
719 721 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
720 722 number of changesets from c1 to base such that if number of changesets are
721 723 more than the limit, full copytracing algorithm won't be used.
722 724 """
723 725 if c1.rev() is None:
724 726 c1 = c1.p1()
725 727 if c1.mutable() and base.mutable():
726 728 sourcecommitlimit = repo.ui.configint(
727 729 b'experimental', b'copytrace.sourcecommitlimit'
728 730 )
729 731 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
730 732 return commits < sourcecommitlimit
731 733 return False
732 734
733 735
734 736 def _checksinglesidecopies(
735 737 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
736 738 ):
737 739 if src not in m2:
738 740 # deleted on side 2
739 741 if src not in m1:
740 742 # renamed on side 1, deleted on side 2
741 743 renamedelete[src] = dsts1
742 744 elif src not in mb:
743 745 # Work around the "short-circuit to avoid issues with merge states"
744 746 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
745 747 # destination doesn't exist in y.
746 748 pass
747 749 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
748 750 return
749 751 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
750 752 # modified on side 2
751 753 for dst in dsts1:
752 754 copy[dst] = src
753 755
754 756
755 757 class branch_copies(object):
756 758 """Information about copies made on one side of a merge/graft.
757 759
758 760 "copy" is a mapping from destination name -> source name,
759 761 where source is in c1 and destination is in c2 or vice-versa.
760 762
761 763 "movewithdir" is a mapping from source name -> destination name,
762 764 where the file at source present in one context but not the other
763 765 needs to be moved to destination by the merge process, because the
764 766 other context moved the directory it is in.
765 767
766 768 "renamedelete" is a mapping of source name -> list of destination
767 769 names for files deleted in c1 that were renamed in c2 or vice-versa.
768 770
769 771 "dirmove" is a mapping of detected source dir -> destination dir renames.
770 772 This is needed for handling changes to new files previously grafted into
771 773 renamed directories.
772 774 """
773 775
774 776 def __init__(
775 777 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
776 778 ):
777 779 self.copy = {} if copy is None else copy
778 780 self.renamedelete = {} if renamedelete is None else renamedelete
779 781 self.dirmove = {} if dirmove is None else dirmove
780 782 self.movewithdir = {} if movewithdir is None else movewithdir
781 783
782 784 def __repr__(self):
783 785 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
784 786 self.copy,
785 787 self.renamedelete,
786 788 self.dirmove,
787 789 self.movewithdir,
788 790 )
789 791
790 792
791 793 def _fullcopytracing(repo, c1, c2, base):
792 794 """The full copytracing algorithm which finds all the new files that were
793 795 added from merge base up to the top commit and for each file it checks if
794 796 this file was copied from another file.
795 797
796 798 This is pretty slow when a lot of changesets are involved but will track all
797 799 the copies.
798 800 """
799 801 m1 = c1.manifest()
800 802 m2 = c2.manifest()
801 803 mb = base.manifest()
802 804
803 805 copies1 = pathcopies(base, c1)
804 806 copies2 = pathcopies(base, c2)
805 807
806 808 if not (copies1 or copies2):
807 809 return branch_copies(), branch_copies(), {}
808 810
809 811 inversecopies1 = {}
810 812 inversecopies2 = {}
811 813 for dst, src in copies1.items():
812 814 inversecopies1.setdefault(src, []).append(dst)
813 815 for dst, src in copies2.items():
814 816 inversecopies2.setdefault(src, []).append(dst)
815 817
816 818 copy1 = {}
817 819 copy2 = {}
818 820 diverge = {}
819 821 renamedelete1 = {}
820 822 renamedelete2 = {}
821 823 allsources = set(inversecopies1) | set(inversecopies2)
822 824 for src in allsources:
823 825 dsts1 = inversecopies1.get(src)
824 826 dsts2 = inversecopies2.get(src)
825 827 if dsts1 and dsts2:
826 828 # copied/renamed on both sides
827 829 if src not in m1 and src not in m2:
828 830 # renamed on both sides
829 831 dsts1 = set(dsts1)
830 832 dsts2 = set(dsts2)
831 833 # If there's some overlap in the rename destinations, we
832 834 # consider it not divergent. For example, if side 1 copies 'a'
833 835 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
834 836 # and 'd' and deletes 'a'.
835 837 if dsts1 & dsts2:
836 838 for dst in dsts1 & dsts2:
837 839 copy1[dst] = src
838 840 copy2[dst] = src
839 841 else:
840 842 diverge[src] = sorted(dsts1 | dsts2)
841 843 elif src in m1 and src in m2:
842 844 # copied on both sides
843 845 dsts1 = set(dsts1)
844 846 dsts2 = set(dsts2)
845 847 for dst in dsts1 & dsts2:
846 848 copy1[dst] = src
847 849 copy2[dst] = src
848 850 # TODO: Handle cases where it was renamed on one side and copied
849 851 # on the other side
850 852 elif dsts1:
851 853 # copied/renamed only on side 1
852 854 _checksinglesidecopies(
853 855 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
854 856 )
855 857 elif dsts2:
856 858 # copied/renamed only on side 2
857 859 _checksinglesidecopies(
858 860 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
859 861 )
860 862
861 863 # find interesting file sets from manifests
862 864 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
863 865 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
864 866 u1 = sorted(addedinm1 - addedinm2)
865 867 u2 = sorted(addedinm2 - addedinm1)
866 868
867 869 header = b" unmatched files in %s"
868 870 if u1:
869 871 repo.ui.debug(b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1)))
870 872 if u2:
871 873 repo.ui.debug(b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2)))
872 874
873 875 if repo.ui.debugflag:
874 876 renamedeleteset = set()
875 877 divergeset = set()
876 878 for dsts in diverge.values():
877 879 divergeset.update(dsts)
878 880 for dsts in renamedelete1.values():
879 881 renamedeleteset.update(dsts)
880 882 for dsts in renamedelete2.values():
881 883 renamedeleteset.update(dsts)
882 884
883 885 repo.ui.debug(
884 886 b" all copies found (* = to merge, ! = divergent, "
885 887 b"% = renamed and deleted):\n"
886 888 )
887 889 for side, copies in ((b"local", copies1), (b"remote", copies2)):
888 890 if not copies:
889 891 continue
890 892 repo.ui.debug(b" on %s side:\n" % side)
891 893 for f in sorted(copies):
892 894 note = b""
893 895 if f in copy1 or f in copy2:
894 896 note += b"*"
895 897 if f in divergeset:
896 898 note += b"!"
897 899 if f in renamedeleteset:
898 900 note += b"%"
899 901 repo.ui.debug(
900 902 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
901 903 )
902 904 del renamedeleteset
903 905 del divergeset
904 906
905 907 repo.ui.debug(b" checking for directory renames\n")
906 908
907 909 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2)
908 910 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1)
909 911
910 912 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
911 913 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
912 914
913 915 return branch_copies1, branch_copies2, diverge
914 916
915 917
916 918 def _dir_renames(repo, ctx, copy, fullcopy, addedfiles):
917 919 """Finds moved directories and files that should move with them.
918 920
919 921 ctx: the context for one of the sides
920 922 copy: files copied on the same side (as ctx)
921 923 fullcopy: files copied on the same side (as ctx), including those that
922 924 merge.manifestmerge() won't care about
923 925 addedfiles: added files on the other side (compared to ctx)
924 926 """
925 927 # generate a directory move map
926 928 d = ctx.dirs()
927 929 invalid = set()
928 930 dirmove = {}
929 931
930 932 # examine each file copy for a potential directory move, which is
931 933 # when all the files in a directory are moved to a new directory
932 934 for dst, src in pycompat.iteritems(fullcopy):
933 935 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
934 936 if dsrc in invalid:
935 937 # already seen to be uninteresting
936 938 continue
937 939 elif dsrc in d and ddst in d:
938 940 # directory wasn't entirely moved locally
939 941 invalid.add(dsrc)
940 942 elif dsrc in dirmove and dirmove[dsrc] != ddst:
941 943 # files from the same directory moved to two different places
942 944 invalid.add(dsrc)
943 945 else:
944 946 # looks good so far
945 947 dirmove[dsrc] = ddst
946 948
947 949 for i in invalid:
948 950 if i in dirmove:
949 951 del dirmove[i]
950 952 del d, invalid
951 953
952 954 if not dirmove:
953 955 return {}, {}
954 956
955 957 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
956 958
957 959 for d in dirmove:
958 960 repo.ui.debug(
959 961 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
960 962 )
961 963
962 964 movewithdir = {}
963 965 # check unaccounted nonoverlapping files against directory moves
964 966 for f in addedfiles:
965 967 if f not in fullcopy:
966 968 for d in dirmove:
967 969 if f.startswith(d):
968 970 # new file added in a directory that was moved, move it
969 971 df = dirmove[d] + f[len(d) :]
970 972 if df not in copy:
971 973 movewithdir[f] = df
972 974 repo.ui.debug(
973 975 b" pending file src: '%s' -> dst: '%s'\n"
974 976 % (f, df)
975 977 )
976 978 break
977 979
978 980 return dirmove, movewithdir
979 981
980 982
981 983 def _heuristicscopytracing(repo, c1, c2, base):
982 984 """Fast copytracing using filename heuristics
983 985
984 986 Assumes that moves or renames are of following two types:
985 987
986 988 1) Inside a directory only (same directory name but different filenames)
987 989 2) Move from one directory to another
988 990 (same filenames but different directory names)
989 991
990 992 Works only when there are no merge commits in the "source branch".
991 993 Source branch is commits from base up to c2 not including base.
992 994
993 995 If merge is involved it fallbacks to _fullcopytracing().
994 996
995 997 Can be used by setting the following config:
996 998
997 999 [experimental]
998 1000 copytrace = heuristics
999 1001
1000 1002 In some cases the copy/move candidates found by heuristics can be very large
1001 1003 in number and that will make the algorithm slow. The number of possible
1002 1004 candidates to check can be limited by using the config
1003 1005 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1004 1006 """
1005 1007
1006 1008 if c1.rev() is None:
1007 1009 c1 = c1.p1()
1008 1010 if c2.rev() is None:
1009 1011 c2 = c2.p1()
1010 1012
1011 1013 changedfiles = set()
1012 1014 m1 = c1.manifest()
1013 1015 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1014 1016 # If base is not in c2 branch, we switch to fullcopytracing
1015 1017 repo.ui.debug(
1016 1018 b"switching to full copytracing as base is not "
1017 1019 b"an ancestor of c2\n"
1018 1020 )
1019 1021 return _fullcopytracing(repo, c1, c2, base)
1020 1022
1021 1023 ctx = c2
1022 1024 while ctx != base:
1023 1025 if len(ctx.parents()) == 2:
1024 1026 # To keep things simple let's not handle merges
1025 1027 repo.ui.debug(b"switching to full copytracing because of merges\n")
1026 1028 return _fullcopytracing(repo, c1, c2, base)
1027 1029 changedfiles.update(ctx.files())
1028 1030 ctx = ctx.p1()
1029 1031
1030 1032 copies2 = {}
1031 1033 cp = _forwardcopies(base, c2)
1032 1034 for dst, src in pycompat.iteritems(cp):
1033 1035 if src in m1:
1034 1036 copies2[dst] = src
1035 1037
1036 1038 # file is missing if it isn't present in the destination, but is present in
1037 1039 # the base and present in the source.
1038 1040 # Presence in the base is important to exclude added files, presence in the
1039 1041 # source is important to exclude removed files.
1040 1042 filt = lambda f: f not in m1 and f in base and f in c2
1041 1043 missingfiles = [f for f in changedfiles if filt(f)]
1042 1044
1043 1045 copies1 = {}
1044 1046 if missingfiles:
1045 1047 basenametofilename = collections.defaultdict(list)
1046 1048 dirnametofilename = collections.defaultdict(list)
1047 1049
1048 1050 for f in m1.filesnotin(base.manifest()):
1049 1051 basename = os.path.basename(f)
1050 1052 dirname = os.path.dirname(f)
1051 1053 basenametofilename[basename].append(f)
1052 1054 dirnametofilename[dirname].append(f)
1053 1055
1054 1056 for f in missingfiles:
1055 1057 basename = os.path.basename(f)
1056 1058 dirname = os.path.dirname(f)
1057 1059 samebasename = basenametofilename[basename]
1058 1060 samedirname = dirnametofilename[dirname]
1059 1061 movecandidates = samebasename + samedirname
1060 1062 # f is guaranteed to be present in c2, that's why
1061 1063 # c2.filectx(f) won't fail
1062 1064 f2 = c2.filectx(f)
1063 1065 # we can have a lot of candidates which can slow down the heuristics
1064 1066 # config value to limit the number of candidates moves to check
1065 1067 maxcandidates = repo.ui.configint(
1066 1068 b'experimental', b'copytrace.movecandidateslimit'
1067 1069 )
1068 1070
1069 1071 if len(movecandidates) > maxcandidates:
1070 1072 repo.ui.status(
1071 1073 _(
1072 1074 b"skipping copytracing for '%s', more "
1073 1075 b"candidates than the limit: %d\n"
1074 1076 )
1075 1077 % (f, len(movecandidates))
1076 1078 )
1077 1079 continue
1078 1080
1079 1081 for candidate in movecandidates:
1080 1082 f1 = c1.filectx(candidate)
1081 1083 if _related(f1, f2):
1082 1084 # if there are a few related copies then we'll merge
1083 1085 # changes into all of them. This matches the behaviour
1084 1086 # of upstream copytracing
1085 1087 copies1[candidate] = f
1086 1088
1087 1089 return branch_copies(copies1), branch_copies(copies2), {}
1088 1090
1089 1091
1090 1092 def _related(f1, f2):
1091 1093 """return True if f1 and f2 filectx have a common ancestor
1092 1094
1093 1095 Walk back to common ancestor to see if the two files originate
1094 1096 from the same file. Since workingfilectx's rev() is None it messes
1095 1097 up the integer comparison logic, hence the pre-step check for
1096 1098 None (f1 and f2 can only be workingfilectx's initially).
1097 1099 """
1098 1100
1099 1101 if f1 == f2:
1100 1102 return True # a match
1101 1103
1102 1104 g1, g2 = f1.ancestors(), f2.ancestors()
1103 1105 try:
1104 1106 f1r, f2r = f1.linkrev(), f2.linkrev()
1105 1107
1106 1108 if f1r is None:
1107 1109 f1 = next(g1)
1108 1110 if f2r is None:
1109 1111 f2 = next(g2)
1110 1112
1111 1113 while True:
1112 1114 f1r, f2r = f1.linkrev(), f2.linkrev()
1113 1115 if f1r > f2r:
1114 1116 f1 = next(g1)
1115 1117 elif f2r > f1r:
1116 1118 f2 = next(g2)
1117 1119 else: # f1 and f2 point to files in the same linkrev
1118 1120 return f1 == f2 # true if they point to the same file
1119 1121 except StopIteration:
1120 1122 return False
1121 1123
1122 1124
1123 1125 def graftcopies(wctx, ctx, base):
1124 1126 """reproduce copies between base and ctx in the wctx
1125 1127
1126 1128 Unlike mergecopies(), this function will only consider copies between base
1127 1129 and ctx; it will ignore copies between base and wctx. Also unlike
1128 1130 mergecopies(), this function will apply copies to the working copy (instead
1129 1131 of just returning information about the copies). That makes it cheaper
1130 1132 (especially in the common case of base==ctx.p1()) and useful also when
1131 1133 experimental.copytrace=off.
1132 1134
1133 1135 merge.update() will have already marked most copies, but it will only
1134 1136 mark copies if it thinks the source files are related (see
1135 1137 merge._related()). It will also not mark copies if the file wasn't modified
1136 1138 on the local side. This function adds the copies that were "missed"
1137 1139 by merge.update().
1138 1140 """
1139 1141 new_copies = pathcopies(base, ctx)
1140 1142 _filter(wctx.p1(), wctx, new_copies)
1141 1143 for dst, src in pycompat.iteritems(new_copies):
1142 1144 wctx[dst].markcopied(src)
@@ -1,316 +1,354 b''
1 1 use crate::utils::hg_path::HgPathBuf;
2 2 use crate::Revision;
3 3
4 4 use im_rc::ordmap::DiffItem;
5 5 use im_rc::ordmap::OrdMap;
6 6
7 7 use std::collections::HashMap;
8 8 use std::collections::HashSet;
9 9
10 10 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
11 11
12 12 #[derive(Clone, Debug, PartialEq)]
13 13 struct TimeStampedPathCopy {
14 14 /// revision at which the copy information was added
15 15 rev: Revision,
16 16 /// the copy source, (Set to None in case of deletion of the associated
17 17 /// key)
18 18 path: Option<HgPathBuf>,
19 19 }
20 20
21 21 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
22 22 type TimeStampedPathCopies = OrdMap<HgPathBuf, TimeStampedPathCopy>;
23 23
24 24 /// hold parent 1, parent 2 and relevant files actions.
25 25 pub type RevInfo = (Revision, Revision, ChangedFiles);
26 26
27 27 /// represent the files affected by a changesets
28 28 ///
29 29 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
30 30 /// all the data categories tracked by it.
31 31 pub struct ChangedFiles {
32 32 removed: HashSet<HgPathBuf>,
33 33 merged: HashSet<HgPathBuf>,
34 34 salvaged: HashSet<HgPathBuf>,
35 35 copied_from_p1: PathCopies,
36 36 copied_from_p2: PathCopies,
37 37 }
38 38
39 39 impl ChangedFiles {
40 40 pub fn new(
41 41 removed: HashSet<HgPathBuf>,
42 42 merged: HashSet<HgPathBuf>,
43 43 salvaged: HashSet<HgPathBuf>,
44 44 copied_from_p1: PathCopies,
45 45 copied_from_p2: PathCopies,
46 46 ) -> Self {
47 47 ChangedFiles {
48 48 removed,
49 49 merged,
50 50 salvaged,
51 51 copied_from_p1,
52 52 copied_from_p2,
53 53 }
54 54 }
55 55
56 56 pub fn new_empty() -> Self {
57 57 ChangedFiles {
58 58 removed: HashSet::new(),
59 59 merged: HashSet::new(),
60 60 salvaged: HashSet::new(),
61 61 copied_from_p1: PathCopies::new(),
62 62 copied_from_p2: PathCopies::new(),
63 63 }
64 64 }
65 65 }
66 66
67 /// A struct responsible for answering "is X ancestors of Y" quickly
68 ///
69 /// The structure will delegate ancestors call to a callback, and cache the
70 /// result.
71 #[derive(Debug)]
72 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
73 inner: &'a A,
74 pairs: HashMap<(Revision, Revision), bool>,
75 }
76
77 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
78 fn new(func: &'a A) -> Self {
79 Self {
80 inner: func,
81 pairs: HashMap::default(),
82 }
83 }
84
85 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
86 fn is_ancestor(&mut self, anc: Revision, desc: Revision) -> bool {
87 if anc > desc {
88 false
89 } else if anc == desc {
90 true
91 } else {
92 if let Some(b) = self.pairs.get(&(anc, desc)) {
93 *b
94 } else {
95 let b = (self.inner)(anc, desc);
96 self.pairs.insert((anc, desc), b);
97 b
98 }
99 }
100 }
101 }
102
67 103 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
68 104 ///
69 105 /// Arguments are:
70 106 ///
71 107 /// revs: all revisions to be considered
72 108 /// children: a {parent ? [childrens]} mapping
73 109 /// target_rev: the final revision we are combining copies to
74 110 /// rev_info(rev): callback to get revision information:
75 111 /// * first parent
76 112 /// * second parent
77 113 /// * ChangedFiles
78 114 /// isancestors(low_rev, high_rev): callback to check if a revision is an
79 115 /// ancestor of another
80 pub fn combine_changeset_copies(
116 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool>(
81 117 revs: Vec<Revision>,
82 118 children: HashMap<Revision, Vec<Revision>>,
83 119 target_rev: Revision,
84 120 rev_info: &impl Fn(Revision) -> RevInfo,
85 is_ancestor: &impl Fn(Revision, Revision) -> bool,
121 is_ancestor: &A,
86 122 ) -> PathCopies {
87 123 let mut all_copies = HashMap::new();
124 let mut oracle = AncestorOracle::new(is_ancestor);
88 125
89 126 for rev in revs {
90 127 // Retrieve data computed in a previous iteration
91 128 let copies = all_copies.remove(&rev);
92 129 let copies = match copies {
93 130 Some(c) => c,
94 131 None => TimeStampedPathCopies::default(), // root of the walked set
95 132 };
96 133
97 134 let current_children = match children.get(&rev) {
98 135 Some(c) => c,
99 136 None => panic!("inconsistent `revs` and `children`"),
100 137 };
101 138
102 139 for child in current_children {
103 140 // We will chain the copies information accumulated for `rev` with
104 141 // the individual copies information for each of its children.
105 142 // Creating a new PathCopies for each `rev` ? `children` vertex.
106 143 let (p1, p2, changes) = rev_info(*child);
107 144
108 145 let (parent, child_copies) = if rev == p1 {
109 146 (1, &changes.copied_from_p1)
110 147 } else {
111 148 assert_eq!(rev, p2);
112 149 (2, &changes.copied_from_p2)
113 150 };
114 151 let mut new_copies = copies.clone();
115 152
116 153 for (dest, source) in child_copies {
117 154 let entry;
118 155 if let Some(v) = copies.get(source) {
119 156 entry = match &v.path {
120 157 Some(path) => Some((*(path)).to_owned()),
121 158 None => Some(source.to_owned()),
122 159 }
123 160 } else {
124 161 entry = Some(source.to_owned());
125 162 }
126 163 // Each new entry is introduced by the children, we record this
127 164 // information as we will need it to take the right decision
128 165 // when merging conflicting copy information. See
129 166 // merge_copies_dict for details.
130 167 let ttpc = TimeStampedPathCopy {
131 168 rev: *child,
132 169 path: entry,
133 170 };
134 171 new_copies.insert(dest.to_owned(), ttpc);
135 172 }
136 173
137 174 // We must drop copy information for removed file.
138 175 //
139 176 // We need to explicitly record them as dropped to propagate this
140 177 // information when merging two TimeStampedPathCopies object.
141 178 for f in changes.removed.iter() {
142 179 if new_copies.contains_key(f.as_ref()) {
143 180 let ttpc = TimeStampedPathCopy {
144 181 rev: *child,
145 182 path: None,
146 183 };
147 184 new_copies.insert(f.to_owned(), ttpc);
148 185 }
149 186 }
150 187
151 188 // Merge has two parents needs to combines their copy information.
152 189 //
153 190 // If the vertex from the other parent was already processed, we
154 191 // will have a value for the child ready to be used. We need to
155 192 // grab it and combine it with the one we already
156 193 // computed. If not we can simply store the newly
157 194 // computed data. The processing happening at
158 195 // the time of the second parent will take care of combining the
159 196 // two TimeStampedPathCopies instance.
160 197 match all_copies.remove(child) {
161 198 None => {
162 199 all_copies.insert(child, new_copies);
163 200 }
164 201 Some(other_copies) => {
165 202 let (minor, major) = match parent {
166 203 1 => (other_copies, new_copies),
167 204 2 => (new_copies, other_copies),
168 205 _ => unreachable!(),
169 206 };
170 207 let merged_copies =
171 merge_copies_dict(minor, major, &changes, is_ancestor);
208 merge_copies_dict(minor, major, &changes, &mut oracle);
172 209 all_copies.insert(child, merged_copies);
173 210 }
174 211 };
175 212 }
176 213 }
177 214
178 215 // Drop internal information (like the timestamp) and return the final
179 216 // mapping.
180 217 let tt_result = all_copies
181 218 .remove(&target_rev)
182 219 .expect("target revision was not processed");
183 220 let mut result = PathCopies::default();
184 221 for (dest, tt_source) in tt_result {
185 222 if let Some(path) = tt_source.path {
186 223 result.insert(dest, path);
187 224 }
188 225 }
189 226 result
190 227 }
191 228
192 229 /// merge two copies-mapping together, minor and major
193 230 ///
194 231 /// In case of conflict, value from "major" will be picked, unless in some
195 232 /// cases. See inline documentation for details.
196 233 #[allow(clippy::if_same_then_else)]
197 fn merge_copies_dict(
234 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
198 235 minor: TimeStampedPathCopies,
199 236 major: TimeStampedPathCopies,
200 237 changes: &ChangedFiles,
201 is_ancestor: &impl Fn(Revision, Revision) -> bool,
238 oracle: &mut AncestorOracle<A>,
202 239 ) -> TimeStampedPathCopies {
203 240 if minor.is_empty() {
204 241 return major;
205 242 } else if major.is_empty() {
206 243 return minor;
207 244 }
208 245 let mut override_minor = Vec::new();
209 246 let mut override_major = Vec::new();
210 247
211 248 let mut to_major = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
212 249 override_major.push((k.clone(), v.clone()))
213 250 };
214 251 let mut to_minor = |k: &HgPathBuf, v: &TimeStampedPathCopy| {
215 252 override_minor.push((k.clone(), v.clone()))
216 253 };
217 254
218 255 // The diff function leverage detection of the identical subpart if minor
219 256 // and major has some common ancestors. This make it very fast is most
220 257 // case.
221 258 //
222 259 // In case where the two map are vastly different in size, the current
223 260 // approach is still slowish because the iteration will iterate over
224 261 // all the "exclusive" content of the larger on. This situation can be
225 262 // frequent when the subgraph of revision we are processing has a lot
226 263 // of roots. Each roots adding they own fully new map to the mix (and
227 264 // likely a small map, if the path from the root to the "main path" is
228 265 // small.
229 266 //
230 267 // We could do better by detecting such situation and processing them
231 268 // differently.
232 269 for d in minor.diff(&major) {
233 270 match d {
234 271 DiffItem::Add(k, v) => to_minor(k, v),
235 272 DiffItem::Remove(k, v) => to_major(k, v),
236 273 DiffItem::Update { old, new } => {
237 274 let (dest, src_major) = new;
238 275 let (_, src_minor) = old;
239 276 let mut pick_minor = || (to_major(dest, src_minor));
240 277 let mut pick_major = || (to_minor(dest, src_major));
241 278 if src_major.path == src_minor.path {
242 279 // we have the same value, but from other source;
243 280 if src_major.rev == src_minor.rev {
244 281 // If the two entry are identical, no need to do
245 282 // anything (but diff should not have yield them)
246 283 unreachable!();
247 } else if is_ancestor(src_major.rev, src_minor.rev) {
284 } else if oracle.is_ancestor(src_major.rev, src_minor.rev)
285 {
248 286 pick_minor();
249 287 } else {
250 288 pick_major();
251 289 }
252 290 } else if src_major.rev == src_minor.rev {
253 291 // We cannot get copy information for both p1 and p2 in the
254 292 // same rev. So this is the same value.
255 293 unreachable!();
256 294 } else if src_major.path.is_none()
257 295 && changes.salvaged.contains(dest)
258 296 {
259 297 // If the file is "deleted" in the major side but was
260 298 // salvaged by the merge, we keep the minor side alive
261 299 pick_minor();
262 300 } else if src_minor.path.is_none()
263 301 && changes.salvaged.contains(dest)
264 302 {
265 303 // If the file is "deleted" in the minor side but was
266 304 // salvaged by the merge, unconditionnaly preserve the
267 305 // major side.
268 306 pick_major();
269 307 } else if changes.merged.contains(dest) {
270 308 // If the file was actively merged, copy information from
271 309 // each side might conflict. The major side will win such
272 310 // conflict.
273 311 pick_major();
274 } else if is_ancestor(src_major.rev, src_minor.rev) {
312 } else if oracle.is_ancestor(src_major.rev, src_minor.rev) {
275 313 // If the minor side is strictly newer than the major side,
276 314 // it should be kept.
277 315 pick_minor();
278 316 } else if src_major.path.is_some() {
279 317 // without any special case, the "major" value win other
280 318 // the "minor" one.
281 319 pick_major();
282 } else if is_ancestor(src_minor.rev, src_major.rev) {
320 } else if oracle.is_ancestor(src_minor.rev, src_major.rev) {
283 321 // the "major" rev is a direct ancestors of "minor", any
284 322 // different value should overwrite
285 323 pick_major();
286 324 } else {
287 325 // major version is None (so the file was deleted on that
288 326 // branch) and that branch is independant (neither minor
289 327 // nor major is an ancestors of the other one.) We preserve
290 328 // the new information about the new file.
291 329 pick_minor();
292 330 }
293 331 }
294 332 };
295 333 }
296 334
297 335 let updates;
298 336 let mut result;
299 337 if override_major.is_empty() {
300 338 result = major
301 339 } else if override_minor.is_empty() {
302 340 result = minor
303 341 } else {
304 342 if override_minor.len() < override_major.len() {
305 343 updates = override_minor;
306 344 result = minor;
307 345 } else {
308 346 updates = override_major;
309 347 result = major;
310 348 }
311 349 for (k, v) in updates {
312 350 result.insert(k, v);
313 351 }
314 352 }
315 353 result
316 354 }
General Comments 0
You need to be logged in to leave comments. Login now