##// END OF EJS Templates
copies: rearrange all value comparison conditional...
marmoute -
r47309:c692384b default
parent child Browse files
Show More
@@ -1,1241 +1,1269
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 6
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 if k == v: # case 3
63 63 del t[k]
64 64 elif v not in src: # case 5
65 65 # remove copies from files that didn't exist
66 66 del t[k]
67 67 elif k not in dst: # case 1
68 68 # remove copies to files that were then removed
69 69 del t[k]
70 70
71 71
72 72 def _chain(prefix, suffix):
73 73 """chain two sets of copies 'prefix' and 'suffix'"""
74 74 result = prefix.copy()
75 75 for key, value in pycompat.iteritems(suffix):
76 76 result[key] = prefix.get(value, value)
77 77 return result
78 78
79 79
80 80 def _tracefile(fctx, am, basemf):
81 81 """return file context that is the ancestor of fctx present in ancestor
82 82 manifest am
83 83
84 84 Note: we used to try and stop after a given limit, however checking if that
85 85 limit is reached turned out to be very expensive. we are better off
86 86 disabling that feature."""
87 87
88 88 for f in fctx.ancestors():
89 89 path = f.path()
90 90 if am.get(path, None) == f.filenode():
91 91 return path
92 92 if basemf and basemf.get(path, None) == f.filenode():
93 93 return path
94 94
95 95
96 96 def _dirstatecopies(repo, match=None):
97 97 ds = repo.dirstate
98 98 c = ds.copies().copy()
99 99 for k in list(c):
100 100 if ds[k] not in b'anm' or (match and not match(k)):
101 101 del c[k]
102 102 return c
103 103
104 104
105 105 def _computeforwardmissing(a, b, match=None):
106 106 """Computes which files are in b but not a.
107 107 This is its own function so extensions can easily wrap this call to see what
108 108 files _forwardcopies is about to process.
109 109 """
110 110 ma = a.manifest()
111 111 mb = b.manifest()
112 112 return mb.filesnotin(ma, match=match)
113 113
114 114
115 115 def usechangesetcentricalgo(repo):
116 116 """Checks if we should use changeset-centric copy algorithms"""
117 117 if repo.filecopiesmode == b'changeset-sidedata':
118 118 return True
119 119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 120 changesetsource = (b'changeset-only', b'compatibility')
121 121 return readfrom in changesetsource
122 122
123 123
124 124 def _committedforwardcopies(a, b, base, match):
125 125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 126 # files might have to be traced back to the fctx parent of the last
127 127 # one-side-only changeset, but not further back than that
128 128 repo = a._repo
129 129
130 130 if usechangesetcentricalgo(repo):
131 131 return _changesetforwardcopies(a, b, match)
132 132
133 133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 134 dbg = repo.ui.debug
135 135 if debug:
136 136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 137 am = a.manifest()
138 138 basemf = None if base is None else base.manifest()
139 139
140 140 # find where new files came from
141 141 # we currently don't try to find where old files went, too expensive
142 142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 143 cm = {}
144 144
145 145 # Computing the forward missing is quite expensive on large manifests, since
146 146 # it compares the entire manifests. We can optimize it in the common use
147 147 # case of computing what copies are in a commit versus its parent (like
148 148 # during a rebase or histedit). Note, we exclude merge commits from this
149 149 # optimization, since the ctx.files() for a merge commit is not correct for
150 150 # this comparison.
151 151 forwardmissingmatch = match
152 152 if b.p1() == a and b.p2().node() == nullid:
153 153 filesmatcher = matchmod.exact(b.files())
154 154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 156 missing = list(b.walk(match))
157 157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 158 if debug:
159 159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 160 else:
161 161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 162 if debug:
163 163 dbg(
164 164 b'debug.copies: missing files to search: %d\n'
165 165 % len(missing)
166 166 )
167 167
168 168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169 169
170 170 for f in sorted(missing):
171 171 if debug:
172 172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 173 fctx = b[f]
174 174 fctx._ancestrycontext = ancestrycontext
175 175
176 176 if debug:
177 177 start = util.timer()
178 178 opath = _tracefile(fctx, am, basemf)
179 179 if opath:
180 180 if debug:
181 181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 182 cm[f] = opath
183 183 if debug:
184 184 dbg(
185 185 b'debug.copies: time: %f seconds\n'
186 186 % (util.timer() - start)
187 187 )
188 188 return cm
189 189
190 190
191 191 def _revinfo_getter(repo, match):
192 192 """returns a function that returns the following data given a <rev>"
193 193
194 194 * p1: revision number of first parent
195 195 * p2: revision number of first parent
196 196 * changes: a ChangingFiles object
197 197 """
198 198 cl = repo.changelog
199 199 parents = cl.parentrevs
200 200 flags = cl.flags
201 201
202 202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203 203
204 204 changelogrevision = cl.changelogrevision
205 205
206 206 if rustmod is not None:
207 207
208 208 def revinfo(rev):
209 209 p1, p2 = parents(rev)
210 210 if flags(rev) & HASCOPIESINFO:
211 211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 212 else:
213 213 raw = None
214 214 return (p1, p2, raw)
215 215
216 216 else:
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 if flags(rev) & HASCOPIESINFO:
221 221 changes = changelogrevision(rev).changes
222 222 else:
223 223 changes = None
224 224 return (p1, p2, changes)
225 225
226 226 return revinfo
227 227
228 228
229 229 def cached_is_ancestor(is_ancestor):
230 230 """return a cached version of is_ancestor"""
231 231 cache = {}
232 232
233 233 def _is_ancestor(anc, desc):
234 234 if anc > desc:
235 235 return False
236 236 elif anc == desc:
237 237 return True
238 238 key = (anc, desc)
239 239 ret = cache.get(key)
240 240 if ret is None:
241 241 ret = cache[key] = is_ancestor(anc, desc)
242 242 return ret
243 243
244 244 return _is_ancestor
245 245
246 246
247 247 def _changesetforwardcopies(a, b, match):
248 248 if a.rev() in (nullrev, b.rev()):
249 249 return {}
250 250
251 251 repo = a.repo().unfiltered()
252 252 children = {}
253 253
254 254 cl = repo.changelog
255 255 isancestor = cl.isancestorrev
256 256
257 257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 258 # edges that are contains in `::B` but not in `::A`.
259 259 #
260 260 #
261 261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 263 # that belong to ::a. These are exactly all the revisions needed to express
264 264 # the parent β†’ children we need to combine.
265 265 #
266 266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 268 # keep this out of the explanation because it is hard enough without this special case..
269 269
270 270 parents = cl._uncheckedparentrevs
271 271 graph_roots = (nullrev, nullrev)
272 272
273 273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 275 roots = set()
276 276 has_graph_roots = False
277 277
278 278 # iterate over `only(B, A)`
279 279 for r in revs:
280 280 ps = parents(r)
281 281 if ps == graph_roots:
282 282 has_graph_roots = True
283 283 else:
284 284 p1, p2 = ps
285 285
286 286 # find all the "root points" (see larger comment above)
287 287 if p1 != nullrev and p1 in ancestors:
288 288 roots.add(p1)
289 289 if p2 != nullrev and p2 in ancestors:
290 290 roots.add(p2)
291 291 if not roots:
292 292 # no common revision to track copies from
293 293 return {}
294 294 if has_graph_roots:
295 295 # this deal with the special case mentionned in the [1] footnotes. We
296 296 # must filter out revisions that leads to non-common graphroots.
297 297 roots = list(roots)
298 298 m = min(roots)
299 299 h = [b.rev()]
300 300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 301 roots_to_head = set(roots_to_head)
302 302 revs = [r for r in revs if r in roots_to_head]
303 303
304 304 if repo.filecopiesmode == b'changeset-sidedata':
305 305 # When using side-data, we will process the edges "from" the children.
306 306 # We iterate over the childre, gathering previous collected data for
307 307 # the parents. Do know when the parents data is no longer necessary, we
308 308 # keep a counter of how many children each revision has.
309 309 #
310 310 # An interresting property of `children_count` is that it only contains
311 311 # revision that will be relevant for a edge of the graph. So if a
312 312 # children has parent not in `children_count`, that edges should not be
313 313 # processed.
314 314 children_count = dict((r, 0) for r in roots)
315 315 for r in revs:
316 316 for p in cl.parentrevs(r):
317 317 if p == nullrev:
318 318 continue
319 319 children_count[r] = 0
320 320 if p in children_count:
321 321 children_count[p] += 1
322 322 revinfo = _revinfo_getter(repo, match)
323 323 return _combine_changeset_copies(
324 324 revs, children_count, b.rev(), revinfo, match, isancestor
325 325 )
326 326 else:
327 327 # When not using side-data, we will process the edges "from" the parent.
328 328 # so we need a full mapping of the parent -> children relation.
329 329 children = dict((r, []) for r in roots)
330 330 for r in revs:
331 331 for p in cl.parentrevs(r):
332 332 if p == nullrev:
333 333 continue
334 334 children[r] = []
335 335 if p in children:
336 336 children[p].append(r)
337 337 x = revs.pop()
338 338 assert x == b.rev()
339 339 revs.extend(roots)
340 340 revs.sort()
341 341
342 342 revinfo = _revinfo_getter_extra(repo)
343 343 return _combine_changeset_copies_extra(
344 344 revs, children, b.rev(), revinfo, match, isancestor
345 345 )
346 346
347 347
348 348 def _combine_changeset_copies(
349 349 revs, children_count, targetrev, revinfo, match, isancestor
350 350 ):
351 351 """combine the copies information for each item of iterrevs
352 352
353 353 revs: sorted iterable of revision to visit
354 354 children_count: a {parent: <number-of-relevant-children>} mapping.
355 355 targetrev: the final copies destination revision (not in iterrevs)
356 356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
357 357 match: a matcher
358 358
359 359 It returns the aggregated copies information for `targetrev`.
360 360 """
361 361
362 362 alwaysmatch = match.always()
363 363
364 364 if rustmod is not None:
365 365 final_copies = rustmod.combine_changeset_copies(
366 366 list(revs), children_count, targetrev, revinfo, isancestor
367 367 )
368 368 else:
369 369 isancestor = cached_is_ancestor(isancestor)
370 370
371 371 all_copies = {}
372 372 # iterate over all the "children" side of copy tracing "edge"
373 373 for current_rev in revs:
374 374 p1, p2, changes = revinfo(current_rev)
375 375 current_copies = None
376 376 # iterate over all parents to chain the existing data with the
377 377 # data from the parent β†’ child edge.
378 378 for parent, parent_rev in ((1, p1), (2, p2)):
379 379 if parent_rev == nullrev:
380 380 continue
381 381 remaining_children = children_count.get(parent_rev)
382 382 if remaining_children is None:
383 383 continue
384 384 remaining_children -= 1
385 385 children_count[parent_rev] = remaining_children
386 386 if remaining_children:
387 387 copies = all_copies.get(parent_rev, None)
388 388 else:
389 389 copies = all_copies.pop(parent_rev, None)
390 390
391 391 if copies is None:
392 392 # this is a root
393 393 newcopies = copies = {}
394 394 elif remaining_children:
395 395 newcopies = copies.copy()
396 396 else:
397 397 newcopies = copies
398 398 # chain the data in the edge with the existing data
399 399 if changes is not None:
400 400 childcopies = {}
401 401 if parent == 1:
402 402 childcopies = changes.copied_from_p1
403 403 elif parent == 2:
404 404 childcopies = changes.copied_from_p2
405 405
406 406 if childcopies:
407 407 newcopies = copies.copy()
408 408 for dest, source in pycompat.iteritems(childcopies):
409 409 prev = copies.get(source)
410 410 if prev is not None and prev[1] is not None:
411 411 source = prev[1]
412 412 newcopies[dest] = (current_rev, source)
413 413 assert newcopies is not copies
414 414 if changes.removed:
415 415 for f in changes.removed:
416 416 if f in newcopies:
417 417 if newcopies is copies:
418 418 # copy on write to avoid affecting potential other
419 419 # branches. when there are no other branches, this
420 420 # could be avoided.
421 421 newcopies = copies.copy()
422 422 newcopies[f] = (current_rev, None)
423 423 # check potential need to combine the data from another parent (for
424 424 # that child). See comment below for details.
425 425 if current_copies is None:
426 426 current_copies = newcopies
427 427 else:
428 428 # we are the second parent to work on c, we need to merge our
429 429 # work with the other.
430 430 #
431 431 # In case of conflict, parent 1 take precedence over parent 2.
432 432 # This is an arbitrary choice made anew when implementing
433 433 # changeset based copies. It was made without regards with
434 434 # potential filelog related behavior.
435 435 assert parent == 2
436 436 current_copies = _merge_copies_dict(
437 437 newcopies, current_copies, isancestor, changes
438 438 )
439 439 all_copies[current_rev] = current_copies
440 440
441 441 # filter out internal details and return a {dest: source mapping}
442 442 final_copies = {}
443 443 for dest, (tt, source) in all_copies[targetrev].items():
444 444 if source is not None:
445 445 final_copies[dest] = source
446 446 if not alwaysmatch:
447 447 for filename in list(final_copies.keys()):
448 448 if not match(filename):
449 449 del final_copies[filename]
450 450 return final_copies
451 451
452 452
453 453 # constant to decide which side to pick with _merge_copies_dict
454 454 PICK_MINOR = 0
455 455 PICK_MAJOR = 1
456 456 PICK_EITHER = 2
457 457
458 458
459 459 def _merge_copies_dict(minor, major, isancestor, changes):
460 460 """merge two copies-mapping together, minor and major
461 461
462 462 In case of conflict, value from "major" will be picked.
463 463
464 464 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
465 465 ancestors of `high_rev`,
466 466
467 467 - `ismerged(path)`: callable return True if `path` have been merged in the
468 468 current revision,
469 469
470 470 return the resulting dict (in practice, the "minor" object, updated)
471 471 """
472 472 for dest, value in major.items():
473 473 other = minor.get(dest)
474 474 if other is None:
475 475 minor[dest] = value
476 476 else:
477 477 pick = _compare_values(changes, isancestor, dest, other, value)
478 478 if pick == PICK_MAJOR:
479 479 minor[dest] = value
480 480 return minor
481 481
482 482
483 483 def _compare_values(changes, isancestor, dest, minor, major):
484 """compare two value within a _merge_copies_dict loop iteration"""
484 """compare two value within a _merge_copies_dict loop iteration
485
486 return pick
487
488 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
489 """
485 490 major_tt, major_value = major
486 491 minor_tt, minor_value = minor
487 492
488 # evacuate some simple case first:
489 493 if major_tt == minor_tt:
490 494 # if it comes from the same revision it must be the same value
491 495 assert major_value == minor_value
492 496 return PICK_EITHER
493 elif major[1] == minor[1]:
494 return PICK_EITHER
495
496 # actual merging needed: content from "major" wins, unless it is older than
497 # the branch point or there is a merge
498 elif changes is not None and major[1] is None and dest in changes.salvaged:
497 elif (
498 changes is not None
499 and minor_value is not None
500 and major_value is None
501 and dest in changes.salvaged
502 ):
503 # In this case, a deletion was reverted, the "alive" value overwrite
504 # the deleted one.
499 505 return PICK_MINOR
500 elif changes is not None and minor[1] is None and dest in changes.salvaged:
501 return PICK_MAJOR
502 elif changes is not None and dest in changes.merged:
503 return PICK_MAJOR
504 elif not isancestor(major_tt, minor_tt):
505 if major[1] is not None:
506 elif (
507 changes is not None
508 and major_value is not None
509 and minor_value is None
510 and dest in changes.salvaged
511 ):
512 # In this case, a deletion was reverted, the "alive" value overwrite
513 # the deleted one.
506 514 return PICK_MAJOR
507 515 elif isancestor(minor_tt, major_tt):
516 if changes is not None and dest in changes.merged:
517 # change to dest happened on the branch without copy-source change,
518 # so both source are valid and "major" wins.
508 519 return PICK_MAJOR
520 else:
521 return PICK_MAJOR
522 elif isancestor(major_tt, minor_tt):
523 if changes is not None and dest in changes.merged:
524 # change to dest happened on the branch without copy-source change,
525 # so both source are valid and "major" wins.
526 return PICK_MAJOR
527 else:
509 528 return PICK_MINOR
529 elif minor_value is None:
530 # in case of conflict, the "alive" side wins.
531 return PICK_MAJOR
532 elif major_value is None:
533 # in case of conflict, the "alive" side wins.
534 return PICK_MINOR
535 else:
536 # in case of conflict where both side are alive, major wins.
537 return PICK_MAJOR
510 538
511 539
512 540 def _revinfo_getter_extra(repo):
513 541 """return a function that return multiple data given a <rev>"i
514 542
515 543 * p1: revision number of first parent
516 544 * p2: revision number of first parent
517 545 * p1copies: mapping of copies from p1
518 546 * p2copies: mapping of copies from p2
519 547 * removed: a list of removed files
520 548 * ismerged: a callback to know if file was merged in that revision
521 549 """
522 550 cl = repo.changelog
523 551 parents = cl.parentrevs
524 552
525 553 def get_ismerged(rev):
526 554 ctx = repo[rev]
527 555
528 556 def ismerged(path):
529 557 if path not in ctx.files():
530 558 return False
531 559 fctx = ctx[path]
532 560 parents = fctx._filelog.parents(fctx._filenode)
533 561 nb_parents = 0
534 562 for n in parents:
535 563 if n != nullid:
536 564 nb_parents += 1
537 565 return nb_parents >= 2
538 566
539 567 return ismerged
540 568
541 569 def revinfo(rev):
542 570 p1, p2 = parents(rev)
543 571 ctx = repo[rev]
544 572 p1copies, p2copies = ctx._copies
545 573 removed = ctx.filesremoved()
546 574 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
547 575
548 576 return revinfo
549 577
550 578
551 579 def _combine_changeset_copies_extra(
552 580 revs, children, targetrev, revinfo, match, isancestor
553 581 ):
554 582 """version of `_combine_changeset_copies` that works with the Google
555 583 specific "extra" based storage for copy information"""
556 584 all_copies = {}
557 585 alwaysmatch = match.always()
558 586 for r in revs:
559 587 copies = all_copies.pop(r, None)
560 588 if copies is None:
561 589 # this is a root
562 590 copies = {}
563 591 for i, c in enumerate(children[r]):
564 592 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
565 593 if r == p1:
566 594 parent = 1
567 595 childcopies = p1copies
568 596 else:
569 597 assert r == p2
570 598 parent = 2
571 599 childcopies = p2copies
572 600 if not alwaysmatch:
573 601 childcopies = {
574 602 dst: src for dst, src in childcopies.items() if match(dst)
575 603 }
576 604 newcopies = copies
577 605 if childcopies:
578 606 newcopies = copies.copy()
579 607 for dest, source in pycompat.iteritems(childcopies):
580 608 prev = copies.get(source)
581 609 if prev is not None and prev[1] is not None:
582 610 source = prev[1]
583 611 newcopies[dest] = (c, source)
584 612 assert newcopies is not copies
585 613 for f in removed:
586 614 if f in newcopies:
587 615 if newcopies is copies:
588 616 # copy on write to avoid affecting potential other
589 617 # branches. when there are no other branches, this
590 618 # could be avoided.
591 619 newcopies = copies.copy()
592 620 newcopies[f] = (c, None)
593 621 othercopies = all_copies.get(c)
594 622 if othercopies is None:
595 623 all_copies[c] = newcopies
596 624 else:
597 625 # we are the second parent to work on c, we need to merge our
598 626 # work with the other.
599 627 #
600 628 # In case of conflict, parent 1 take precedence over parent 2.
601 629 # This is an arbitrary choice made anew when implementing
602 630 # changeset based copies. It was made without regards with
603 631 # potential filelog related behavior.
604 632 if parent == 1:
605 633 _merge_copies_dict_extra(
606 634 othercopies, newcopies, isancestor, ismerged
607 635 )
608 636 else:
609 637 _merge_copies_dict_extra(
610 638 newcopies, othercopies, isancestor, ismerged
611 639 )
612 640 all_copies[c] = newcopies
613 641
614 642 final_copies = {}
615 643 for dest, (tt, source) in all_copies[targetrev].items():
616 644 if source is not None:
617 645 final_copies[dest] = source
618 646 return final_copies
619 647
620 648
621 649 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
622 650 """version of `_merge_copies_dict` that works with the Google
623 651 specific "extra" based storage for copy information"""
624 652 for dest, value in major.items():
625 653 other = minor.get(dest)
626 654 if other is None:
627 655 minor[dest] = value
628 656 else:
629 657 new_tt = value[0]
630 658 other_tt = other[0]
631 659 if value[1] == other[1]:
632 660 continue
633 661 # content from "major" wins, unless it is older
634 662 # than the branch point or there is a merge
635 663 if (
636 664 new_tt == other_tt
637 665 or not isancestor(new_tt, other_tt)
638 666 or ismerged(dest)
639 667 ):
640 668 minor[dest] = value
641 669
642 670
643 671 def _forwardcopies(a, b, base=None, match=None):
644 672 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
645 673
646 674 if base is None:
647 675 base = a
648 676 match = a.repo().narrowmatch(match)
649 677 # check for working copy
650 678 if b.rev() is None:
651 679 cm = _committedforwardcopies(a, b.p1(), base, match)
652 680 # combine copies from dirstate if necessary
653 681 copies = _chain(cm, _dirstatecopies(b._repo, match))
654 682 else:
655 683 copies = _committedforwardcopies(a, b, base, match)
656 684 return copies
657 685
658 686
659 687 def _backwardrenames(a, b, match):
660 688 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
661 689 return {}
662 690
663 691 # Even though we're not taking copies into account, 1:n rename situations
664 692 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
665 693 # arbitrarily pick one of the renames.
666 694 # We don't want to pass in "match" here, since that would filter
667 695 # the destination by it. Since we're reversing the copies, we want
668 696 # to filter the source instead.
669 697 f = _forwardcopies(b, a)
670 698 r = {}
671 699 for k, v in sorted(pycompat.iteritems(f)):
672 700 if match and not match(v):
673 701 continue
674 702 # remove copies
675 703 if v in a:
676 704 continue
677 705 r[v] = k
678 706 return r
679 707
680 708
681 709 def pathcopies(x, y, match=None):
682 710 """find {dst@y: src@x} copy mapping for directed compare"""
683 711 repo = x._repo
684 712 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
685 713 if debug:
686 714 repo.ui.debug(
687 715 b'debug.copies: searching copies from %s to %s\n' % (x, y)
688 716 )
689 717 if x == y or not x or not y:
690 718 return {}
691 719 if y.rev() is None and x == y.p1():
692 720 if debug:
693 721 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
694 722 # short-circuit to avoid issues with merge states
695 723 return _dirstatecopies(repo, match)
696 724 a = y.ancestor(x)
697 725 if a == x:
698 726 if debug:
699 727 repo.ui.debug(b'debug.copies: search mode: forward\n')
700 728 copies = _forwardcopies(x, y, match=match)
701 729 elif a == y:
702 730 if debug:
703 731 repo.ui.debug(b'debug.copies: search mode: backward\n')
704 732 copies = _backwardrenames(x, y, match=match)
705 733 else:
706 734 if debug:
707 735 repo.ui.debug(b'debug.copies: search mode: combined\n')
708 736 base = None
709 737 if a.rev() != nullrev:
710 738 base = x
711 739 copies = _chain(
712 740 _backwardrenames(x, a, match=match),
713 741 _forwardcopies(a, y, base, match=match),
714 742 )
715 743 _filter(x, y, copies)
716 744 return copies
717 745
718 746
719 747 def mergecopies(repo, c1, c2, base):
720 748 """
721 749 Finds moves and copies between context c1 and c2 that are relevant for
722 750 merging. 'base' will be used as the merge base.
723 751
724 752 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
725 753 files that were moved/ copied in one merge parent and modified in another.
726 754 For example:
727 755
728 756 o ---> 4 another commit
729 757 |
730 758 | o ---> 3 commit that modifies a.txt
731 759 | /
732 760 o / ---> 2 commit that moves a.txt to b.txt
733 761 |/
734 762 o ---> 1 merge base
735 763
736 764 If we try to rebase revision 3 on revision 4, since there is no a.txt in
737 765 revision 4, and if user have copytrace disabled, we prints the following
738 766 message:
739 767
740 768 ```other changed <file> which local deleted```
741 769
742 770 Returns a tuple where:
743 771
744 772 "branch_copies" an instance of branch_copies.
745 773
746 774 "diverge" is a mapping of source name -> list of destination names
747 775 for divergent renames.
748 776
749 777 This function calls different copytracing algorithms based on config.
750 778 """
751 779 # avoid silly behavior for update from empty dir
752 780 if not c1 or not c2 or c1 == c2:
753 781 return branch_copies(), branch_copies(), {}
754 782
755 783 narrowmatch = c1.repo().narrowmatch()
756 784
757 785 # avoid silly behavior for parent -> working dir
758 786 if c2.node() is None and c1.node() == repo.dirstate.p1():
759 787 return (
760 788 branch_copies(_dirstatecopies(repo, narrowmatch)),
761 789 branch_copies(),
762 790 {},
763 791 )
764 792
765 793 copytracing = repo.ui.config(b'experimental', b'copytrace')
766 794 if stringutil.parsebool(copytracing) is False:
767 795 # stringutil.parsebool() returns None when it is unable to parse the
768 796 # value, so we should rely on making sure copytracing is on such cases
769 797 return branch_copies(), branch_copies(), {}
770 798
771 799 if usechangesetcentricalgo(repo):
772 800 # The heuristics don't make sense when we need changeset-centric algos
773 801 return _fullcopytracing(repo, c1, c2, base)
774 802
775 803 # Copy trace disabling is explicitly below the node == p1 logic above
776 804 # because the logic above is required for a simple copy to be kept across a
777 805 # rebase.
778 806 if copytracing == b'heuristics':
779 807 # Do full copytracing if only non-public revisions are involved as
780 808 # that will be fast enough and will also cover the copies which could
781 809 # be missed by heuristics
782 810 if _isfullcopytraceable(repo, c1, base):
783 811 return _fullcopytracing(repo, c1, c2, base)
784 812 return _heuristicscopytracing(repo, c1, c2, base)
785 813 else:
786 814 return _fullcopytracing(repo, c1, c2, base)
787 815
788 816
789 817 def _isfullcopytraceable(repo, c1, base):
790 818 """Checks that if base, source and destination are all no-public branches,
791 819 if yes let's use the full copytrace algorithm for increased capabilities
792 820 since it will be fast enough.
793 821
794 822 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
795 823 number of changesets from c1 to base such that if number of changesets are
796 824 more than the limit, full copytracing algorithm won't be used.
797 825 """
798 826 if c1.rev() is None:
799 827 c1 = c1.p1()
800 828 if c1.mutable() and base.mutable():
801 829 sourcecommitlimit = repo.ui.configint(
802 830 b'experimental', b'copytrace.sourcecommitlimit'
803 831 )
804 832 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
805 833 return commits < sourcecommitlimit
806 834 return False
807 835
808 836
809 837 def _checksinglesidecopies(
810 838 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
811 839 ):
812 840 if src not in m2:
813 841 # deleted on side 2
814 842 if src not in m1:
815 843 # renamed on side 1, deleted on side 2
816 844 renamedelete[src] = dsts1
817 845 elif src not in mb:
818 846 # Work around the "short-circuit to avoid issues with merge states"
819 847 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
820 848 # destination doesn't exist in y.
821 849 pass
822 850 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
823 851 return
824 852 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
825 853 # modified on side 2
826 854 for dst in dsts1:
827 855 copy[dst] = src
828 856
829 857
830 858 class branch_copies(object):
831 859 """Information about copies made on one side of a merge/graft.
832 860
833 861 "copy" is a mapping from destination name -> source name,
834 862 where source is in c1 and destination is in c2 or vice-versa.
835 863
836 864 "movewithdir" is a mapping from source name -> destination name,
837 865 where the file at source present in one context but not the other
838 866 needs to be moved to destination by the merge process, because the
839 867 other context moved the directory it is in.
840 868
841 869 "renamedelete" is a mapping of source name -> list of destination
842 870 names for files deleted in c1 that were renamed in c2 or vice-versa.
843 871
844 872 "dirmove" is a mapping of detected source dir -> destination dir renames.
845 873 This is needed for handling changes to new files previously grafted into
846 874 renamed directories.
847 875 """
848 876
849 877 def __init__(
850 878 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
851 879 ):
852 880 self.copy = {} if copy is None else copy
853 881 self.renamedelete = {} if renamedelete is None else renamedelete
854 882 self.dirmove = {} if dirmove is None else dirmove
855 883 self.movewithdir = {} if movewithdir is None else movewithdir
856 884
857 885 def __repr__(self):
858 886 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
859 887 self.copy,
860 888 self.renamedelete,
861 889 self.dirmove,
862 890 self.movewithdir,
863 891 )
864 892
865 893
866 894 def _fullcopytracing(repo, c1, c2, base):
867 895 """The full copytracing algorithm which finds all the new files that were
868 896 added from merge base up to the top commit and for each file it checks if
869 897 this file was copied from another file.
870 898
871 899 This is pretty slow when a lot of changesets are involved but will track all
872 900 the copies.
873 901 """
874 902 m1 = c1.manifest()
875 903 m2 = c2.manifest()
876 904 mb = base.manifest()
877 905
878 906 copies1 = pathcopies(base, c1)
879 907 copies2 = pathcopies(base, c2)
880 908
881 909 if not (copies1 or copies2):
882 910 return branch_copies(), branch_copies(), {}
883 911
884 912 inversecopies1 = {}
885 913 inversecopies2 = {}
886 914 for dst, src in copies1.items():
887 915 inversecopies1.setdefault(src, []).append(dst)
888 916 for dst, src in copies2.items():
889 917 inversecopies2.setdefault(src, []).append(dst)
890 918
891 919 copy1 = {}
892 920 copy2 = {}
893 921 diverge = {}
894 922 renamedelete1 = {}
895 923 renamedelete2 = {}
896 924 allsources = set(inversecopies1) | set(inversecopies2)
897 925 for src in allsources:
898 926 dsts1 = inversecopies1.get(src)
899 927 dsts2 = inversecopies2.get(src)
900 928 if dsts1 and dsts2:
901 929 # copied/renamed on both sides
902 930 if src not in m1 and src not in m2:
903 931 # renamed on both sides
904 932 dsts1 = set(dsts1)
905 933 dsts2 = set(dsts2)
906 934 # If there's some overlap in the rename destinations, we
907 935 # consider it not divergent. For example, if side 1 copies 'a'
908 936 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
909 937 # and 'd' and deletes 'a'.
910 938 if dsts1 & dsts2:
911 939 for dst in dsts1 & dsts2:
912 940 copy1[dst] = src
913 941 copy2[dst] = src
914 942 else:
915 943 diverge[src] = sorted(dsts1 | dsts2)
916 944 elif src in m1 and src in m2:
917 945 # copied on both sides
918 946 dsts1 = set(dsts1)
919 947 dsts2 = set(dsts2)
920 948 for dst in dsts1 & dsts2:
921 949 copy1[dst] = src
922 950 copy2[dst] = src
923 951 # TODO: Handle cases where it was renamed on one side and copied
924 952 # on the other side
925 953 elif dsts1:
926 954 # copied/renamed only on side 1
927 955 _checksinglesidecopies(
928 956 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
929 957 )
930 958 elif dsts2:
931 959 # copied/renamed only on side 2
932 960 _checksinglesidecopies(
933 961 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
934 962 )
935 963
936 964 # find interesting file sets from manifests
937 965 cache = []
938 966
939 967 def _get_addedfiles(idx):
940 968 if not cache:
941 969 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
942 970 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
943 971 u1 = sorted(addedinm1 - addedinm2)
944 972 u2 = sorted(addedinm2 - addedinm1)
945 973 cache.extend((u1, u2))
946 974 return cache[idx]
947 975
948 976 u1fn = lambda: _get_addedfiles(0)
949 977 u2fn = lambda: _get_addedfiles(1)
950 978 if repo.ui.debugflag:
951 979 u1 = u1fn()
952 980 u2 = u2fn()
953 981
954 982 header = b" unmatched files in %s"
955 983 if u1:
956 984 repo.ui.debug(
957 985 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
958 986 )
959 987 if u2:
960 988 repo.ui.debug(
961 989 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
962 990 )
963 991
964 992 renamedeleteset = set()
965 993 divergeset = set()
966 994 for dsts in diverge.values():
967 995 divergeset.update(dsts)
968 996 for dsts in renamedelete1.values():
969 997 renamedeleteset.update(dsts)
970 998 for dsts in renamedelete2.values():
971 999 renamedeleteset.update(dsts)
972 1000
973 1001 repo.ui.debug(
974 1002 b" all copies found (* = to merge, ! = divergent, "
975 1003 b"% = renamed and deleted):\n"
976 1004 )
977 1005 for side, copies in ((b"local", copies1), (b"remote", copies2)):
978 1006 if not copies:
979 1007 continue
980 1008 repo.ui.debug(b" on %s side:\n" % side)
981 1009 for f in sorted(copies):
982 1010 note = b""
983 1011 if f in copy1 or f in copy2:
984 1012 note += b"*"
985 1013 if f in divergeset:
986 1014 note += b"!"
987 1015 if f in renamedeleteset:
988 1016 note += b"%"
989 1017 repo.ui.debug(
990 1018 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
991 1019 )
992 1020 del renamedeleteset
993 1021 del divergeset
994 1022
995 1023 repo.ui.debug(b" checking for directory renames\n")
996 1024
997 1025 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
998 1026 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
999 1027
1000 1028 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1001 1029 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1002 1030
1003 1031 return branch_copies1, branch_copies2, diverge
1004 1032
1005 1033
1006 1034 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1007 1035 """Finds moved directories and files that should move with them.
1008 1036
1009 1037 ctx: the context for one of the sides
1010 1038 copy: files copied on the same side (as ctx)
1011 1039 fullcopy: files copied on the same side (as ctx), including those that
1012 1040 merge.manifestmerge() won't care about
1013 1041 addedfilesfn: function returning added files on the other side (compared to
1014 1042 ctx)
1015 1043 """
1016 1044 # generate a directory move map
1017 1045 invalid = set()
1018 1046 dirmove = {}
1019 1047
1020 1048 # examine each file copy for a potential directory move, which is
1021 1049 # when all the files in a directory are moved to a new directory
1022 1050 for dst, src in pycompat.iteritems(fullcopy):
1023 1051 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1024 1052 if dsrc in invalid:
1025 1053 # already seen to be uninteresting
1026 1054 continue
1027 1055 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1028 1056 # directory wasn't entirely moved locally
1029 1057 invalid.add(dsrc)
1030 1058 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1031 1059 # files from the same directory moved to two different places
1032 1060 invalid.add(dsrc)
1033 1061 else:
1034 1062 # looks good so far
1035 1063 dirmove[dsrc] = ddst
1036 1064
1037 1065 for i in invalid:
1038 1066 if i in dirmove:
1039 1067 del dirmove[i]
1040 1068 del invalid
1041 1069
1042 1070 if not dirmove:
1043 1071 return {}, {}
1044 1072
1045 1073 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1046 1074
1047 1075 for d in dirmove:
1048 1076 repo.ui.debug(
1049 1077 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1050 1078 )
1051 1079
1052 1080 movewithdir = {}
1053 1081 # check unaccounted nonoverlapping files against directory moves
1054 1082 for f in addedfilesfn():
1055 1083 if f not in fullcopy:
1056 1084 for d in dirmove:
1057 1085 if f.startswith(d):
1058 1086 # new file added in a directory that was moved, move it
1059 1087 df = dirmove[d] + f[len(d) :]
1060 1088 if df not in copy:
1061 1089 movewithdir[f] = df
1062 1090 repo.ui.debug(
1063 1091 b" pending file src: '%s' -> dst: '%s'\n"
1064 1092 % (f, df)
1065 1093 )
1066 1094 break
1067 1095
1068 1096 return dirmove, movewithdir
1069 1097
1070 1098
1071 1099 def _heuristicscopytracing(repo, c1, c2, base):
1072 1100 """Fast copytracing using filename heuristics
1073 1101
1074 1102 Assumes that moves or renames are of following two types:
1075 1103
1076 1104 1) Inside a directory only (same directory name but different filenames)
1077 1105 2) Move from one directory to another
1078 1106 (same filenames but different directory names)
1079 1107
1080 1108 Works only when there are no merge commits in the "source branch".
1081 1109 Source branch is commits from base up to c2 not including base.
1082 1110
1083 1111 If merge is involved it fallbacks to _fullcopytracing().
1084 1112
1085 1113 Can be used by setting the following config:
1086 1114
1087 1115 [experimental]
1088 1116 copytrace = heuristics
1089 1117
1090 1118 In some cases the copy/move candidates found by heuristics can be very large
1091 1119 in number and that will make the algorithm slow. The number of possible
1092 1120 candidates to check can be limited by using the config
1093 1121 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1094 1122 """
1095 1123
1096 1124 if c1.rev() is None:
1097 1125 c1 = c1.p1()
1098 1126 if c2.rev() is None:
1099 1127 c2 = c2.p1()
1100 1128
1101 1129 changedfiles = set()
1102 1130 m1 = c1.manifest()
1103 1131 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1104 1132 # If base is not in c2 branch, we switch to fullcopytracing
1105 1133 repo.ui.debug(
1106 1134 b"switching to full copytracing as base is not "
1107 1135 b"an ancestor of c2\n"
1108 1136 )
1109 1137 return _fullcopytracing(repo, c1, c2, base)
1110 1138
1111 1139 ctx = c2
1112 1140 while ctx != base:
1113 1141 if len(ctx.parents()) == 2:
1114 1142 # To keep things simple let's not handle merges
1115 1143 repo.ui.debug(b"switching to full copytracing because of merges\n")
1116 1144 return _fullcopytracing(repo, c1, c2, base)
1117 1145 changedfiles.update(ctx.files())
1118 1146 ctx = ctx.p1()
1119 1147
1120 1148 copies2 = {}
1121 1149 cp = _forwardcopies(base, c2)
1122 1150 for dst, src in pycompat.iteritems(cp):
1123 1151 if src in m1:
1124 1152 copies2[dst] = src
1125 1153
1126 1154 # file is missing if it isn't present in the destination, but is present in
1127 1155 # the base and present in the source.
1128 1156 # Presence in the base is important to exclude added files, presence in the
1129 1157 # source is important to exclude removed files.
1130 1158 filt = lambda f: f not in m1 and f in base and f in c2
1131 1159 missingfiles = [f for f in changedfiles if filt(f)]
1132 1160
1133 1161 copies1 = {}
1134 1162 if missingfiles:
1135 1163 basenametofilename = collections.defaultdict(list)
1136 1164 dirnametofilename = collections.defaultdict(list)
1137 1165
1138 1166 for f in m1.filesnotin(base.manifest()):
1139 1167 basename = os.path.basename(f)
1140 1168 dirname = os.path.dirname(f)
1141 1169 basenametofilename[basename].append(f)
1142 1170 dirnametofilename[dirname].append(f)
1143 1171
1144 1172 for f in missingfiles:
1145 1173 basename = os.path.basename(f)
1146 1174 dirname = os.path.dirname(f)
1147 1175 samebasename = basenametofilename[basename]
1148 1176 samedirname = dirnametofilename[dirname]
1149 1177 movecandidates = samebasename + samedirname
1150 1178 # f is guaranteed to be present in c2, that's why
1151 1179 # c2.filectx(f) won't fail
1152 1180 f2 = c2.filectx(f)
1153 1181 # we can have a lot of candidates which can slow down the heuristics
1154 1182 # config value to limit the number of candidates moves to check
1155 1183 maxcandidates = repo.ui.configint(
1156 1184 b'experimental', b'copytrace.movecandidateslimit'
1157 1185 )
1158 1186
1159 1187 if len(movecandidates) > maxcandidates:
1160 1188 repo.ui.status(
1161 1189 _(
1162 1190 b"skipping copytracing for '%s', more "
1163 1191 b"candidates than the limit: %d\n"
1164 1192 )
1165 1193 % (f, len(movecandidates))
1166 1194 )
1167 1195 continue
1168 1196
1169 1197 for candidate in movecandidates:
1170 1198 f1 = c1.filectx(candidate)
1171 1199 if _related(f1, f2):
1172 1200 # if there are a few related copies then we'll merge
1173 1201 # changes into all of them. This matches the behaviour
1174 1202 # of upstream copytracing
1175 1203 copies1[candidate] = f
1176 1204
1177 1205 return branch_copies(copies1), branch_copies(copies2), {}
1178 1206
1179 1207
1180 1208 def _related(f1, f2):
1181 1209 """return True if f1 and f2 filectx have a common ancestor
1182 1210
1183 1211 Walk back to common ancestor to see if the two files originate
1184 1212 from the same file. Since workingfilectx's rev() is None it messes
1185 1213 up the integer comparison logic, hence the pre-step check for
1186 1214 None (f1 and f2 can only be workingfilectx's initially).
1187 1215 """
1188 1216
1189 1217 if f1 == f2:
1190 1218 return True # a match
1191 1219
1192 1220 g1, g2 = f1.ancestors(), f2.ancestors()
1193 1221 try:
1194 1222 f1r, f2r = f1.linkrev(), f2.linkrev()
1195 1223
1196 1224 if f1r is None:
1197 1225 f1 = next(g1)
1198 1226 if f2r is None:
1199 1227 f2 = next(g2)
1200 1228
1201 1229 while True:
1202 1230 f1r, f2r = f1.linkrev(), f2.linkrev()
1203 1231 if f1r > f2r:
1204 1232 f1 = next(g1)
1205 1233 elif f2r > f1r:
1206 1234 f2 = next(g2)
1207 1235 else: # f1 and f2 point to files in the same linkrev
1208 1236 return f1 == f2 # true if they point to the same file
1209 1237 except StopIteration:
1210 1238 return False
1211 1239
1212 1240
1213 1241 def graftcopies(wctx, ctx, base):
1214 1242 """reproduce copies between base and ctx in the wctx
1215 1243
1216 1244 Unlike mergecopies(), this function will only consider copies between base
1217 1245 and ctx; it will ignore copies between base and wctx. Also unlike
1218 1246 mergecopies(), this function will apply copies to the working copy (instead
1219 1247 of just returning information about the copies). That makes it cheaper
1220 1248 (especially in the common case of base==ctx.p1()) and useful also when
1221 1249 experimental.copytrace=off.
1222 1250
1223 1251 merge.update() will have already marked most copies, but it will only
1224 1252 mark copies if it thinks the source files are related (see
1225 1253 merge._related()). It will also not mark copies if the file wasn't modified
1226 1254 on the local side. This function adds the copies that were "missed"
1227 1255 by merge.update().
1228 1256 """
1229 1257 new_copies = pathcopies(base, ctx)
1230 1258 parent = wctx.p1()
1231 1259 _filter(parent, wctx, new_copies)
1232 1260 # Extra filtering to drop copy information for files that existed before
1233 1261 # the graft. This is to handle the case of grafting a rename onto a commit
1234 1262 # that already has the rename. Otherwise the presence of copy information
1235 1263 # would result in the creation of an empty commit where we would prefer to
1236 1264 # not create one.
1237 1265 for dest, __ in list(new_copies.items()):
1238 1266 if dest in parent:
1239 1267 del new_copies[dest]
1240 1268 for dst, src in pycompat.iteritems(new_copies):
1241 1269 wctx[dst].markcopied(src)
@@ -1,797 +1,815
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::convert::TryInto;
13 13
14 14 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
15 15
16 16 type PathToken = usize;
17 17
18 18 #[derive(Clone, Debug, PartialEq, Copy)]
19 19 struct TimeStampedPathCopy {
20 20 /// revision at which the copy information was added
21 21 rev: Revision,
22 22 /// the copy source, (Set to None in case of deletion of the associated
23 23 /// key)
24 24 path: Option<PathToken>,
25 25 }
26 26
27 27 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
28 28 type TimeStampedPathCopies = OrdMap<PathToken, TimeStampedPathCopy>;
29 29
30 30 /// hold parent 1, parent 2 and relevant files actions.
31 31 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
32 32
33 33 /// represent the files affected by a changesets
34 34 ///
35 35 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
36 36 /// all the data categories tracked by it.
37 37 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
38 38 /// all the data categories tracked by it.
39 39 pub struct ChangedFiles<'a> {
40 40 nb_items: u32,
41 41 index: &'a [u8],
42 42 data: &'a [u8],
43 43 }
44 44
45 45 /// Represent active changes that affect the copy tracing.
46 46 enum Action<'a> {
47 47 /// The parent ? children edge is removing a file
48 48 ///
49 49 /// (actually, this could be the edge from the other parent, but it does
50 50 /// not matters)
51 51 Removed(&'a HgPath),
52 52 /// The parent ? children edge introduce copy information between (dest,
53 53 /// source)
54 54 Copied(&'a HgPath, &'a HgPath),
55 55 }
56 56
57 57 /// This express the possible "special" case we can get in a merge
58 58 ///
59 59 /// See mercurial/metadata.py for details on these values.
60 60 #[derive(PartialEq)]
61 61 enum MergeCase {
62 62 /// Merged: file had history on both side that needed to be merged
63 63 Merged,
64 64 /// Salvaged: file was candidate for deletion, but survived the merge
65 65 Salvaged,
66 66 /// Normal: Not one of the two cases above
67 67 Normal,
68 68 }
69 69
70 70 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
71 71
72 72 const EMPTY: &[u8] = b"";
73 73 const COPY_MASK: u8 = 3;
74 74 const P1_COPY: u8 = 2;
75 75 const P2_COPY: u8 = 3;
76 76 const ACTION_MASK: u8 = 28;
77 77 const REMOVED: u8 = 12;
78 78 const MERGED: u8 = 8;
79 79 const SALVAGED: u8 = 16;
80 80
81 81 impl<'a> ChangedFiles<'a> {
82 82 const INDEX_START: usize = 4;
83 83 const ENTRY_SIZE: u32 = 9;
84 84 const FILENAME_START: u32 = 1;
85 85 const COPY_SOURCE_START: u32 = 5;
86 86
87 87 pub fn new(data: &'a [u8]) -> Self {
88 88 assert!(
89 89 data.len() >= 4,
90 90 "data size ({}) is too small to contain the header (4)",
91 91 data.len()
92 92 );
93 93 let nb_items_raw: [u8; 4] = (&data[0..=3])
94 94 .try_into()
95 95 .expect("failed to turn 4 bytes into 4 bytes");
96 96 let nb_items = u32::from_be_bytes(nb_items_raw);
97 97
98 98 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
99 99 let index_end = Self::INDEX_START + index_size;
100 100
101 101 assert!(
102 102 data.len() >= index_end,
103 103 "data size ({}) is too small to fit the index_data ({})",
104 104 data.len(),
105 105 index_end
106 106 );
107 107
108 108 let ret = ChangedFiles {
109 109 nb_items,
110 110 index: &data[Self::INDEX_START..index_end],
111 111 data: &data[index_end..],
112 112 };
113 113 let max_data = ret.filename_end(nb_items - 1) as usize;
114 114 assert!(
115 115 ret.data.len() >= max_data,
116 116 "data size ({}) is too small to fit all data ({})",
117 117 data.len(),
118 118 index_end + max_data
119 119 );
120 120 ret
121 121 }
122 122
123 123 pub fn new_empty() -> Self {
124 124 ChangedFiles {
125 125 nb_items: 0,
126 126 index: EMPTY,
127 127 data: EMPTY,
128 128 }
129 129 }
130 130
131 131 /// internal function to return an individual entry at a given index
132 132 fn entry(&'a self, idx: u32) -> FileChange<'a> {
133 133 if idx >= self.nb_items {
134 134 panic!(
135 135 "index for entry is higher that the number of file {} >= {}",
136 136 idx, self.nb_items
137 137 )
138 138 }
139 139 let flags = self.flags(idx);
140 140 let filename = self.filename(idx);
141 141 let copy_idx = self.copy_idx(idx);
142 142 let copy_source = self.filename(copy_idx);
143 143 (flags, filename, copy_source)
144 144 }
145 145
146 146 /// internal function to return the filename of the entry at a given index
147 147 fn filename(&self, idx: u32) -> &HgPath {
148 148 let filename_start;
149 149 if idx == 0 {
150 150 filename_start = 0;
151 151 } else {
152 152 filename_start = self.filename_end(idx - 1)
153 153 }
154 154 let filename_end = self.filename_end(idx);
155 155 let filename_start = filename_start as usize;
156 156 let filename_end = filename_end as usize;
157 157 HgPath::new(&self.data[filename_start..filename_end])
158 158 }
159 159
160 160 /// internal function to return the flag field of the entry at a given
161 161 /// index
162 162 fn flags(&self, idx: u32) -> u8 {
163 163 let idx = idx as usize;
164 164 self.index[idx * (Self::ENTRY_SIZE as usize)]
165 165 }
166 166
167 167 /// internal function to return the end of a filename part at a given index
168 168 fn filename_end(&self, idx: u32) -> u32 {
169 169 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
170 170 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
171 171 let start = start as usize;
172 172 let end = end as usize;
173 173 let raw = (&self.index[start..end])
174 174 .try_into()
175 175 .expect("failed to turn 4 bytes into 4 bytes");
176 176 u32::from_be_bytes(raw)
177 177 }
178 178
179 179 /// internal function to return index of the copy source of the entry at a
180 180 /// given index
181 181 fn copy_idx(&self, idx: u32) -> u32 {
182 182 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
183 183 let end = (idx + 1) * Self::ENTRY_SIZE;
184 184 let start = start as usize;
185 185 let end = end as usize;
186 186 let raw = (&self.index[start..end])
187 187 .try_into()
188 188 .expect("failed to turn 4 bytes into 4 bytes");
189 189 u32::from_be_bytes(raw)
190 190 }
191 191
192 192 /// Return an iterator over all the `Action` in this instance.
193 193 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
194 194 ActionsIterator {
195 195 changes: &self,
196 196 parent: parent,
197 197 current: 0,
198 198 }
199 199 }
200 200
201 201 /// return the MergeCase value associated with a filename
202 202 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
203 203 if self.nb_items == 0 {
204 204 return MergeCase::Normal;
205 205 }
206 206 let mut low_part = 0;
207 207 let mut high_part = self.nb_items;
208 208
209 209 while low_part < high_part {
210 210 let cursor = (low_part + high_part - 1) / 2;
211 211 let (flags, filename, _source) = self.entry(cursor);
212 212 match path.cmp(filename) {
213 213 Ordering::Less => low_part = cursor + 1,
214 214 Ordering::Greater => high_part = cursor,
215 215 Ordering::Equal => {
216 216 return match flags & ACTION_MASK {
217 217 MERGED => MergeCase::Merged,
218 218 SALVAGED => MergeCase::Salvaged,
219 219 _ => MergeCase::Normal,
220 220 };
221 221 }
222 222 }
223 223 }
224 224 MergeCase::Normal
225 225 }
226 226 }
227 227
228 228 /// A struct responsible for answering "is X ancestors of Y" quickly
229 229 ///
230 230 /// The structure will delegate ancestors call to a callback, and cache the
231 231 /// result.
232 232 #[derive(Debug)]
233 233 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
234 234 inner: &'a A,
235 235 pairs: HashMap<(Revision, Revision), bool>,
236 236 }
237 237
238 238 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
239 239 fn new(func: &'a A) -> Self {
240 240 Self {
241 241 inner: func,
242 242 pairs: HashMap::default(),
243 243 }
244 244 }
245 245
246 246 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
247 247 self.pairs.insert((anc, desc), true);
248 248 }
249 249
250 250 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
251 251 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
252 252 if anc > desc {
253 253 false
254 254 } else if anc == desc {
255 255 true
256 256 } else {
257 257 if let Some(b) = self.pairs.get(&(anc, desc)) {
258 258 *b
259 259 } else {
260 260 let b = (self.inner)(anc, desc);
261 261 self.pairs.insert((anc, desc), b);
262 262 b
263 263 }
264 264 }
265 265 }
266 266 }
267 267
268 268 struct ActionsIterator<'a> {
269 269 changes: &'a ChangedFiles<'a>,
270 270 parent: Parent,
271 271 current: u32,
272 272 }
273 273
274 274 impl<'a> Iterator for ActionsIterator<'a> {
275 275 type Item = Action<'a>;
276 276
277 277 fn next(&mut self) -> Option<Action<'a>> {
278 278 let copy_flag = match self.parent {
279 279 Parent::FirstParent => P1_COPY,
280 280 Parent::SecondParent => P2_COPY,
281 281 };
282 282 while self.current < self.changes.nb_items {
283 283 let (flags, file, source) = self.changes.entry(self.current);
284 284 self.current += 1;
285 285 if (flags & ACTION_MASK) == REMOVED {
286 286 return Some(Action::Removed(file));
287 287 }
288 288 let copy = flags & COPY_MASK;
289 289 if copy == copy_flag {
290 290 return Some(Action::Copied(file, source));
291 291 }
292 292 }
293 293 return None;
294 294 }
295 295 }
296 296
297 297 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
298 298 /// ChangedFiles
299 299 ///
300 300 /// It is passed to the RevInfoMaker callback who can assign any necessary
301 301 /// content to the `data` attribute. The copy tracing code is responsible for
302 302 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
303 303 pub struct DataHolder<D> {
304 304 /// RevInfoMaker callback should assign data referenced by the
305 305 /// ChangedFiles struct it return to this attribute. The DataHolder
306 306 /// lifetime will be at least as long as the ChangedFiles one.
307 307 pub data: Option<D>,
308 308 }
309 309
310 310 pub type RevInfoMaker<'a, D> =
311 311 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
312 312
313 313 /// enum used to carry information about the parent β†’ child currently processed
314 314 #[derive(Copy, Clone, Debug)]
315 315 enum Parent {
316 316 /// The `p1(x) β†’ x` edge
317 317 FirstParent,
318 318 /// The `p2(x) β†’ x` edge
319 319 SecondParent,
320 320 }
321 321
322 322 /// A small "tokenizer" responsible of turning full HgPath into lighter
323 323 /// PathToken
324 324 ///
325 325 /// Dealing with small object, like integer is much faster, so HgPath input are
326 326 /// turned into integer "PathToken" and converted back in the end.
327 327 #[derive(Clone, Debug, Default)]
328 328 struct TwoWayPathMap {
329 329 token: HashMap<HgPathBuf, PathToken>,
330 330 path: Vec<HgPathBuf>,
331 331 }
332 332
333 333 impl TwoWayPathMap {
334 334 fn tokenize(&mut self, path: &HgPath) -> PathToken {
335 335 match self.token.get(path) {
336 336 Some(a) => *a,
337 337 None => {
338 338 let a = self.token.len();
339 339 let buf = path.to_owned();
340 340 self.path.push(buf.clone());
341 341 self.token.insert(buf, a);
342 342 a
343 343 }
344 344 }
345 345 }
346 346
347 347 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
348 348 assert!(token < self.path.len(), format!("Unknown token: {}", token));
349 349 &self.path[token]
350 350 }
351 351 }
352 352
353 353 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
354 354 ///
355 355 /// Arguments are:
356 356 ///
357 357 /// revs: all revisions to be considered
358 358 /// children: a {parent ? [childrens]} mapping
359 359 /// target_rev: the final revision we are combining copies to
360 360 /// rev_info(rev): callback to get revision information:
361 361 /// * first parent
362 362 /// * second parent
363 363 /// * ChangedFiles
364 364 /// isancestors(low_rev, high_rev): callback to check if a revision is an
365 365 /// ancestor of another
366 366 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
367 367 revs: Vec<Revision>,
368 368 mut children_count: HashMap<Revision, usize>,
369 369 target_rev: Revision,
370 370 rev_info: RevInfoMaker<D>,
371 371 is_ancestor: &A,
372 372 ) -> PathCopies {
373 373 let mut all_copies = HashMap::new();
374 374 let mut oracle = AncestorOracle::new(is_ancestor);
375 375
376 376 let mut path_map = TwoWayPathMap::default();
377 377
378 378 for rev in revs {
379 379 let mut d: DataHolder<D> = DataHolder { data: None };
380 380 let (p1, p2, changes) = rev_info(rev, &mut d);
381 381
382 382 // We will chain the copies information accumulated for the parent with
383 383 // the individual copies information the curent revision. Creating a
384 384 // new TimeStampedPath for each `rev` β†’ `children` vertex.
385 385 let mut copies: Option<TimeStampedPathCopies> = None;
386 386 if p1 != NULL_REVISION {
387 387 // Retrieve data computed in a previous iteration
388 388 let parent_copies = get_and_clean_parent_copies(
389 389 &mut all_copies,
390 390 &mut children_count,
391 391 p1,
392 392 );
393 393 if let Some(parent_copies) = parent_copies {
394 394 // combine it with data for that revision
395 395 let vertex_copies = add_from_changes(
396 396 &mut path_map,
397 397 &mut oracle,
398 398 &parent_copies,
399 399 &changes,
400 400 Parent::FirstParent,
401 401 rev,
402 402 );
403 403 // keep that data around for potential later combination
404 404 copies = Some(vertex_copies);
405 405 }
406 406 }
407 407 if p2 != NULL_REVISION {
408 408 // Retrieve data computed in a previous iteration
409 409 let parent_copies = get_and_clean_parent_copies(
410 410 &mut all_copies,
411 411 &mut children_count,
412 412 p2,
413 413 );
414 414 if let Some(parent_copies) = parent_copies {
415 415 // combine it with data for that revision
416 416 let vertex_copies = add_from_changes(
417 417 &mut path_map,
418 418 &mut oracle,
419 419 &parent_copies,
420 420 &changes,
421 421 Parent::SecondParent,
422 422 rev,
423 423 );
424 424
425 425 copies = match copies {
426 426 None => Some(vertex_copies),
427 427 // Merge has two parents needs to combines their copy
428 428 // information.
429 429 //
430 430 // If we got data from both parents, We need to combine
431 431 // them.
432 432 Some(copies) => Some(merge_copies_dict(
433 433 &path_map,
434 434 rev,
435 435 vertex_copies,
436 436 copies,
437 437 &changes,
438 438 &mut oracle,
439 439 )),
440 440 };
441 441 }
442 442 }
443 443 match copies {
444 444 Some(copies) => {
445 445 all_copies.insert(rev, copies);
446 446 }
447 447 _ => {}
448 448 }
449 449 }
450 450
451 451 // Drop internal information (like the timestamp) and return the final
452 452 // mapping.
453 453 let tt_result = all_copies
454 454 .remove(&target_rev)
455 455 .expect("target revision was not processed");
456 456 let mut result = PathCopies::default();
457 457 for (dest, tt_source) in tt_result {
458 458 if let Some(path) = tt_source.path {
459 459 let path_dest = path_map.untokenize(dest).to_owned();
460 460 let path_path = path_map.untokenize(path).to_owned();
461 461 result.insert(path_dest, path_path);
462 462 }
463 463 }
464 464 result
465 465 }
466 466
467 467 /// fetch previous computed information
468 468 ///
469 469 /// If no other children are expected to need this information, we drop it from
470 470 /// the cache.
471 471 ///
472 472 /// If parent is not part of the set we are expected to walk, return None.
473 473 fn get_and_clean_parent_copies(
474 474 all_copies: &mut HashMap<Revision, TimeStampedPathCopies>,
475 475 children_count: &mut HashMap<Revision, usize>,
476 476 parent_rev: Revision,
477 477 ) -> Option<TimeStampedPathCopies> {
478 478 let count = children_count.get_mut(&parent_rev)?;
479 479 *count -= 1;
480 480 if *count == 0 {
481 481 match all_copies.remove(&parent_rev) {
482 482 Some(c) => Some(c),
483 483 None => Some(TimeStampedPathCopies::default()),
484 484 }
485 485 } else {
486 486 match all_copies.get(&parent_rev) {
487 487 Some(c) => Some(c.clone()),
488 488 None => Some(TimeStampedPathCopies::default()),
489 489 }
490 490 }
491 491 }
492 492
493 493 /// Combine ChangedFiles with some existing PathCopies information and return
494 494 /// the result
495 495 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
496 496 path_map: &mut TwoWayPathMap,
497 497 oracle: &mut AncestorOracle<A>,
498 498 base_copies: &TimeStampedPathCopies,
499 499 changes: &ChangedFiles,
500 500 parent: Parent,
501 501 current_rev: Revision,
502 502 ) -> TimeStampedPathCopies {
503 503 let mut copies = base_copies.clone();
504 504 for action in changes.iter_actions(parent) {
505 505 match action {
506 506 Action::Copied(path_dest, path_source) => {
507 507 let dest = path_map.tokenize(path_dest);
508 508 let source = path_map.tokenize(path_source);
509 509 let entry;
510 510 if let Some(v) = base_copies.get(&source) {
511 511 entry = match &v.path {
512 512 Some(path) => Some((*(path)).to_owned()),
513 513 None => Some(source.to_owned()),
514 514 }
515 515 } else {
516 516 entry = Some(source.to_owned());
517 517 }
518 518 // Each new entry is introduced by the children, we
519 519 // record this information as we will need it to take
520 520 // the right decision when merging conflicting copy
521 521 // information. See merge_copies_dict for details.
522 522 match copies.entry(dest) {
523 523 Entry::Vacant(slot) => {
524 524 let ttpc = TimeStampedPathCopy {
525 525 rev: current_rev,
526 526 path: entry,
527 527 };
528 528 slot.insert(ttpc);
529 529 }
530 530 Entry::Occupied(mut slot) => {
531 531 let mut ttpc = slot.get_mut();
532 532 oracle.record_overwrite(ttpc.rev, current_rev);
533 533 ttpc.rev = current_rev;
534 534 ttpc.path = entry;
535 535 }
536 536 }
537 537 }
538 538 Action::Removed(deleted_path) => {
539 539 // We must drop copy information for removed file.
540 540 //
541 541 // We need to explicitly record them as dropped to
542 542 // propagate this information when merging two
543 543 // TimeStampedPathCopies object.
544 544 let deleted = path_map.tokenize(deleted_path);
545 545 copies.entry(deleted).and_modify(|old| {
546 546 oracle.record_overwrite(old.rev, current_rev);
547 547 old.rev = current_rev;
548 548 old.path = None;
549 549 });
550 550 }
551 551 }
552 552 }
553 553 copies
554 554 }
555 555
556 556 /// merge two copies-mapping together, minor and major
557 557 ///
558 558 /// In case of conflict, value from "major" will be picked, unless in some
559 559 /// cases. See inline documentation for details.
560 560 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
561 561 path_map: &TwoWayPathMap,
562 562 current_merge: Revision,
563 563 mut minor: TimeStampedPathCopies,
564 564 mut major: TimeStampedPathCopies,
565 565 changes: &ChangedFiles,
566 566 oracle: &mut AncestorOracle<A>,
567 567 ) -> TimeStampedPathCopies {
568 568 // This closure exist as temporary help while multiple developper are
569 569 // actively working on this code. Feel free to re-inline it once this
570 570 // code is more settled.
571 571 let mut cmp_value =
572 572 |dest: &PathToken,
573 573 src_minor: &TimeStampedPathCopy,
574 574 src_major: &TimeStampedPathCopy| {
575 575 compare_value(
576 576 path_map,
577 577 current_merge,
578 578 changes,
579 579 oracle,
580 580 dest,
581 581 src_minor,
582 582 src_major,
583 583 )
584 584 };
585 585 if minor.is_empty() {
586 586 major
587 587 } else if major.is_empty() {
588 588 minor
589 589 } else if minor.len() * 2 < major.len() {
590 590 // Lets says we are merging two TimeStampedPathCopies instance A and B.
591 591 //
592 592 // If A contains N items, the merge result will never contains more
593 593 // than N values differents than the one in A
594 594 //
595 595 // If B contains M items, with M > N, the merge result will always
596 596 // result in a minimum of M - N value differents than the on in
597 597 // A
598 598 //
599 599 // As a result, if N < (M-N), we know that simply iterating over A will
600 600 // yield less difference than iterating over the difference
601 601 // between A and B.
602 602 //
603 603 // This help performance a lot in case were a tiny
604 604 // TimeStampedPathCopies is merged with a much larger one.
605 605 for (dest, src_minor) in minor {
606 606 let src_major = major.get(&dest);
607 607 match src_major {
608 608 None => major.insert(dest, src_minor),
609 609 Some(src_major) => {
610 610 match cmp_value(&dest, &src_minor, src_major) {
611 611 MergePick::Any | MergePick::Major => None,
612 612 MergePick::Minor => major.insert(dest, src_minor),
613 613 }
614 614 }
615 615 };
616 616 }
617 617 major
618 618 } else if major.len() * 2 < minor.len() {
619 619 // This use the same rational than the previous block.
620 620 // (Check previous block documentation for details.)
621 621 for (dest, src_major) in major {
622 622 let src_minor = minor.get(&dest);
623 623 match src_minor {
624 624 None => minor.insert(dest, src_major),
625 625 Some(src_minor) => {
626 626 match cmp_value(&dest, src_minor, &src_major) {
627 627 MergePick::Any | MergePick::Minor => None,
628 628 MergePick::Major => minor.insert(dest, src_major),
629 629 }
630 630 }
631 631 };
632 632 }
633 633 minor
634 634 } else {
635 635 let mut override_minor = Vec::new();
636 636 let mut override_major = Vec::new();
637 637
638 638 let mut to_major = |k: &PathToken, v: &TimeStampedPathCopy| {
639 639 override_major.push((k.clone(), v.clone()))
640 640 };
641 641 let mut to_minor = |k: &PathToken, v: &TimeStampedPathCopy| {
642 642 override_minor.push((k.clone(), v.clone()))
643 643 };
644 644
645 645 // The diff function leverage detection of the identical subpart if
646 646 // minor and major has some common ancestors. This make it very
647 647 // fast is most case.
648 648 //
649 649 // In case where the two map are vastly different in size, the current
650 650 // approach is still slowish because the iteration will iterate over
651 651 // all the "exclusive" content of the larger on. This situation can be
652 652 // frequent when the subgraph of revision we are processing has a lot
653 653 // of roots. Each roots adding they own fully new map to the mix (and
654 654 // likely a small map, if the path from the root to the "main path" is
655 655 // small.
656 656 //
657 657 // We could do better by detecting such situation and processing them
658 658 // differently.
659 659 for d in minor.diff(&major) {
660 660 match d {
661 661 DiffItem::Add(k, v) => to_minor(k, v),
662 662 DiffItem::Remove(k, v) => to_major(k, v),
663 663 DiffItem::Update { old, new } => {
664 664 let (dest, src_major) = new;
665 665 let (_, src_minor) = old;
666 666 match cmp_value(dest, src_minor, src_major) {
667 667 MergePick::Major => to_minor(dest, src_major),
668 668 MergePick::Minor => to_major(dest, src_minor),
669 669 // If the two entry are identical, no need to do
670 670 // anything (but diff should not have yield them)
671 671 MergePick::Any => unreachable!(),
672 672 }
673 673 }
674 674 };
675 675 }
676 676
677 677 let updates;
678 678 let mut result;
679 679 if override_major.is_empty() {
680 680 result = major
681 681 } else if override_minor.is_empty() {
682 682 result = minor
683 683 } else {
684 684 if override_minor.len() < override_major.len() {
685 685 updates = override_minor;
686 686 result = minor;
687 687 } else {
688 688 updates = override_major;
689 689 result = major;
690 690 }
691 691 for (k, v) in updates {
692 692 result.insert(k, v);
693 693 }
694 694 }
695 695 result
696 696 }
697 697 }
698 698
699 699 /// represent the side that should prevail when merging two
700 700 /// TimeStampedPathCopies
701 701 enum MergePick {
702 702 /// The "major" (p1) side prevails
703 703 Major,
704 704 /// The "minor" (p2) side prevails
705 705 Minor,
706 706 /// Any side could be used (because they are the same)
707 707 Any,
708 708 }
709 709
710 710 /// decide which side prevails in case of conflicting values
711 711 #[allow(clippy::if_same_then_else)]
712 712 fn compare_value<A: Fn(Revision, Revision) -> bool>(
713 713 path_map: &TwoWayPathMap,
714 714 current_merge: Revision,
715 715 changes: &ChangedFiles,
716 716 oracle: &mut AncestorOracle<A>,
717 717 dest: &PathToken,
718 718 src_minor: &TimeStampedPathCopy,
719 719 src_major: &TimeStampedPathCopy,
720 720 ) -> MergePick {
721 721 if src_major.rev == current_merge {
722 722 if src_minor.rev == current_merge {
723 723 if src_major.path.is_none() {
724 724 // We cannot get different copy information for both p1 and p2
725 725 // from the same revision. Unless this was a
726 726 // deletion
727 727 MergePick::Any
728 728 } else {
729 729 unreachable!();
730 730 }
731 731 } else {
732 732 // The last value comes the current merge, this value -will- win
733 733 // eventually.
734 734 oracle.record_overwrite(src_minor.rev, src_major.rev);
735 735 MergePick::Major
736 736 }
737 737 } else if src_minor.rev == current_merge {
738 738 // The last value comes the current merge, this value -will- win
739 739 // eventually.
740 740 oracle.record_overwrite(src_major.rev, src_minor.rev);
741 741 MergePick::Minor
742 742 } else if src_major.path == src_minor.path {
743 743 // we have the same value, but from other source;
744 744 if src_major.rev == src_minor.rev {
745 745 // If the two entry are identical, they are both valid
746 746 MergePick::Any
747 747 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
748 748 MergePick::Minor
749 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
750 MergePick::Major
749 751 } else {
750 752 MergePick::Major
751 753 }
752 754 } else if src_major.rev == src_minor.rev {
753 755 // We cannot get copy information for both p1 and p2 in the
754 756 // same rev. So this is the same value.
755 757 unreachable!(
756 "conflict information from p1 and p2 in the same revision"
758 "conflicting information from p1 and p2 in the same revision"
757 759 );
758 760 } else {
759 761 let dest_path = path_map.untokenize(*dest);
760 762 let action = changes.get_merge_case(dest_path);
761 if src_major.path.is_none() && action == MergeCase::Salvaged {
763 if src_minor.path.is_some()
764 && src_major.path.is_none()
765 && action == MergeCase::Salvaged
766 {
762 767 // If the file is "deleted" in the major side but was
763 768 // salvaged by the merge, we keep the minor side alive
764 769 MergePick::Minor
765 } else if src_minor.path.is_none() && action == MergeCase::Salvaged {
770 } else if src_major.path.is_some()
771 && src_minor.path.is_none()
772 && action == MergeCase::Salvaged
773 {
766 774 // If the file is "deleted" in the minor side but was
767 775 // salvaged by the merge, unconditionnaly preserve the
768 776 // major side.
769 777 MergePick::Major
770 } else if action == MergeCase::Merged {
771 // If the file was actively merged, copy information
772 // from each side might conflict. The major side will
773 // win such conflict.
774 MergePick::Major
775 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
776 // If the minor side is strictly newer than the major
777 // side, it should be kept.
778 MergePick::Minor
779 } else if src_major.path.is_some() {
780 // without any special case, the "major" value win
781 // other the "minor" one.
782 MergePick::Major
783 778 } else if oracle.is_overwrite(src_minor.rev, src_major.rev) {
784 // the "major" rev is a direct ancestors of "minor",
785 // any different value should
786 // overwrite
779 // The information from the minor version are strictly older than
780 // the major version
781 if action == MergeCase::Merged {
782 // If the file was actively merged, its means some non-copy
783 // activity happened on the other branch. It
784 // mean the older copy information are still relevant.
785 //
786 // The major side wins such conflict.
787 787 MergePick::Major
788 788 } else {
789 // major version is None (so the file was deleted on
790 // that branch) and that branch is independant (neither
791 // minor nor major is an ancestors of the other one.)
792 // We preserve the new
793 // information about the new file.
789 // No activity on the minor branch, pick the newer one.
790 MergePick::Major
791 }
792 } else if oracle.is_overwrite(src_major.rev, src_minor.rev) {
793 if action == MergeCase::Merged {
794 // If the file was actively merged, its means some non-copy
795 // activity happened on the other branch. It
796 // mean the older copy information are still relevant.
797 //
798 // The major side wins such conflict.
799 MergePick::Major
800 } else {
801 // No activity on the minor branch, pick the newer one.
794 802 MergePick::Minor
795 803 }
804 } else if src_minor.path.is_none() {
805 // the minor side has no relevant information, pick the alive one
806 MergePick::Major
807 } else if src_major.path.is_none() {
808 // the major side has no relevant information, pick the alive one
809 MergePick::Minor
810 } else {
811 // by default the major side wins
812 MergePick::Major
813 }
796 814 }
797 815 }
General Comments 0
You need to be logged in to leave comments. Login now