##// END OF EJS Templates
copies-rust: remove the ancestor Oracle logic...
marmoute -
r47316:34827c95 default
parent child Browse files
Show More
@@ -1,1281 +1,1281 b''
1 1 # coding: utf8
2 2 # copies.py - copy detection for Mercurial
3 3 #
4 4 # Copyright 2008 Matt Mackall <mpm@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import collections
12 12 import os
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 nullid,
17 17 nullrev,
18 18 )
19 19
20 20 from . import (
21 21 match as matchmod,
22 22 pathutil,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 )
27 27
28 28
29 29 from .utils import stringutil
30 30
31 31 from .revlogutils import (
32 32 flagutil,
33 33 sidedata as sidedatamod,
34 34 )
35 35
36 36 rustmod = policy.importrust("copy_tracing")
37 37
38 38
39 39 def _filter(src, dst, t):
40 40 """filters out invalid copies after chaining"""
41 41
42 42 # When _chain()'ing copies in 'a' (from 'src' via some other commit 'mid')
43 43 # with copies in 'b' (from 'mid' to 'dst'), we can get the different cases
44 44 # in the following table (not including trivial cases). For example, case 6
45 45 # is where a file existed in 'src' and remained under that name in 'mid' and
46 46 # then was renamed between 'mid' and 'dst'.
47 47 #
48 48 # case src mid dst result
49 49 # 1 x y - -
50 50 # 2 x y y x->y
51 51 # 3 x y x -
52 52 # 4 x y z x->z
53 53 # 5 - x y -
54 54 # 6 x x y x->y
55 55 #
56 56 # _chain() takes care of chaining the copies in 'a' and 'b', but it
57 57 # cannot tell the difference between cases 1 and 2, between 3 and 4, or
58 58 # between 5 and 6, so it includes all cases in its result.
59 59 # Cases 1, 3, and 5 are then removed by _filter().
60 60
61 61 for k, v in list(t.items()):
62 62 if k == v: # case 3
63 63 del t[k]
64 64 elif v not in src: # case 5
65 65 # remove copies from files that didn't exist
66 66 del t[k]
67 67 elif k not in dst: # case 1
68 68 # remove copies to files that were then removed
69 69 del t[k]
70 70
71 71
72 72 def _chain(prefix, suffix):
73 73 """chain two sets of copies 'prefix' and 'suffix'"""
74 74 result = prefix.copy()
75 75 for key, value in pycompat.iteritems(suffix):
76 76 result[key] = prefix.get(value, value)
77 77 return result
78 78
79 79
80 80 def _tracefile(fctx, am, basemf):
81 81 """return file context that is the ancestor of fctx present in ancestor
82 82 manifest am
83 83
84 84 Note: we used to try and stop after a given limit, however checking if that
85 85 limit is reached turned out to be very expensive. we are better off
86 86 disabling that feature."""
87 87
88 88 for f in fctx.ancestors():
89 89 path = f.path()
90 90 if am.get(path, None) == f.filenode():
91 91 return path
92 92 if basemf and basemf.get(path, None) == f.filenode():
93 93 return path
94 94
95 95
96 96 def _dirstatecopies(repo, match=None):
97 97 ds = repo.dirstate
98 98 c = ds.copies().copy()
99 99 for k in list(c):
100 100 if ds[k] not in b'anm' or (match and not match(k)):
101 101 del c[k]
102 102 return c
103 103
104 104
105 105 def _computeforwardmissing(a, b, match=None):
106 106 """Computes which files are in b but not a.
107 107 This is its own function so extensions can easily wrap this call to see what
108 108 files _forwardcopies is about to process.
109 109 """
110 110 ma = a.manifest()
111 111 mb = b.manifest()
112 112 return mb.filesnotin(ma, match=match)
113 113
114 114
115 115 def usechangesetcentricalgo(repo):
116 116 """Checks if we should use changeset-centric copy algorithms"""
117 117 if repo.filecopiesmode == b'changeset-sidedata':
118 118 return True
119 119 readfrom = repo.ui.config(b'experimental', b'copies.read-from')
120 120 changesetsource = (b'changeset-only', b'compatibility')
121 121 return readfrom in changesetsource
122 122
123 123
124 124 def _committedforwardcopies(a, b, base, match):
125 125 """Like _forwardcopies(), but b.rev() cannot be None (working copy)"""
126 126 # files might have to be traced back to the fctx parent of the last
127 127 # one-side-only changeset, but not further back than that
128 128 repo = a._repo
129 129
130 130 if usechangesetcentricalgo(repo):
131 131 return _changesetforwardcopies(a, b, match)
132 132
133 133 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
134 134 dbg = repo.ui.debug
135 135 if debug:
136 136 dbg(b'debug.copies: looking into rename from %s to %s\n' % (a, b))
137 137 am = a.manifest()
138 138 basemf = None if base is None else base.manifest()
139 139
140 140 # find where new files came from
141 141 # we currently don't try to find where old files went, too expensive
142 142 # this means we can miss a case like 'hg rm b; hg cp a b'
143 143 cm = {}
144 144
145 145 # Computing the forward missing is quite expensive on large manifests, since
146 146 # it compares the entire manifests. We can optimize it in the common use
147 147 # case of computing what copies are in a commit versus its parent (like
148 148 # during a rebase or histedit). Note, we exclude merge commits from this
149 149 # optimization, since the ctx.files() for a merge commit is not correct for
150 150 # this comparison.
151 151 forwardmissingmatch = match
152 152 if b.p1() == a and b.p2().node() == nullid:
153 153 filesmatcher = matchmod.exact(b.files())
154 154 forwardmissingmatch = matchmod.intersectmatchers(match, filesmatcher)
155 155 if repo.ui.configbool(b'devel', b'copy-tracing.trace-all-files'):
156 156 missing = list(b.walk(match))
157 157 # _computeforwardmissing(a, b, match=forwardmissingmatch)
158 158 if debug:
159 159 dbg(b'debug.copies: searching all files: %d\n' % len(missing))
160 160 else:
161 161 missing = _computeforwardmissing(a, b, match=forwardmissingmatch)
162 162 if debug:
163 163 dbg(
164 164 b'debug.copies: missing files to search: %d\n'
165 165 % len(missing)
166 166 )
167 167
168 168 ancestrycontext = a._repo.changelog.ancestors([b.rev()], inclusive=True)
169 169
170 170 for f in sorted(missing):
171 171 if debug:
172 172 dbg(b'debug.copies: tracing file: %s\n' % f)
173 173 fctx = b[f]
174 174 fctx._ancestrycontext = ancestrycontext
175 175
176 176 if debug:
177 177 start = util.timer()
178 178 opath = _tracefile(fctx, am, basemf)
179 179 if opath:
180 180 if debug:
181 181 dbg(b'debug.copies: rename of: %s\n' % opath)
182 182 cm[f] = opath
183 183 if debug:
184 184 dbg(
185 185 b'debug.copies: time: %f seconds\n'
186 186 % (util.timer() - start)
187 187 )
188 188 return cm
189 189
190 190
191 191 def _revinfo_getter(repo, match):
192 192 """returns a function that returns the following data given a <rev>"
193 193
194 194 * p1: revision number of first parent
195 195 * p2: revision number of first parent
196 196 * changes: a ChangingFiles object
197 197 """
198 198 cl = repo.changelog
199 199 parents = cl.parentrevs
200 200 flags = cl.flags
201 201
202 202 HASCOPIESINFO = flagutil.REVIDX_HASCOPIESINFO
203 203
204 204 changelogrevision = cl.changelogrevision
205 205
206 206 if rustmod is not None:
207 207
208 208 def revinfo(rev):
209 209 p1, p2 = parents(rev)
210 210 if flags(rev) & HASCOPIESINFO:
211 211 raw = changelogrevision(rev)._sidedata.get(sidedatamod.SD_FILES)
212 212 else:
213 213 raw = None
214 214 return (p1, p2, raw)
215 215
216 216 else:
217 217
218 218 def revinfo(rev):
219 219 p1, p2 = parents(rev)
220 220 if flags(rev) & HASCOPIESINFO:
221 221 changes = changelogrevision(rev).changes
222 222 else:
223 223 changes = None
224 224 return (p1, p2, changes)
225 225
226 226 return revinfo
227 227
228 228
229 229 def cached_is_ancestor(is_ancestor):
230 230 """return a cached version of is_ancestor"""
231 231 cache = {}
232 232
233 233 def _is_ancestor(anc, desc):
234 234 if anc > desc:
235 235 return False
236 236 elif anc == desc:
237 237 return True
238 238 key = (anc, desc)
239 239 ret = cache.get(key)
240 240 if ret is None:
241 241 ret = cache[key] = is_ancestor(anc, desc)
242 242 return ret
243 243
244 244 return _is_ancestor
245 245
246 246
247 247 def _changesetforwardcopies(a, b, match):
248 248 if a.rev() in (nullrev, b.rev()):
249 249 return {}
250 250
251 251 repo = a.repo().unfiltered()
252 252 children = {}
253 253
254 254 cl = repo.changelog
255 255 isancestor = cl.isancestorrev
256 256
257 257 # To track rename from "A" to B, we need to gather all parent β†’ children
258 258 # edges that are contains in `::B` but not in `::A`.
259 259 #
260 260 #
261 261 # To do so, we need to gather all revisions exclusiveΒΉ to "B" (ieΒΉ: `::b -
262 262 # ::a`) and also all the "roots point", ie the parents of the exclusive set
263 263 # that belong to ::a. These are exactly all the revisions needed to express
264 264 # the parent β†’ children we need to combine.
265 265 #
266 266 # [1] actually, we need to gather all the edges within `(::a)::b`, ie:
267 267 # excluding paths that leads to roots that are not ancestors of `a`. We
268 268 # keep this out of the explanation because it is hard enough without this special case..
269 269
270 270 parents = cl._uncheckedparentrevs
271 271 graph_roots = (nullrev, nullrev)
272 272
273 273 ancestors = cl.ancestors([a.rev()], inclusive=True)
274 274 revs = cl.findmissingrevs(common=[a.rev()], heads=[b.rev()])
275 275 roots = set()
276 276 has_graph_roots = False
277 277
278 278 # iterate over `only(B, A)`
279 279 for r in revs:
280 280 ps = parents(r)
281 281 if ps == graph_roots:
282 282 has_graph_roots = True
283 283 else:
284 284 p1, p2 = ps
285 285
286 286 # find all the "root points" (see larger comment above)
287 287 if p1 != nullrev and p1 in ancestors:
288 288 roots.add(p1)
289 289 if p2 != nullrev and p2 in ancestors:
290 290 roots.add(p2)
291 291 if not roots:
292 292 # no common revision to track copies from
293 293 return {}
294 294 if has_graph_roots:
295 295 # this deal with the special case mentionned in the [1] footnotes. We
296 296 # must filter out revisions that leads to non-common graphroots.
297 297 roots = list(roots)
298 298 m = min(roots)
299 299 h = [b.rev()]
300 300 roots_to_head = cl.reachableroots(m, h, roots, includepath=True)
301 301 roots_to_head = set(roots_to_head)
302 302 revs = [r for r in revs if r in roots_to_head]
303 303
304 304 if repo.filecopiesmode == b'changeset-sidedata':
305 305 # When using side-data, we will process the edges "from" the children.
306 306 # We iterate over the childre, gathering previous collected data for
307 307 # the parents. Do know when the parents data is no longer necessary, we
308 308 # keep a counter of how many children each revision has.
309 309 #
310 310 # An interresting property of `children_count` is that it only contains
311 311 # revision that will be relevant for a edge of the graph. So if a
312 312 # children has parent not in `children_count`, that edges should not be
313 313 # processed.
314 314 children_count = dict((r, 0) for r in roots)
315 315 for r in revs:
316 316 for p in cl.parentrevs(r):
317 317 if p == nullrev:
318 318 continue
319 319 children_count[r] = 0
320 320 if p in children_count:
321 321 children_count[p] += 1
322 322 revinfo = _revinfo_getter(repo, match)
323 323 return _combine_changeset_copies(
324 324 revs, children_count, b.rev(), revinfo, match, isancestor
325 325 )
326 326 else:
327 327 # When not using side-data, we will process the edges "from" the parent.
328 328 # so we need a full mapping of the parent -> children relation.
329 329 children = dict((r, []) for r in roots)
330 330 for r in revs:
331 331 for p in cl.parentrevs(r):
332 332 if p == nullrev:
333 333 continue
334 334 children[r] = []
335 335 if p in children:
336 336 children[p].append(r)
337 337 x = revs.pop()
338 338 assert x == b.rev()
339 339 revs.extend(roots)
340 340 revs.sort()
341 341
342 342 revinfo = _revinfo_getter_extra(repo)
343 343 return _combine_changeset_copies_extra(
344 344 revs, children, b.rev(), revinfo, match, isancestor
345 345 )
346 346
347 347
348 348 def _combine_changeset_copies(
349 349 revs, children_count, targetrev, revinfo, match, isancestor
350 350 ):
351 351 """combine the copies information for each item of iterrevs
352 352
353 353 revs: sorted iterable of revision to visit
354 354 children_count: a {parent: <number-of-relevant-children>} mapping.
355 355 targetrev: the final copies destination revision (not in iterrevs)
356 356 revinfo(rev): a function that return (p1, p2, p1copies, p2copies, removed)
357 357 match: a matcher
358 358
359 359 It returns the aggregated copies information for `targetrev`.
360 360 """
361 361
362 362 alwaysmatch = match.always()
363 363
364 364 if rustmod is not None:
365 365 final_copies = rustmod.combine_changeset_copies(
366 list(revs), children_count, targetrev, revinfo, isancestor
366 list(revs), children_count, targetrev, revinfo
367 367 )
368 368 else:
369 369 isancestor = cached_is_ancestor(isancestor)
370 370
371 371 all_copies = {}
372 372 # iterate over all the "children" side of copy tracing "edge"
373 373 for current_rev in revs:
374 374 p1, p2, changes = revinfo(current_rev)
375 375 current_copies = None
376 376 # iterate over all parents to chain the existing data with the
377 377 # data from the parent β†’ child edge.
378 378 for parent, parent_rev in ((1, p1), (2, p2)):
379 379 if parent_rev == nullrev:
380 380 continue
381 381 remaining_children = children_count.get(parent_rev)
382 382 if remaining_children is None:
383 383 continue
384 384 remaining_children -= 1
385 385 children_count[parent_rev] = remaining_children
386 386 if remaining_children:
387 387 copies = all_copies.get(parent_rev, None)
388 388 else:
389 389 copies = all_copies.pop(parent_rev, None)
390 390
391 391 if copies is None:
392 392 # this is a root
393 393 newcopies = copies = {}
394 394 elif remaining_children:
395 395 newcopies = copies.copy()
396 396 else:
397 397 newcopies = copies
398 398 # chain the data in the edge with the existing data
399 399 if changes is not None:
400 400 childcopies = {}
401 401 if parent == 1:
402 402 childcopies = changes.copied_from_p1
403 403 elif parent == 2:
404 404 childcopies = changes.copied_from_p2
405 405
406 406 if childcopies:
407 407 newcopies = copies.copy()
408 408 for dest, source in pycompat.iteritems(childcopies):
409 409 prev = copies.get(source)
410 410 if prev is not None and prev[1] is not None:
411 411 source = prev[1]
412 412 newcopies[dest] = (current_rev, source)
413 413 assert newcopies is not copies
414 414 if changes.removed:
415 415 for f in changes.removed:
416 416 if f in newcopies:
417 417 if newcopies is copies:
418 418 # copy on write to avoid affecting potential other
419 419 # branches. when there are no other branches, this
420 420 # could be avoided.
421 421 newcopies = copies.copy()
422 422 newcopies[f] = (current_rev, None)
423 423 # check potential need to combine the data from another parent (for
424 424 # that child). See comment below for details.
425 425 if current_copies is None:
426 426 current_copies = newcopies
427 427 else:
428 428 # we are the second parent to work on c, we need to merge our
429 429 # work with the other.
430 430 #
431 431 # In case of conflict, parent 1 take precedence over parent 2.
432 432 # This is an arbitrary choice made anew when implementing
433 433 # changeset based copies. It was made without regards with
434 434 # potential filelog related behavior.
435 435 assert parent == 2
436 436 current_copies = _merge_copies_dict(
437 437 newcopies,
438 438 current_copies,
439 439 isancestor,
440 440 changes,
441 441 current_rev,
442 442 )
443 443 all_copies[current_rev] = current_copies
444 444
445 445 # filter out internal details and return a {dest: source mapping}
446 446 final_copies = {}
447 447 for dest, (tt, source) in all_copies[targetrev].items():
448 448 if source is not None:
449 449 final_copies[dest] = source
450 450 if not alwaysmatch:
451 451 for filename in list(final_copies.keys()):
452 452 if not match(filename):
453 453 del final_copies[filename]
454 454 return final_copies
455 455
456 456
457 457 # constant to decide which side to pick with _merge_copies_dict
458 458 PICK_MINOR = 0
459 459 PICK_MAJOR = 1
460 460 PICK_EITHER = 2
461 461
462 462
463 463 def _merge_copies_dict(minor, major, isancestor, changes, current_merge):
464 464 """merge two copies-mapping together, minor and major
465 465
466 466 In case of conflict, value from "major" will be picked.
467 467
468 468 - `isancestors(low_rev, high_rev)`: callable return True if `low_rev` is an
469 469 ancestors of `high_rev`,
470 470
471 471 - `ismerged(path)`: callable return True if `path` have been merged in the
472 472 current revision,
473 473
474 474 return the resulting dict (in practice, the "minor" object, updated)
475 475 """
476 476 for dest, value in major.items():
477 477 other = minor.get(dest)
478 478 if other is None:
479 479 minor[dest] = value
480 480 else:
481 481 pick, overwrite = _compare_values(
482 482 changes, isancestor, dest, other, value
483 483 )
484 484 if overwrite:
485 485 if pick == PICK_MAJOR:
486 486 minor[dest] = (current_merge, value[1])
487 487 else:
488 488 minor[dest] = (current_merge, other[1])
489 489 elif pick == PICK_MAJOR:
490 490 minor[dest] = value
491 491 return minor
492 492
493 493
494 494 def _compare_values(changes, isancestor, dest, minor, major):
495 495 """compare two value within a _merge_copies_dict loop iteration
496 496
497 497 return (pick, overwrite).
498 498
499 499 - pick is one of PICK_MINOR, PICK_MAJOR or PICK_EITHER
500 500 - overwrite is True if pick is a return of an ambiguity that needs resolution.
501 501 """
502 502 major_tt, major_value = major
503 503 minor_tt, minor_value = minor
504 504
505 505 if major_tt == minor_tt:
506 506 # if it comes from the same revision it must be the same value
507 507 assert major_value == minor_value
508 508 return PICK_EITHER, False
509 509 elif (
510 510 changes is not None
511 511 and minor_value is not None
512 512 and major_value is None
513 513 and dest in changes.salvaged
514 514 ):
515 515 # In this case, a deletion was reverted, the "alive" value overwrite
516 516 # the deleted one.
517 517 return PICK_MINOR, True
518 518 elif (
519 519 changes is not None
520 520 and major_value is not None
521 521 and minor_value is None
522 522 and dest in changes.salvaged
523 523 ):
524 524 # In this case, a deletion was reverted, the "alive" value overwrite
525 525 # the deleted one.
526 526 return PICK_MAJOR, True
527 527 elif isancestor(minor_tt, major_tt):
528 528 if changes is not None and dest in changes.merged:
529 529 # change to dest happened on the branch without copy-source change,
530 530 # so both source are valid and "major" wins.
531 531 return PICK_MAJOR, True
532 532 else:
533 533 return PICK_MAJOR, False
534 534 elif isancestor(major_tt, minor_tt):
535 535 if changes is not None and dest in changes.merged:
536 536 # change to dest happened on the branch without copy-source change,
537 537 # so both source are valid and "major" wins.
538 538 return PICK_MAJOR, True
539 539 else:
540 540 return PICK_MINOR, False
541 541 elif minor_value is None:
542 542 # in case of conflict, the "alive" side wins.
543 543 return PICK_MAJOR, True
544 544 elif major_value is None:
545 545 # in case of conflict, the "alive" side wins.
546 546 return PICK_MINOR, True
547 547 else:
548 548 # in case of conflict where both side are alive, major wins.
549 549 return PICK_MAJOR, True
550 550
551 551
552 552 def _revinfo_getter_extra(repo):
553 553 """return a function that return multiple data given a <rev>"i
554 554
555 555 * p1: revision number of first parent
556 556 * p2: revision number of first parent
557 557 * p1copies: mapping of copies from p1
558 558 * p2copies: mapping of copies from p2
559 559 * removed: a list of removed files
560 560 * ismerged: a callback to know if file was merged in that revision
561 561 """
562 562 cl = repo.changelog
563 563 parents = cl.parentrevs
564 564
565 565 def get_ismerged(rev):
566 566 ctx = repo[rev]
567 567
568 568 def ismerged(path):
569 569 if path not in ctx.files():
570 570 return False
571 571 fctx = ctx[path]
572 572 parents = fctx._filelog.parents(fctx._filenode)
573 573 nb_parents = 0
574 574 for n in parents:
575 575 if n != nullid:
576 576 nb_parents += 1
577 577 return nb_parents >= 2
578 578
579 579 return ismerged
580 580
581 581 def revinfo(rev):
582 582 p1, p2 = parents(rev)
583 583 ctx = repo[rev]
584 584 p1copies, p2copies = ctx._copies
585 585 removed = ctx.filesremoved()
586 586 return p1, p2, p1copies, p2copies, removed, get_ismerged(rev)
587 587
588 588 return revinfo
589 589
590 590
591 591 def _combine_changeset_copies_extra(
592 592 revs, children, targetrev, revinfo, match, isancestor
593 593 ):
594 594 """version of `_combine_changeset_copies` that works with the Google
595 595 specific "extra" based storage for copy information"""
596 596 all_copies = {}
597 597 alwaysmatch = match.always()
598 598 for r in revs:
599 599 copies = all_copies.pop(r, None)
600 600 if copies is None:
601 601 # this is a root
602 602 copies = {}
603 603 for i, c in enumerate(children[r]):
604 604 p1, p2, p1copies, p2copies, removed, ismerged = revinfo(c)
605 605 if r == p1:
606 606 parent = 1
607 607 childcopies = p1copies
608 608 else:
609 609 assert r == p2
610 610 parent = 2
611 611 childcopies = p2copies
612 612 if not alwaysmatch:
613 613 childcopies = {
614 614 dst: src for dst, src in childcopies.items() if match(dst)
615 615 }
616 616 newcopies = copies
617 617 if childcopies:
618 618 newcopies = copies.copy()
619 619 for dest, source in pycompat.iteritems(childcopies):
620 620 prev = copies.get(source)
621 621 if prev is not None and prev[1] is not None:
622 622 source = prev[1]
623 623 newcopies[dest] = (c, source)
624 624 assert newcopies is not copies
625 625 for f in removed:
626 626 if f in newcopies:
627 627 if newcopies is copies:
628 628 # copy on write to avoid affecting potential other
629 629 # branches. when there are no other branches, this
630 630 # could be avoided.
631 631 newcopies = copies.copy()
632 632 newcopies[f] = (c, None)
633 633 othercopies = all_copies.get(c)
634 634 if othercopies is None:
635 635 all_copies[c] = newcopies
636 636 else:
637 637 # we are the second parent to work on c, we need to merge our
638 638 # work with the other.
639 639 #
640 640 # In case of conflict, parent 1 take precedence over parent 2.
641 641 # This is an arbitrary choice made anew when implementing
642 642 # changeset based copies. It was made without regards with
643 643 # potential filelog related behavior.
644 644 if parent == 1:
645 645 _merge_copies_dict_extra(
646 646 othercopies, newcopies, isancestor, ismerged
647 647 )
648 648 else:
649 649 _merge_copies_dict_extra(
650 650 newcopies, othercopies, isancestor, ismerged
651 651 )
652 652 all_copies[c] = newcopies
653 653
654 654 final_copies = {}
655 655 for dest, (tt, source) in all_copies[targetrev].items():
656 656 if source is not None:
657 657 final_copies[dest] = source
658 658 return final_copies
659 659
660 660
661 661 def _merge_copies_dict_extra(minor, major, isancestor, ismerged):
662 662 """version of `_merge_copies_dict` that works with the Google
663 663 specific "extra" based storage for copy information"""
664 664 for dest, value in major.items():
665 665 other = minor.get(dest)
666 666 if other is None:
667 667 minor[dest] = value
668 668 else:
669 669 new_tt = value[0]
670 670 other_tt = other[0]
671 671 if value[1] == other[1]:
672 672 continue
673 673 # content from "major" wins, unless it is older
674 674 # than the branch point or there is a merge
675 675 if (
676 676 new_tt == other_tt
677 677 or not isancestor(new_tt, other_tt)
678 678 or ismerged(dest)
679 679 ):
680 680 minor[dest] = value
681 681
682 682
683 683 def _forwardcopies(a, b, base=None, match=None):
684 684 """find {dst@b: src@a} copy mapping where a is an ancestor of b"""
685 685
686 686 if base is None:
687 687 base = a
688 688 match = a.repo().narrowmatch(match)
689 689 # check for working copy
690 690 if b.rev() is None:
691 691 cm = _committedforwardcopies(a, b.p1(), base, match)
692 692 # combine copies from dirstate if necessary
693 693 copies = _chain(cm, _dirstatecopies(b._repo, match))
694 694 else:
695 695 copies = _committedforwardcopies(a, b, base, match)
696 696 return copies
697 697
698 698
699 699 def _backwardrenames(a, b, match):
700 700 if a._repo.ui.config(b'experimental', b'copytrace') == b'off':
701 701 return {}
702 702
703 703 # Even though we're not taking copies into account, 1:n rename situations
704 704 # can still exist (e.g. hg cp a b; hg mv a c). In those cases we
705 705 # arbitrarily pick one of the renames.
706 706 # We don't want to pass in "match" here, since that would filter
707 707 # the destination by it. Since we're reversing the copies, we want
708 708 # to filter the source instead.
709 709 f = _forwardcopies(b, a)
710 710 r = {}
711 711 for k, v in sorted(pycompat.iteritems(f)):
712 712 if match and not match(v):
713 713 continue
714 714 # remove copies
715 715 if v in a:
716 716 continue
717 717 r[v] = k
718 718 return r
719 719
720 720
721 721 def pathcopies(x, y, match=None):
722 722 """find {dst@y: src@x} copy mapping for directed compare"""
723 723 repo = x._repo
724 724 debug = repo.ui.debugflag and repo.ui.configbool(b'devel', b'debug.copies')
725 725 if debug:
726 726 repo.ui.debug(
727 727 b'debug.copies: searching copies from %s to %s\n' % (x, y)
728 728 )
729 729 if x == y or not x or not y:
730 730 return {}
731 731 if y.rev() is None and x == y.p1():
732 732 if debug:
733 733 repo.ui.debug(b'debug.copies: search mode: dirstate\n')
734 734 # short-circuit to avoid issues with merge states
735 735 return _dirstatecopies(repo, match)
736 736 a = y.ancestor(x)
737 737 if a == x:
738 738 if debug:
739 739 repo.ui.debug(b'debug.copies: search mode: forward\n')
740 740 copies = _forwardcopies(x, y, match=match)
741 741 elif a == y:
742 742 if debug:
743 743 repo.ui.debug(b'debug.copies: search mode: backward\n')
744 744 copies = _backwardrenames(x, y, match=match)
745 745 else:
746 746 if debug:
747 747 repo.ui.debug(b'debug.copies: search mode: combined\n')
748 748 base = None
749 749 if a.rev() != nullrev:
750 750 base = x
751 751 copies = _chain(
752 752 _backwardrenames(x, a, match=match),
753 753 _forwardcopies(a, y, base, match=match),
754 754 )
755 755 _filter(x, y, copies)
756 756 return copies
757 757
758 758
759 759 def mergecopies(repo, c1, c2, base):
760 760 """
761 761 Finds moves and copies between context c1 and c2 that are relevant for
762 762 merging. 'base' will be used as the merge base.
763 763
764 764 Copytracing is used in commands like rebase, merge, unshelve, etc to merge
765 765 files that were moved/ copied in one merge parent and modified in another.
766 766 For example:
767 767
768 768 o ---> 4 another commit
769 769 |
770 770 | o ---> 3 commit that modifies a.txt
771 771 | /
772 772 o / ---> 2 commit that moves a.txt to b.txt
773 773 |/
774 774 o ---> 1 merge base
775 775
776 776 If we try to rebase revision 3 on revision 4, since there is no a.txt in
777 777 revision 4, and if user have copytrace disabled, we prints the following
778 778 message:
779 779
780 780 ```other changed <file> which local deleted```
781 781
782 782 Returns a tuple where:
783 783
784 784 "branch_copies" an instance of branch_copies.
785 785
786 786 "diverge" is a mapping of source name -> list of destination names
787 787 for divergent renames.
788 788
789 789 This function calls different copytracing algorithms based on config.
790 790 """
791 791 # avoid silly behavior for update from empty dir
792 792 if not c1 or not c2 or c1 == c2:
793 793 return branch_copies(), branch_copies(), {}
794 794
795 795 narrowmatch = c1.repo().narrowmatch()
796 796
797 797 # avoid silly behavior for parent -> working dir
798 798 if c2.node() is None and c1.node() == repo.dirstate.p1():
799 799 return (
800 800 branch_copies(_dirstatecopies(repo, narrowmatch)),
801 801 branch_copies(),
802 802 {},
803 803 )
804 804
805 805 copytracing = repo.ui.config(b'experimental', b'copytrace')
806 806 if stringutil.parsebool(copytracing) is False:
807 807 # stringutil.parsebool() returns None when it is unable to parse the
808 808 # value, so we should rely on making sure copytracing is on such cases
809 809 return branch_copies(), branch_copies(), {}
810 810
811 811 if usechangesetcentricalgo(repo):
812 812 # The heuristics don't make sense when we need changeset-centric algos
813 813 return _fullcopytracing(repo, c1, c2, base)
814 814
815 815 # Copy trace disabling is explicitly below the node == p1 logic above
816 816 # because the logic above is required for a simple copy to be kept across a
817 817 # rebase.
818 818 if copytracing == b'heuristics':
819 819 # Do full copytracing if only non-public revisions are involved as
820 820 # that will be fast enough and will also cover the copies which could
821 821 # be missed by heuristics
822 822 if _isfullcopytraceable(repo, c1, base):
823 823 return _fullcopytracing(repo, c1, c2, base)
824 824 return _heuristicscopytracing(repo, c1, c2, base)
825 825 else:
826 826 return _fullcopytracing(repo, c1, c2, base)
827 827
828 828
829 829 def _isfullcopytraceable(repo, c1, base):
830 830 """Checks that if base, source and destination are all no-public branches,
831 831 if yes let's use the full copytrace algorithm for increased capabilities
832 832 since it will be fast enough.
833 833
834 834 `experimental.copytrace.sourcecommitlimit` can be used to set a limit for
835 835 number of changesets from c1 to base such that if number of changesets are
836 836 more than the limit, full copytracing algorithm won't be used.
837 837 """
838 838 if c1.rev() is None:
839 839 c1 = c1.p1()
840 840 if c1.mutable() and base.mutable():
841 841 sourcecommitlimit = repo.ui.configint(
842 842 b'experimental', b'copytrace.sourcecommitlimit'
843 843 )
844 844 commits = len(repo.revs(b'%d::%d', base.rev(), c1.rev()))
845 845 return commits < sourcecommitlimit
846 846 return False
847 847
848 848
849 849 def _checksinglesidecopies(
850 850 src, dsts1, m1, m2, mb, c2, base, copy, renamedelete
851 851 ):
852 852 if src not in m2:
853 853 # deleted on side 2
854 854 if src not in m1:
855 855 # renamed on side 1, deleted on side 2
856 856 renamedelete[src] = dsts1
857 857 elif src not in mb:
858 858 # Work around the "short-circuit to avoid issues with merge states"
859 859 # thing in pathcopies(): pathcopies(x, y) can return a copy where the
860 860 # destination doesn't exist in y.
861 861 pass
862 862 elif mb[src] != m2[src] and not _related(c2[src], base[src]):
863 863 return
864 864 elif mb[src] != m2[src] or mb.flags(src) != m2.flags(src):
865 865 # modified on side 2
866 866 for dst in dsts1:
867 867 copy[dst] = src
868 868
869 869
870 870 class branch_copies(object):
871 871 """Information about copies made on one side of a merge/graft.
872 872
873 873 "copy" is a mapping from destination name -> source name,
874 874 where source is in c1 and destination is in c2 or vice-versa.
875 875
876 876 "movewithdir" is a mapping from source name -> destination name,
877 877 where the file at source present in one context but not the other
878 878 needs to be moved to destination by the merge process, because the
879 879 other context moved the directory it is in.
880 880
881 881 "renamedelete" is a mapping of source name -> list of destination
882 882 names for files deleted in c1 that were renamed in c2 or vice-versa.
883 883
884 884 "dirmove" is a mapping of detected source dir -> destination dir renames.
885 885 This is needed for handling changes to new files previously grafted into
886 886 renamed directories.
887 887 """
888 888
889 889 def __init__(
890 890 self, copy=None, renamedelete=None, dirmove=None, movewithdir=None
891 891 ):
892 892 self.copy = {} if copy is None else copy
893 893 self.renamedelete = {} if renamedelete is None else renamedelete
894 894 self.dirmove = {} if dirmove is None else dirmove
895 895 self.movewithdir = {} if movewithdir is None else movewithdir
896 896
897 897 def __repr__(self):
898 898 return '<branch_copies\n copy=%r\n renamedelete=%r\n dirmove=%r\n movewithdir=%r\n>' % (
899 899 self.copy,
900 900 self.renamedelete,
901 901 self.dirmove,
902 902 self.movewithdir,
903 903 )
904 904
905 905
906 906 def _fullcopytracing(repo, c1, c2, base):
907 907 """The full copytracing algorithm which finds all the new files that were
908 908 added from merge base up to the top commit and for each file it checks if
909 909 this file was copied from another file.
910 910
911 911 This is pretty slow when a lot of changesets are involved but will track all
912 912 the copies.
913 913 """
914 914 m1 = c1.manifest()
915 915 m2 = c2.manifest()
916 916 mb = base.manifest()
917 917
918 918 copies1 = pathcopies(base, c1)
919 919 copies2 = pathcopies(base, c2)
920 920
921 921 if not (copies1 or copies2):
922 922 return branch_copies(), branch_copies(), {}
923 923
924 924 inversecopies1 = {}
925 925 inversecopies2 = {}
926 926 for dst, src in copies1.items():
927 927 inversecopies1.setdefault(src, []).append(dst)
928 928 for dst, src in copies2.items():
929 929 inversecopies2.setdefault(src, []).append(dst)
930 930
931 931 copy1 = {}
932 932 copy2 = {}
933 933 diverge = {}
934 934 renamedelete1 = {}
935 935 renamedelete2 = {}
936 936 allsources = set(inversecopies1) | set(inversecopies2)
937 937 for src in allsources:
938 938 dsts1 = inversecopies1.get(src)
939 939 dsts2 = inversecopies2.get(src)
940 940 if dsts1 and dsts2:
941 941 # copied/renamed on both sides
942 942 if src not in m1 and src not in m2:
943 943 # renamed on both sides
944 944 dsts1 = set(dsts1)
945 945 dsts2 = set(dsts2)
946 946 # If there's some overlap in the rename destinations, we
947 947 # consider it not divergent. For example, if side 1 copies 'a'
948 948 # to 'b' and 'c' and deletes 'a', and side 2 copies 'a' to 'c'
949 949 # and 'd' and deletes 'a'.
950 950 if dsts1 & dsts2:
951 951 for dst in dsts1 & dsts2:
952 952 copy1[dst] = src
953 953 copy2[dst] = src
954 954 else:
955 955 diverge[src] = sorted(dsts1 | dsts2)
956 956 elif src in m1 and src in m2:
957 957 # copied on both sides
958 958 dsts1 = set(dsts1)
959 959 dsts2 = set(dsts2)
960 960 for dst in dsts1 & dsts2:
961 961 copy1[dst] = src
962 962 copy2[dst] = src
963 963 # TODO: Handle cases where it was renamed on one side and copied
964 964 # on the other side
965 965 elif dsts1:
966 966 # copied/renamed only on side 1
967 967 _checksinglesidecopies(
968 968 src, dsts1, m1, m2, mb, c2, base, copy1, renamedelete1
969 969 )
970 970 elif dsts2:
971 971 # copied/renamed only on side 2
972 972 _checksinglesidecopies(
973 973 src, dsts2, m2, m1, mb, c1, base, copy2, renamedelete2
974 974 )
975 975
976 976 # find interesting file sets from manifests
977 977 cache = []
978 978
979 979 def _get_addedfiles(idx):
980 980 if not cache:
981 981 addedinm1 = m1.filesnotin(mb, repo.narrowmatch())
982 982 addedinm2 = m2.filesnotin(mb, repo.narrowmatch())
983 983 u1 = sorted(addedinm1 - addedinm2)
984 984 u2 = sorted(addedinm2 - addedinm1)
985 985 cache.extend((u1, u2))
986 986 return cache[idx]
987 987
988 988 u1fn = lambda: _get_addedfiles(0)
989 989 u2fn = lambda: _get_addedfiles(1)
990 990 if repo.ui.debugflag:
991 991 u1 = u1fn()
992 992 u2 = u2fn()
993 993
994 994 header = b" unmatched files in %s"
995 995 if u1:
996 996 repo.ui.debug(
997 997 b"%s:\n %s\n" % (header % b'local', b"\n ".join(u1))
998 998 )
999 999 if u2:
1000 1000 repo.ui.debug(
1001 1001 b"%s:\n %s\n" % (header % b'other', b"\n ".join(u2))
1002 1002 )
1003 1003
1004 1004 renamedeleteset = set()
1005 1005 divergeset = set()
1006 1006 for dsts in diverge.values():
1007 1007 divergeset.update(dsts)
1008 1008 for dsts in renamedelete1.values():
1009 1009 renamedeleteset.update(dsts)
1010 1010 for dsts in renamedelete2.values():
1011 1011 renamedeleteset.update(dsts)
1012 1012
1013 1013 repo.ui.debug(
1014 1014 b" all copies found (* = to merge, ! = divergent, "
1015 1015 b"% = renamed and deleted):\n"
1016 1016 )
1017 1017 for side, copies in ((b"local", copies1), (b"remote", copies2)):
1018 1018 if not copies:
1019 1019 continue
1020 1020 repo.ui.debug(b" on %s side:\n" % side)
1021 1021 for f in sorted(copies):
1022 1022 note = b""
1023 1023 if f in copy1 or f in copy2:
1024 1024 note += b"*"
1025 1025 if f in divergeset:
1026 1026 note += b"!"
1027 1027 if f in renamedeleteset:
1028 1028 note += b"%"
1029 1029 repo.ui.debug(
1030 1030 b" src: '%s' -> dst: '%s' %s\n" % (copies[f], f, note)
1031 1031 )
1032 1032 del renamedeleteset
1033 1033 del divergeset
1034 1034
1035 1035 repo.ui.debug(b" checking for directory renames\n")
1036 1036
1037 1037 dirmove1, movewithdir2 = _dir_renames(repo, c1, copy1, copies1, u2fn)
1038 1038 dirmove2, movewithdir1 = _dir_renames(repo, c2, copy2, copies2, u1fn)
1039 1039
1040 1040 branch_copies1 = branch_copies(copy1, renamedelete1, dirmove1, movewithdir1)
1041 1041 branch_copies2 = branch_copies(copy2, renamedelete2, dirmove2, movewithdir2)
1042 1042
1043 1043 return branch_copies1, branch_copies2, diverge
1044 1044
1045 1045
1046 1046 def _dir_renames(repo, ctx, copy, fullcopy, addedfilesfn):
1047 1047 """Finds moved directories and files that should move with them.
1048 1048
1049 1049 ctx: the context for one of the sides
1050 1050 copy: files copied on the same side (as ctx)
1051 1051 fullcopy: files copied on the same side (as ctx), including those that
1052 1052 merge.manifestmerge() won't care about
1053 1053 addedfilesfn: function returning added files on the other side (compared to
1054 1054 ctx)
1055 1055 """
1056 1056 # generate a directory move map
1057 1057 invalid = set()
1058 1058 dirmove = {}
1059 1059
1060 1060 # examine each file copy for a potential directory move, which is
1061 1061 # when all the files in a directory are moved to a new directory
1062 1062 for dst, src in pycompat.iteritems(fullcopy):
1063 1063 dsrc, ddst = pathutil.dirname(src), pathutil.dirname(dst)
1064 1064 if dsrc in invalid:
1065 1065 # already seen to be uninteresting
1066 1066 continue
1067 1067 elif ctx.hasdir(dsrc) and ctx.hasdir(ddst):
1068 1068 # directory wasn't entirely moved locally
1069 1069 invalid.add(dsrc)
1070 1070 elif dsrc in dirmove and dirmove[dsrc] != ddst:
1071 1071 # files from the same directory moved to two different places
1072 1072 invalid.add(dsrc)
1073 1073 else:
1074 1074 # looks good so far
1075 1075 dirmove[dsrc] = ddst
1076 1076
1077 1077 for i in invalid:
1078 1078 if i in dirmove:
1079 1079 del dirmove[i]
1080 1080 del invalid
1081 1081
1082 1082 if not dirmove:
1083 1083 return {}, {}
1084 1084
1085 1085 dirmove = {k + b"/": v + b"/" for k, v in pycompat.iteritems(dirmove)}
1086 1086
1087 1087 for d in dirmove:
1088 1088 repo.ui.debug(
1089 1089 b" discovered dir src: '%s' -> dst: '%s'\n" % (d, dirmove[d])
1090 1090 )
1091 1091
1092 1092 movewithdir = {}
1093 1093 # check unaccounted nonoverlapping files against directory moves
1094 1094 for f in addedfilesfn():
1095 1095 if f not in fullcopy:
1096 1096 for d in dirmove:
1097 1097 if f.startswith(d):
1098 1098 # new file added in a directory that was moved, move it
1099 1099 df = dirmove[d] + f[len(d) :]
1100 1100 if df not in copy:
1101 1101 movewithdir[f] = df
1102 1102 repo.ui.debug(
1103 1103 b" pending file src: '%s' -> dst: '%s'\n"
1104 1104 % (f, df)
1105 1105 )
1106 1106 break
1107 1107
1108 1108 return dirmove, movewithdir
1109 1109
1110 1110
1111 1111 def _heuristicscopytracing(repo, c1, c2, base):
1112 1112 """Fast copytracing using filename heuristics
1113 1113
1114 1114 Assumes that moves or renames are of following two types:
1115 1115
1116 1116 1) Inside a directory only (same directory name but different filenames)
1117 1117 2) Move from one directory to another
1118 1118 (same filenames but different directory names)
1119 1119
1120 1120 Works only when there are no merge commits in the "source branch".
1121 1121 Source branch is commits from base up to c2 not including base.
1122 1122
1123 1123 If merge is involved it fallbacks to _fullcopytracing().
1124 1124
1125 1125 Can be used by setting the following config:
1126 1126
1127 1127 [experimental]
1128 1128 copytrace = heuristics
1129 1129
1130 1130 In some cases the copy/move candidates found by heuristics can be very large
1131 1131 in number and that will make the algorithm slow. The number of possible
1132 1132 candidates to check can be limited by using the config
1133 1133 `experimental.copytrace.movecandidateslimit` which defaults to 100.
1134 1134 """
1135 1135
1136 1136 if c1.rev() is None:
1137 1137 c1 = c1.p1()
1138 1138 if c2.rev() is None:
1139 1139 c2 = c2.p1()
1140 1140
1141 1141 changedfiles = set()
1142 1142 m1 = c1.manifest()
1143 1143 if not repo.revs(b'%d::%d', base.rev(), c2.rev()):
1144 1144 # If base is not in c2 branch, we switch to fullcopytracing
1145 1145 repo.ui.debug(
1146 1146 b"switching to full copytracing as base is not "
1147 1147 b"an ancestor of c2\n"
1148 1148 )
1149 1149 return _fullcopytracing(repo, c1, c2, base)
1150 1150
1151 1151 ctx = c2
1152 1152 while ctx != base:
1153 1153 if len(ctx.parents()) == 2:
1154 1154 # To keep things simple let's not handle merges
1155 1155 repo.ui.debug(b"switching to full copytracing because of merges\n")
1156 1156 return _fullcopytracing(repo, c1, c2, base)
1157 1157 changedfiles.update(ctx.files())
1158 1158 ctx = ctx.p1()
1159 1159
1160 1160 copies2 = {}
1161 1161 cp = _forwardcopies(base, c2)
1162 1162 for dst, src in pycompat.iteritems(cp):
1163 1163 if src in m1:
1164 1164 copies2[dst] = src
1165 1165
1166 1166 # file is missing if it isn't present in the destination, but is present in
1167 1167 # the base and present in the source.
1168 1168 # Presence in the base is important to exclude added files, presence in the
1169 1169 # source is important to exclude removed files.
1170 1170 filt = lambda f: f not in m1 and f in base and f in c2
1171 1171 missingfiles = [f for f in changedfiles if filt(f)]
1172 1172
1173 1173 copies1 = {}
1174 1174 if missingfiles:
1175 1175 basenametofilename = collections.defaultdict(list)
1176 1176 dirnametofilename = collections.defaultdict(list)
1177 1177
1178 1178 for f in m1.filesnotin(base.manifest()):
1179 1179 basename = os.path.basename(f)
1180 1180 dirname = os.path.dirname(f)
1181 1181 basenametofilename[basename].append(f)
1182 1182 dirnametofilename[dirname].append(f)
1183 1183
1184 1184 for f in missingfiles:
1185 1185 basename = os.path.basename(f)
1186 1186 dirname = os.path.dirname(f)
1187 1187 samebasename = basenametofilename[basename]
1188 1188 samedirname = dirnametofilename[dirname]
1189 1189 movecandidates = samebasename + samedirname
1190 1190 # f is guaranteed to be present in c2, that's why
1191 1191 # c2.filectx(f) won't fail
1192 1192 f2 = c2.filectx(f)
1193 1193 # we can have a lot of candidates which can slow down the heuristics
1194 1194 # config value to limit the number of candidates moves to check
1195 1195 maxcandidates = repo.ui.configint(
1196 1196 b'experimental', b'copytrace.movecandidateslimit'
1197 1197 )
1198 1198
1199 1199 if len(movecandidates) > maxcandidates:
1200 1200 repo.ui.status(
1201 1201 _(
1202 1202 b"skipping copytracing for '%s', more "
1203 1203 b"candidates than the limit: %d\n"
1204 1204 )
1205 1205 % (f, len(movecandidates))
1206 1206 )
1207 1207 continue
1208 1208
1209 1209 for candidate in movecandidates:
1210 1210 f1 = c1.filectx(candidate)
1211 1211 if _related(f1, f2):
1212 1212 # if there are a few related copies then we'll merge
1213 1213 # changes into all of them. This matches the behaviour
1214 1214 # of upstream copytracing
1215 1215 copies1[candidate] = f
1216 1216
1217 1217 return branch_copies(copies1), branch_copies(copies2), {}
1218 1218
1219 1219
1220 1220 def _related(f1, f2):
1221 1221 """return True if f1 and f2 filectx have a common ancestor
1222 1222
1223 1223 Walk back to common ancestor to see if the two files originate
1224 1224 from the same file. Since workingfilectx's rev() is None it messes
1225 1225 up the integer comparison logic, hence the pre-step check for
1226 1226 None (f1 and f2 can only be workingfilectx's initially).
1227 1227 """
1228 1228
1229 1229 if f1 == f2:
1230 1230 return True # a match
1231 1231
1232 1232 g1, g2 = f1.ancestors(), f2.ancestors()
1233 1233 try:
1234 1234 f1r, f2r = f1.linkrev(), f2.linkrev()
1235 1235
1236 1236 if f1r is None:
1237 1237 f1 = next(g1)
1238 1238 if f2r is None:
1239 1239 f2 = next(g2)
1240 1240
1241 1241 while True:
1242 1242 f1r, f2r = f1.linkrev(), f2.linkrev()
1243 1243 if f1r > f2r:
1244 1244 f1 = next(g1)
1245 1245 elif f2r > f1r:
1246 1246 f2 = next(g2)
1247 1247 else: # f1 and f2 point to files in the same linkrev
1248 1248 return f1 == f2 # true if they point to the same file
1249 1249 except StopIteration:
1250 1250 return False
1251 1251
1252 1252
1253 1253 def graftcopies(wctx, ctx, base):
1254 1254 """reproduce copies between base and ctx in the wctx
1255 1255
1256 1256 Unlike mergecopies(), this function will only consider copies between base
1257 1257 and ctx; it will ignore copies between base and wctx. Also unlike
1258 1258 mergecopies(), this function will apply copies to the working copy (instead
1259 1259 of just returning information about the copies). That makes it cheaper
1260 1260 (especially in the common case of base==ctx.p1()) and useful also when
1261 1261 experimental.copytrace=off.
1262 1262
1263 1263 merge.update() will have already marked most copies, but it will only
1264 1264 mark copies if it thinks the source files are related (see
1265 1265 merge._related()). It will also not mark copies if the file wasn't modified
1266 1266 on the local side. This function adds the copies that were "missed"
1267 1267 by merge.update().
1268 1268 """
1269 1269 new_copies = pathcopies(base, ctx)
1270 1270 parent = wctx.p1()
1271 1271 _filter(parent, wctx, new_copies)
1272 1272 # Extra filtering to drop copy information for files that existed before
1273 1273 # the graft. This is to handle the case of grafting a rename onto a commit
1274 1274 # that already has the rename. Otherwise the presence of copy information
1275 1275 # would result in the creation of an empty commit where we would prefer to
1276 1276 # not create one.
1277 1277 for dest, __ in list(new_copies.items()):
1278 1278 if dest in parent:
1279 1279 del new_copies[dest]
1280 1280 for dst, src in pycompat.iteritems(new_copies):
1281 1281 wctx[dst].markcopied(src)
@@ -1,949 +1,890 b''
1 1 use crate::utils::hg_path::HgPath;
2 2 use crate::utils::hg_path::HgPathBuf;
3 3 use crate::Revision;
4 4 use crate::NULL_REVISION;
5 5
6 6 use im_rc::ordmap::DiffItem;
7 7 use im_rc::ordmap::Entry;
8 8 use im_rc::ordmap::OrdMap;
9 9
10 10 use std::cmp::Ordering;
11 11 use std::collections::HashMap;
12 12 use std::collections::HashSet;
13 13 use std::convert::TryInto;
14 14
15 15 pub type PathCopies = HashMap<HgPathBuf, HgPathBuf>;
16 16
17 17 type PathToken = usize;
18 18
19 19 #[derive(Clone, Debug, PartialEq)]
20 20 struct CopySource {
21 21 /// revision at which the copy information was added
22 22 rev: Revision,
23 23 /// the copy source, (Set to None in case of deletion of the associated
24 24 /// key)
25 25 path: Option<PathToken>,
26 26 /// a set of previous `CopySource.rev` value directly or indirectly
27 27 /// overwritten by this one.
28 28 overwritten: HashSet<Revision>,
29 29 }
30 30
31 31 impl CopySource {
32 32 /// create a new CopySource
33 33 ///
34 34 /// Use this when no previous copy source existed.
35 35 fn new(rev: Revision, path: Option<PathToken>) -> Self {
36 36 Self {
37 37 rev,
38 38 path,
39 39 overwritten: HashSet::new(),
40 40 }
41 41 }
42 42
43 43 /// create a new CopySource from merging two others
44 44 ///
45 45 /// Use this when merging two InternalPathCopies requires active merging of
46 46 /// some entries.
47 47 fn new_from_merge(rev: Revision, winner: &Self, loser: &Self) -> Self {
48 48 let mut overwritten = HashSet::new();
49 49 overwritten.extend(winner.overwritten.iter().copied());
50 50 overwritten.extend(loser.overwritten.iter().copied());
51 51 overwritten.insert(winner.rev);
52 52 overwritten.insert(loser.rev);
53 53 Self {
54 54 rev,
55 55 path: winner.path,
56 56 overwritten: overwritten,
57 57 }
58 58 }
59 59
60 60 /// Update the value of a pre-existing CopySource
61 61 ///
62 62 /// Use this when recording copy information from parent β†’ child edges
63 63 fn overwrite(&mut self, rev: Revision, path: Option<PathToken>) {
64 64 self.overwritten.insert(self.rev);
65 65 self.rev = rev;
66 66 self.path = path;
67 67 }
68 68
69 69 /// Mark pre-existing copy information as "dropped" by a file deletion
70 70 ///
71 71 /// Use this when recording copy information from parent β†’ child edges
72 72 fn mark_delete(&mut self, rev: Revision) {
73 73 self.overwritten.insert(self.rev);
74 74 self.rev = rev;
75 75 self.path = None;
76 76 }
77 77
78 78 fn is_overwritten_by(&self, other: &Self) -> bool {
79 79 other.overwritten.contains(&self.rev)
80 80 }
81 81 }
82 82
83 83 /// maps CopyDestination to Copy Source (+ a "timestamp" for the operation)
84 84 type InternalPathCopies = OrdMap<PathToken, CopySource>;
85 85
86 86 /// hold parent 1, parent 2 and relevant files actions.
87 87 pub type RevInfo<'a> = (Revision, Revision, ChangedFiles<'a>);
88 88
89 89 /// represent the files affected by a changesets
90 90 ///
91 91 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
92 92 /// all the data categories tracked by it.
93 93 /// This hold a subset of mercurial.metadata.ChangingFiles as we do not need
94 94 /// all the data categories tracked by it.
95 95 pub struct ChangedFiles<'a> {
96 96 nb_items: u32,
97 97 index: &'a [u8],
98 98 data: &'a [u8],
99 99 }
100 100
101 101 /// Represent active changes that affect the copy tracing.
102 102 enum Action<'a> {
103 103 /// The parent ? children edge is removing a file
104 104 ///
105 105 /// (actually, this could be the edge from the other parent, but it does
106 106 /// not matters)
107 107 Removed(&'a HgPath),
108 108 /// The parent ? children edge introduce copy information between (dest,
109 109 /// source)
110 110 Copied(&'a HgPath, &'a HgPath),
111 111 }
112 112
113 113 /// This express the possible "special" case we can get in a merge
114 114 ///
115 115 /// See mercurial/metadata.py for details on these values.
116 116 #[derive(PartialEq)]
117 117 enum MergeCase {
118 118 /// Merged: file had history on both side that needed to be merged
119 119 Merged,
120 120 /// Salvaged: file was candidate for deletion, but survived the merge
121 121 Salvaged,
122 122 /// Normal: Not one of the two cases above
123 123 Normal,
124 124 }
125 125
126 126 type FileChange<'a> = (u8, &'a HgPath, &'a HgPath);
127 127
128 128 const EMPTY: &[u8] = b"";
129 129 const COPY_MASK: u8 = 3;
130 130 const P1_COPY: u8 = 2;
131 131 const P2_COPY: u8 = 3;
132 132 const ACTION_MASK: u8 = 28;
133 133 const REMOVED: u8 = 12;
134 134 const MERGED: u8 = 8;
135 135 const SALVAGED: u8 = 16;
136 136
137 137 impl<'a> ChangedFiles<'a> {
138 138 const INDEX_START: usize = 4;
139 139 const ENTRY_SIZE: u32 = 9;
140 140 const FILENAME_START: u32 = 1;
141 141 const COPY_SOURCE_START: u32 = 5;
142 142
143 143 pub fn new(data: &'a [u8]) -> Self {
144 144 assert!(
145 145 data.len() >= 4,
146 146 "data size ({}) is too small to contain the header (4)",
147 147 data.len()
148 148 );
149 149 let nb_items_raw: [u8; 4] = (&data[0..=3])
150 150 .try_into()
151 151 .expect("failed to turn 4 bytes into 4 bytes");
152 152 let nb_items = u32::from_be_bytes(nb_items_raw);
153 153
154 154 let index_size = (nb_items * Self::ENTRY_SIZE) as usize;
155 155 let index_end = Self::INDEX_START + index_size;
156 156
157 157 assert!(
158 158 data.len() >= index_end,
159 159 "data size ({}) is too small to fit the index_data ({})",
160 160 data.len(),
161 161 index_end
162 162 );
163 163
164 164 let ret = ChangedFiles {
165 165 nb_items,
166 166 index: &data[Self::INDEX_START..index_end],
167 167 data: &data[index_end..],
168 168 };
169 169 let max_data = ret.filename_end(nb_items - 1) as usize;
170 170 assert!(
171 171 ret.data.len() >= max_data,
172 172 "data size ({}) is too small to fit all data ({})",
173 173 data.len(),
174 174 index_end + max_data
175 175 );
176 176 ret
177 177 }
178 178
179 179 pub fn new_empty() -> Self {
180 180 ChangedFiles {
181 181 nb_items: 0,
182 182 index: EMPTY,
183 183 data: EMPTY,
184 184 }
185 185 }
186 186
187 187 /// internal function to return an individual entry at a given index
188 188 fn entry(&'a self, idx: u32) -> FileChange<'a> {
189 189 if idx >= self.nb_items {
190 190 panic!(
191 191 "index for entry is higher that the number of file {} >= {}",
192 192 idx, self.nb_items
193 193 )
194 194 }
195 195 let flags = self.flags(idx);
196 196 let filename = self.filename(idx);
197 197 let copy_idx = self.copy_idx(idx);
198 198 let copy_source = self.filename(copy_idx);
199 199 (flags, filename, copy_source)
200 200 }
201 201
202 202 /// internal function to return the filename of the entry at a given index
203 203 fn filename(&self, idx: u32) -> &HgPath {
204 204 let filename_start;
205 205 if idx == 0 {
206 206 filename_start = 0;
207 207 } else {
208 208 filename_start = self.filename_end(idx - 1)
209 209 }
210 210 let filename_end = self.filename_end(idx);
211 211 let filename_start = filename_start as usize;
212 212 let filename_end = filename_end as usize;
213 213 HgPath::new(&self.data[filename_start..filename_end])
214 214 }
215 215
216 216 /// internal function to return the flag field of the entry at a given
217 217 /// index
218 218 fn flags(&self, idx: u32) -> u8 {
219 219 let idx = idx as usize;
220 220 self.index[idx * (Self::ENTRY_SIZE as usize)]
221 221 }
222 222
223 223 /// internal function to return the end of a filename part at a given index
224 224 fn filename_end(&self, idx: u32) -> u32 {
225 225 let start = (idx * Self::ENTRY_SIZE) + Self::FILENAME_START;
226 226 let end = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
227 227 let start = start as usize;
228 228 let end = end as usize;
229 229 let raw = (&self.index[start..end])
230 230 .try_into()
231 231 .expect("failed to turn 4 bytes into 4 bytes");
232 232 u32::from_be_bytes(raw)
233 233 }
234 234
235 235 /// internal function to return index of the copy source of the entry at a
236 236 /// given index
237 237 fn copy_idx(&self, idx: u32) -> u32 {
238 238 let start = (idx * Self::ENTRY_SIZE) + Self::COPY_SOURCE_START;
239 239 let end = (idx + 1) * Self::ENTRY_SIZE;
240 240 let start = start as usize;
241 241 let end = end as usize;
242 242 let raw = (&self.index[start..end])
243 243 .try_into()
244 244 .expect("failed to turn 4 bytes into 4 bytes");
245 245 u32::from_be_bytes(raw)
246 246 }
247 247
248 248 /// Return an iterator over all the `Action` in this instance.
249 249 fn iter_actions(&self, parent: Parent) -> ActionsIterator {
250 250 ActionsIterator {
251 251 changes: &self,
252 252 parent: parent,
253 253 current: 0,
254 254 }
255 255 }
256 256
257 257 /// return the MergeCase value associated with a filename
258 258 fn get_merge_case(&self, path: &HgPath) -> MergeCase {
259 259 if self.nb_items == 0 {
260 260 return MergeCase::Normal;
261 261 }
262 262 let mut low_part = 0;
263 263 let mut high_part = self.nb_items;
264 264
265 265 while low_part < high_part {
266 266 let cursor = (low_part + high_part - 1) / 2;
267 267 let (flags, filename, _source) = self.entry(cursor);
268 268 match path.cmp(filename) {
269 269 Ordering::Less => low_part = cursor + 1,
270 270 Ordering::Greater => high_part = cursor,
271 271 Ordering::Equal => {
272 272 return match flags & ACTION_MASK {
273 273 MERGED => MergeCase::Merged,
274 274 SALVAGED => MergeCase::Salvaged,
275 275 _ => MergeCase::Normal,
276 276 };
277 277 }
278 278 }
279 279 }
280 280 MergeCase::Normal
281 281 }
282 282 }
283 283
284 /// A struct responsible for answering "is X ancestors of Y" quickly
285 ///
286 /// The structure will delegate ancestors call to a callback, and cache the
287 /// result.
288 #[derive(Debug)]
289 struct AncestorOracle<'a, A: Fn(Revision, Revision) -> bool> {
290 inner: &'a A,
291 pairs: HashMap<(Revision, Revision), bool>,
292 }
293
294 impl<'a, A: Fn(Revision, Revision) -> bool> AncestorOracle<'a, A> {
295 fn new(func: &'a A) -> Self {
296 Self {
297 inner: func,
298 pairs: HashMap::default(),
299 }
300 }
301
302 fn record_overwrite(&mut self, anc: Revision, desc: Revision) {
303 self.pairs.insert((anc, desc), true);
304 }
305
306 /// returns `true` if `anc` is an ancestors of `desc`, `false` otherwise
307 fn is_overwrite(&mut self, anc: Revision, desc: Revision) -> bool {
308 if anc > desc {
309 false
310 } else if anc == desc {
311 true
312 } else {
313 if let Some(b) = self.pairs.get(&(anc, desc)) {
314 *b
315 } else {
316 let b = (self.inner)(anc, desc);
317 self.pairs.insert((anc, desc), b);
318 b
319 }
320 }
321 }
322 }
323
324 284 struct ActionsIterator<'a> {
325 285 changes: &'a ChangedFiles<'a>,
326 286 parent: Parent,
327 287 current: u32,
328 288 }
329 289
330 290 impl<'a> Iterator for ActionsIterator<'a> {
331 291 type Item = Action<'a>;
332 292
333 293 fn next(&mut self) -> Option<Action<'a>> {
334 294 let copy_flag = match self.parent {
335 295 Parent::FirstParent => P1_COPY,
336 296 Parent::SecondParent => P2_COPY,
337 297 };
338 298 while self.current < self.changes.nb_items {
339 299 let (flags, file, source) = self.changes.entry(self.current);
340 300 self.current += 1;
341 301 if (flags & ACTION_MASK) == REMOVED {
342 302 return Some(Action::Removed(file));
343 303 }
344 304 let copy = flags & COPY_MASK;
345 305 if copy == copy_flag {
346 306 return Some(Action::Copied(file, source));
347 307 }
348 308 }
349 309 return None;
350 310 }
351 311 }
352 312
353 313 /// A small struct whose purpose is to ensure lifetime of bytes referenced in
354 314 /// ChangedFiles
355 315 ///
356 316 /// It is passed to the RevInfoMaker callback who can assign any necessary
357 317 /// content to the `data` attribute. The copy tracing code is responsible for
358 318 /// keeping the DataHolder alive at least as long as the ChangedFiles object.
359 319 pub struct DataHolder<D> {
360 320 /// RevInfoMaker callback should assign data referenced by the
361 321 /// ChangedFiles struct it return to this attribute. The DataHolder
362 322 /// lifetime will be at least as long as the ChangedFiles one.
363 323 pub data: Option<D>,
364 324 }
365 325
366 326 pub type RevInfoMaker<'a, D> =
367 327 Box<dyn for<'r> Fn(Revision, &'r mut DataHolder<D>) -> RevInfo<'r> + 'a>;
368 328
369 329 /// enum used to carry information about the parent β†’ child currently processed
370 330 #[derive(Copy, Clone, Debug)]
371 331 enum Parent {
372 332 /// The `p1(x) β†’ x` edge
373 333 FirstParent,
374 334 /// The `p2(x) β†’ x` edge
375 335 SecondParent,
376 336 }
377 337
378 338 /// A small "tokenizer" responsible of turning full HgPath into lighter
379 339 /// PathToken
380 340 ///
381 341 /// Dealing with small object, like integer is much faster, so HgPath input are
382 342 /// turned into integer "PathToken" and converted back in the end.
383 343 #[derive(Clone, Debug, Default)]
384 344 struct TwoWayPathMap {
385 345 token: HashMap<HgPathBuf, PathToken>,
386 346 path: Vec<HgPathBuf>,
387 347 }
388 348
389 349 impl TwoWayPathMap {
390 350 fn tokenize(&mut self, path: &HgPath) -> PathToken {
391 351 match self.token.get(path) {
392 352 Some(a) => *a,
393 353 None => {
394 354 let a = self.token.len();
395 355 let buf = path.to_owned();
396 356 self.path.push(buf.clone());
397 357 self.token.insert(buf, a);
398 358 a
399 359 }
400 360 }
401 361 }
402 362
403 363 fn untokenize(&self, token: PathToken) -> &HgPathBuf {
404 364 assert!(token < self.path.len(), format!("Unknown token: {}", token));
405 365 &self.path[token]
406 366 }
407 367 }
408 368
409 369 /// Same as mercurial.copies._combine_changeset_copies, but in Rust.
410 370 ///
411 371 /// Arguments are:
412 372 ///
413 373 /// revs: all revisions to be considered
414 374 /// children: a {parent ? [childrens]} mapping
415 375 /// target_rev: the final revision we are combining copies to
416 376 /// rev_info(rev): callback to get revision information:
417 377 /// * first parent
418 378 /// * second parent
419 379 /// * ChangedFiles
420 380 /// isancestors(low_rev, high_rev): callback to check if a revision is an
421 381 /// ancestor of another
422 pub fn combine_changeset_copies<A: Fn(Revision, Revision) -> bool, D>(
382 pub fn combine_changeset_copies<D>(
423 383 revs: Vec<Revision>,
424 384 mut children_count: HashMap<Revision, usize>,
425 385 target_rev: Revision,
426 386 rev_info: RevInfoMaker<D>,
427 is_ancestor: &A,
428 387 ) -> PathCopies {
429 388 let mut all_copies = HashMap::new();
430 let mut oracle = AncestorOracle::new(is_ancestor);
431 389
432 390 let mut path_map = TwoWayPathMap::default();
433 391
434 392 for rev in revs {
435 393 let mut d: DataHolder<D> = DataHolder { data: None };
436 394 let (p1, p2, changes) = rev_info(rev, &mut d);
437 395
438 396 // We will chain the copies information accumulated for the parent with
439 397 // the individual copies information the curent revision. Creating a
440 398 // new TimeStampedPath for each `rev` β†’ `children` vertex.
441 399 let mut copies: Option<InternalPathCopies> = None;
442 400 if p1 != NULL_REVISION {
443 401 // Retrieve data computed in a previous iteration
444 402 let parent_copies = get_and_clean_parent_copies(
445 403 &mut all_copies,
446 404 &mut children_count,
447 405 p1,
448 406 );
449 407 if let Some(parent_copies) = parent_copies {
450 408 // combine it with data for that revision
451 409 let vertex_copies = add_from_changes(
452 410 &mut path_map,
453 &mut oracle,
454 411 &parent_copies,
455 412 &changes,
456 413 Parent::FirstParent,
457 414 rev,
458 415 );
459 416 // keep that data around for potential later combination
460 417 copies = Some(vertex_copies);
461 418 }
462 419 }
463 420 if p2 != NULL_REVISION {
464 421 // Retrieve data computed in a previous iteration
465 422 let parent_copies = get_and_clean_parent_copies(
466 423 &mut all_copies,
467 424 &mut children_count,
468 425 p2,
469 426 );
470 427 if let Some(parent_copies) = parent_copies {
471 428 // combine it with data for that revision
472 429 let vertex_copies = add_from_changes(
473 430 &mut path_map,
474 &mut oracle,
475 431 &parent_copies,
476 432 &changes,
477 433 Parent::SecondParent,
478 434 rev,
479 435 );
480 436
481 437 copies = match copies {
482 438 None => Some(vertex_copies),
483 439 // Merge has two parents needs to combines their copy
484 440 // information.
485 441 //
486 442 // If we got data from both parents, We need to combine
487 443 // them.
488 444 Some(copies) => Some(merge_copies_dict(
489 445 &path_map,
490 446 rev,
491 447 vertex_copies,
492 448 copies,
493 449 &changes,
494 &mut oracle,
495 450 )),
496 451 };
497 452 }
498 453 }
499 454 match copies {
500 455 Some(copies) => {
501 456 all_copies.insert(rev, copies);
502 457 }
503 458 _ => {}
504 459 }
505 460 }
506 461
507 462 // Drop internal information (like the timestamp) and return the final
508 463 // mapping.
509 464 let tt_result = all_copies
510 465 .remove(&target_rev)
511 466 .expect("target revision was not processed");
512 467 let mut result = PathCopies::default();
513 468 for (dest, tt_source) in tt_result {
514 469 if let Some(path) = tt_source.path {
515 470 let path_dest = path_map.untokenize(dest).to_owned();
516 471 let path_path = path_map.untokenize(path).to_owned();
517 472 result.insert(path_dest, path_path);
518 473 }
519 474 }
520 475 result
521 476 }
522 477
523 478 /// fetch previous computed information
524 479 ///
525 480 /// If no other children are expected to need this information, we drop it from
526 481 /// the cache.
527 482 ///
528 483 /// If parent is not part of the set we are expected to walk, return None.
529 484 fn get_and_clean_parent_copies(
530 485 all_copies: &mut HashMap<Revision, InternalPathCopies>,
531 486 children_count: &mut HashMap<Revision, usize>,
532 487 parent_rev: Revision,
533 488 ) -> Option<InternalPathCopies> {
534 489 let count = children_count.get_mut(&parent_rev)?;
535 490 *count -= 1;
536 491 if *count == 0 {
537 492 match all_copies.remove(&parent_rev) {
538 493 Some(c) => Some(c),
539 494 None => Some(InternalPathCopies::default()),
540 495 }
541 496 } else {
542 497 match all_copies.get(&parent_rev) {
543 498 Some(c) => Some(c.clone()),
544 499 None => Some(InternalPathCopies::default()),
545 500 }
546 501 }
547 502 }
548 503
549 504 /// Combine ChangedFiles with some existing PathCopies information and return
550 505 /// the result
551 fn add_from_changes<A: Fn(Revision, Revision) -> bool>(
506 fn add_from_changes(
552 507 path_map: &mut TwoWayPathMap,
553 oracle: &mut AncestorOracle<A>,
554 508 base_copies: &InternalPathCopies,
555 509 changes: &ChangedFiles,
556 510 parent: Parent,
557 511 current_rev: Revision,
558 512 ) -> InternalPathCopies {
559 513 let mut copies = base_copies.clone();
560 514 for action in changes.iter_actions(parent) {
561 515 match action {
562 516 Action::Copied(path_dest, path_source) => {
563 517 let dest = path_map.tokenize(path_dest);
564 518 let source = path_map.tokenize(path_source);
565 519 let entry;
566 520 if let Some(v) = base_copies.get(&source) {
567 521 entry = match &v.path {
568 522 Some(path) => Some((*(path)).to_owned()),
569 523 None => Some(source.to_owned()),
570 524 }
571 525 } else {
572 526 entry = Some(source.to_owned());
573 527 }
574 528 // Each new entry is introduced by the children, we
575 529 // record this information as we will need it to take
576 530 // the right decision when merging conflicting copy
577 531 // information. See merge_copies_dict for details.
578 532 match copies.entry(dest) {
579 533 Entry::Vacant(slot) => {
580 534 let ttpc = CopySource::new(current_rev, entry);
581 535 slot.insert(ttpc);
582 536 }
583 537 Entry::Occupied(mut slot) => {
584 538 let ttpc = slot.get_mut();
585 oracle.record_overwrite(ttpc.rev, current_rev);
586 539 ttpc.overwrite(current_rev, entry);
587 540 }
588 541 }
589 542 }
590 543 Action::Removed(deleted_path) => {
591 544 // We must drop copy information for removed file.
592 545 //
593 546 // We need to explicitly record them as dropped to
594 547 // propagate this information when merging two
595 548 // InternalPathCopies object.
596 549 let deleted = path_map.tokenize(deleted_path);
597 550 copies.entry(deleted).and_modify(|old| {
598 oracle.record_overwrite(old.rev, current_rev);
599 551 old.mark_delete(current_rev);
600 552 });
601 553 }
602 554 }
603 555 }
604 556 copies
605 557 }
606 558
607 559 /// merge two copies-mapping together, minor and major
608 560 ///
609 561 /// In case of conflict, value from "major" will be picked, unless in some
610 562 /// cases. See inline documentation for details.
611 fn merge_copies_dict<A: Fn(Revision, Revision) -> bool>(
563 fn merge_copies_dict(
612 564 path_map: &TwoWayPathMap,
613 565 current_merge: Revision,
614 566 mut minor: InternalPathCopies,
615 567 mut major: InternalPathCopies,
616 568 changes: &ChangedFiles,
617 oracle: &mut AncestorOracle<A>,
618 569 ) -> InternalPathCopies {
619 570 // This closure exist as temporary help while multiple developper are
620 571 // actively working on this code. Feel free to re-inline it once this
621 572 // code is more settled.
622 let cmp_value = |oracle: &mut AncestorOracle<A>,
623 dest: &PathToken,
624 src_minor: &CopySource,
625 src_major: &CopySource| {
573 let cmp_value =
574 |dest: &PathToken, src_minor: &CopySource, src_major: &CopySource| {
626 575 compare_value(
627 576 path_map,
628 577 current_merge,
629 578 changes,
630 oracle,
631 579 dest,
632 580 src_minor,
633 581 src_major,
634 582 )
635 583 };
636 584 if minor.is_empty() {
637 585 major
638 586 } else if major.is_empty() {
639 587 minor
640 588 } else if minor.len() * 2 < major.len() {
641 589 // Lets says we are merging two InternalPathCopies instance A and B.
642 590 //
643 591 // If A contains N items, the merge result will never contains more
644 592 // than N values differents than the one in A
645 593 //
646 594 // If B contains M items, with M > N, the merge result will always
647 595 // result in a minimum of M - N value differents than the on in
648 596 // A
649 597 //
650 598 // As a result, if N < (M-N), we know that simply iterating over A will
651 599 // yield less difference than iterating over the difference
652 600 // between A and B.
653 601 //
654 602 // This help performance a lot in case were a tiny
655 603 // InternalPathCopies is merged with a much larger one.
656 604 for (dest, src_minor) in minor {
657 605 let src_major = major.get(&dest);
658 606 match src_major {
659 607 None => {
660 608 major.insert(dest, src_minor);
661 609 }
662 610 Some(src_major) => {
663 611 let (pick, overwrite) =
664 cmp_value(oracle, &dest, &src_minor, src_major);
612 cmp_value(&dest, &src_minor, src_major);
665 613 if overwrite {
666 oracle.record_overwrite(src_minor.rev, current_merge);
667 oracle.record_overwrite(src_major.rev, current_merge);
668 614 let src = match pick {
669 615 MergePick::Major => CopySource::new_from_merge(
670 616 current_merge,
671 617 src_major,
672 618 &src_minor,
673 619 ),
674 620 MergePick::Minor => CopySource::new_from_merge(
675 621 current_merge,
676 622 &src_minor,
677 623 src_major,
678 624 ),
679 625 MergePick::Any => CopySource::new_from_merge(
680 626 current_merge,
681 627 src_major,
682 628 &src_minor,
683 629 ),
684 630 };
685 631 major.insert(dest, src);
686 632 } else {
687 633 match pick {
688 634 MergePick::Any | MergePick::Major => None,
689 635 MergePick::Minor => major.insert(dest, src_minor),
690 636 };
691 637 }
692 638 }
693 639 };
694 640 }
695 641 major
696 642 } else if major.len() * 2 < minor.len() {
697 643 // This use the same rational than the previous block.
698 644 // (Check previous block documentation for details.)
699 645 for (dest, src_major) in major {
700 646 let src_minor = minor.get(&dest);
701 647 match src_minor {
702 648 None => {
703 649 minor.insert(dest, src_major);
704 650 }
705 651 Some(src_minor) => {
706 652 let (pick, overwrite) =
707 cmp_value(oracle, &dest, src_minor, &src_major);
653 cmp_value(&dest, src_minor, &src_major);
708 654 if overwrite {
709 oracle.record_overwrite(src_minor.rev, current_merge);
710 oracle.record_overwrite(src_major.rev, current_merge);
711 655 let src = match pick {
712 656 MergePick::Major => CopySource::new_from_merge(
713 657 current_merge,
714 658 &src_major,
715 659 src_minor,
716 660 ),
717 661 MergePick::Minor => CopySource::new_from_merge(
718 662 current_merge,
719 663 src_minor,
720 664 &src_major,
721 665 ),
722 666 MergePick::Any => CopySource::new_from_merge(
723 667 current_merge,
724 668 &src_major,
725 669 src_minor,
726 670 ),
727 671 };
728 672 minor.insert(dest, src);
729 673 } else {
730 674 match pick {
731 675 MergePick::Any | MergePick::Minor => None,
732 676 MergePick::Major => minor.insert(dest, src_major),
733 677 };
734 678 }
735 679 }
736 680 };
737 681 }
738 682 minor
739 683 } else {
740 684 let mut override_minor = Vec::new();
741 685 let mut override_major = Vec::new();
742 686
743 687 let mut to_major = |k: &PathToken, v: &CopySource| {
744 688 override_major.push((k.clone(), v.clone()))
745 689 };
746 690 let mut to_minor = |k: &PathToken, v: &CopySource| {
747 691 override_minor.push((k.clone(), v.clone()))
748 692 };
749 693
750 694 // The diff function leverage detection of the identical subpart if
751 695 // minor and major has some common ancestors. This make it very
752 696 // fast is most case.
753 697 //
754 698 // In case where the two map are vastly different in size, the current
755 699 // approach is still slowish because the iteration will iterate over
756 700 // all the "exclusive" content of the larger on. This situation can be
757 701 // frequent when the subgraph of revision we are processing has a lot
758 702 // of roots. Each roots adding they own fully new map to the mix (and
759 703 // likely a small map, if the path from the root to the "main path" is
760 704 // small.
761 705 //
762 706 // We could do better by detecting such situation and processing them
763 707 // differently.
764 708 for d in minor.diff(&major) {
765 709 match d {
766 710 DiffItem::Add(k, v) => to_minor(k, v),
767 711 DiffItem::Remove(k, v) => to_major(k, v),
768 712 DiffItem::Update { old, new } => {
769 713 let (dest, src_major) = new;
770 714 let (_, src_minor) = old;
771 715 let (pick, overwrite) =
772 cmp_value(oracle, dest, src_minor, src_major);
716 cmp_value(dest, src_minor, src_major);
773 717 if overwrite {
774 oracle.record_overwrite(src_minor.rev, current_merge);
775 oracle.record_overwrite(src_major.rev, current_merge);
776 718 let src = match pick {
777 719 MergePick::Major => CopySource::new_from_merge(
778 720 current_merge,
779 721 src_major,
780 722 src_minor,
781 723 ),
782 724 MergePick::Minor => CopySource::new_from_merge(
783 725 current_merge,
784 726 src_minor,
785 727 src_major,
786 728 ),
787 729 MergePick::Any => CopySource::new_from_merge(
788 730 current_merge,
789 731 src_major,
790 732 src_minor,
791 733 ),
792 734 };
793 735 to_minor(dest, &src);
794 736 to_major(dest, &src);
795 737 } else {
796 738 match pick {
797 739 MergePick::Major => to_minor(dest, src_major),
798 740 MergePick::Minor => to_major(dest, src_minor),
799 741 // If the two entry are identical, no need to do
800 742 // anything (but diff should not have yield them)
801 743 MergePick::Any => unreachable!(),
802 744 }
803 745 }
804 746 }
805 747 };
806 748 }
807 749
808 750 let updates;
809 751 let mut result;
810 752 if override_major.is_empty() {
811 753 result = major
812 754 } else if override_minor.is_empty() {
813 755 result = minor
814 756 } else {
815 757 if override_minor.len() < override_major.len() {
816 758 updates = override_minor;
817 759 result = minor;
818 760 } else {
819 761 updates = override_major;
820 762 result = major;
821 763 }
822 764 for (k, v) in updates {
823 765 result.insert(k, v);
824 766 }
825 767 }
826 768 result
827 769 }
828 770 }
829 771
830 772 /// represent the side that should prevail when merging two
831 773 /// InternalPathCopies
832 774 enum MergePick {
833 775 /// The "major" (p1) side prevails
834 776 Major,
835 777 /// The "minor" (p2) side prevails
836 778 Minor,
837 779 /// Any side could be used (because they are the same)
838 780 Any,
839 781 }
840 782
841 783 /// decide which side prevails in case of conflicting values
842 784 #[allow(clippy::if_same_then_else)]
843 fn compare_value<A: Fn(Revision, Revision) -> bool>(
785 fn compare_value(
844 786 path_map: &TwoWayPathMap,
845 787 current_merge: Revision,
846 788 changes: &ChangedFiles,
847 oracle: &mut AncestorOracle<A>,
848 789 dest: &PathToken,
849 790 src_minor: &CopySource,
850 791 src_major: &CopySource,
851 792 ) -> (MergePick, bool) {
852 793 if src_major.rev == current_merge {
853 794 if src_minor.rev == current_merge {
854 795 if src_major.path.is_none() {
855 796 // We cannot get different copy information for both p1 and p2
856 797 // from the same revision. Unless this was a
857 798 // deletion.
858 799 //
859 800 // However the deletion might come over different data on each
860 801 // branch.
861 802 let need_over = src_major.overwritten != src_minor.overwritten;
862 803 (MergePick::Any, need_over)
863 804 } else {
864 805 unreachable!();
865 806 }
866 807 } else {
867 808 // The last value comes the current merge, this value -will- win
868 809 // eventually.
869 810 (MergePick::Major, true)
870 811 }
871 812 } else if src_minor.rev == current_merge {
872 813 // The last value comes the current merge, this value -will- win
873 814 // eventually.
874 815 (MergePick::Minor, true)
875 816 } else if src_major.path == src_minor.path {
876 817 // we have the same value, but from other source;
877 818 if src_major.rev == src_minor.rev {
878 819 // If the two entry are identical, they are both valid
879 820 debug_assert!(src_minor.overwritten == src_minor.overwritten);
880 821 (MergePick::Any, false)
881 822 } else if src_major.is_overwritten_by(src_minor) {
882 823 (MergePick::Minor, false)
883 824 } else if src_minor.is_overwritten_by(src_major) {
884 825 (MergePick::Major, false)
885 826 } else {
886 827 (MergePick::Any, true)
887 828 }
888 829 } else if src_major.rev == src_minor.rev {
889 830 // We cannot get copy information for both p1 and p2 in the
890 831 // same rev. So this is the same value.
891 832 unreachable!(
892 833 "conflicting information from p1 and p2 in the same revision"
893 834 );
894 835 } else {
895 836 let dest_path = path_map.untokenize(*dest);
896 837 let action = changes.get_merge_case(dest_path);
897 838 if src_minor.path.is_some()
898 839 && src_major.path.is_none()
899 840 && action == MergeCase::Salvaged
900 841 {
901 842 // If the file is "deleted" in the major side but was
902 843 // salvaged by the merge, we keep the minor side alive
903 844 (MergePick::Minor, true)
904 845 } else if src_major.path.is_some()
905 846 && src_minor.path.is_none()
906 847 && action == MergeCase::Salvaged
907 848 {
908 849 // If the file is "deleted" in the minor side but was
909 850 // salvaged by the merge, unconditionnaly preserve the
910 851 // major side.
911 852 (MergePick::Major, true)
912 853 } else if src_minor.is_overwritten_by(src_major) {
913 854 // The information from the minor version are strictly older than
914 855 // the major version
915 856 if action == MergeCase::Merged {
916 857 // If the file was actively merged, its means some non-copy
917 858 // activity happened on the other branch. It
918 859 // mean the older copy information are still relevant.
919 860 //
920 861 // The major side wins such conflict.
921 862 (MergePick::Major, true)
922 863 } else {
923 864 // No activity on the minor branch, pick the newer one.
924 865 (MergePick::Major, false)
925 866 }
926 867 } else if src_major.is_overwritten_by(src_minor) {
927 868 if action == MergeCase::Merged {
928 869 // If the file was actively merged, its means some non-copy
929 870 // activity happened on the other branch. It
930 871 // mean the older copy information are still relevant.
931 872 //
932 873 // The major side wins such conflict.
933 874 (MergePick::Major, true)
934 875 } else {
935 876 // No activity on the minor branch, pick the newer one.
936 877 (MergePick::Minor, false)
937 878 }
938 879 } else if src_minor.path.is_none() {
939 880 // the minor side has no relevant information, pick the alive one
940 881 (MergePick::Major, true)
941 882 } else if src_major.path.is_none() {
942 883 // the major side has no relevant information, pick the alive one
943 884 (MergePick::Minor, true)
944 885 } else {
945 886 // by default the major side wins
946 887 (MergePick::Major, true)
947 888 }
948 889 }
949 890 }
@@ -1,148 +1,123 b''
1 1 use cpython::ObjectProtocol;
2 use cpython::PyBool;
3 2 use cpython::PyBytes;
4 3 use cpython::PyDict;
5 4 use cpython::PyList;
6 5 use cpython::PyModule;
7 6 use cpython::PyObject;
8 7 use cpython::PyResult;
9 8 use cpython::PyTuple;
10 9 use cpython::Python;
11 10
12 11 use hg::copy_tracing::combine_changeset_copies;
13 12 use hg::copy_tracing::ChangedFiles;
14 13 use hg::copy_tracing::DataHolder;
15 14 use hg::copy_tracing::RevInfo;
16 15 use hg::copy_tracing::RevInfoMaker;
17 16 use hg::Revision;
18 17
19 18 /// Combines copies information contained into revision `revs` to build a copy
20 19 /// map.
21 20 ///
22 21 /// See mercurial/copies.py for details
23 22 pub fn combine_changeset_copies_wrapper(
24 23 py: Python,
25 24 revs: PyList,
26 25 children_count: PyDict,
27 26 target_rev: Revision,
28 27 rev_info: PyObject,
29 is_ancestor: PyObject,
30 28 ) -> PyResult<PyDict> {
31 29 let revs: PyResult<_> =
32 30 revs.iter(py).map(|r| Ok(r.extract(py)?)).collect();
33 31
34 // Wrap the `is_ancestor` python callback as a Rust closure
35 //
36 // No errors are expected from the Python side, and they will should only
37 // happens in case of programing error or severe data corruption. Such
38 // errors will raise panic and the rust-cpython harness will turn them into
39 // Python exception.
40 let is_ancestor_wrap = |anc: Revision, desc: Revision| -> bool {
41 is_ancestor
42 .call(py, (anc, desc), None)
43 .expect(
44 "rust-copy-tracing: python call to `is_ancestor` \
45 failed",
46 )
47 .cast_into::<PyBool>(py)
48 .expect(
49 "rust-copy-tracing: python call to `is_ancestor` \
50 returned unexpected non-Bool value",
51 )
52 .is_true()
53 };
54
55 32 // Wrap the `rev_info_maker` python callback as a Rust closure
56 33 //
57 34 // No errors are expected from the Python side, and they will should only
58 35 // happens in case of programing error or severe data corruption. Such
59 36 // errors will raise panic and the rust-cpython harness will turn them into
60 37 // Python exception.
61 38 let rev_info_maker: RevInfoMaker<PyBytes> =
62 39 Box::new(|rev: Revision, d: &mut DataHolder<PyBytes>| -> RevInfo {
63 40 let res: PyTuple = rev_info
64 41 .call(py, (rev,), None)
65 42 .expect("rust-copy-tracing: python call to `rev_info` failed")
66 43 .cast_into(py)
67 44 .expect(
68 45 "rust-copy_tracing: python call to `rev_info` returned \
69 46 unexpected non-Tuple value",
70 47 );
71 48 let p1 = res.get_item(py, 0).extract(py).expect(
72 49 "rust-copy-tracing: rev_info return is invalid, first item \
73 50 is a not a revision",
74 51 );
75 52 let p2 = res.get_item(py, 1).extract(py).expect(
76 53 "rust-copy-tracing: rev_info return is invalid, first item \
77 54 is a not a revision",
78 55 );
79 56
80 57 let files = match res.get_item(py, 2).extract::<PyBytes>(py) {
81 58 Ok(raw) => {
82 59 // Give responsability for the raw bytes lifetime to
83 60 // hg-core
84 61 d.data = Some(raw);
85 62 let addrs = d.data.as_ref().expect(
86 63 "rust-copy-tracing: failed to get a reference to the \
87 64 raw bytes for copy data").data(py);
88 65 ChangedFiles::new(addrs)
89 66 }
90 67 // value was presumably None, meaning they was no copy data.
91 68 Err(_) => ChangedFiles::new_empty(),
92 69 };
93 70
94 71 (p1, p2, files)
95 72 });
96 73 let children_count: PyResult<_> = children_count
97 74 .items(py)
98 75 .iter()
99 76 .map(|(k, v)| Ok((k.extract(py)?, v.extract(py)?)))
100 77 .collect();
101 78
102 79 let res = combine_changeset_copies(
103 80 revs?,
104 81 children_count?,
105 82 target_rev,
106 83 rev_info_maker,
107 &is_ancestor_wrap,
108 84 );
109 85 let out = PyDict::new(py);
110 86 for (dest, source) in res.into_iter() {
111 87 out.set_item(
112 88 py,
113 89 PyBytes::new(py, &dest.into_vec()),
114 90 PyBytes::new(py, &source.into_vec()),
115 91 )?;
116 92 }
117 93 Ok(out)
118 94 }
119 95
120 96 /// Create the module, with `__package__` given from parent
121 97 pub fn init_module(py: Python, package: &str) -> PyResult<PyModule> {
122 98 let dotted_name = &format!("{}.copy_tracing", package);
123 99 let m = PyModule::new(py, dotted_name)?;
124 100
125 101 m.add(py, "__package__", package)?;
126 102 m.add(py, "__doc__", "Copy tracing - Rust implementation")?;
127 103
128 104 m.add(
129 105 py,
130 106 "combine_changeset_copies",
131 107 py_fn!(
132 108 py,
133 109 combine_changeset_copies_wrapper(
134 110 revs: PyList,
135 111 children: PyDict,
136 112 target_rev: Revision,
137 rev_info: PyObject,
138 is_ancestor: PyObject
113 rev_info: PyObject
139 114 )
140 115 ),
141 116 )?;
142 117
143 118 let sys = PyModule::import(py, "sys")?;
144 119 let sys_modules: PyDict = sys.get(py, "modules")?.extract(py)?;
145 120 sys_modules.set_item(py, dotted_name, &m)?;
146 121
147 122 Ok(m)
148 123 }
General Comments 0
You need to be logged in to leave comments. Login now